]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64][1/3] Migrate aarch64_add_constant to new interface & kill aarch64_build_con...
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
818ab71a 2 Copyright (C) 2009-2016 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
01736018 22#define INCLUDE_STRING
43e9d192
IB
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
e11c4407
AM
26#include "target.h"
27#include "rtl.h"
c7131fb2
AM
28#include "tree.h"
29#include "gimple.h"
e11c4407
AM
30#include "cfghooks.h"
31#include "cfgloop.h"
c7131fb2 32#include "df.h"
e11c4407
AM
33#include "tm_p.h"
34#include "stringpool.h"
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "diagnostic.h"
43e9d192 40#include "insn-attr.h"
40e23961 41#include "alias.h"
40e23961 42#include "fold-const.h"
d8a2d370
DN
43#include "stor-layout.h"
44#include "calls.h"
45#include "varasm.h"
43e9d192 46#include "output.h"
36566b39 47#include "flags.h"
36566b39 48#include "explow.h"
43e9d192
IB
49#include "expr.h"
50#include "reload.h"
43e9d192 51#include "langhooks.h"
5a2c8331 52#include "opts.h"
2d6bc7fa 53#include "params.h"
45b0be94 54#include "gimplify.h"
43e9d192 55#include "dwarf2.h"
61d371eb 56#include "gimple-iterator.h"
8990e73a 57#include "tree-vectorizer.h"
d1bcc29f 58#include "aarch64-cost-tables.h"
0ee859b5 59#include "dumpfile.h"
9b2b7279 60#include "builtins.h"
8baff86e 61#include "rtl-iter.h"
9bbe08fe 62#include "tm-constrs.h"
d03f7e44 63#include "sched-int.h"
fde9b31b 64#include "cortex-a57-fma-steering.h"
d78006d9 65#include "target-globals.h"
a3eb8a52 66#include "common/common-target.h"
43e9d192 67
994c5d85 68/* This file should be included last. */
d58627a0
RS
69#include "target-def.h"
70
28514dda
YZ
71/* Defined for convenience. */
72#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
73
43e9d192
IB
74/* Classifies an address.
75
76 ADDRESS_REG_IMM
77 A simple base register plus immediate offset.
78
79 ADDRESS_REG_WB
80 A base register indexed by immediate offset with writeback.
81
82 ADDRESS_REG_REG
83 A base register indexed by (optionally scaled) register.
84
85 ADDRESS_REG_UXTW
86 A base register indexed by (optionally scaled) zero-extended register.
87
88 ADDRESS_REG_SXTW
89 A base register indexed by (optionally scaled) sign-extended register.
90
91 ADDRESS_LO_SUM
92 A LO_SUM rtx with a base register and "LO12" symbol relocation.
93
94 ADDRESS_SYMBOLIC:
95 A constant symbolic address, in pc-relative literal pool. */
96
97enum aarch64_address_type {
98 ADDRESS_REG_IMM,
99 ADDRESS_REG_WB,
100 ADDRESS_REG_REG,
101 ADDRESS_REG_UXTW,
102 ADDRESS_REG_SXTW,
103 ADDRESS_LO_SUM,
104 ADDRESS_SYMBOLIC
105};
106
107struct aarch64_address_info {
108 enum aarch64_address_type type;
109 rtx base;
110 rtx offset;
111 int shift;
112 enum aarch64_symbol_type symbol_type;
113};
114
48063b9d
IB
115struct simd_immediate_info
116{
117 rtx value;
118 int shift;
119 int element_width;
48063b9d 120 bool mvn;
e4f0f84d 121 bool msl;
48063b9d
IB
122};
123
43e9d192
IB
124/* The current code model. */
125enum aarch64_code_model aarch64_cmodel;
126
127#ifdef HAVE_AS_TLS
128#undef TARGET_HAVE_TLS
129#define TARGET_HAVE_TLS 1
130#endif
131
ef4bddc2
RS
132static bool aarch64_composite_type_p (const_tree, machine_mode);
133static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 134 const_tree,
ef4bddc2 135 machine_mode *, int *,
43e9d192
IB
136 bool *);
137static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
138static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 139static void aarch64_override_options_after_change (void);
ef4bddc2 140static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 141static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 142 const unsigned char *sel);
ef4bddc2 143static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
88b08073 144
0c6caaf8
RL
145/* Major revision number of the ARM Architecture implemented by the target. */
146unsigned aarch64_architecture_version;
147
43e9d192 148/* The processor for which instructions should be scheduled. */
02fdbd5b 149enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 150
43e9d192
IB
151/* Mask to specify which instruction scheduling options should be used. */
152unsigned long aarch64_tune_flags = 0;
153
1be34295 154/* Global flag for PC relative loads. */
9ee6540a 155bool aarch64_pcrelative_literal_loads;
1be34295 156
8dec06f2
JG
157/* Support for command line parsing of boolean flags in the tuning
158 structures. */
159struct aarch64_flag_desc
160{
161 const char* name;
162 unsigned int flag;
163};
164
ed9fa8d2 165#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
166 { name, AARCH64_FUSE_##internal_name },
167static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
168{
169 { "none", AARCH64_FUSE_NOTHING },
170#include "aarch64-fusion-pairs.def"
171 { "all", AARCH64_FUSE_ALL },
172 { NULL, AARCH64_FUSE_NOTHING }
173};
174#undef AARCH64_FUION_PAIR
175
a339a01c 176#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
177 { name, AARCH64_EXTRA_TUNE_##internal_name },
178static const struct aarch64_flag_desc aarch64_tuning_flags[] =
179{
180 { "none", AARCH64_EXTRA_TUNE_NONE },
181#include "aarch64-tuning-flags.def"
182 { "all", AARCH64_EXTRA_TUNE_ALL },
183 { NULL, AARCH64_EXTRA_TUNE_NONE }
184};
185#undef AARCH64_EXTRA_TUNING_OPTION
186
43e9d192
IB
187/* Tuning parameters. */
188
43e9d192
IB
189static const struct cpu_addrcost_table generic_addrcost_table =
190{
67747367 191 {
bd95e655
JG
192 0, /* hi */
193 0, /* si */
194 0, /* di */
195 0, /* ti */
67747367 196 },
bd95e655
JG
197 0, /* pre_modify */
198 0, /* post_modify */
199 0, /* register_offset */
783879e6
EM
200 0, /* register_sextend */
201 0, /* register_zextend */
bd95e655 202 0 /* imm_offset */
43e9d192
IB
203};
204
60bff090
JG
205static const struct cpu_addrcost_table cortexa57_addrcost_table =
206{
60bff090 207 {
bd95e655
JG
208 1, /* hi */
209 0, /* si */
210 0, /* di */
211 1, /* ti */
60bff090 212 },
bd95e655
JG
213 0, /* pre_modify */
214 0, /* post_modify */
215 0, /* register_offset */
783879e6
EM
216 0, /* register_sextend */
217 0, /* register_zextend */
bd95e655 218 0, /* imm_offset */
60bff090
JG
219};
220
5ec1ae3b
EM
221static const struct cpu_addrcost_table exynosm1_addrcost_table =
222{
223 {
224 0, /* hi */
225 0, /* si */
226 0, /* di */
227 2, /* ti */
228 },
229 0, /* pre_modify */
230 0, /* post_modify */
231 1, /* register_offset */
232 1, /* register_sextend */
233 2, /* register_zextend */
234 0, /* imm_offset */
235};
236
381e27aa
PT
237static const struct cpu_addrcost_table xgene1_addrcost_table =
238{
381e27aa 239 {
bd95e655
JG
240 1, /* hi */
241 0, /* si */
242 0, /* di */
243 1, /* ti */
381e27aa 244 },
bd95e655
JG
245 1, /* pre_modify */
246 0, /* post_modify */
247 0, /* register_offset */
783879e6
EM
248 1, /* register_sextend */
249 1, /* register_zextend */
bd95e655 250 0, /* imm_offset */
381e27aa
PT
251};
252
ee446d9f
JW
253static const struct cpu_addrcost_table qdf24xx_addrcost_table =
254{
255 {
256 1, /* hi */
257 0, /* si */
258 0, /* di */
259 1, /* ti */
260 },
261 0, /* pre_modify */
262 0, /* post_modify */
263 0, /* register_offset */
264 0, /* register_sextend */
265 0, /* register_zextend */
266 0 /* imm_offset */
267};
268
ad611a4c
VP
269static const struct cpu_addrcost_table vulcan_addrcost_table =
270{
271 {
272 0, /* hi */
273 0, /* si */
274 0, /* di */
275 2, /* ti */
276 },
277 0, /* pre_modify */
278 0, /* post_modify */
279 2, /* register_offset */
280 3, /* register_sextend */
281 3, /* register_zextend */
282 0, /* imm_offset */
283};
284
43e9d192
IB
285static const struct cpu_regmove_cost generic_regmove_cost =
286{
bd95e655 287 1, /* GP2GP */
3969c510
WD
288 /* Avoid the use of slow int<->fp moves for spilling by setting
289 their cost higher than memmov_cost. */
bd95e655
JG
290 5, /* GP2FP */
291 5, /* FP2GP */
292 2 /* FP2FP */
43e9d192
IB
293};
294
e4a9c55a
WD
295static const struct cpu_regmove_cost cortexa57_regmove_cost =
296{
bd95e655 297 1, /* GP2GP */
e4a9c55a
WD
298 /* Avoid the use of slow int<->fp moves for spilling by setting
299 their cost higher than memmov_cost. */
bd95e655
JG
300 5, /* GP2FP */
301 5, /* FP2GP */
302 2 /* FP2FP */
e4a9c55a
WD
303};
304
305static const struct cpu_regmove_cost cortexa53_regmove_cost =
306{
bd95e655 307 1, /* GP2GP */
e4a9c55a
WD
308 /* Avoid the use of slow int<->fp moves for spilling by setting
309 their cost higher than memmov_cost. */
bd95e655
JG
310 5, /* GP2FP */
311 5, /* FP2GP */
312 2 /* FP2FP */
e4a9c55a
WD
313};
314
5ec1ae3b
EM
315static const struct cpu_regmove_cost exynosm1_regmove_cost =
316{
317 1, /* GP2GP */
318 /* Avoid the use of slow int<->fp moves for spilling by setting
319 their cost higher than memmov_cost (actual, 4 and 9). */
320 9, /* GP2FP */
321 9, /* FP2GP */
322 1 /* FP2FP */
323};
324
d1bcc29f
AP
325static const struct cpu_regmove_cost thunderx_regmove_cost =
326{
bd95e655
JG
327 2, /* GP2GP */
328 2, /* GP2FP */
329 6, /* FP2GP */
330 4 /* FP2FP */
d1bcc29f
AP
331};
332
381e27aa
PT
333static const struct cpu_regmove_cost xgene1_regmove_cost =
334{
bd95e655 335 1, /* GP2GP */
381e27aa
PT
336 /* Avoid the use of slow int<->fp moves for spilling by setting
337 their cost higher than memmov_cost. */
bd95e655
JG
338 8, /* GP2FP */
339 8, /* FP2GP */
340 2 /* FP2FP */
381e27aa
PT
341};
342
ee446d9f
JW
343static const struct cpu_regmove_cost qdf24xx_regmove_cost =
344{
345 2, /* GP2GP */
346 /* Avoid the use of int<->fp moves for spilling. */
347 6, /* GP2FP */
348 6, /* FP2GP */
349 4 /* FP2FP */
350};
351
ad611a4c
VP
352static const struct cpu_regmove_cost vulcan_regmove_cost =
353{
354 1, /* GP2GP */
355 /* Avoid the use of int<->fp moves for spilling. */
356 8, /* GP2FP */
357 8, /* FP2GP */
358 4 /* FP2FP */
359};
360
8990e73a 361/* Generic costs for vector insn classes. */
8990e73a
TB
362static const struct cpu_vector_cost generic_vector_cost =
363{
bd95e655
JG
364 1, /* scalar_stmt_cost */
365 1, /* scalar_load_cost */
366 1, /* scalar_store_cost */
367 1, /* vec_stmt_cost */
c428f91c 368 2, /* vec_permute_cost */
bd95e655
JG
369 1, /* vec_to_scalar_cost */
370 1, /* scalar_to_vec_cost */
371 1, /* vec_align_load_cost */
372 1, /* vec_unalign_load_cost */
373 1, /* vec_unalign_store_cost */
374 1, /* vec_store_cost */
375 3, /* cond_taken_branch_cost */
376 1 /* cond_not_taken_branch_cost */
8990e73a
TB
377};
378
60bff090 379/* Generic costs for vector insn classes. */
60bff090
JG
380static const struct cpu_vector_cost cortexa57_vector_cost =
381{
bd95e655
JG
382 1, /* scalar_stmt_cost */
383 4, /* scalar_load_cost */
384 1, /* scalar_store_cost */
385 3, /* vec_stmt_cost */
c428f91c 386 3, /* vec_permute_cost */
bd95e655
JG
387 8, /* vec_to_scalar_cost */
388 8, /* scalar_to_vec_cost */
389 5, /* vec_align_load_cost */
390 5, /* vec_unalign_load_cost */
391 1, /* vec_unalign_store_cost */
392 1, /* vec_store_cost */
393 1, /* cond_taken_branch_cost */
394 1 /* cond_not_taken_branch_cost */
60bff090
JG
395};
396
5ec1ae3b
EM
397static const struct cpu_vector_cost exynosm1_vector_cost =
398{
399 1, /* scalar_stmt_cost */
400 5, /* scalar_load_cost */
401 1, /* scalar_store_cost */
402 3, /* vec_stmt_cost */
c428f91c 403 3, /* vec_permute_cost */
5ec1ae3b
EM
404 3, /* vec_to_scalar_cost */
405 3, /* scalar_to_vec_cost */
406 5, /* vec_align_load_cost */
407 5, /* vec_unalign_load_cost */
408 1, /* vec_unalign_store_cost */
409 1, /* vec_store_cost */
410 1, /* cond_taken_branch_cost */
411 1 /* cond_not_taken_branch_cost */
412};
413
381e27aa 414/* Generic costs for vector insn classes. */
381e27aa
PT
415static const struct cpu_vector_cost xgene1_vector_cost =
416{
bd95e655
JG
417 1, /* scalar_stmt_cost */
418 5, /* scalar_load_cost */
419 1, /* scalar_store_cost */
420 2, /* vec_stmt_cost */
c428f91c 421 2, /* vec_permute_cost */
bd95e655
JG
422 4, /* vec_to_scalar_cost */
423 4, /* scalar_to_vec_cost */
424 10, /* vec_align_load_cost */
425 10, /* vec_unalign_load_cost */
426 2, /* vec_unalign_store_cost */
427 2, /* vec_store_cost */
428 2, /* cond_taken_branch_cost */
429 1 /* cond_not_taken_branch_cost */
381e27aa
PT
430};
431
ad611a4c
VP
432/* Costs for vector insn classes for Vulcan. */
433static const struct cpu_vector_cost vulcan_vector_cost =
434{
435 6, /* scalar_stmt_cost */
436 4, /* scalar_load_cost */
437 1, /* scalar_store_cost */
438 6, /* vec_stmt_cost */
439 3, /* vec_permute_cost */
440 6, /* vec_to_scalar_cost */
441 5, /* scalar_to_vec_cost */
442 8, /* vec_align_load_cost */
443 8, /* vec_unalign_load_cost */
444 4, /* vec_unalign_store_cost */
445 4, /* vec_store_cost */
446 2, /* cond_taken_branch_cost */
447 1 /* cond_not_taken_branch_cost */
448};
449
b9066f5a
MW
450/* Generic costs for branch instructions. */
451static const struct cpu_branch_cost generic_branch_cost =
452{
453 2, /* Predictable. */
454 2 /* Unpredictable. */
455};
456
67707f65
JG
457/* Branch costs for Cortex-A57. */
458static const struct cpu_branch_cost cortexa57_branch_cost =
459{
460 1, /* Predictable. */
461 3 /* Unpredictable. */
462};
463
ad611a4c
VP
464/* Branch costs for Vulcan. */
465static const struct cpu_branch_cost vulcan_branch_cost =
466{
467 1, /* Predictable. */
468 3 /* Unpredictable. */
469};
470
9acc9cbe
EM
471/* Generic approximation modes. */
472static const cpu_approx_modes generic_approx_modes =
473{
79a2bc2d 474 AARCH64_APPROX_NONE, /* division */
98daafa0 475 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
476 AARCH64_APPROX_NONE /* recip_sqrt */
477};
478
479/* Approximation modes for Exynos M1. */
480static const cpu_approx_modes exynosm1_approx_modes =
481{
79a2bc2d 482 AARCH64_APPROX_NONE, /* division */
98daafa0 483 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
484 AARCH64_APPROX_ALL /* recip_sqrt */
485};
486
487/* Approximation modes for X-Gene 1. */
488static const cpu_approx_modes xgene1_approx_modes =
489{
79a2bc2d 490 AARCH64_APPROX_NONE, /* division */
98daafa0 491 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
492 AARCH64_APPROX_ALL /* recip_sqrt */
493};
494
43e9d192
IB
495static const struct tune_params generic_tunings =
496{
4e2cd668 497 &cortexa57_extra_costs,
43e9d192
IB
498 &generic_addrcost_table,
499 &generic_regmove_cost,
8990e73a 500 &generic_vector_cost,
b9066f5a 501 &generic_branch_cost,
9acc9cbe 502 &generic_approx_modes,
bd95e655
JG
503 4, /* memmov_cost */
504 2, /* issue_rate */
e9a3a175 505 AARCH64_FUSE_NOTHING, /* fusible_ops */
0b82a5a2
WD
506 8, /* function_align. */
507 8, /* jump_align. */
508 4, /* loop_align. */
cee66c68
WD
509 2, /* int_reassoc_width. */
510 4, /* fp_reassoc_width. */
50093a33
WD
511 1, /* vec_reassoc_width. */
512 2, /* min_div_recip_mul_sf. */
dfba575f 513 2, /* min_div_recip_mul_df. */
50487d79
EM
514 0, /* max_case_values. */
515 0, /* cache_line_size. */
2d6bc7fa 516 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
dfba575f 517 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
43e9d192
IB
518};
519
1c72a3ca
JG
520static const struct tune_params cortexa35_tunings =
521{
522 &cortexa53_extra_costs,
523 &generic_addrcost_table,
524 &cortexa53_regmove_cost,
525 &generic_vector_cost,
0bc24338 526 &cortexa57_branch_cost,
9acc9cbe 527 &generic_approx_modes,
1c72a3ca
JG
528 4, /* memmov_cost */
529 1, /* issue_rate */
0bc24338 530 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 531 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 532 16, /* function_align. */
1c72a3ca 533 8, /* jump_align. */
d4407370 534 8, /* loop_align. */
1c72a3ca
JG
535 2, /* int_reassoc_width. */
536 4, /* fp_reassoc_width. */
537 1, /* vec_reassoc_width. */
538 2, /* min_div_recip_mul_sf. */
539 2, /* min_div_recip_mul_df. */
540 0, /* max_case_values. */
541 0, /* cache_line_size. */
542 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
543 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
544};
545
984239ad
KT
546static const struct tune_params cortexa53_tunings =
547{
548 &cortexa53_extra_costs,
549 &generic_addrcost_table,
e4a9c55a 550 &cortexa53_regmove_cost,
984239ad 551 &generic_vector_cost,
0bc24338 552 &cortexa57_branch_cost,
9acc9cbe 553 &generic_approx_modes,
bd95e655
JG
554 4, /* memmov_cost */
555 2, /* issue_rate */
00a8574a 556 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 557 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 558 16, /* function_align. */
0b82a5a2 559 8, /* jump_align. */
d4407370 560 8, /* loop_align. */
cee66c68
WD
561 2, /* int_reassoc_width. */
562 4, /* fp_reassoc_width. */
50093a33
WD
563 1, /* vec_reassoc_width. */
564 2, /* min_div_recip_mul_sf. */
dfba575f 565 2, /* min_div_recip_mul_df. */
50487d79
EM
566 0, /* max_case_values. */
567 0, /* cache_line_size. */
2d6bc7fa 568 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
dfba575f 569 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
984239ad
KT
570};
571
4fd92af6
KT
572static const struct tune_params cortexa57_tunings =
573{
574 &cortexa57_extra_costs,
60bff090 575 &cortexa57_addrcost_table,
e4a9c55a 576 &cortexa57_regmove_cost,
60bff090 577 &cortexa57_vector_cost,
67707f65 578 &cortexa57_branch_cost,
9acc9cbe 579 &generic_approx_modes,
bd95e655
JG
580 4, /* memmov_cost */
581 3, /* issue_rate */
00a8574a 582 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 583 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2
WD
584 16, /* function_align. */
585 8, /* jump_align. */
d4407370 586 8, /* loop_align. */
cee66c68
WD
587 2, /* int_reassoc_width. */
588 4, /* fp_reassoc_width. */
50093a33
WD
589 1, /* vec_reassoc_width. */
590 2, /* min_div_recip_mul_sf. */
dfba575f 591 2, /* min_div_recip_mul_df. */
50487d79
EM
592 0, /* max_case_values. */
593 0, /* cache_line_size. */
2d6bc7fa 594 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
7c175186 595 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
dfba575f
JG
596};
597
598static const struct tune_params cortexa72_tunings =
599{
600 &cortexa57_extra_costs,
601 &cortexa57_addrcost_table,
602 &cortexa57_regmove_cost,
603 &cortexa57_vector_cost,
0bc24338 604 &cortexa57_branch_cost,
9acc9cbe 605 &generic_approx_modes,
dfba575f
JG
606 4, /* memmov_cost */
607 3, /* issue_rate */
00a8574a 608 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f
JG
609 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
610 16, /* function_align. */
611 8, /* jump_align. */
d4407370 612 8, /* loop_align. */
dfba575f
JG
613 2, /* int_reassoc_width. */
614 4, /* fp_reassoc_width. */
615 1, /* vec_reassoc_width. */
616 2, /* min_div_recip_mul_sf. */
617 2, /* min_div_recip_mul_df. */
50487d79
EM
618 0, /* max_case_values. */
619 0, /* cache_line_size. */
0bc24338 620 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
dfba575f 621 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
4fd92af6
KT
622};
623
4fb570c4
KT
624static const struct tune_params cortexa73_tunings =
625{
626 &cortexa57_extra_costs,
627 &cortexa57_addrcost_table,
628 &cortexa57_regmove_cost,
629 &cortexa57_vector_cost,
0bc24338 630 &cortexa57_branch_cost,
4fb570c4
KT
631 &generic_approx_modes,
632 4, /* memmov_cost. */
633 2, /* issue_rate. */
634 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
635 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
636 16, /* function_align. */
637 8, /* jump_align. */
d4407370 638 8, /* loop_align. */
4fb570c4
KT
639 2, /* int_reassoc_width. */
640 4, /* fp_reassoc_width. */
641 1, /* vec_reassoc_width. */
642 2, /* min_div_recip_mul_sf. */
643 2, /* min_div_recip_mul_df. */
644 0, /* max_case_values. */
645 0, /* cache_line_size. */
646 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
647 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
648};
649
5ec1ae3b
EM
650static const struct tune_params exynosm1_tunings =
651{
652 &exynosm1_extra_costs,
653 &exynosm1_addrcost_table,
654 &exynosm1_regmove_cost,
655 &exynosm1_vector_cost,
656 &generic_branch_cost,
9acc9cbe 657 &exynosm1_approx_modes,
5ec1ae3b
EM
658 4, /* memmov_cost */
659 3, /* issue_rate */
25cc2199 660 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
5ec1ae3b
EM
661 4, /* function_align. */
662 4, /* jump_align. */
663 4, /* loop_align. */
664 2, /* int_reassoc_width. */
665 4, /* fp_reassoc_width. */
666 1, /* vec_reassoc_width. */
667 2, /* min_div_recip_mul_sf. */
668 2, /* min_div_recip_mul_df. */
669 48, /* max_case_values. */
670 64, /* cache_line_size. */
220379df 671 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9acc9cbe 672 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
5ec1ae3b
EM
673};
674
d1bcc29f
AP
675static const struct tune_params thunderx_tunings =
676{
677 &thunderx_extra_costs,
678 &generic_addrcost_table,
679 &thunderx_regmove_cost,
680 &generic_vector_cost,
b9066f5a 681 &generic_branch_cost,
9acc9cbe 682 &generic_approx_modes,
bd95e655
JG
683 6, /* memmov_cost */
684 2, /* issue_rate */
e9a3a175 685 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
686 8, /* function_align. */
687 8, /* jump_align. */
688 8, /* loop_align. */
cee66c68
WD
689 2, /* int_reassoc_width. */
690 4, /* fp_reassoc_width. */
50093a33
WD
691 1, /* vec_reassoc_width. */
692 2, /* min_div_recip_mul_sf. */
dfba575f 693 2, /* min_div_recip_mul_df. */
50487d79
EM
694 0, /* max_case_values. */
695 0, /* cache_line_size. */
2d6bc7fa 696 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
dfba575f 697 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
d1bcc29f
AP
698};
699
381e27aa
PT
700static const struct tune_params xgene1_tunings =
701{
702 &xgene1_extra_costs,
703 &xgene1_addrcost_table,
704 &xgene1_regmove_cost,
705 &xgene1_vector_cost,
b9066f5a 706 &generic_branch_cost,
9acc9cbe 707 &xgene1_approx_modes,
bd95e655
JG
708 6, /* memmov_cost */
709 4, /* issue_rate */
e9a3a175 710 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
711 16, /* function_align. */
712 8, /* jump_align. */
713 16, /* loop_align. */
714 2, /* int_reassoc_width. */
715 4, /* fp_reassoc_width. */
50093a33
WD
716 1, /* vec_reassoc_width. */
717 2, /* min_div_recip_mul_sf. */
dfba575f 718 2, /* min_div_recip_mul_df. */
50487d79
EM
719 0, /* max_case_values. */
720 0, /* cache_line_size. */
2d6bc7fa 721 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9acc9cbe 722 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
381e27aa
PT
723};
724
ee446d9f
JW
725static const struct tune_params qdf24xx_tunings =
726{
727 &qdf24xx_extra_costs,
728 &qdf24xx_addrcost_table,
729 &qdf24xx_regmove_cost,
730 &generic_vector_cost,
731 &generic_branch_cost,
732 &generic_approx_modes,
733 4, /* memmov_cost */
734 4, /* issue_rate */
735 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
736 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
737 16, /* function_align. */
738 8, /* jump_align. */
739 16, /* loop_align. */
740 2, /* int_reassoc_width. */
741 4, /* fp_reassoc_width. */
742 1, /* vec_reassoc_width. */
743 2, /* min_div_recip_mul_sf. */
744 2, /* min_div_recip_mul_df. */
745 0, /* max_case_values. */
746 64, /* cache_line_size. */
747 tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
748 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
749};
750
ad611a4c
VP
751static const struct tune_params vulcan_tunings =
752{
753 &vulcan_extra_costs,
754 &vulcan_addrcost_table,
755 &vulcan_regmove_cost,
756 &vulcan_vector_cost,
757 &vulcan_branch_cost,
758 &generic_approx_modes,
759 4, /* memmov_cost. */
760 4, /* issue_rate. */
761 AARCH64_FUSE_NOTHING, /* fuseable_ops. */
762 16, /* function_align. */
763 8, /* jump_align. */
764 16, /* loop_align. */
765 3, /* int_reassoc_width. */
766 2, /* fp_reassoc_width. */
767 2, /* vec_reassoc_width. */
768 2, /* min_div_recip_mul_sf. */
769 2, /* min_div_recip_mul_df. */
770 0, /* max_case_values. */
771 0, /* cache_line_size. */
772 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
773 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
774};
775
8dec06f2
JG
776/* Support for fine-grained override of the tuning structures. */
777struct aarch64_tuning_override_function
778{
779 const char* name;
780 void (*parse_override)(const char*, struct tune_params*);
781};
782
783static void aarch64_parse_fuse_string (const char*, struct tune_params*);
784static void aarch64_parse_tune_string (const char*, struct tune_params*);
785
786static const struct aarch64_tuning_override_function
787aarch64_tuning_override_functions[] =
788{
789 { "fuse", aarch64_parse_fuse_string },
790 { "tune", aarch64_parse_tune_string },
791 { NULL, NULL }
792};
793
43e9d192
IB
794/* A processor implementing AArch64. */
795struct processor
796{
797 const char *const name;
46806c44
KT
798 enum aarch64_processor ident;
799 enum aarch64_processor sched_core;
393ae126 800 enum aarch64_arch arch;
0c6caaf8 801 unsigned architecture_version;
43e9d192
IB
802 const unsigned long flags;
803 const struct tune_params *const tune;
804};
805
393ae126
KT
806/* Architectures implementing AArch64. */
807static const struct processor all_architectures[] =
808{
809#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
810 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
811#include "aarch64-arches.def"
812#undef AARCH64_ARCH
813 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
814};
815
43e9d192
IB
816/* Processor cores implementing AArch64. */
817static const struct processor all_cores[] =
818{
7e1bcce3 819#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
393ae126
KT
820 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
821 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
822 FLAGS, &COSTS##_tunings},
43e9d192
IB
823#include "aarch64-cores.def"
824#undef AARCH64_CORE
393ae126
KT
825 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
826 AARCH64_FL_FOR_ARCH8, &generic_tunings},
827 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
828};
829
43e9d192 830
361fb3ee
KT
831/* Target specification. These are populated by the -march, -mtune, -mcpu
832 handling code or by target attributes. */
43e9d192
IB
833static const struct processor *selected_arch;
834static const struct processor *selected_cpu;
835static const struct processor *selected_tune;
836
b175b679
JG
837/* The current tuning set. */
838struct tune_params aarch64_tune_params = generic_tunings;
839
43e9d192
IB
840#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
841
842/* An ISA extension in the co-processor and main instruction set space. */
843struct aarch64_option_extension
844{
845 const char *const name;
846 const unsigned long flags_on;
847 const unsigned long flags_off;
848};
849
43e9d192
IB
850typedef enum aarch64_cond_code
851{
852 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
853 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
854 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
855}
856aarch64_cc;
857
858#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
859
860/* The condition codes of the processor, and the inverse function. */
861static const char * const aarch64_condition_codes[] =
862{
863 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
864 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
865};
866
973d2e01
TP
867/* Generate code to enable conditional branches in functions over 1 MiB. */
868const char *
869aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
870 const char * branch_format)
871{
872 rtx_code_label * tmp_label = gen_label_rtx ();
873 char label_buf[256];
874 char buffer[128];
875 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
876 CODE_LABEL_NUMBER (tmp_label));
877 const char *label_ptr = targetm.strip_name_encoding (label_buf);
878 rtx dest_label = operands[pos_label];
879 operands[pos_label] = tmp_label;
880
881 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
882 output_asm_insn (buffer, operands);
883
884 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
885 operands[pos_label] = dest_label;
886 output_asm_insn (buffer, operands);
887 return "";
888}
889
261fb553
AL
890void
891aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
892{
893 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
894 if (TARGET_GENERAL_REGS_ONLY)
895 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
896 else
897 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
898}
899
c64f7d37
WD
900/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
901 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
31e2b5a3
WD
902 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
903 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
904 cost (in this case the best class is the lowest cost one). Using ALL_REGS
905 irrespectively of its cost results in bad allocations with many redundant
906 int<->FP moves which are expensive on various cores.
907 To avoid this we don't allow ALL_REGS as the allocno class, but force a
908 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
909 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
910 Otherwise set the allocno class depending on the mode.
911 The result of this is that it is no longer inefficient to have a higher
912 memory move cost than the register move cost.
913*/
c64f7d37
WD
914
915static reg_class_t
31e2b5a3
WD
916aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
917 reg_class_t best_class)
c64f7d37
WD
918{
919 enum machine_mode mode;
920
921 if (allocno_class != ALL_REGS)
922 return allocno_class;
923
31e2b5a3
WD
924 if (best_class != ALL_REGS)
925 return best_class;
926
c64f7d37
WD
927 mode = PSEUDO_REGNO_MODE (regno);
928 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
929}
930
26e0ff94 931static unsigned int
50093a33 932aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
26e0ff94 933{
50093a33 934 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
935 return aarch64_tune_params.min_div_recip_mul_sf;
936 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
937}
938
cee66c68
WD
939static int
940aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
941 enum machine_mode mode)
942{
943 if (VECTOR_MODE_P (mode))
b175b679 944 return aarch64_tune_params.vec_reassoc_width;
cee66c68 945 if (INTEGRAL_MODE_P (mode))
b175b679 946 return aarch64_tune_params.int_reassoc_width;
cee66c68 947 if (FLOAT_MODE_P (mode))
b175b679 948 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
949 return 1;
950}
951
43e9d192
IB
952/* Provide a mapping from gcc register numbers to dwarf register numbers. */
953unsigned
954aarch64_dbx_register_number (unsigned regno)
955{
956 if (GP_REGNUM_P (regno))
957 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
958 else if (regno == SP_REGNUM)
959 return AARCH64_DWARF_SP;
960 else if (FP_REGNUM_P (regno))
961 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
962
963 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
964 equivalent DWARF register. */
965 return DWARF_FRAME_REGISTERS;
966}
967
968/* Return TRUE if MODE is any of the large INT modes. */
969static bool
ef4bddc2 970aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
971{
972 return mode == OImode || mode == CImode || mode == XImode;
973}
974
975/* Return TRUE if MODE is any of the vector modes. */
976static bool
ef4bddc2 977aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
978{
979 return aarch64_vector_mode_supported_p (mode)
980 || aarch64_vect_struct_mode_p (mode);
981}
982
983/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
984static bool
ef4bddc2 985aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
986 unsigned HOST_WIDE_INT nelems)
987{
988 if (TARGET_SIMD
635e66fe
AL
989 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
990 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
991 && (nelems >= 2 && nelems <= 4))
992 return true;
993
994 return false;
995}
996
997/* Implement HARD_REGNO_NREGS. */
998
999int
ef4bddc2 1000aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
1001{
1002 switch (aarch64_regno_regclass (regno))
1003 {
1004 case FP_REGS:
1005 case FP_LO_REGS:
1006 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
1007 default:
1008 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1009 }
1010 gcc_unreachable ();
1011}
1012
1013/* Implement HARD_REGNO_MODE_OK. */
1014
1015int
ef4bddc2 1016aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1017{
1018 if (GET_MODE_CLASS (mode) == MODE_CC)
1019 return regno == CC_REGNUM;
1020
9259db42
YZ
1021 if (regno == SP_REGNUM)
1022 /* The purpose of comparing with ptr_mode is to support the
1023 global register variable associated with the stack pointer
1024 register via the syntax of asm ("wsp") in ILP32. */
1025 return mode == Pmode || mode == ptr_mode;
1026
1027 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1028 return mode == Pmode;
1029
1030 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
1031 return 1;
1032
1033 if (FP_REGNUM_P (regno))
1034 {
1035 if (aarch64_vect_struct_mode_p (mode))
1036 return
1037 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
1038 else
1039 return 1;
1040 }
1041
1042 return 0;
1043}
1044
73d9ac6a 1045/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1046machine_mode
73d9ac6a 1047aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 1048 machine_mode mode)
73d9ac6a
IB
1049{
1050 /* Handle modes that fit within single registers. */
1051 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
1052 {
1053 if (GET_MODE_SIZE (mode) >= 4)
1054 return mode;
1055 else
1056 return SImode;
1057 }
1058 /* Fall back to generic for multi-reg and very large modes. */
1059 else
1060 return choose_hard_reg_mode (regno, nregs, false);
1061}
1062
43e9d192
IB
1063/* Return true if calls to DECL should be treated as
1064 long-calls (ie called via a register). */
1065static bool
1066aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1067{
1068 return false;
1069}
1070
1071/* Return true if calls to symbol-ref SYM should be treated as
1072 long-calls (ie called via a register). */
1073bool
1074aarch64_is_long_call_p (rtx sym)
1075{
1076 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1077}
1078
b60d63cb
JW
1079/* Return true if calls to symbol-ref SYM should not go through
1080 plt stubs. */
1081
1082bool
1083aarch64_is_noplt_call_p (rtx sym)
1084{
1085 const_tree decl = SYMBOL_REF_DECL (sym);
1086
1087 if (flag_pic
1088 && decl
1089 && (!flag_plt
1090 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1091 && !targetm.binds_local_p (decl))
1092 return true;
1093
1094 return false;
1095}
1096
43e9d192
IB
1097/* Return true if the offsets to a zero/sign-extract operation
1098 represent an expression that matches an extend operation. The
1099 operands represent the paramters from
1100
4745e701 1101 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1102bool
ef4bddc2 1103aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
1104 rtx extract_imm)
1105{
1106 HOST_WIDE_INT mult_val, extract_val;
1107
1108 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1109 return false;
1110
1111 mult_val = INTVAL (mult_imm);
1112 extract_val = INTVAL (extract_imm);
1113
1114 if (extract_val > 8
1115 && extract_val < GET_MODE_BITSIZE (mode)
1116 && exact_log2 (extract_val & ~7) > 0
1117 && (extract_val & 7) <= 4
1118 && mult_val == (1 << (extract_val & 7)))
1119 return true;
1120
1121 return false;
1122}
1123
1124/* Emit an insn that's a simple single-set. Both the operands must be
1125 known to be valid. */
1126inline static rtx
1127emit_set_insn (rtx x, rtx y)
1128{
f7df4a84 1129 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1130}
1131
1132/* X and Y are two things to compare using CODE. Emit the compare insn and
1133 return the rtx for register 0 in the proper mode. */
1134rtx
1135aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1136{
ef4bddc2 1137 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1138 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1139
1140 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1141 return cc_reg;
1142}
1143
1144/* Build the SYMBOL_REF for __tls_get_addr. */
1145
1146static GTY(()) rtx tls_get_addr_libfunc;
1147
1148rtx
1149aarch64_tls_get_addr (void)
1150{
1151 if (!tls_get_addr_libfunc)
1152 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1153 return tls_get_addr_libfunc;
1154}
1155
1156/* Return the TLS model to use for ADDR. */
1157
1158static enum tls_model
1159tls_symbolic_operand_type (rtx addr)
1160{
1161 enum tls_model tls_kind = TLS_MODEL_NONE;
1162 rtx sym, addend;
1163
1164 if (GET_CODE (addr) == CONST)
1165 {
1166 split_const (addr, &sym, &addend);
1167 if (GET_CODE (sym) == SYMBOL_REF)
1168 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1169 }
1170 else if (GET_CODE (addr) == SYMBOL_REF)
1171 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1172
1173 return tls_kind;
1174}
1175
1176/* We'll allow lo_sum's in addresses in our legitimate addresses
1177 so that combine would take care of combining addresses where
1178 necessary, but for generation purposes, we'll generate the address
1179 as :
1180 RTL Absolute
1181 tmp = hi (symbol_ref); adrp x1, foo
1182 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1183 nop
1184
1185 PIC TLS
1186 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1187 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1188 bl __tls_get_addr
1189 nop
1190
1191 Load TLS symbol, depending on TLS mechanism and TLS access model.
1192
1193 Global Dynamic - Traditional TLS:
1194 adrp tmp, :tlsgd:imm
1195 add dest, tmp, #:tlsgd_lo12:imm
1196 bl __tls_get_addr
1197
1198 Global Dynamic - TLS Descriptors:
1199 adrp dest, :tlsdesc:imm
1200 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1201 add dest, dest, #:tlsdesc_lo12:imm
1202 blr tmp
1203 mrs tp, tpidr_el0
1204 add dest, dest, tp
1205
1206 Initial Exec:
1207 mrs tp, tpidr_el0
1208 adrp tmp, :gottprel:imm
1209 ldr dest, [tmp, #:gottprel_lo12:imm]
1210 add dest, dest, tp
1211
1212 Local Exec:
1213 mrs tp, tpidr_el0
0699caae
RL
1214 add t0, tp, #:tprel_hi12:imm, lsl #12
1215 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1216*/
1217
1218static void
1219aarch64_load_symref_appropriately (rtx dest, rtx imm,
1220 enum aarch64_symbol_type type)
1221{
1222 switch (type)
1223 {
1224 case SYMBOL_SMALL_ABSOLUTE:
1225 {
28514dda 1226 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1227 rtx tmp_reg = dest;
ef4bddc2 1228 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1229
1230 gcc_assert (mode == Pmode || mode == ptr_mode);
1231
43e9d192 1232 if (can_create_pseudo_p ())
28514dda 1233 tmp_reg = gen_reg_rtx (mode);
43e9d192 1234
28514dda 1235 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1236 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1237 return;
1238 }
1239
a5350ddc 1240 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1241 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1242 return;
1243
1b1e81f8
JW
1244 case SYMBOL_SMALL_GOT_28K:
1245 {
1246 machine_mode mode = GET_MODE (dest);
1247 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1248 rtx insn;
1249 rtx mem;
1b1e81f8
JW
1250
1251 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1252 here before rtl expand. Tree IVOPT will generate rtl pattern to
1253 decide rtx costs, in which case pic_offset_table_rtx is not
1254 initialized. For that case no need to generate the first adrp
026c3cfd 1255 instruction as the final cost for global variable access is
1b1e81f8
JW
1256 one instruction. */
1257 if (gp_rtx != NULL)
1258 {
1259 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1260 using the page base as GOT base, the first page may be wasted,
1261 in the worst scenario, there is only 28K space for GOT).
1262
1263 The generate instruction sequence for accessing global variable
1264 is:
1265
a3957742 1266 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1267
1268 Only one instruction needed. But we must initialize
1269 pic_offset_table_rtx properly. We generate initialize insn for
1270 every global access, and allow CSE to remove all redundant.
1271
1272 The final instruction sequences will look like the following
1273 for multiply global variables access.
1274
a3957742 1275 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1276
a3957742
JW
1277 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1278 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1279 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1280 ... */
1b1e81f8
JW
1281
1282 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1283 crtl->uses_pic_offset_table = 1;
1284 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1285
1286 if (mode != GET_MODE (gp_rtx))
1287 gp_rtx = simplify_gen_subreg (mode, gp_rtx, GET_MODE (gp_rtx), 0);
1288 }
1289
1290 if (mode == ptr_mode)
1291 {
1292 if (mode == DImode)
53021678 1293 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1294 else
53021678
JW
1295 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1296
1297 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1298 }
1299 else
1300 {
1301 gcc_assert (mode == Pmode);
53021678
JW
1302
1303 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1304 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1305 }
1306
53021678
JW
1307 /* The operand is expected to be MEM. Whenever the related insn
1308 pattern changed, above code which calculate mem should be
1309 updated. */
1310 gcc_assert (GET_CODE (mem) == MEM);
1311 MEM_READONLY_P (mem) = 1;
1312 MEM_NOTRAP_P (mem) = 1;
1313 emit_insn (insn);
1b1e81f8
JW
1314 return;
1315 }
1316
6642bdb4 1317 case SYMBOL_SMALL_GOT_4G:
43e9d192 1318 {
28514dda
YZ
1319 /* In ILP32, the mode of dest can be either SImode or DImode,
1320 while the got entry is always of SImode size. The mode of
1321 dest depends on how dest is used: if dest is assigned to a
1322 pointer (e.g. in the memory), it has SImode; it may have
1323 DImode if dest is dereferenced to access the memeory.
1324 This is why we have to handle three different ldr_got_small
1325 patterns here (two patterns for ILP32). */
53021678
JW
1326
1327 rtx insn;
1328 rtx mem;
43e9d192 1329 rtx tmp_reg = dest;
ef4bddc2 1330 machine_mode mode = GET_MODE (dest);
28514dda 1331
43e9d192 1332 if (can_create_pseudo_p ())
28514dda
YZ
1333 tmp_reg = gen_reg_rtx (mode);
1334
1335 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1336 if (mode == ptr_mode)
1337 {
1338 if (mode == DImode)
53021678 1339 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1340 else
53021678
JW
1341 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1342
1343 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1344 }
1345 else
1346 {
1347 gcc_assert (mode == Pmode);
53021678
JW
1348
1349 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1350 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1351 }
1352
53021678
JW
1353 gcc_assert (GET_CODE (mem) == MEM);
1354 MEM_READONLY_P (mem) = 1;
1355 MEM_NOTRAP_P (mem) = 1;
1356 emit_insn (insn);
43e9d192
IB
1357 return;
1358 }
1359
1360 case SYMBOL_SMALL_TLSGD:
1361 {
5d8a22a5 1362 rtx_insn *insns;
43e9d192
IB
1363 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
1364
1365 start_sequence ();
78607708 1366 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
1367 insns = get_insns ();
1368 end_sequence ();
1369
1370 RTL_CONST_CALL_P (insns) = 1;
1371 emit_libcall_block (insns, dest, result, imm);
1372 return;
1373 }
1374
1375 case SYMBOL_SMALL_TLSDESC:
1376 {
ef4bddc2 1377 machine_mode mode = GET_MODE (dest);
621ad2de 1378 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1379 rtx tp;
1380
621ad2de
AP
1381 gcc_assert (mode == Pmode || mode == ptr_mode);
1382
2876a13f
JW
1383 /* In ILP32, the got entry is always of SImode size. Unlike
1384 small GOT, the dest is fixed at reg 0. */
1385 if (TARGET_ILP32)
1386 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1387 else
2876a13f 1388 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1389 tp = aarch64_load_tp (NULL);
621ad2de
AP
1390
1391 if (mode != Pmode)
1392 tp = gen_lowpart (mode, tp);
1393
2876a13f 1394 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
1395 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1396 return;
1397 }
1398
79496620 1399 case SYMBOL_SMALL_TLSIE:
43e9d192 1400 {
621ad2de
AP
1401 /* In ILP32, the mode of dest can be either SImode or DImode,
1402 while the got entry is always of SImode size. The mode of
1403 dest depends on how dest is used: if dest is assigned to a
1404 pointer (e.g. in the memory), it has SImode; it may have
1405 DImode if dest is dereferenced to access the memeory.
1406 This is why we have to handle three different tlsie_small
1407 patterns here (two patterns for ILP32). */
ef4bddc2 1408 machine_mode mode = GET_MODE (dest);
621ad2de 1409 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1410 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1411
1412 if (mode == ptr_mode)
1413 {
1414 if (mode == DImode)
1415 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1416 else
1417 {
1418 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1419 tp = gen_lowpart (mode, tp);
1420 }
1421 }
1422 else
1423 {
1424 gcc_assert (mode == Pmode);
1425 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1426 }
1427
f7df4a84 1428 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
1429 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1430 return;
1431 }
1432
cbf5629e 1433 case SYMBOL_TLSLE12:
d18ba284 1434 case SYMBOL_TLSLE24:
cbf5629e
JW
1435 case SYMBOL_TLSLE32:
1436 case SYMBOL_TLSLE48:
43e9d192 1437 {
cbf5629e 1438 machine_mode mode = GET_MODE (dest);
43e9d192 1439 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1440
cbf5629e
JW
1441 if (mode != Pmode)
1442 tp = gen_lowpart (mode, tp);
1443
1444 switch (type)
1445 {
1446 case SYMBOL_TLSLE12:
1447 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1448 (dest, tp, imm));
1449 break;
1450 case SYMBOL_TLSLE24:
1451 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1452 (dest, tp, imm));
1453 break;
1454 case SYMBOL_TLSLE32:
1455 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1456 (dest, imm));
1457 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1458 (dest, dest, tp));
1459 break;
1460 case SYMBOL_TLSLE48:
1461 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1462 (dest, imm));
1463 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1464 (dest, dest, tp));
1465 break;
1466 default:
1467 gcc_unreachable ();
1468 }
e6f7f0e9 1469
43e9d192
IB
1470 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1471 return;
1472 }
1473
87dd8ab0
MS
1474 case SYMBOL_TINY_GOT:
1475 emit_insn (gen_ldr_got_tiny (dest, imm));
1476 return;
1477
5ae7caad
JW
1478 case SYMBOL_TINY_TLSIE:
1479 {
1480 machine_mode mode = GET_MODE (dest);
1481 rtx tp = aarch64_load_tp (NULL);
1482
1483 if (mode == ptr_mode)
1484 {
1485 if (mode == DImode)
1486 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1487 else
1488 {
1489 tp = gen_lowpart (mode, tp);
1490 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1491 }
1492 }
1493 else
1494 {
1495 gcc_assert (mode == Pmode);
1496 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1497 }
1498
1499 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1500 return;
1501 }
1502
43e9d192
IB
1503 default:
1504 gcc_unreachable ();
1505 }
1506}
1507
1508/* Emit a move from SRC to DEST. Assume that the move expanders can
1509 handle all moves if !can_create_pseudo_p (). The distinction is
1510 important because, unlike emit_move_insn, the move expanders know
1511 how to force Pmode objects into the constant pool even when the
1512 constant pool address is not itself legitimate. */
1513static rtx
1514aarch64_emit_move (rtx dest, rtx src)
1515{
1516 return (can_create_pseudo_p ()
1517 ? emit_move_insn (dest, src)
1518 : emit_move_insn_1 (dest, src));
1519}
1520
030d03b8
RE
1521/* Split a 128-bit move operation into two 64-bit move operations,
1522 taking care to handle partial overlap of register to register
1523 copies. Special cases are needed when moving between GP regs and
1524 FP regs. SRC can be a register, constant or memory; DST a register
1525 or memory. If either operand is memory it must not have any side
1526 effects. */
43e9d192
IB
1527void
1528aarch64_split_128bit_move (rtx dst, rtx src)
1529{
030d03b8
RE
1530 rtx dst_lo, dst_hi;
1531 rtx src_lo, src_hi;
43e9d192 1532
ef4bddc2 1533 machine_mode mode = GET_MODE (dst);
12dc6974 1534
030d03b8
RE
1535 gcc_assert (mode == TImode || mode == TFmode);
1536 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1537 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1538
1539 if (REG_P (dst) && REG_P (src))
1540 {
030d03b8
RE
1541 int src_regno = REGNO (src);
1542 int dst_regno = REGNO (dst);
43e9d192 1543
030d03b8 1544 /* Handle FP <-> GP regs. */
43e9d192
IB
1545 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1546 {
030d03b8
RE
1547 src_lo = gen_lowpart (word_mode, src);
1548 src_hi = gen_highpart (word_mode, src);
1549
1550 if (mode == TImode)
1551 {
1552 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1553 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1554 }
1555 else
1556 {
1557 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1558 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1559 }
1560 return;
43e9d192
IB
1561 }
1562 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1563 {
030d03b8
RE
1564 dst_lo = gen_lowpart (word_mode, dst);
1565 dst_hi = gen_highpart (word_mode, dst);
1566
1567 if (mode == TImode)
1568 {
1569 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1570 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1571 }
1572 else
1573 {
1574 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1575 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1576 }
1577 return;
43e9d192 1578 }
43e9d192
IB
1579 }
1580
030d03b8
RE
1581 dst_lo = gen_lowpart (word_mode, dst);
1582 dst_hi = gen_highpart (word_mode, dst);
1583 src_lo = gen_lowpart (word_mode, src);
1584 src_hi = gen_highpart_mode (word_mode, mode, src);
1585
1586 /* At most one pairing may overlap. */
1587 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1588 {
1589 aarch64_emit_move (dst_hi, src_hi);
1590 aarch64_emit_move (dst_lo, src_lo);
1591 }
1592 else
1593 {
1594 aarch64_emit_move (dst_lo, src_lo);
1595 aarch64_emit_move (dst_hi, src_hi);
1596 }
43e9d192
IB
1597}
1598
1599bool
1600aarch64_split_128bit_move_p (rtx dst, rtx src)
1601{
1602 return (! REG_P (src)
1603 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1604}
1605
8b033a8a
SN
1606/* Split a complex SIMD combine. */
1607
1608void
1609aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1610{
ef4bddc2
RS
1611 machine_mode src_mode = GET_MODE (src1);
1612 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1613
1614 gcc_assert (VECTOR_MODE_P (dst_mode));
1615
1616 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1617 {
1618 rtx (*gen) (rtx, rtx, rtx);
1619
1620 switch (src_mode)
1621 {
1622 case V8QImode:
1623 gen = gen_aarch64_simd_combinev8qi;
1624 break;
1625 case V4HImode:
1626 gen = gen_aarch64_simd_combinev4hi;
1627 break;
1628 case V2SImode:
1629 gen = gen_aarch64_simd_combinev2si;
1630 break;
7c369485
AL
1631 case V4HFmode:
1632 gen = gen_aarch64_simd_combinev4hf;
1633 break;
8b033a8a
SN
1634 case V2SFmode:
1635 gen = gen_aarch64_simd_combinev2sf;
1636 break;
1637 case DImode:
1638 gen = gen_aarch64_simd_combinedi;
1639 break;
1640 case DFmode:
1641 gen = gen_aarch64_simd_combinedf;
1642 break;
1643 default:
1644 gcc_unreachable ();
1645 }
1646
1647 emit_insn (gen (dst, src1, src2));
1648 return;
1649 }
1650}
1651
fd4842cd
SN
1652/* Split a complex SIMD move. */
1653
1654void
1655aarch64_split_simd_move (rtx dst, rtx src)
1656{
ef4bddc2
RS
1657 machine_mode src_mode = GET_MODE (src);
1658 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1659
1660 gcc_assert (VECTOR_MODE_P (dst_mode));
1661
1662 if (REG_P (dst) && REG_P (src))
1663 {
c59b7e28
SN
1664 rtx (*gen) (rtx, rtx);
1665
fd4842cd
SN
1666 gcc_assert (VECTOR_MODE_P (src_mode));
1667
1668 switch (src_mode)
1669 {
1670 case V16QImode:
c59b7e28 1671 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1672 break;
1673 case V8HImode:
c59b7e28 1674 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1675 break;
1676 case V4SImode:
c59b7e28 1677 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1678 break;
1679 case V2DImode:
c59b7e28 1680 gen = gen_aarch64_split_simd_movv2di;
fd4842cd 1681 break;
71a11456
AL
1682 case V8HFmode:
1683 gen = gen_aarch64_split_simd_movv8hf;
1684 break;
fd4842cd 1685 case V4SFmode:
c59b7e28 1686 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1687 break;
1688 case V2DFmode:
c59b7e28 1689 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1690 break;
1691 default:
1692 gcc_unreachable ();
1693 }
c59b7e28
SN
1694
1695 emit_insn (gen (dst, src));
fd4842cd
SN
1696 return;
1697 }
1698}
1699
ef22810a
RH
1700bool
1701aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
1702 machine_mode ymode, rtx y)
1703{
1704 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
1705 gcc_assert (r != NULL);
1706 return rtx_equal_p (x, r);
1707}
1708
1709
43e9d192 1710static rtx
ef4bddc2 1711aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1712{
1713 if (can_create_pseudo_p ())
e18b4a81 1714 return force_reg (mode, value);
43e9d192
IB
1715 else
1716 {
1717 x = aarch64_emit_move (x, value);
1718 return x;
1719 }
1720}
1721
1722
1723static rtx
ef4bddc2 1724aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1725{
9c023bf0 1726 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1727 {
1728 rtx high;
1729 /* Load the full offset into a register. This
1730 might be improvable in the future. */
1731 high = GEN_INT (offset);
1732 offset = 0;
e18b4a81
YZ
1733 high = aarch64_force_temporary (mode, temp, high);
1734 reg = aarch64_force_temporary (mode, temp,
1735 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1736 }
1737 return plus_constant (mode, reg, offset);
1738}
1739
82614948
RR
1740static int
1741aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1742 machine_mode mode)
43e9d192 1743{
43e9d192 1744 int i;
9a4865db
WD
1745 unsigned HOST_WIDE_INT val, val2, mask;
1746 int one_match, zero_match;
1747 int num_insns;
43e9d192 1748
9a4865db
WD
1749 val = INTVAL (imm);
1750
1751 if (aarch64_move_imm (val, mode))
43e9d192 1752 {
82614948 1753 if (generate)
f7df4a84 1754 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 1755 return 1;
43e9d192
IB
1756 }
1757
9a4865db 1758 if ((val >> 32) == 0 || mode == SImode)
43e9d192 1759 {
82614948
RR
1760 if (generate)
1761 {
9a4865db
WD
1762 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
1763 if (mode == SImode)
1764 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1765 GEN_INT ((val >> 16) & 0xffff)));
1766 else
1767 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
1768 GEN_INT ((val >> 16) & 0xffff)));
82614948 1769 }
9a4865db 1770 return 2;
43e9d192
IB
1771 }
1772
1773 /* Remaining cases are all for DImode. */
1774
43e9d192 1775 mask = 0xffff;
9a4865db
WD
1776 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
1777 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
1778 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
1779 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 1780
62c8d76c 1781 if (zero_match != 2 && one_match != 2)
43e9d192 1782 {
62c8d76c
WD
1783 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1784 For a 64-bit bitmask try whether changing 16 bits to all ones or
1785 zeroes creates a valid bitmask. To check any repeated bitmask,
1786 try using 16 bits from the other 32-bit half of val. */
43e9d192 1787
62c8d76c 1788 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 1789 {
62c8d76c
WD
1790 val2 = val & ~mask;
1791 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1792 break;
1793 val2 = val | mask;
1794 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1795 break;
1796 val2 = val2 & ~mask;
1797 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
1798 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1799 break;
43e9d192 1800 }
62c8d76c 1801 if (i != 64)
43e9d192 1802 {
62c8d76c 1803 if (generate)
43e9d192 1804 {
62c8d76c
WD
1805 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1806 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 1807 GEN_INT ((val >> i) & 0xffff)));
43e9d192 1808 }
1312b1ba 1809 return 2;
43e9d192
IB
1810 }
1811 }
1812
9a4865db
WD
1813 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1814 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1815 otherwise skip zero bits. */
2c274197 1816
9a4865db 1817 num_insns = 1;
43e9d192 1818 mask = 0xffff;
9a4865db
WD
1819 val2 = one_match > zero_match ? ~val : val;
1820 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
1821
1822 if (generate)
1823 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
1824 ? (val | ~(mask << i))
1825 : (val & (mask << i)))));
1826 for (i += 16; i < 64; i += 16)
43e9d192 1827 {
9a4865db
WD
1828 if ((val2 & (mask << i)) == 0)
1829 continue;
1830 if (generate)
1831 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1832 GEN_INT ((val >> i) & 0xffff)));
1833 num_insns ++;
82614948
RR
1834 }
1835
1836 return num_insns;
1837}
1838
1839
1840void
1841aarch64_expand_mov_immediate (rtx dest, rtx imm)
1842{
1843 machine_mode mode = GET_MODE (dest);
1844
1845 gcc_assert (mode == SImode || mode == DImode);
1846
1847 /* Check on what type of symbol it is. */
1848 if (GET_CODE (imm) == SYMBOL_REF
1849 || GET_CODE (imm) == LABEL_REF
1850 || GET_CODE (imm) == CONST)
1851 {
1852 rtx mem, base, offset;
1853 enum aarch64_symbol_type sty;
1854
1855 /* If we have (const (plus symbol offset)), separate out the offset
1856 before we start classifying the symbol. */
1857 split_const (imm, &base, &offset);
1858
a6e0bfa7 1859 sty = aarch64_classify_symbol (base, offset);
82614948
RR
1860 switch (sty)
1861 {
1862 case SYMBOL_FORCE_TO_MEM:
1863 if (offset != const0_rtx
1864 && targetm.cannot_force_const_mem (mode, imm))
1865 {
1866 gcc_assert (can_create_pseudo_p ());
1867 base = aarch64_force_temporary (mode, dest, base);
1868 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1869 aarch64_emit_move (dest, base);
1870 return;
1871 }
b4f50fd4 1872
82614948
RR
1873 mem = force_const_mem (ptr_mode, imm);
1874 gcc_assert (mem);
b4f50fd4
RR
1875
1876 /* If we aren't generating PC relative literals, then
1877 we need to expand the literal pool access carefully.
1878 This is something that needs to be done in a number
1879 of places, so could well live as a separate function. */
9ee6540a 1880 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
1881 {
1882 gcc_assert (can_create_pseudo_p ());
1883 base = gen_reg_rtx (ptr_mode);
1884 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
1885 mem = gen_rtx_MEM (ptr_mode, base);
1886 }
1887
82614948
RR
1888 if (mode != ptr_mode)
1889 mem = gen_rtx_ZERO_EXTEND (mode, mem);
b4f50fd4 1890
f7df4a84 1891 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 1892
82614948
RR
1893 return;
1894
1895 case SYMBOL_SMALL_TLSGD:
1896 case SYMBOL_SMALL_TLSDESC:
79496620 1897 case SYMBOL_SMALL_TLSIE:
1b1e81f8 1898 case SYMBOL_SMALL_GOT_28K:
6642bdb4 1899 case SYMBOL_SMALL_GOT_4G:
82614948 1900 case SYMBOL_TINY_GOT:
5ae7caad 1901 case SYMBOL_TINY_TLSIE:
82614948
RR
1902 if (offset != const0_rtx)
1903 {
1904 gcc_assert(can_create_pseudo_p ());
1905 base = aarch64_force_temporary (mode, dest, base);
1906 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1907 aarch64_emit_move (dest, base);
1908 return;
1909 }
1910 /* FALLTHRU */
1911
82614948
RR
1912 case SYMBOL_SMALL_ABSOLUTE:
1913 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 1914 case SYMBOL_TLSLE12:
d18ba284 1915 case SYMBOL_TLSLE24:
cbf5629e
JW
1916 case SYMBOL_TLSLE32:
1917 case SYMBOL_TLSLE48:
82614948
RR
1918 aarch64_load_symref_appropriately (dest, imm, sty);
1919 return;
1920
1921 default:
1922 gcc_unreachable ();
1923 }
1924 }
1925
1926 if (!CONST_INT_P (imm))
1927 {
1928 if (GET_CODE (imm) == HIGH)
f7df4a84 1929 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
1930 else
1931 {
1932 rtx mem = force_const_mem (mode, imm);
1933 gcc_assert (mem);
f7df4a84 1934 emit_insn (gen_rtx_SET (dest, mem));
43e9d192 1935 }
82614948
RR
1936
1937 return;
43e9d192 1938 }
82614948
RR
1939
1940 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1941}
1942
1943static bool
fee9ba42
JW
1944aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1945 tree exp ATTRIBUTE_UNUSED)
43e9d192 1946{
fee9ba42 1947 /* Currently, always true. */
43e9d192
IB
1948 return true;
1949}
1950
1951/* Implement TARGET_PASS_BY_REFERENCE. */
1952
1953static bool
1954aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 1955 machine_mode mode,
43e9d192
IB
1956 const_tree type,
1957 bool named ATTRIBUTE_UNUSED)
1958{
1959 HOST_WIDE_INT size;
ef4bddc2 1960 machine_mode dummymode;
43e9d192
IB
1961 int nregs;
1962
1963 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1964 size = (mode == BLKmode && type)
1965 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1966
aadc1c43
MHD
1967 /* Aggregates are passed by reference based on their size. */
1968 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1969 {
aadc1c43 1970 size = int_size_in_bytes (type);
43e9d192
IB
1971 }
1972
1973 /* Variable sized arguments are always returned by reference. */
1974 if (size < 0)
1975 return true;
1976
1977 /* Can this be a candidate to be passed in fp/simd register(s)? */
1978 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1979 &dummymode, &nregs,
1980 NULL))
1981 return false;
1982
1983 /* Arguments which are variable sized or larger than 2 registers are
1984 passed by reference unless they are a homogenous floating point
1985 aggregate. */
1986 return size > 2 * UNITS_PER_WORD;
1987}
1988
1989/* Return TRUE if VALTYPE is padded to its least significant bits. */
1990static bool
1991aarch64_return_in_msb (const_tree valtype)
1992{
ef4bddc2 1993 machine_mode dummy_mode;
43e9d192
IB
1994 int dummy_int;
1995
1996 /* Never happens in little-endian mode. */
1997 if (!BYTES_BIG_ENDIAN)
1998 return false;
1999
2000 /* Only composite types smaller than or equal to 16 bytes can
2001 be potentially returned in registers. */
2002 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
2003 || int_size_in_bytes (valtype) <= 0
2004 || int_size_in_bytes (valtype) > 16)
2005 return false;
2006
2007 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2008 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2009 is always passed/returned in the least significant bits of fp/simd
2010 register(s). */
2011 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
2012 &dummy_mode, &dummy_int, NULL))
2013 return false;
2014
2015 return true;
2016}
2017
2018/* Implement TARGET_FUNCTION_VALUE.
2019 Define how to find the value returned by a function. */
2020
2021static rtx
2022aarch64_function_value (const_tree type, const_tree func,
2023 bool outgoing ATTRIBUTE_UNUSED)
2024{
ef4bddc2 2025 machine_mode mode;
43e9d192
IB
2026 int unsignedp;
2027 int count;
ef4bddc2 2028 machine_mode ag_mode;
43e9d192
IB
2029
2030 mode = TYPE_MODE (type);
2031 if (INTEGRAL_TYPE_P (type))
2032 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
2033
2034 if (aarch64_return_in_msb (type))
2035 {
2036 HOST_WIDE_INT size = int_size_in_bytes (type);
2037
2038 if (size % UNITS_PER_WORD != 0)
2039 {
2040 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2041 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2042 }
2043 }
2044
2045 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2046 &ag_mode, &count, NULL))
2047 {
2048 if (!aarch64_composite_type_p (type, mode))
2049 {
2050 gcc_assert (count == 1 && mode == ag_mode);
2051 return gen_rtx_REG (mode, V0_REGNUM);
2052 }
2053 else
2054 {
2055 int i;
2056 rtx par;
2057
2058 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
2059 for (i = 0; i < count; i++)
2060 {
2061 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
2062 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2063 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
2064 XVECEXP (par, 0, i) = tmp;
2065 }
2066 return par;
2067 }
2068 }
2069 else
2070 return gen_rtx_REG (mode, R0_REGNUM);
2071}
2072
2073/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2074 Return true if REGNO is the number of a hard register in which the values
2075 of called function may come back. */
2076
2077static bool
2078aarch64_function_value_regno_p (const unsigned int regno)
2079{
2080 /* Maximum of 16 bytes can be returned in the general registers. Examples
2081 of 16-byte return values are: 128-bit integers and 16-byte small
2082 structures (excluding homogeneous floating-point aggregates). */
2083 if (regno == R0_REGNUM || regno == R1_REGNUM)
2084 return true;
2085
2086 /* Up to four fp/simd registers can return a function value, e.g. a
2087 homogeneous floating-point aggregate having four members. */
2088 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 2089 return TARGET_FLOAT;
43e9d192
IB
2090
2091 return false;
2092}
2093
2094/* Implement TARGET_RETURN_IN_MEMORY.
2095
2096 If the type T of the result of a function is such that
2097 void func (T arg)
2098 would require that arg be passed as a value in a register (or set of
2099 registers) according to the parameter passing rules, then the result
2100 is returned in the same registers as would be used for such an
2101 argument. */
2102
2103static bool
2104aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
2105{
2106 HOST_WIDE_INT size;
ef4bddc2 2107 machine_mode ag_mode;
43e9d192
IB
2108 int count;
2109
2110 if (!AGGREGATE_TYPE_P (type)
2111 && TREE_CODE (type) != COMPLEX_TYPE
2112 && TREE_CODE (type) != VECTOR_TYPE)
2113 /* Simple scalar types always returned in registers. */
2114 return false;
2115
2116 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
2117 type,
2118 &ag_mode,
2119 &count,
2120 NULL))
2121 return false;
2122
2123 /* Types larger than 2 registers returned in memory. */
2124 size = int_size_in_bytes (type);
2125 return (size < 0 || size > 2 * UNITS_PER_WORD);
2126}
2127
2128static bool
ef4bddc2 2129aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2130 const_tree type, int *nregs)
2131{
2132 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2133 return aarch64_vfp_is_call_or_return_candidate (mode,
2134 type,
2135 &pcum->aapcs_vfp_rmode,
2136 nregs,
2137 NULL);
2138}
2139
2140/* Given MODE and TYPE of a function argument, return the alignment in
2141 bits. The idea is to suppress any stronger alignment requested by
2142 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2143 This is a helper function for local use only. */
2144
2145static unsigned int
ef4bddc2 2146aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 2147{
75d6cc81
AL
2148 if (!type)
2149 return GET_MODE_ALIGNMENT (mode);
2150 if (integer_zerop (TYPE_SIZE (type)))
2151 return 0;
43e9d192 2152
75d6cc81
AL
2153 gcc_assert (TYPE_MODE (type) == mode);
2154
2155 if (!AGGREGATE_TYPE_P (type))
2156 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
2157
2158 if (TREE_CODE (type) == ARRAY_TYPE)
2159 return TYPE_ALIGN (TREE_TYPE (type));
2160
2161 unsigned int alignment = 0;
2162
2163 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2164 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192
IB
2165
2166 return alignment;
2167}
2168
2169/* Layout a function argument according to the AAPCS64 rules. The rule
2170 numbers refer to the rule numbers in the AAPCS64. */
2171
2172static void
ef4bddc2 2173aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2174 const_tree type,
2175 bool named ATTRIBUTE_UNUSED)
2176{
2177 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2178 int ncrn, nvrn, nregs;
2179 bool allocate_ncrn, allocate_nvrn;
3abf17cf 2180 HOST_WIDE_INT size;
43e9d192
IB
2181
2182 /* We need to do this once per argument. */
2183 if (pcum->aapcs_arg_processed)
2184 return;
2185
2186 pcum->aapcs_arg_processed = true;
2187
3abf17cf
YZ
2188 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2189 size
4f59f9f2
UB
2190 = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
2191 UNITS_PER_WORD);
3abf17cf 2192
43e9d192
IB
2193 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
2194 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
2195 mode,
2196 type,
2197 &nregs);
2198
2199 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2200 The following code thus handles passing by SIMD/FP registers first. */
2201
2202 nvrn = pcum->aapcs_nvrn;
2203
2204 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2205 and homogenous short-vector aggregates (HVA). */
2206 if (allocate_nvrn)
2207 {
261fb553
AL
2208 if (!TARGET_FLOAT)
2209 aarch64_err_no_fpadvsimd (mode, "argument");
2210
43e9d192
IB
2211 if (nvrn + nregs <= NUM_FP_ARG_REGS)
2212 {
2213 pcum->aapcs_nextnvrn = nvrn + nregs;
2214 if (!aarch64_composite_type_p (type, mode))
2215 {
2216 gcc_assert (nregs == 1);
2217 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
2218 }
2219 else
2220 {
2221 rtx par;
2222 int i;
2223 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2224 for (i = 0; i < nregs; i++)
2225 {
2226 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
2227 V0_REGNUM + nvrn + i);
2228 tmp = gen_rtx_EXPR_LIST
2229 (VOIDmode, tmp,
2230 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
2231 XVECEXP (par, 0, i) = tmp;
2232 }
2233 pcum->aapcs_reg = par;
2234 }
2235 return;
2236 }
2237 else
2238 {
2239 /* C.3 NSRN is set to 8. */
2240 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
2241 goto on_stack;
2242 }
2243 }
2244
2245 ncrn = pcum->aapcs_ncrn;
3abf17cf 2246 nregs = size / UNITS_PER_WORD;
43e9d192
IB
2247
2248 /* C6 - C9. though the sign and zero extension semantics are
2249 handled elsewhere. This is the case where the argument fits
2250 entirely general registers. */
2251 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
2252 {
2253 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2254
2255 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
2256
2257 /* C.8 if the argument has an alignment of 16 then the NGRN is
2258 rounded up to the next even number. */
2259 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
2260 {
2261 ++ncrn;
2262 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
2263 }
2264 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2265 A reg is still generated for it, but the caller should be smart
2266 enough not to use it. */
2267 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2268 {
2269 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
2270 }
2271 else
2272 {
2273 rtx par;
2274 int i;
2275
2276 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2277 for (i = 0; i < nregs; i++)
2278 {
2279 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2280 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2281 GEN_INT (i * UNITS_PER_WORD));
2282 XVECEXP (par, 0, i) = tmp;
2283 }
2284 pcum->aapcs_reg = par;
2285 }
2286
2287 pcum->aapcs_nextncrn = ncrn + nregs;
2288 return;
2289 }
2290
2291 /* C.11 */
2292 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2293
2294 /* The argument is passed on stack; record the needed number of words for
3abf17cf 2295 this argument and align the total size if necessary. */
43e9d192 2296on_stack:
3abf17cf 2297 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192 2298 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
2299 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
2300 16 / UNITS_PER_WORD);
43e9d192
IB
2301 return;
2302}
2303
2304/* Implement TARGET_FUNCTION_ARG. */
2305
2306static rtx
ef4bddc2 2307aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2308 const_tree type, bool named)
2309{
2310 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2311 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2312
2313 if (mode == VOIDmode)
2314 return NULL_RTX;
2315
2316 aarch64_layout_arg (pcum_v, mode, type, named);
2317 return pcum->aapcs_reg;
2318}
2319
2320void
2321aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2322 const_tree fntype ATTRIBUTE_UNUSED,
2323 rtx libname ATTRIBUTE_UNUSED,
2324 const_tree fndecl ATTRIBUTE_UNUSED,
2325 unsigned n_named ATTRIBUTE_UNUSED)
2326{
2327 pcum->aapcs_ncrn = 0;
2328 pcum->aapcs_nvrn = 0;
2329 pcum->aapcs_nextncrn = 0;
2330 pcum->aapcs_nextnvrn = 0;
2331 pcum->pcs_variant = ARM_PCS_AAPCS64;
2332 pcum->aapcs_reg = NULL_RTX;
2333 pcum->aapcs_arg_processed = false;
2334 pcum->aapcs_stack_words = 0;
2335 pcum->aapcs_stack_size = 0;
2336
261fb553
AL
2337 if (!TARGET_FLOAT
2338 && fndecl && TREE_PUBLIC (fndecl)
2339 && fntype && fntype != error_mark_node)
2340 {
2341 const_tree type = TREE_TYPE (fntype);
2342 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2343 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2344 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2345 &mode, &nregs, NULL))
2346 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2347 }
43e9d192
IB
2348 return;
2349}
2350
2351static void
2352aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 2353 machine_mode mode,
43e9d192
IB
2354 const_tree type,
2355 bool named)
2356{
2357 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2358 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2359 {
2360 aarch64_layout_arg (pcum_v, mode, type, named);
2361 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2362 != (pcum->aapcs_stack_words != 0));
2363 pcum->aapcs_arg_processed = false;
2364 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2365 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2366 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2367 pcum->aapcs_stack_words = 0;
2368 pcum->aapcs_reg = NULL_RTX;
2369 }
2370}
2371
2372bool
2373aarch64_function_arg_regno_p (unsigned regno)
2374{
2375 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2376 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2377}
2378
2379/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2380 PARM_BOUNDARY bits of alignment, but will be given anything up
2381 to STACK_BOUNDARY bits if the type requires it. This makes sure
2382 that both before and after the layout of each argument, the Next
2383 Stacked Argument Address (NSAA) will have a minimum alignment of
2384 8 bytes. */
2385
2386static unsigned int
ef4bddc2 2387aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
2388{
2389 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2390
2391 if (alignment < PARM_BOUNDARY)
2392 alignment = PARM_BOUNDARY;
2393 if (alignment > STACK_BOUNDARY)
2394 alignment = STACK_BOUNDARY;
2395 return alignment;
2396}
2397
2398/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2399
2400 Return true if an argument passed on the stack should be padded upwards,
2401 i.e. if the least-significant byte of the stack slot has useful data.
2402
2403 Small aggregate types are placed in the lowest memory address.
2404
2405 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2406
2407bool
ef4bddc2 2408aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
2409{
2410 /* On little-endian targets, the least significant byte of every stack
2411 argument is passed at the lowest byte address of the stack slot. */
2412 if (!BYTES_BIG_ENDIAN)
2413 return true;
2414
00edcfbe 2415 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
2416 the least significant byte of a stack argument is passed at the highest
2417 byte address of the stack slot. */
2418 if (type
00edcfbe
YZ
2419 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2420 || POINTER_TYPE_P (type))
43e9d192
IB
2421 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2422 return false;
2423
2424 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2425 return true;
2426}
2427
2428/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2429
2430 It specifies padding for the last (may also be the only)
2431 element of a block move between registers and memory. If
2432 assuming the block is in the memory, padding upward means that
2433 the last element is padded after its highest significant byte,
2434 while in downward padding, the last element is padded at the
2435 its least significant byte side.
2436
2437 Small aggregates and small complex types are always padded
2438 upwards.
2439
2440 We don't need to worry about homogeneous floating-point or
2441 short-vector aggregates; their move is not affected by the
2442 padding direction determined here. Regardless of endianness,
2443 each element of such an aggregate is put in the least
2444 significant bits of a fp/simd register.
2445
2446 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2447 register has useful data, and return the opposite if the most
2448 significant byte does. */
2449
2450bool
ef4bddc2 2451aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2452 bool first ATTRIBUTE_UNUSED)
2453{
2454
2455 /* Small composite types are always padded upward. */
2456 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2457 {
2458 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2459 : GET_MODE_SIZE (mode));
2460 if (size < 2 * UNITS_PER_WORD)
2461 return true;
2462 }
2463
2464 /* Otherwise, use the default padding. */
2465 return !BYTES_BIG_ENDIAN;
2466}
2467
ef4bddc2 2468static machine_mode
43e9d192
IB
2469aarch64_libgcc_cmp_return_mode (void)
2470{
2471 return SImode;
2472}
2473
a3eb8a52
EB
2474#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2475
2476/* We use the 12-bit shifted immediate arithmetic instructions so values
2477 must be multiple of (1 << 12), i.e. 4096. */
2478#define ARITH_FACTOR 4096
2479
2480#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2481#error Cannot use simple address calculation for stack probing
2482#endif
2483
2484/* The pair of scratch registers used for stack probing. */
2485#define PROBE_STACK_FIRST_REG 9
2486#define PROBE_STACK_SECOND_REG 10
2487
2488/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2489 inclusive. These are offsets from the current stack pointer. */
2490
2491static void
2492aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
2493{
2494 rtx reg1 = gen_rtx_REG (ptr_mode, PROBE_STACK_FIRST_REG);
2495
2496 /* See the same assertion on PROBE_INTERVAL above. */
2497 gcc_assert ((first % ARITH_FACTOR) == 0);
2498
2499 /* See if we have a constant small number of probes to generate. If so,
2500 that's the easy case. */
2501 if (size <= PROBE_INTERVAL)
2502 {
2503 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
2504
2505 emit_set_insn (reg1,
2506 plus_constant (ptr_mode,
2507 stack_pointer_rtx, -(first + base)));
2508 emit_stack_probe (plus_constant (ptr_mode, reg1, base - size));
2509 }
2510
2511 /* The run-time loop is made up of 8 insns in the generic case while the
2512 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2513 else if (size <= 4 * PROBE_INTERVAL)
2514 {
2515 HOST_WIDE_INT i, rem;
2516
2517 emit_set_insn (reg1,
2518 plus_constant (ptr_mode,
2519 stack_pointer_rtx,
2520 -(first + PROBE_INTERVAL)));
2521 emit_stack_probe (reg1);
2522
2523 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2524 it exceeds SIZE. If only two probes are needed, this will not
2525 generate any code. Then probe at FIRST + SIZE. */
2526 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
2527 {
2528 emit_set_insn (reg1,
2529 plus_constant (ptr_mode, reg1, -PROBE_INTERVAL));
2530 emit_stack_probe (reg1);
2531 }
2532
2533 rem = size - (i - PROBE_INTERVAL);
2534 if (rem > 256)
2535 {
2536 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2537
2538 emit_set_insn (reg1, plus_constant (ptr_mode, reg1, -base));
2539 emit_stack_probe (plus_constant (ptr_mode, reg1, base - rem));
2540 }
2541 else
2542 emit_stack_probe (plus_constant (ptr_mode, reg1, -rem));
2543 }
2544
2545 /* Otherwise, do the same as above, but in a loop. Note that we must be
2546 extra careful with variables wrapping around because we might be at
2547 the very top (or the very bottom) of the address space and we have
2548 to be able to handle this case properly; in particular, we use an
2549 equality test for the loop condition. */
2550 else
2551 {
2552 rtx reg2 = gen_rtx_REG (ptr_mode, PROBE_STACK_SECOND_REG);
2553
2554 /* Step 1: round SIZE to the previous multiple of the interval. */
2555
2556 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
2557
2558
2559 /* Step 2: compute initial and final value of the loop counter. */
2560
2561 /* TEST_ADDR = SP + FIRST. */
2562 emit_set_insn (reg1,
2563 plus_constant (ptr_mode, stack_pointer_rtx, -first));
2564
2565 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
2566 emit_set_insn (reg2,
2567 plus_constant (ptr_mode, stack_pointer_rtx,
2568 -(first + rounded_size)));
2569
2570
2571 /* Step 3: the loop
2572
2573 do
2574 {
2575 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2576 probe at TEST_ADDR
2577 }
2578 while (TEST_ADDR != LAST_ADDR)
2579
2580 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2581 until it is equal to ROUNDED_SIZE. */
2582
2583 if (ptr_mode == DImode)
2584 emit_insn (gen_probe_stack_range_di (reg1, reg1, reg2));
2585 else
2586 emit_insn (gen_probe_stack_range_si (reg1, reg1, reg2));
2587
2588
2589 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2590 that SIZE is equal to ROUNDED_SIZE. */
2591
2592 if (size != rounded_size)
2593 {
2594 HOST_WIDE_INT rem = size - rounded_size;
2595
2596 if (rem > 256)
2597 {
2598 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2599
2600 emit_set_insn (reg2, plus_constant (ptr_mode, reg2, -base));
2601 emit_stack_probe (plus_constant (ptr_mode, reg2, base - rem));
2602 }
2603 else
2604 emit_stack_probe (plus_constant (ptr_mode, reg2, -rem));
2605 }
2606 }
2607
2608 /* Make sure nothing is scheduled before we are done. */
2609 emit_insn (gen_blockage ());
2610}
2611
2612/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2613 absolute addresses. */
2614
2615const char *
2616aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
2617{
2618 static int labelno = 0;
2619 char loop_lab[32];
2620 rtx xops[2];
2621
2622 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
2623
2624 /* Loop. */
2625 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
2626
2627 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2628 xops[0] = reg1;
2629 xops[1] = GEN_INT (PROBE_INTERVAL);
2630 output_asm_insn ("sub\t%0, %0, %1", xops);
2631
2632 /* Probe at TEST_ADDR. */
2633 output_asm_insn ("str\txzr, [%0]", xops);
2634
2635 /* Test if TEST_ADDR == LAST_ADDR. */
2636 xops[1] = reg2;
2637 output_asm_insn ("cmp\t%0, %1", xops);
2638
2639 /* Branch. */
2640 fputs ("\tb.ne\t", asm_out_file);
2641 assemble_name_raw (asm_out_file, loop_lab);
2642 fputc ('\n', asm_out_file);
2643
2644 return "";
2645}
2646
43e9d192
IB
2647static bool
2648aarch64_frame_pointer_required (void)
2649{
0b7f8166
MS
2650 /* In aarch64_override_options_after_change
2651 flag_omit_leaf_frame_pointer turns off the frame pointer by
2652 default. Turn it back on now if we've not got a leaf
2653 function. */
2654 if (flag_omit_leaf_frame_pointer
2655 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2656 return true;
43e9d192 2657
0b7f8166 2658 return false;
43e9d192
IB
2659}
2660
2661/* Mark the registers that need to be saved by the callee and calculate
2662 the size of the callee-saved registers area and frame record (both FP
2663 and LR may be omitted). */
2664static void
2665aarch64_layout_frame (void)
2666{
2667 HOST_WIDE_INT offset = 0;
2668 int regno;
2669
2670 if (reload_completed && cfun->machine->frame.laid_out)
2671 return;
2672
97826595
MS
2673#define SLOT_NOT_REQUIRED (-2)
2674#define SLOT_REQUIRED (-1)
2675
363ffa50
JW
2676 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2677 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2678
43e9d192
IB
2679 /* First mark all the registers that really need to be saved... */
2680 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2681 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2682
2683 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2684 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2685
2686 /* ... that includes the eh data registers (if needed)... */
2687 if (crtl->calls_eh_return)
2688 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2689 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2690 = SLOT_REQUIRED;
43e9d192
IB
2691
2692 /* ... and any callee saved register that dataflow says is live. */
2693 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2694 if (df_regs_ever_live_p (regno)
1c923b60
JW
2695 && (regno == R30_REGNUM
2696 || !call_used_regs[regno]))
97826595 2697 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2698
2699 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2700 if (df_regs_ever_live_p (regno)
2701 && !call_used_regs[regno])
97826595 2702 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2703
2704 if (frame_pointer_needed)
2705 {
2e1cdae5 2706 /* FP and LR are placed in the linkage record. */
43e9d192 2707 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2708 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2709 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2710 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 2711 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 2712 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2713 }
2714
2715 /* Now assign stack slots for them. */
2e1cdae5 2716 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2717 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2718 {
2719 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2720 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2721 cfun->machine->frame.wb_candidate1 = regno;
2722 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2723 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2724 offset += UNITS_PER_WORD;
2725 }
2726
2727 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2728 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2729 {
2730 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
2731 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2732 cfun->machine->frame.wb_candidate1 = regno;
2733 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2734 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2735 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2736 offset += UNITS_PER_WORD;
2737 }
2738
43e9d192 2739 cfun->machine->frame.padding0 =
4f59f9f2
UB
2740 (ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2741 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
2742
2743 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
2744
2745 cfun->machine->frame.hard_fp_offset
4f59f9f2
UB
2746 = ROUND_UP (cfun->machine->frame.saved_varargs_size
2747 + get_frame_size ()
2748 + cfun->machine->frame.saved_regs_size,
2749 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02
MS
2750
2751 cfun->machine->frame.frame_size
4f59f9f2
UB
2752 = ROUND_UP (cfun->machine->frame.hard_fp_offset
2753 + crtl->outgoing_args_size,
2754 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 2755
43e9d192
IB
2756 cfun->machine->frame.laid_out = true;
2757}
2758
43e9d192
IB
2759static bool
2760aarch64_register_saved_on_entry (int regno)
2761{
97826595 2762 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2763}
2764
64dedd72
JW
2765static unsigned
2766aarch64_next_callee_save (unsigned regno, unsigned limit)
2767{
2768 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2769 regno ++;
2770 return regno;
2771}
43e9d192 2772
c5e1f66e 2773static void
ef4bddc2 2774aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2775 HOST_WIDE_INT adjustment)
2776 {
2777 rtx base_rtx = stack_pointer_rtx;
2778 rtx insn, reg, mem;
2779
2780 reg = gen_rtx_REG (mode, regno);
2781 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2782 plus_constant (Pmode, base_rtx, -adjustment));
2783 mem = gen_rtx_MEM (mode, mem);
2784
2785 insn = emit_move_insn (mem, reg);
2786 RTX_FRAME_RELATED_P (insn) = 1;
2787}
2788
80c11907 2789static rtx
ef4bddc2 2790aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
2791 HOST_WIDE_INT adjustment)
2792{
2793 switch (mode)
2794 {
2795 case DImode:
2796 return gen_storewb_pairdi_di (base, base, reg, reg2,
2797 GEN_INT (-adjustment),
2798 GEN_INT (UNITS_PER_WORD - adjustment));
2799 case DFmode:
2800 return gen_storewb_pairdf_di (base, base, reg, reg2,
2801 GEN_INT (-adjustment),
2802 GEN_INT (UNITS_PER_WORD - adjustment));
2803 default:
2804 gcc_unreachable ();
2805 }
2806}
2807
2808static void
ef4bddc2 2809aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
80c11907
JW
2810 unsigned regno2, HOST_WIDE_INT adjustment)
2811{
5d8a22a5 2812 rtx_insn *insn;
80c11907
JW
2813 rtx reg1 = gen_rtx_REG (mode, regno1);
2814 rtx reg2 = gen_rtx_REG (mode, regno2);
2815
2816 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2817 reg2, adjustment));
2818 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
2819 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2820 RTX_FRAME_RELATED_P (insn) = 1;
2821}
2822
159313d9 2823static rtx
ef4bddc2 2824aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
2825 HOST_WIDE_INT adjustment)
2826{
2827 switch (mode)
2828 {
2829 case DImode:
2830 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2831 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2832 case DFmode:
2833 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2834 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2835 default:
2836 gcc_unreachable ();
2837 }
2838}
2839
72df5c1f 2840static rtx
ef4bddc2 2841aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
2842 rtx reg2)
2843{
2844 switch (mode)
2845 {
2846 case DImode:
2847 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2848
2849 case DFmode:
2850 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2851
2852 default:
2853 gcc_unreachable ();
2854 }
2855}
2856
2857static rtx
ef4bddc2 2858aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
2859 rtx mem2)
2860{
2861 switch (mode)
2862 {
2863 case DImode:
2864 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2865
2866 case DFmode:
2867 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2868
2869 default:
2870 gcc_unreachable ();
2871 }
2872}
2873
43e9d192 2874
43e9d192 2875static void
ef4bddc2 2876aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2877 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2878{
5d8a22a5 2879 rtx_insn *insn;
ef4bddc2 2880 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 2881 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2882 unsigned regno;
2883 unsigned regno2;
2884
0ec74a1e 2885 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2886 regno <= limit;
2887 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2888 {
ae13fce3
JW
2889 rtx reg, mem;
2890 HOST_WIDE_INT offset;
64dedd72 2891
ae13fce3
JW
2892 if (skip_wb
2893 && (regno == cfun->machine->frame.wb_candidate1
2894 || regno == cfun->machine->frame.wb_candidate2))
2895 continue;
2896
2897 reg = gen_rtx_REG (mode, regno);
2898 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2899 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2900 offset));
64dedd72
JW
2901
2902 regno2 = aarch64_next_callee_save (regno + 1, limit);
2903
2904 if (regno2 <= limit
2905 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2906 == cfun->machine->frame.reg_offset[regno2]))
2907
43e9d192 2908 {
0ec74a1e 2909 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2910 rtx mem2;
2911
2912 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2913 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2914 offset));
2915 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2916 reg2));
0b4a9743 2917
64dedd72
JW
2918 /* The first part of a frame-related parallel insn is
2919 always assumed to be relevant to the frame
2920 calculations; subsequent parts, are only
2921 frame-related if explicitly marked. */
2922 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2923 regno = regno2;
2924 }
2925 else
8ed2fc62
JW
2926 insn = emit_move_insn (mem, reg);
2927
2928 RTX_FRAME_RELATED_P (insn) = 1;
2929 }
2930}
2931
2932static void
ef4bddc2 2933aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 2934 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2935 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2936{
8ed2fc62 2937 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 2938 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
2939 ? gen_frame_mem : gen_rtx_MEM);
2940 unsigned regno;
2941 unsigned regno2;
2942 HOST_WIDE_INT offset;
2943
2944 for (regno = aarch64_next_callee_save (start, limit);
2945 regno <= limit;
2946 regno = aarch64_next_callee_save (regno + 1, limit))
2947 {
ae13fce3 2948 rtx reg, mem;
8ed2fc62 2949
ae13fce3
JW
2950 if (skip_wb
2951 && (regno == cfun->machine->frame.wb_candidate1
2952 || regno == cfun->machine->frame.wb_candidate2))
2953 continue;
2954
2955 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2956 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2957 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2958
2959 regno2 = aarch64_next_callee_save (regno + 1, limit);
2960
2961 if (regno2 <= limit
2962 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2963 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2964 {
8ed2fc62
JW
2965 rtx reg2 = gen_rtx_REG (mode, regno2);
2966 rtx mem2;
2967
2968 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2969 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2970 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2971
dd991abb 2972 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2973 regno = regno2;
43e9d192 2974 }
8ed2fc62 2975 else
dd991abb
RH
2976 emit_move_insn (reg, mem);
2977 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2978 }
43e9d192
IB
2979}
2980
2981/* AArch64 stack frames generated by this compiler look like:
2982
2983 +-------------------------------+
2984 | |
2985 | incoming stack arguments |
2986 | |
34834420
MS
2987 +-------------------------------+
2988 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2989 | callee-allocated save area |
2990 | for register varargs |
2991 | |
34834420
MS
2992 +-------------------------------+
2993 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2994 | |
2995 +-------------------------------+
454fdba9
RL
2996 | padding0 | \
2997 +-------------------------------+ |
454fdba9 2998 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2999 +-------------------------------+ |
3000 | LR' | |
3001 +-------------------------------+ |
34834420
MS
3002 | FP' | / <- hard_frame_pointer_rtx (aligned)
3003 +-------------------------------+
43e9d192
IB
3004 | dynamic allocation |
3005 +-------------------------------+
34834420
MS
3006 | padding |
3007 +-------------------------------+
3008 | outgoing stack arguments | <-- arg_pointer
3009 | |
3010 +-------------------------------+
3011 | | <-- stack_pointer_rtx (aligned)
43e9d192 3012
34834420
MS
3013 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3014 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3015 unchanged. */
43e9d192
IB
3016
3017/* Generate the prologue instructions for entry into a function.
3018 Establish the stack frame by decreasing the stack pointer with a
3019 properly calculated size and, if necessary, create a frame record
3020 filled with the values of LR and previous frame pointer. The
6991c977 3021 current FP is also set up if it is in use. */
43e9d192
IB
3022
3023void
3024aarch64_expand_prologue (void)
3025{
3026 /* sub sp, sp, #<frame_size>
3027 stp {fp, lr}, [sp, #<frame_size> - 16]
3028 add fp, sp, #<frame_size> - hardfp_offset
3029 stp {cs_reg}, [fp, #-16] etc.
3030
3031 sub sp, sp, <final_adjustment_if_any>
3032 */
43e9d192 3033 HOST_WIDE_INT frame_size, offset;
1c960e02 3034 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 3035 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 3036 rtx_insn *insn;
43e9d192
IB
3037
3038 aarch64_layout_frame ();
43e9d192 3039
dd991abb
RH
3040 offset = frame_size = cfun->machine->frame.frame_size;
3041 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
3042 fp_offset = frame_size - hard_fp_offset;
43e9d192 3043
dd991abb
RH
3044 if (flag_stack_usage_info)
3045 current_function_static_stack_size = frame_size;
43e9d192 3046
a3eb8a52
EB
3047 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3048 {
3049 if (crtl->is_leaf && !cfun->calls_alloca)
3050 {
3051 if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
3052 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3053 frame_size - STACK_CHECK_PROTECT);
3054 }
3055 else if (frame_size > 0)
3056 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
3057 }
3058
44c0e7b9 3059 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
3060 if (offset >= 512)
3061 {
3062 /* When the frame has a large size, an initial decrease is done on
3063 the stack pointer to jump over the callee-allocated save area for
3064 register varargs, the local variable area and/or the callee-saved
3065 register area. This will allow the pre-index write-back
3066 store pair instructions to be used for setting up the stack frame
3067 efficiently. */
dd991abb 3068 offset = hard_fp_offset;
43e9d192
IB
3069 if (offset >= 512)
3070 offset = cfun->machine->frame.saved_regs_size;
3071
3072 frame_size -= (offset + crtl->outgoing_args_size);
3073 fp_offset = 0;
3074
3075 if (frame_size >= 0x1000000)
3076 {
3077 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3078 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
3079 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
3080
3081 add_reg_note (insn, REG_CFA_ADJUST_CFA,
f7df4a84 3082 gen_rtx_SET (stack_pointer_rtx,
dd991abb
RH
3083 plus_constant (Pmode, stack_pointer_rtx,
3084 -frame_size)));
3085 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
3086 }
3087 else if (frame_size > 0)
3088 {
dd991abb
RH
3089 int hi_ofs = frame_size & 0xfff000;
3090 int lo_ofs = frame_size & 0x000fff;
3091
3092 if (hi_ofs)
43e9d192
IB
3093 {
3094 insn = emit_insn (gen_add2_insn
dd991abb 3095 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
3096 RTX_FRAME_RELATED_P (insn) = 1;
3097 }
dd991abb 3098 if (lo_ofs)
43e9d192
IB
3099 {
3100 insn = emit_insn (gen_add2_insn
dd991abb 3101 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
3102 RTX_FRAME_RELATED_P (insn) = 1;
3103 }
3104 }
3105 }
3106 else
3107 frame_size = -1;
3108
3109 if (offset > 0)
3110 {
ae13fce3
JW
3111 bool skip_wb = false;
3112
43e9d192
IB
3113 if (frame_pointer_needed)
3114 {
c5e1f66e
JW
3115 skip_wb = true;
3116
43e9d192
IB
3117 if (fp_offset)
3118 {
3119 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
3120 GEN_INT (-offset)));
3121 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
3122
3123 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 3124 R30_REGNUM, false);
43e9d192
IB
3125 }
3126 else
80c11907 3127 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
3128
3129 /* Set up frame pointer to point to the location of the
3130 previous frame pointer on the stack. */
3131 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
3132 stack_pointer_rtx,
3133 GEN_INT (fp_offset)));
43e9d192 3134 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 3135 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
3136 }
3137 else
3138 {
c5e1f66e
JW
3139 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3140 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 3141
c5e1f66e
JW
3142 if (fp_offset
3143 || reg1 == FIRST_PSEUDO_REGISTER
3144 || (reg2 == FIRST_PSEUDO_REGISTER
3145 && offset >= 256))
3146 {
3147 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
3148 GEN_INT (-offset)));
3149 RTX_FRAME_RELATED_P (insn) = 1;
3150 }
3151 else
3152 {
ef4bddc2 3153 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
c5e1f66e
JW
3154
3155 skip_wb = true;
3156
3157 if (reg2 == FIRST_PSEUDO_REGISTER)
3158 aarch64_pushwb_single_reg (mode1, reg1, offset);
3159 else
3160 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
3161 }
43e9d192
IB
3162 }
3163
c5e1f66e
JW
3164 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
3165 skip_wb);
ae13fce3
JW
3166 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
3167 skip_wb);
43e9d192
IB
3168 }
3169
3170 /* when offset >= 512,
3171 sub sp, sp, #<outgoing_args_size> */
3172 if (frame_size > -1)
3173 {
3174 if (crtl->outgoing_args_size > 0)
3175 {
3176 insn = emit_insn (gen_add2_insn
3177 (stack_pointer_rtx,
3178 GEN_INT (- crtl->outgoing_args_size)));
3179 RTX_FRAME_RELATED_P (insn) = 1;
3180 }
3181 }
3182}
3183
4f942779
RL
3184/* Return TRUE if we can use a simple_return insn.
3185
3186 This function checks whether the callee saved stack is empty, which
3187 means no restore actions are need. The pro_and_epilogue will use
3188 this to check whether shrink-wrapping opt is feasible. */
3189
3190bool
3191aarch64_use_return_insn_p (void)
3192{
3193 if (!reload_completed)
3194 return false;
3195
3196 if (crtl->profile)
3197 return false;
3198
3199 aarch64_layout_frame ();
3200
3201 return cfun->machine->frame.frame_size == 0;
3202}
3203
43e9d192
IB
3204/* Generate the epilogue instructions for returning from a function. */
3205void
3206aarch64_expand_epilogue (bool for_sibcall)
3207{
1c960e02 3208 HOST_WIDE_INT frame_size, offset;
43e9d192 3209 HOST_WIDE_INT fp_offset;
dd991abb 3210 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 3211 rtx_insn *insn;
7e8c2bd5
JW
3212 /* We need to add memory barrier to prevent read from deallocated stack. */
3213 bool need_barrier_p = (get_frame_size () != 0
3214 || cfun->machine->frame.saved_varargs_size);
43e9d192
IB
3215
3216 aarch64_layout_frame ();
43e9d192 3217
1c960e02 3218 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
3219 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
3220 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
3221
3222 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
3223 if (offset >= 512)
3224 {
dd991abb 3225 offset = hard_fp_offset;
43e9d192
IB
3226 if (offset >= 512)
3227 offset = cfun->machine->frame.saved_regs_size;
3228
3229 frame_size -= (offset + crtl->outgoing_args_size);
3230 fp_offset = 0;
3231 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
3232 {
3233 insn = emit_insn (gen_add2_insn
3234 (stack_pointer_rtx,
3235 GEN_INT (crtl->outgoing_args_size)));
3236 RTX_FRAME_RELATED_P (insn) = 1;
3237 }
3238 }
3239 else
3240 frame_size = -1;
3241
3242 /* If there were outgoing arguments or we've done dynamic stack
3243 allocation, then restore the stack pointer from the frame
3244 pointer. This is at most one insn and more efficient than using
3245 GCC's internal mechanism. */
3246 if (frame_pointer_needed
3247 && (crtl->outgoing_args_size || cfun->calls_alloca))
3248 {
7e8c2bd5
JW
3249 if (cfun->calls_alloca)
3250 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3251
43e9d192
IB
3252 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
3253 hard_frame_pointer_rtx,
8f454e9f
JW
3254 GEN_INT (0)));
3255 offset = offset - fp_offset;
43e9d192
IB
3256 }
3257
43e9d192
IB
3258 if (offset > 0)
3259 {
4b92caa1
JW
3260 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3261 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3262 bool skip_wb = true;
dd991abb 3263 rtx cfi_ops = NULL;
4b92caa1 3264
43e9d192 3265 if (frame_pointer_needed)
4b92caa1
JW
3266 fp_offset = 0;
3267 else if (fp_offset
3268 || reg1 == FIRST_PSEUDO_REGISTER
3269 || (reg2 == FIRST_PSEUDO_REGISTER
3270 && offset >= 256))
3271 skip_wb = false;
3272
3273 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 3274 skip_wb, &cfi_ops);
4b92caa1 3275 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 3276 skip_wb, &cfi_ops);
4b92caa1 3277
7e8c2bd5
JW
3278 if (need_barrier_p)
3279 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3280
4b92caa1 3281 if (skip_wb)
43e9d192 3282 {
ef4bddc2 3283 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 3284 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 3285
dd991abb 3286 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 3287 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
3288 {
3289 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
3290 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
3291 mem = gen_rtx_MEM (mode1, mem);
3292 insn = emit_move_insn (rreg1, mem);
3293 }
4b92caa1
JW
3294 else
3295 {
dd991abb 3296 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 3297
dd991abb
RH
3298 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
3299 insn = emit_insn (aarch64_gen_loadwb_pair
3300 (mode1, stack_pointer_rtx, rreg1,
3301 rreg2, offset));
4b92caa1 3302 }
43e9d192 3303 }
43e9d192
IB
3304 else
3305 {
3306 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
3307 GEN_INT (offset)));
43e9d192 3308 }
43e9d192 3309
dd991abb
RH
3310 /* Reset the CFA to be SP + FRAME_SIZE. */
3311 rtx new_cfa = stack_pointer_rtx;
3312 if (frame_size > 0)
3313 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
3314 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
3315 REG_NOTES (insn) = cfi_ops;
43e9d192 3316 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
3317 }
3318
dd991abb 3319 if (frame_size > 0)
43e9d192 3320 {
7e8c2bd5
JW
3321 if (need_barrier_p)
3322 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3323
43e9d192
IB
3324 if (frame_size >= 0x1000000)
3325 {
3326 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3327 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 3328 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 3329 }
dd991abb 3330 else
43e9d192 3331 {
dd991abb
RH
3332 int hi_ofs = frame_size & 0xfff000;
3333 int lo_ofs = frame_size & 0x000fff;
3334
3335 if (hi_ofs && lo_ofs)
43e9d192
IB
3336 {
3337 insn = emit_insn (gen_add2_insn
dd991abb 3338 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 3339 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 3340 frame_size = lo_ofs;
43e9d192 3341 }
dd991abb
RH
3342 insn = emit_insn (gen_add2_insn
3343 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
3344 }
3345
dd991abb
RH
3346 /* Reset the CFA to be SP + 0. */
3347 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
3348 RTX_FRAME_RELATED_P (insn) = 1;
3349 }
3350
3351 /* Stack adjustment for exception handler. */
3352 if (crtl->calls_eh_return)
3353 {
3354 /* We need to unwind the stack by the offset computed by
3355 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3356 to be SP; letting the CFA move during this adjustment
3357 is just as correct as retaining the CFA from the body
3358 of the function. Therefore, do nothing special. */
3359 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
3360 }
3361
3362 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
3363 if (!for_sibcall)
3364 emit_jump_insn (ret_rtx);
3365}
3366
3367/* Return the place to copy the exception unwinding return address to.
3368 This will probably be a stack slot, but could (in theory be the
3369 return register). */
3370rtx
3371aarch64_final_eh_return_addr (void)
3372{
1c960e02
MS
3373 HOST_WIDE_INT fp_offset;
3374
43e9d192 3375 aarch64_layout_frame ();
1c960e02
MS
3376
3377 fp_offset = cfun->machine->frame.frame_size
3378 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
3379
3380 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
3381 return gen_rtx_REG (DImode, LR_REGNUM);
3382
3383 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
3384 result in a store to save LR introduced by builtin_eh_return () being
3385 incorrectly deleted because the alias is not detected.
3386 So in the calculation of the address to copy the exception unwinding
3387 return address to, we note 2 cases.
3388 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
3389 we return a SP-relative location since all the addresses are SP-relative
3390 in this case. This prevents the store from being optimized away.
3391 If the fp_offset is not 0, then the addresses will be FP-relative and
3392 therefore we return a FP-relative location. */
3393
3394 if (frame_pointer_needed)
3395 {
3396 if (fp_offset)
3397 return gen_frame_mem (DImode,
3398 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
3399 else
3400 return gen_frame_mem (DImode,
3401 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
3402 }
3403
3404 /* If FP is not needed, we calculate the location of LR, which would be
3405 at the top of the saved registers block. */
3406
3407 return gen_frame_mem (DImode,
3408 plus_constant (Pmode,
3409 stack_pointer_rtx,
3410 fp_offset
3411 + cfun->machine->frame.saved_regs_size
3412 - 2 * UNITS_PER_WORD));
3413}
3414
43e9d192 3415static void
f43657b4
JW
3416aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
3417 HOST_WIDE_INT delta)
43e9d192
IB
3418{
3419 HOST_WIDE_INT mdelta = delta;
f43657b4
JW
3420 rtx this_rtx = gen_rtx_REG (mode, regnum);
3421 rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
43e9d192
IB
3422
3423 if (mdelta < 0)
3424 mdelta = -mdelta;
3425
3426 if (mdelta >= 4096 * 4096)
3427 {
f43657b4 3428 aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
d9600ae5 3429 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
3430 }
3431 else if (mdelta > 0)
3432 {
43e9d192 3433 if (mdelta >= 4096)
d9600ae5 3434 {
f7df4a84 3435 emit_insn (gen_rtx_SET (scratch_rtx, GEN_INT (mdelta / 4096)));
f43657b4 3436 rtx shift = gen_rtx_ASHIFT (mode, scratch_rtx, GEN_INT (12));
d9600ae5 3437 if (delta < 0)
f7df4a84 3438 emit_insn (gen_rtx_SET (this_rtx,
f43657b4 3439 gen_rtx_MINUS (mode, this_rtx, shift)));
d9600ae5 3440 else
f7df4a84 3441 emit_insn (gen_rtx_SET (this_rtx,
f43657b4 3442 gen_rtx_PLUS (mode, this_rtx, shift)));
d9600ae5 3443 }
43e9d192 3444 if (mdelta % 4096 != 0)
d9600ae5
SN
3445 {
3446 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
f7df4a84 3447 emit_insn (gen_rtx_SET (this_rtx,
f43657b4 3448 gen_rtx_PLUS (mode, this_rtx, scratch_rtx)));
d9600ae5 3449 }
43e9d192
IB
3450 }
3451}
3452
3453/* Output code to add DELTA to the first argument, and then jump
3454 to FUNCTION. Used for C++ multiple inheritance. */
3455static void
3456aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3457 HOST_WIDE_INT delta,
3458 HOST_WIDE_INT vcall_offset,
3459 tree function)
3460{
3461 /* The this pointer is always in x0. Note that this differs from
3462 Arm where the this pointer maybe bumped to r1 if r0 is required
3463 to return a pointer to an aggregate. On AArch64 a result value
3464 pointer will be in x8. */
3465 int this_regno = R0_REGNUM;
5d8a22a5
DM
3466 rtx this_rtx, temp0, temp1, addr, funexp;
3467 rtx_insn *insn;
43e9d192 3468
75f1d6fc
SN
3469 reload_completed = 1;
3470 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
3471
3472 if (vcall_offset == 0)
f43657b4 3473 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3474 else
3475 {
28514dda 3476 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 3477
75f1d6fc
SN
3478 this_rtx = gen_rtx_REG (Pmode, this_regno);
3479 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3480 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 3481
75f1d6fc
SN
3482 addr = this_rtx;
3483 if (delta != 0)
3484 {
3485 if (delta >= -256 && delta < 256)
3486 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3487 plus_constant (Pmode, this_rtx, delta));
3488 else
f43657b4 3489 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3490 }
3491
28514dda
YZ
3492 if (Pmode == ptr_mode)
3493 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3494 else
3495 aarch64_emit_move (temp0,
3496 gen_rtx_ZERO_EXTEND (Pmode,
3497 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 3498
28514dda 3499 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 3500 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
3501 else
3502 {
f43657b4
JW
3503 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
3504 Pmode);
75f1d6fc 3505 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
3506 }
3507
28514dda
YZ
3508 if (Pmode == ptr_mode)
3509 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3510 else
3511 aarch64_emit_move (temp1,
3512 gen_rtx_SIGN_EXTEND (Pmode,
3513 gen_rtx_MEM (ptr_mode, addr)));
3514
75f1d6fc 3515 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
3516 }
3517
75f1d6fc
SN
3518 /* Generate a tail call to the target function. */
3519 if (!TREE_USED (function))
3520 {
3521 assemble_external (function);
3522 TREE_USED (function) = 1;
3523 }
3524 funexp = XEXP (DECL_RTL (function), 0);
3525 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3526 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3527 SIBLING_CALL_P (insn) = 1;
3528
3529 insn = get_insns ();
3530 shorten_branches (insn);
3531 final_start_function (insn, file, 1);
3532 final (insn, file, 1);
43e9d192 3533 final_end_function ();
75f1d6fc
SN
3534
3535 /* Stop pretending to be a post-reload pass. */
3536 reload_completed = 0;
43e9d192
IB
3537}
3538
43e9d192
IB
3539static bool
3540aarch64_tls_referenced_p (rtx x)
3541{
3542 if (!TARGET_HAVE_TLS)
3543 return false;
e7de8563
RS
3544 subrtx_iterator::array_type array;
3545 FOR_EACH_SUBRTX (iter, array, x, ALL)
3546 {
3547 const_rtx x = *iter;
3548 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3549 return true;
3550 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3551 TLS offsets, not real symbol references. */
3552 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3553 iter.skip_subrtxes ();
3554 }
3555 return false;
43e9d192
IB
3556}
3557
3558
43e9d192
IB
3559/* Return true if val can be encoded as a 12-bit unsigned immediate with
3560 a left shift of 0 or 12 bits. */
3561bool
3562aarch64_uimm12_shift (HOST_WIDE_INT val)
3563{
3564 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3565 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3566 );
3567}
3568
3569
3570/* Return true if val is an immediate that can be loaded into a
3571 register by a MOVZ instruction. */
3572static bool
ef4bddc2 3573aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3574{
3575 if (GET_MODE_SIZE (mode) > 4)
3576 {
3577 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3578 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3579 return 1;
3580 }
3581 else
3582 {
3583 /* Ignore sign extension. */
3584 val &= (HOST_WIDE_INT) 0xffffffff;
3585 }
3586 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3587 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3588}
3589
a64c73a2
WD
3590/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
3591
3592static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
3593 {
3594 0x0000000100000001ull,
3595 0x0001000100010001ull,
3596 0x0101010101010101ull,
3597 0x1111111111111111ull,
3598 0x5555555555555555ull,
3599 };
3600
43e9d192
IB
3601
3602/* Return true if val is a valid bitmask immediate. */
a64c73a2 3603
43e9d192 3604bool
a64c73a2 3605aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 3606{
a64c73a2
WD
3607 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
3608 int bits;
3609
3610 /* Check for a single sequence of one bits and return quickly if so.
3611 The special cases of all ones and all zeroes returns false. */
3612 val = (unsigned HOST_WIDE_INT) val_in;
3613 tmp = val + (val & -val);
3614
3615 if (tmp == (tmp & -tmp))
3616 return (val + 1) > 1;
3617
3618 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
3619 if (mode == SImode)
3620 val = (val << 32) | (val & 0xffffffff);
3621
3622 /* Invert if the immediate doesn't start with a zero bit - this means we
3623 only need to search for sequences of one bits. */
3624 if (val & 1)
3625 val = ~val;
3626
3627 /* Find the first set bit and set tmp to val with the first sequence of one
3628 bits removed. Return success if there is a single sequence of ones. */
3629 first_one = val & -val;
3630 tmp = val & (val + first_one);
3631
3632 if (tmp == 0)
3633 return true;
3634
3635 /* Find the next set bit and compute the difference in bit position. */
3636 next_one = tmp & -tmp;
3637 bits = clz_hwi (first_one) - clz_hwi (next_one);
3638 mask = val ^ tmp;
3639
3640 /* Check the bit position difference is a power of 2, and that the first
3641 sequence of one bits fits within 'bits' bits. */
3642 if ((mask >> bits) != 0 || bits != (bits & -bits))
3643 return false;
3644
3645 /* Check the sequence of one bits is repeated 64/bits times. */
3646 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
3647}
3648
3649
3650/* Return true if val is an immediate that can be loaded into a
3651 register in a single instruction. */
3652bool
ef4bddc2 3653aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3654{
3655 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3656 return 1;
3657 return aarch64_bitmask_imm (val, mode);
3658}
3659
3660static bool
ef4bddc2 3661aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
3662{
3663 rtx base, offset;
7eda14e1 3664
43e9d192
IB
3665 if (GET_CODE (x) == HIGH)
3666 return true;
3667
3668 split_const (x, &base, &offset);
3669 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 3670 {
a6e0bfa7 3671 if (aarch64_classify_symbol (base, offset)
28514dda
YZ
3672 != SYMBOL_FORCE_TO_MEM)
3673 return true;
3674 else
3675 /* Avoid generating a 64-bit relocation in ILP32; leave
3676 to aarch64_expand_mov_immediate to handle it properly. */
3677 return mode != ptr_mode;
3678 }
43e9d192
IB
3679
3680 return aarch64_tls_referenced_p (x);
3681}
3682
e79136e4
WD
3683/* Implement TARGET_CASE_VALUES_THRESHOLD.
3684 The expansion for a table switch is quite expensive due to the number
3685 of instructions, the table lookup and hard to predict indirect jump.
3686 When optimizing for speed, and -O3 enabled, use the per-core tuning if
3687 set, otherwise use tables for > 16 cases as a tradeoff between size and
3688 performance. When optimizing for size, use the default setting. */
50487d79
EM
3689
3690static unsigned int
3691aarch64_case_values_threshold (void)
3692{
3693 /* Use the specified limit for the number of cases before using jump
3694 tables at higher optimization levels. */
3695 if (optimize > 2
3696 && selected_cpu->tune->max_case_values != 0)
3697 return selected_cpu->tune->max_case_values;
3698 else
e79136e4 3699 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
3700}
3701
43e9d192
IB
3702/* Return true if register REGNO is a valid index register.
3703 STRICT_P is true if REG_OK_STRICT is in effect. */
3704
3705bool
3706aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3707{
3708 if (!HARD_REGISTER_NUM_P (regno))
3709 {
3710 if (!strict_p)
3711 return true;
3712
3713 if (!reg_renumber)
3714 return false;
3715
3716 regno = reg_renumber[regno];
3717 }
3718 return GP_REGNUM_P (regno);
3719}
3720
3721/* Return true if register REGNO is a valid base register for mode MODE.
3722 STRICT_P is true if REG_OK_STRICT is in effect. */
3723
3724bool
3725aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3726{
3727 if (!HARD_REGISTER_NUM_P (regno))
3728 {
3729 if (!strict_p)
3730 return true;
3731
3732 if (!reg_renumber)
3733 return false;
3734
3735 regno = reg_renumber[regno];
3736 }
3737
3738 /* The fake registers will be eliminated to either the stack or
3739 hard frame pointer, both of which are usually valid base registers.
3740 Reload deals with the cases where the eliminated form isn't valid. */
3741 return (GP_REGNUM_P (regno)
3742 || regno == SP_REGNUM
3743 || regno == FRAME_POINTER_REGNUM
3744 || regno == ARG_POINTER_REGNUM);
3745}
3746
3747/* Return true if X is a valid base register for mode MODE.
3748 STRICT_P is true if REG_OK_STRICT is in effect. */
3749
3750static bool
3751aarch64_base_register_rtx_p (rtx x, bool strict_p)
3752{
3753 if (!strict_p && GET_CODE (x) == SUBREG)
3754 x = SUBREG_REG (x);
3755
3756 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3757}
3758
3759/* Return true if address offset is a valid index. If it is, fill in INFO
3760 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3761
3762static bool
3763aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 3764 machine_mode mode, bool strict_p)
43e9d192
IB
3765{
3766 enum aarch64_address_type type;
3767 rtx index;
3768 int shift;
3769
3770 /* (reg:P) */
3771 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3772 && GET_MODE (x) == Pmode)
3773 {
3774 type = ADDRESS_REG_REG;
3775 index = x;
3776 shift = 0;
3777 }
3778 /* (sign_extend:DI (reg:SI)) */
3779 else if ((GET_CODE (x) == SIGN_EXTEND
3780 || GET_CODE (x) == ZERO_EXTEND)
3781 && GET_MODE (x) == DImode
3782 && GET_MODE (XEXP (x, 0)) == SImode)
3783 {
3784 type = (GET_CODE (x) == SIGN_EXTEND)
3785 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3786 index = XEXP (x, 0);
3787 shift = 0;
3788 }
3789 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3790 else if (GET_CODE (x) == MULT
3791 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3792 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3793 && GET_MODE (XEXP (x, 0)) == DImode
3794 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3795 && CONST_INT_P (XEXP (x, 1)))
3796 {
3797 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3798 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3799 index = XEXP (XEXP (x, 0), 0);
3800 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3801 }
3802 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3803 else if (GET_CODE (x) == ASHIFT
3804 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3805 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3806 && GET_MODE (XEXP (x, 0)) == DImode
3807 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3808 && CONST_INT_P (XEXP (x, 1)))
3809 {
3810 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3811 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3812 index = XEXP (XEXP (x, 0), 0);
3813 shift = INTVAL (XEXP (x, 1));
3814 }
3815 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3816 else if ((GET_CODE (x) == SIGN_EXTRACT
3817 || GET_CODE (x) == ZERO_EXTRACT)
3818 && GET_MODE (x) == DImode
3819 && GET_CODE (XEXP (x, 0)) == MULT
3820 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3821 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3822 {
3823 type = (GET_CODE (x) == SIGN_EXTRACT)
3824 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3825 index = XEXP (XEXP (x, 0), 0);
3826 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3827 if (INTVAL (XEXP (x, 1)) != 32 + shift
3828 || INTVAL (XEXP (x, 2)) != 0)
3829 shift = -1;
3830 }
3831 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3832 (const_int 0xffffffff<<shift)) */
3833 else if (GET_CODE (x) == AND
3834 && GET_MODE (x) == DImode
3835 && GET_CODE (XEXP (x, 0)) == MULT
3836 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3837 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3838 && CONST_INT_P (XEXP (x, 1)))
3839 {
3840 type = ADDRESS_REG_UXTW;
3841 index = XEXP (XEXP (x, 0), 0);
3842 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3843 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3844 shift = -1;
3845 }
3846 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3847 else if ((GET_CODE (x) == SIGN_EXTRACT
3848 || GET_CODE (x) == ZERO_EXTRACT)
3849 && GET_MODE (x) == DImode
3850 && GET_CODE (XEXP (x, 0)) == ASHIFT
3851 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3852 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3853 {
3854 type = (GET_CODE (x) == SIGN_EXTRACT)
3855 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3856 index = XEXP (XEXP (x, 0), 0);
3857 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3858 if (INTVAL (XEXP (x, 1)) != 32 + shift
3859 || INTVAL (XEXP (x, 2)) != 0)
3860 shift = -1;
3861 }
3862 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3863 (const_int 0xffffffff<<shift)) */
3864 else if (GET_CODE (x) == AND
3865 && GET_MODE (x) == DImode
3866 && GET_CODE (XEXP (x, 0)) == ASHIFT
3867 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3868 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3869 && CONST_INT_P (XEXP (x, 1)))
3870 {
3871 type = ADDRESS_REG_UXTW;
3872 index = XEXP (XEXP (x, 0), 0);
3873 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3874 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3875 shift = -1;
3876 }
3877 /* (mult:P (reg:P) (const_int scale)) */
3878 else if (GET_CODE (x) == MULT
3879 && GET_MODE (x) == Pmode
3880 && GET_MODE (XEXP (x, 0)) == Pmode
3881 && CONST_INT_P (XEXP (x, 1)))
3882 {
3883 type = ADDRESS_REG_REG;
3884 index = XEXP (x, 0);
3885 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3886 }
3887 /* (ashift:P (reg:P) (const_int shift)) */
3888 else if (GET_CODE (x) == ASHIFT
3889 && GET_MODE (x) == Pmode
3890 && GET_MODE (XEXP (x, 0)) == Pmode
3891 && CONST_INT_P (XEXP (x, 1)))
3892 {
3893 type = ADDRESS_REG_REG;
3894 index = XEXP (x, 0);
3895 shift = INTVAL (XEXP (x, 1));
3896 }
3897 else
3898 return false;
3899
3900 if (GET_CODE (index) == SUBREG)
3901 index = SUBREG_REG (index);
3902
3903 if ((shift == 0 ||
3904 (shift > 0 && shift <= 3
3905 && (1 << shift) == GET_MODE_SIZE (mode)))
3906 && REG_P (index)
3907 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3908 {
3909 info->type = type;
3910 info->offset = index;
3911 info->shift = shift;
3912 return true;
3913 }
3914
3915 return false;
3916}
3917
44707478 3918bool
ef4bddc2 3919aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3920{
3921 return (offset >= -64 * GET_MODE_SIZE (mode)
3922 && offset < 64 * GET_MODE_SIZE (mode)
3923 && offset % GET_MODE_SIZE (mode) == 0);
3924}
3925
3926static inline bool
ef4bddc2 3927offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
3928 HOST_WIDE_INT offset)
3929{
3930 return offset >= -256 && offset < 256;
3931}
3932
3933static inline bool
ef4bddc2 3934offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3935{
3936 return (offset >= 0
3937 && offset < 4096 * GET_MODE_SIZE (mode)
3938 && offset % GET_MODE_SIZE (mode) == 0);
3939}
3940
abc52318
KT
3941/* Return true if MODE is one of the modes for which we
3942 support LDP/STP operations. */
3943
3944static bool
3945aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
3946{
3947 return mode == SImode || mode == DImode
3948 || mode == SFmode || mode == DFmode
3949 || (aarch64_vector_mode_supported_p (mode)
3950 && GET_MODE_SIZE (mode) == 8);
3951}
3952
9e0218fc
RH
3953/* Return true if REGNO is a virtual pointer register, or an eliminable
3954 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
3955 include stack_pointer or hard_frame_pointer. */
3956static bool
3957virt_or_elim_regno_p (unsigned regno)
3958{
3959 return ((regno >= FIRST_VIRTUAL_REGISTER
3960 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
3961 || regno == FRAME_POINTER_REGNUM
3962 || regno == ARG_POINTER_REGNUM);
3963}
3964
43e9d192
IB
3965/* Return true if X is a valid address for machine mode MODE. If it is,
3966 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3967 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3968
3969static bool
3970aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 3971 rtx x, machine_mode mode,
43e9d192
IB
3972 RTX_CODE outer_code, bool strict_p)
3973{
3974 enum rtx_code code = GET_CODE (x);
3975 rtx op0, op1;
2d8c6dc1
AH
3976
3977 /* On BE, we use load/store pair for all large int mode load/stores. */
3978 bool load_store_pair_p = (outer_code == PARALLEL
3979 || (BYTES_BIG_ENDIAN
3980 && aarch64_vect_struct_mode_p (mode)));
3981
43e9d192 3982 bool allow_reg_index_p =
2d8c6dc1
AH
3983 !load_store_pair_p
3984 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3985 && !aarch64_vect_struct_mode_p (mode);
3986
3987 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3988 REG addressing. */
3989 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
3990 && (code != POST_INC && code != REG))
3991 return false;
3992
3993 switch (code)
3994 {
3995 case REG:
3996 case SUBREG:
3997 info->type = ADDRESS_REG_IMM;
3998 info->base = x;
3999 info->offset = const0_rtx;
4000 return aarch64_base_register_rtx_p (x, strict_p);
4001
4002 case PLUS:
4003 op0 = XEXP (x, 0);
4004 op1 = XEXP (x, 1);
15c0c5c9
JW
4005
4006 if (! strict_p
4aa81c2e 4007 && REG_P (op0)
9e0218fc 4008 && virt_or_elim_regno_p (REGNO (op0))
4aa81c2e 4009 && CONST_INT_P (op1))
15c0c5c9
JW
4010 {
4011 info->type = ADDRESS_REG_IMM;
4012 info->base = op0;
4013 info->offset = op1;
4014
4015 return true;
4016 }
4017
43e9d192
IB
4018 if (GET_MODE_SIZE (mode) != 0
4019 && CONST_INT_P (op1)
4020 && aarch64_base_register_rtx_p (op0, strict_p))
4021 {
4022 HOST_WIDE_INT offset = INTVAL (op1);
4023
4024 info->type = ADDRESS_REG_IMM;
4025 info->base = op0;
4026 info->offset = op1;
4027
4028 /* TImode and TFmode values are allowed in both pairs of X
4029 registers and individual Q registers. The available
4030 address modes are:
4031 X,X: 7-bit signed scaled offset
4032 Q: 9-bit signed offset
4033 We conservatively require an offset representable in either mode.
4034 */
4035 if (mode == TImode || mode == TFmode)
44707478 4036 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
4037 && offset_9bit_signed_unscaled_p (mode, offset));
4038
2d8c6dc1
AH
4039 /* A 7bit offset check because OImode will emit a ldp/stp
4040 instruction (only big endian will get here).
4041 For ldp/stp instructions, the offset is scaled for the size of a
4042 single element of the pair. */
4043 if (mode == OImode)
4044 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
4045
4046 /* Three 9/12 bit offsets checks because CImode will emit three
4047 ldr/str instructions (only big endian will get here). */
4048 if (mode == CImode)
4049 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4050 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
4051 || offset_12bit_unsigned_scaled_p (V16QImode,
4052 offset + 32)));
4053
4054 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4055 instructions (only big endian will get here). */
4056 if (mode == XImode)
4057 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4058 && aarch64_offset_7bit_signed_scaled_p (TImode,
4059 offset + 32));
4060
4061 if (load_store_pair_p)
43e9d192 4062 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4063 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4064 else
4065 return (offset_9bit_signed_unscaled_p (mode, offset)
4066 || offset_12bit_unsigned_scaled_p (mode, offset));
4067 }
4068
4069 if (allow_reg_index_p)
4070 {
4071 /* Look for base + (scaled/extended) index register. */
4072 if (aarch64_base_register_rtx_p (op0, strict_p)
4073 && aarch64_classify_index (info, op1, mode, strict_p))
4074 {
4075 info->base = op0;
4076 return true;
4077 }
4078 if (aarch64_base_register_rtx_p (op1, strict_p)
4079 && aarch64_classify_index (info, op0, mode, strict_p))
4080 {
4081 info->base = op1;
4082 return true;
4083 }
4084 }
4085
4086 return false;
4087
4088 case POST_INC:
4089 case POST_DEC:
4090 case PRE_INC:
4091 case PRE_DEC:
4092 info->type = ADDRESS_REG_WB;
4093 info->base = XEXP (x, 0);
4094 info->offset = NULL_RTX;
4095 return aarch64_base_register_rtx_p (info->base, strict_p);
4096
4097 case POST_MODIFY:
4098 case PRE_MODIFY:
4099 info->type = ADDRESS_REG_WB;
4100 info->base = XEXP (x, 0);
4101 if (GET_CODE (XEXP (x, 1)) == PLUS
4102 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
4103 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
4104 && aarch64_base_register_rtx_p (info->base, strict_p))
4105 {
4106 HOST_WIDE_INT offset;
4107 info->offset = XEXP (XEXP (x, 1), 1);
4108 offset = INTVAL (info->offset);
4109
4110 /* TImode and TFmode values are allowed in both pairs of X
4111 registers and individual Q registers. The available
4112 address modes are:
4113 X,X: 7-bit signed scaled offset
4114 Q: 9-bit signed offset
4115 We conservatively require an offset representable in either mode.
4116 */
4117 if (mode == TImode || mode == TFmode)
44707478 4118 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
4119 && offset_9bit_signed_unscaled_p (mode, offset));
4120
2d8c6dc1 4121 if (load_store_pair_p)
43e9d192 4122 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4123 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4124 else
4125 return offset_9bit_signed_unscaled_p (mode, offset);
4126 }
4127 return false;
4128
4129 case CONST:
4130 case SYMBOL_REF:
4131 case LABEL_REF:
79517551
SN
4132 /* load literal: pc-relative constant pool entry. Only supported
4133 for SI mode or larger. */
43e9d192 4134 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
4135
4136 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
4137 {
4138 rtx sym, addend;
4139
4140 split_const (x, &sym, &addend);
b4f50fd4
RR
4141 return ((GET_CODE (sym) == LABEL_REF
4142 || (GET_CODE (sym) == SYMBOL_REF
4143 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 4144 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
4145 }
4146 return false;
4147
4148 case LO_SUM:
4149 info->type = ADDRESS_LO_SUM;
4150 info->base = XEXP (x, 0);
4151 info->offset = XEXP (x, 1);
4152 if (allow_reg_index_p
4153 && aarch64_base_register_rtx_p (info->base, strict_p))
4154 {
4155 rtx sym, offs;
4156 split_const (info->offset, &sym, &offs);
4157 if (GET_CODE (sym) == SYMBOL_REF
a6e0bfa7 4158 && (aarch64_classify_symbol (sym, offs) == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
4159 {
4160 /* The symbol and offset must be aligned to the access size. */
4161 unsigned int align;
4162 unsigned int ref_size;
4163
4164 if (CONSTANT_POOL_ADDRESS_P (sym))
4165 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
4166 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
4167 {
4168 tree exp = SYMBOL_REF_DECL (sym);
4169 align = TYPE_ALIGN (TREE_TYPE (exp));
4170 align = CONSTANT_ALIGNMENT (exp, align);
4171 }
4172 else if (SYMBOL_REF_DECL (sym))
4173 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
4174 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
4175 && SYMBOL_REF_BLOCK (sym) != NULL)
4176 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
4177 else
4178 align = BITS_PER_UNIT;
4179
4180 ref_size = GET_MODE_SIZE (mode);
4181 if (ref_size == 0)
4182 ref_size = GET_MODE_SIZE (DImode);
4183
4184 return ((INTVAL (offs) & (ref_size - 1)) == 0
4185 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
4186 }
4187 }
4188 return false;
4189
4190 default:
4191 return false;
4192 }
4193}
4194
4195bool
4196aarch64_symbolic_address_p (rtx x)
4197{
4198 rtx offset;
4199
4200 split_const (x, &x, &offset);
4201 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
4202}
4203
a6e0bfa7 4204/* Classify the base of symbolic expression X. */
da4f13a4
MS
4205
4206enum aarch64_symbol_type
a6e0bfa7 4207aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
4208{
4209 rtx offset;
da4f13a4 4210
43e9d192 4211 split_const (x, &x, &offset);
a6e0bfa7 4212 return aarch64_classify_symbol (x, offset);
43e9d192
IB
4213}
4214
4215
4216/* Return TRUE if X is a legitimate address for accessing memory in
4217 mode MODE. */
4218static bool
ef4bddc2 4219aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
4220{
4221 struct aarch64_address_info addr;
4222
4223 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
4224}
4225
4226/* Return TRUE if X is a legitimate address for accessing memory in
4227 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4228 pair operation. */
4229bool
ef4bddc2 4230aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 4231 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
4232{
4233 struct aarch64_address_info addr;
4234
4235 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
4236}
4237
4238/* Return TRUE if rtx X is immediate constant 0.0 */
4239bool
3520f7cc 4240aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 4241{
43e9d192
IB
4242 if (GET_MODE (x) == VOIDmode)
4243 return false;
4244
34a72c33 4245 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 4246 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 4247 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
4248}
4249
70f09188
AP
4250/* Return the fixed registers used for condition codes. */
4251
4252static bool
4253aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
4254{
4255 *p1 = CC_REGNUM;
4256 *p2 = INVALID_REGNUM;
4257 return true;
4258}
4259
78607708
TV
4260/* Emit call insn with PAT and do aarch64-specific handling. */
4261
d07a3fed 4262void
78607708
TV
4263aarch64_emit_call_insn (rtx pat)
4264{
4265 rtx insn = emit_call_insn (pat);
4266
4267 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
4268 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
4269 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
4270}
4271
ef4bddc2 4272machine_mode
43e9d192
IB
4273aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
4274{
4275 /* All floating point compares return CCFP if it is an equality
4276 comparison, and CCFPE otherwise. */
4277 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4278 {
4279 switch (code)
4280 {
4281 case EQ:
4282 case NE:
4283 case UNORDERED:
4284 case ORDERED:
4285 case UNLT:
4286 case UNLE:
4287 case UNGT:
4288 case UNGE:
4289 case UNEQ:
4290 case LTGT:
4291 return CCFPmode;
4292
4293 case LT:
4294 case LE:
4295 case GT:
4296 case GE:
4297 return CCFPEmode;
4298
4299 default:
4300 gcc_unreachable ();
4301 }
4302 }
4303
2b8568fe
KT
4304 /* Equality comparisons of short modes against zero can be performed
4305 using the TST instruction with the appropriate bitmask. */
4306 if (y == const0_rtx && REG_P (x)
4307 && (code == EQ || code == NE)
4308 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
4309 return CC_NZmode;
4310
43e9d192
IB
4311 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4312 && y == const0_rtx
4313 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 4314 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
4315 || GET_CODE (x) == NEG
4316 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
4317 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
4318 return CC_NZmode;
4319
1c992d1e 4320 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
4321 the comparison will have to be swapped when we emit the assembly
4322 code. */
4323 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4324 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
4325 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
4326 || GET_CODE (x) == LSHIFTRT
1c992d1e 4327 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
4328 return CC_SWPmode;
4329
1c992d1e
RE
4330 /* Similarly for a negated operand, but we can only do this for
4331 equalities. */
4332 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4333 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
4334 && (code == EQ || code == NE)
4335 && GET_CODE (x) == NEG)
4336 return CC_Zmode;
4337
ef22810a
RH
4338 /* A test for unsigned overflow. */
4339 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
4340 && code == NE
4341 && GET_CODE (x) == PLUS
4342 && GET_CODE (y) == ZERO_EXTEND)
4343 return CC_Cmode;
4344
43e9d192
IB
4345 /* For everything else, return CCmode. */
4346 return CCmode;
4347}
4348
3dfa7055
ZC
4349static int
4350aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
4351
cd5660ab 4352int
43e9d192
IB
4353aarch64_get_condition_code (rtx x)
4354{
ef4bddc2 4355 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
4356 enum rtx_code comp_code = GET_CODE (x);
4357
4358 if (GET_MODE_CLASS (mode) != MODE_CC)
4359 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
4360 return aarch64_get_condition_code_1 (mode, comp_code);
4361}
43e9d192 4362
3dfa7055
ZC
4363static int
4364aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
4365{
43e9d192
IB
4366 switch (mode)
4367 {
4368 case CCFPmode:
4369 case CCFPEmode:
4370 switch (comp_code)
4371 {
4372 case GE: return AARCH64_GE;
4373 case GT: return AARCH64_GT;
4374 case LE: return AARCH64_LS;
4375 case LT: return AARCH64_MI;
4376 case NE: return AARCH64_NE;
4377 case EQ: return AARCH64_EQ;
4378 case ORDERED: return AARCH64_VC;
4379 case UNORDERED: return AARCH64_VS;
4380 case UNLT: return AARCH64_LT;
4381 case UNLE: return AARCH64_LE;
4382 case UNGT: return AARCH64_HI;
4383 case UNGE: return AARCH64_PL;
cd5660ab 4384 default: return -1;
43e9d192
IB
4385 }
4386 break;
4387
4388 case CCmode:
4389 switch (comp_code)
4390 {
4391 case NE: return AARCH64_NE;
4392 case EQ: return AARCH64_EQ;
4393 case GE: return AARCH64_GE;
4394 case GT: return AARCH64_GT;
4395 case LE: return AARCH64_LE;
4396 case LT: return AARCH64_LT;
4397 case GEU: return AARCH64_CS;
4398 case GTU: return AARCH64_HI;
4399 case LEU: return AARCH64_LS;
4400 case LTU: return AARCH64_CC;
cd5660ab 4401 default: return -1;
43e9d192
IB
4402 }
4403 break;
4404
4405 case CC_SWPmode:
43e9d192
IB
4406 switch (comp_code)
4407 {
4408 case NE: return AARCH64_NE;
4409 case EQ: return AARCH64_EQ;
4410 case GE: return AARCH64_LE;
4411 case GT: return AARCH64_LT;
4412 case LE: return AARCH64_GE;
4413 case LT: return AARCH64_GT;
4414 case GEU: return AARCH64_LS;
4415 case GTU: return AARCH64_CC;
4416 case LEU: return AARCH64_CS;
4417 case LTU: return AARCH64_HI;
cd5660ab 4418 default: return -1;
43e9d192
IB
4419 }
4420 break;
4421
4422 case CC_NZmode:
4423 switch (comp_code)
4424 {
4425 case NE: return AARCH64_NE;
4426 case EQ: return AARCH64_EQ;
4427 case GE: return AARCH64_PL;
4428 case LT: return AARCH64_MI;
cd5660ab 4429 default: return -1;
43e9d192
IB
4430 }
4431 break;
4432
1c992d1e
RE
4433 case CC_Zmode:
4434 switch (comp_code)
4435 {
4436 case NE: return AARCH64_NE;
4437 case EQ: return AARCH64_EQ;
cd5660ab 4438 default: return -1;
1c992d1e
RE
4439 }
4440 break;
4441
ef22810a
RH
4442 case CC_Cmode:
4443 switch (comp_code)
4444 {
4445 case NE: return AARCH64_CS;
4446 case EQ: return AARCH64_CC;
4447 default: return -1;
4448 }
4449 break;
4450
43e9d192 4451 default:
cd5660ab 4452 return -1;
43e9d192
IB
4453 break;
4454 }
3dfa7055 4455
3dfa7055 4456 return -1;
43e9d192
IB
4457}
4458
ddeabd3e
AL
4459bool
4460aarch64_const_vec_all_same_in_range_p (rtx x,
4461 HOST_WIDE_INT minval,
4462 HOST_WIDE_INT maxval)
4463{
4464 HOST_WIDE_INT firstval;
4465 int count, i;
4466
4467 if (GET_CODE (x) != CONST_VECTOR
4468 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
4469 return false;
4470
4471 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
4472 if (firstval < minval || firstval > maxval)
4473 return false;
4474
4475 count = CONST_VECTOR_NUNITS (x);
4476 for (i = 1; i < count; i++)
4477 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
4478 return false;
4479
4480 return true;
4481}
4482
4483bool
4484aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
4485{
4486 return aarch64_const_vec_all_same_in_range_p (x, val, val);
4487}
4488
43e9d192 4489
cf670503
ZC
4490/* N Z C V. */
4491#define AARCH64_CC_V 1
4492#define AARCH64_CC_C (1 << 1)
4493#define AARCH64_CC_Z (1 << 2)
4494#define AARCH64_CC_N (1 << 3)
4495
c8012fbc
WD
4496/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
4497static const int aarch64_nzcv_codes[] =
4498{
4499 0, /* EQ, Z == 1. */
4500 AARCH64_CC_Z, /* NE, Z == 0. */
4501 0, /* CS, C == 1. */
4502 AARCH64_CC_C, /* CC, C == 0. */
4503 0, /* MI, N == 1. */
4504 AARCH64_CC_N, /* PL, N == 0. */
4505 0, /* VS, V == 1. */
4506 AARCH64_CC_V, /* VC, V == 0. */
4507 0, /* HI, C ==1 && Z == 0. */
4508 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
4509 AARCH64_CC_V, /* GE, N == V. */
4510 0, /* LT, N != V. */
4511 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
4512 0, /* LE, !(Z == 0 && N == V). */
4513 0, /* AL, Any. */
4514 0 /* NV, Any. */
cf670503
ZC
4515};
4516
cc8ca59e
JB
4517static void
4518aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192
IB
4519{
4520 switch (code)
4521 {
f541a481
KT
4522 /* An integer or symbol address without a preceding # sign. */
4523 case 'c':
4524 switch (GET_CODE (x))
4525 {
4526 case CONST_INT:
4527 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4528 break;
4529
4530 case SYMBOL_REF:
4531 output_addr_const (f, x);
4532 break;
4533
4534 case CONST:
4535 if (GET_CODE (XEXP (x, 0)) == PLUS
4536 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4537 {
4538 output_addr_const (f, x);
4539 break;
4540 }
4541 /* Fall through. */
4542
4543 default:
4544 output_operand_lossage ("Unsupported operand for code '%c'", code);
4545 }
4546 break;
4547
43e9d192
IB
4548 case 'e':
4549 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4550 {
4551 int n;
4552
4aa81c2e 4553 if (!CONST_INT_P (x)
43e9d192
IB
4554 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4555 {
4556 output_operand_lossage ("invalid operand for '%%%c'", code);
4557 return;
4558 }
4559
4560 switch (n)
4561 {
4562 case 3:
4563 fputc ('b', f);
4564 break;
4565 case 4:
4566 fputc ('h', f);
4567 break;
4568 case 5:
4569 fputc ('w', f);
4570 break;
4571 default:
4572 output_operand_lossage ("invalid operand for '%%%c'", code);
4573 return;
4574 }
4575 }
4576 break;
4577
4578 case 'p':
4579 {
4580 int n;
4581
4582 /* Print N such that 2^N == X. */
4aa81c2e 4583 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4584 {
4585 output_operand_lossage ("invalid operand for '%%%c'", code);
4586 return;
4587 }
4588
4589 asm_fprintf (f, "%d", n);
4590 }
4591 break;
4592
4593 case 'P':
4594 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4595 if (!CONST_INT_P (x))
43e9d192
IB
4596 {
4597 output_operand_lossage ("invalid operand for '%%%c'", code);
4598 return;
4599 }
4600
8d55c61b 4601 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
4602 break;
4603
4604 case 'H':
4605 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 4606 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
4607 {
4608 output_operand_lossage ("invalid operand for '%%%c'", code);
4609 return;
4610 }
4611
01a3a324 4612 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
4613 break;
4614
43e9d192 4615 case 'M':
c8012fbc 4616 case 'm':
cd5660ab
KT
4617 {
4618 int cond_code;
c8012fbc 4619 /* Print a condition (eq, ne, etc) or its inverse. */
43e9d192 4620
c8012fbc
WD
4621 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
4622 if (x == const_true_rtx)
cd5660ab 4623 {
c8012fbc
WD
4624 if (code == 'M')
4625 fputs ("nv", f);
cd5660ab
KT
4626 return;
4627 }
43e9d192 4628
cd5660ab
KT
4629 if (!COMPARISON_P (x))
4630 {
4631 output_operand_lossage ("invalid operand for '%%%c'", code);
4632 return;
4633 }
c8012fbc 4634
cd5660ab
KT
4635 cond_code = aarch64_get_condition_code (x);
4636 gcc_assert (cond_code >= 0);
c8012fbc
WD
4637 if (code == 'M')
4638 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
4639 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 4640 }
43e9d192
IB
4641 break;
4642
4643 case 'b':
4644 case 'h':
4645 case 's':
4646 case 'd':
4647 case 'q':
4648 /* Print a scalar FP/SIMD register name. */
4649 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4650 {
4651 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4652 return;
4653 }
50ce6f88 4654 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
4655 break;
4656
4657 case 'S':
4658 case 'T':
4659 case 'U':
4660 case 'V':
4661 /* Print the first FP/SIMD register name in a list. */
4662 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4663 {
4664 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4665 return;
4666 }
50ce6f88 4667 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
4668 break;
4669
2d8c6dc1
AH
4670 case 'R':
4671 /* Print a scalar FP/SIMD register name + 1. */
4672 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4673 {
4674 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4675 return;
4676 }
4677 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4678 break;
4679
a05c0ddf 4680 case 'X':
50d38551 4681 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 4682 if (!CONST_INT_P (x))
a05c0ddf
IB
4683 {
4684 output_operand_lossage ("invalid operand for '%%%c'", code);
4685 return;
4686 }
50d38551 4687 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
4688 break;
4689
43e9d192
IB
4690 case 'w':
4691 case 'x':
4692 /* Print a general register name or the zero register (32-bit or
4693 64-bit). */
3520f7cc
JG
4694 if (x == const0_rtx
4695 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 4696 {
50ce6f88 4697 asm_fprintf (f, "%czr", code);
43e9d192
IB
4698 break;
4699 }
4700
4701 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4702 {
50ce6f88 4703 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
4704 break;
4705 }
4706
4707 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4708 {
50ce6f88 4709 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
4710 break;
4711 }
4712
4713 /* Fall through */
4714
4715 case 0:
4716 /* Print a normal operand, if it's a general register, then we
4717 assume DImode. */
4718 if (x == NULL)
4719 {
4720 output_operand_lossage ("missing operand");
4721 return;
4722 }
4723
4724 switch (GET_CODE (x))
4725 {
4726 case REG:
01a3a324 4727 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
4728 break;
4729
4730 case MEM:
cc8ca59e 4731 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
4732 break;
4733
2af16a7c 4734 case CONST:
43e9d192
IB
4735 case LABEL_REF:
4736 case SYMBOL_REF:
4737 output_addr_const (asm_out_file, x);
4738 break;
4739
4740 case CONST_INT:
4741 asm_fprintf (f, "%wd", INTVAL (x));
4742 break;
4743
4744 case CONST_VECTOR:
3520f7cc
JG
4745 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4746 {
ddeabd3e
AL
4747 gcc_assert (
4748 aarch64_const_vec_all_same_in_range_p (x,
4749 HOST_WIDE_INT_MIN,
4750 HOST_WIDE_INT_MAX));
3520f7cc
JG
4751 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4752 }
4753 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4754 {
4755 fputc ('0', f);
4756 }
4757 else
4758 gcc_unreachable ();
43e9d192
IB
4759 break;
4760
3520f7cc 4761 case CONST_DOUBLE:
2ca5b430
KT
4762 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
4763 be getting CONST_DOUBLEs holding integers. */
4764 gcc_assert (GET_MODE (x) != VOIDmode);
4765 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
4766 {
4767 fputc ('0', f);
4768 break;
4769 }
4770 else if (aarch64_float_const_representable_p (x))
4771 {
4772#define buf_size 20
4773 char float_buf[buf_size] = {'\0'};
34a72c33
RS
4774 real_to_decimal_for_mode (float_buf,
4775 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
4776 buf_size, buf_size,
4777 1, GET_MODE (x));
4778 asm_fprintf (asm_out_file, "%s", float_buf);
4779 break;
4780#undef buf_size
4781 }
4782 output_operand_lossage ("invalid constant");
4783 return;
43e9d192
IB
4784 default:
4785 output_operand_lossage ("invalid operand");
4786 return;
4787 }
4788 break;
4789
4790 case 'A':
4791 if (GET_CODE (x) == HIGH)
4792 x = XEXP (x, 0);
4793
a6e0bfa7 4794 switch (aarch64_classify_symbolic_expression (x))
43e9d192 4795 {
6642bdb4 4796 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
4797 asm_fprintf (asm_out_file, ":got:");
4798 break;
4799
4800 case SYMBOL_SMALL_TLSGD:
4801 asm_fprintf (asm_out_file, ":tlsgd:");
4802 break;
4803
4804 case SYMBOL_SMALL_TLSDESC:
4805 asm_fprintf (asm_out_file, ":tlsdesc:");
4806 break;
4807
79496620 4808 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
4809 asm_fprintf (asm_out_file, ":gottprel:");
4810 break;
4811
d18ba284 4812 case SYMBOL_TLSLE24:
43e9d192
IB
4813 asm_fprintf (asm_out_file, ":tprel:");
4814 break;
4815
87dd8ab0
MS
4816 case SYMBOL_TINY_GOT:
4817 gcc_unreachable ();
4818 break;
4819
43e9d192
IB
4820 default:
4821 break;
4822 }
4823 output_addr_const (asm_out_file, x);
4824 break;
4825
4826 case 'L':
a6e0bfa7 4827 switch (aarch64_classify_symbolic_expression (x))
43e9d192 4828 {
6642bdb4 4829 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
4830 asm_fprintf (asm_out_file, ":lo12:");
4831 break;
4832
4833 case SYMBOL_SMALL_TLSGD:
4834 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4835 break;
4836
4837 case SYMBOL_SMALL_TLSDESC:
4838 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4839 break;
4840
79496620 4841 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
4842 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4843 break;
4844
cbf5629e
JW
4845 case SYMBOL_TLSLE12:
4846 asm_fprintf (asm_out_file, ":tprel_lo12:");
4847 break;
4848
d18ba284 4849 case SYMBOL_TLSLE24:
43e9d192
IB
4850 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4851 break;
4852
87dd8ab0
MS
4853 case SYMBOL_TINY_GOT:
4854 asm_fprintf (asm_out_file, ":got:");
4855 break;
4856
5ae7caad
JW
4857 case SYMBOL_TINY_TLSIE:
4858 asm_fprintf (asm_out_file, ":gottprel:");
4859 break;
4860
43e9d192
IB
4861 default:
4862 break;
4863 }
4864 output_addr_const (asm_out_file, x);
4865 break;
4866
4867 case 'G':
4868
a6e0bfa7 4869 switch (aarch64_classify_symbolic_expression (x))
43e9d192 4870 {
d18ba284 4871 case SYMBOL_TLSLE24:
43e9d192
IB
4872 asm_fprintf (asm_out_file, ":tprel_hi12:");
4873 break;
4874 default:
4875 break;
4876 }
4877 output_addr_const (asm_out_file, x);
4878 break;
4879
cf670503
ZC
4880 case 'k':
4881 {
c8012fbc 4882 HOST_WIDE_INT cond_code;
cf670503
ZC
4883 /* Print nzcv. */
4884
c8012fbc 4885 if (!CONST_INT_P (x))
cf670503
ZC
4886 {
4887 output_operand_lossage ("invalid operand for '%%%c'", code);
4888 return;
4889 }
4890
c8012fbc
WD
4891 cond_code = INTVAL (x);
4892 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
4893 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
4894 }
4895 break;
4896
43e9d192
IB
4897 default:
4898 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4899 return;
4900 }
4901}
4902
cc8ca59e
JB
4903static void
4904aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
43e9d192
IB
4905{
4906 struct aarch64_address_info addr;
4907
cc8ca59e 4908 if (aarch64_classify_address (&addr, x, mode, MEM, true))
43e9d192
IB
4909 switch (addr.type)
4910 {
4911 case ADDRESS_REG_IMM:
4912 if (addr.offset == const0_rtx)
01a3a324 4913 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4914 else
16a3246f 4915 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4916 INTVAL (addr.offset));
4917 return;
4918
4919 case ADDRESS_REG_REG:
4920 if (addr.shift == 0)
16a3246f 4921 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4922 reg_names [REGNO (addr.offset)]);
43e9d192 4923 else
16a3246f 4924 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4925 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4926 return;
4927
4928 case ADDRESS_REG_UXTW:
4929 if (addr.shift == 0)
16a3246f 4930 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4931 REGNO (addr.offset) - R0_REGNUM);
4932 else
16a3246f 4933 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4934 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4935 return;
4936
4937 case ADDRESS_REG_SXTW:
4938 if (addr.shift == 0)
16a3246f 4939 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4940 REGNO (addr.offset) - R0_REGNUM);
4941 else
16a3246f 4942 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4943 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4944 return;
4945
4946 case ADDRESS_REG_WB:
4947 switch (GET_CODE (x))
4948 {
4949 case PRE_INC:
16a3246f 4950 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
cc8ca59e 4951 GET_MODE_SIZE (mode));
43e9d192
IB
4952 return;
4953 case POST_INC:
16a3246f 4954 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
cc8ca59e 4955 GET_MODE_SIZE (mode));
43e9d192
IB
4956 return;
4957 case PRE_DEC:
16a3246f 4958 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
cc8ca59e 4959 GET_MODE_SIZE (mode));
43e9d192
IB
4960 return;
4961 case POST_DEC:
16a3246f 4962 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
cc8ca59e 4963 GET_MODE_SIZE (mode));
43e9d192
IB
4964 return;
4965 case PRE_MODIFY:
16a3246f 4966 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4967 INTVAL (addr.offset));
4968 return;
4969 case POST_MODIFY:
16a3246f 4970 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4971 INTVAL (addr.offset));
4972 return;
4973 default:
4974 break;
4975 }
4976 break;
4977
4978 case ADDRESS_LO_SUM:
16a3246f 4979 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4980 output_addr_const (f, addr.offset);
4981 asm_fprintf (f, "]");
4982 return;
4983
4984 case ADDRESS_SYMBOLIC:
4985 break;
4986 }
4987
4988 output_addr_const (f, x);
4989}
4990
43e9d192
IB
4991bool
4992aarch64_label_mentioned_p (rtx x)
4993{
4994 const char *fmt;
4995 int i;
4996
4997 if (GET_CODE (x) == LABEL_REF)
4998 return true;
4999
5000 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5001 referencing instruction, but they are constant offsets, not
5002 symbols. */
5003 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5004 return false;
5005
5006 fmt = GET_RTX_FORMAT (GET_CODE (x));
5007 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5008 {
5009 if (fmt[i] == 'E')
5010 {
5011 int j;
5012
5013 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5014 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
5015 return 1;
5016 }
5017 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
5018 return 1;
5019 }
5020
5021 return 0;
5022}
5023
5024/* Implement REGNO_REG_CLASS. */
5025
5026enum reg_class
5027aarch64_regno_regclass (unsigned regno)
5028{
5029 if (GP_REGNUM_P (regno))
a4a182c6 5030 return GENERAL_REGS;
43e9d192
IB
5031
5032 if (regno == SP_REGNUM)
5033 return STACK_REG;
5034
5035 if (regno == FRAME_POINTER_REGNUM
5036 || regno == ARG_POINTER_REGNUM)
f24bb080 5037 return POINTER_REGS;
43e9d192
IB
5038
5039 if (FP_REGNUM_P (regno))
5040 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
5041
5042 return NO_REGS;
5043}
5044
0c4ec427 5045static rtx
ef4bddc2 5046aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
5047{
5048 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5049 where mask is selected by alignment and size of the offset.
5050 We try to pick as large a range for the offset as possible to
5051 maximize the chance of a CSE. However, for aligned addresses
5052 we limit the range to 4k so that structures with different sized
e8426e0a
BC
5053 elements are likely to use the same base. We need to be careful
5054 not to split a CONST for some forms of address expression, otherwise
5055 it will generate sub-optimal code. */
0c4ec427
RE
5056
5057 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
5058 {
9e0218fc 5059 rtx base = XEXP (x, 0);
17d7bdd8 5060 rtx offset_rtx = XEXP (x, 1);
9e0218fc 5061 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 5062
9e0218fc 5063 if (GET_CODE (base) == PLUS)
e8426e0a 5064 {
9e0218fc
RH
5065 rtx op0 = XEXP (base, 0);
5066 rtx op1 = XEXP (base, 1);
5067
5068 /* Force any scaling into a temp for CSE. */
5069 op0 = force_reg (Pmode, op0);
5070 op1 = force_reg (Pmode, op1);
5071
5072 /* Let the pointer register be in op0. */
5073 if (REG_POINTER (op1))
5074 std::swap (op0, op1);
5075
5076 /* If the pointer is virtual or frame related, then we know that
5077 virtual register instantiation or register elimination is going
5078 to apply a second constant. We want the two constants folded
5079 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5080 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 5081 {
9e0218fc
RH
5082 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
5083 NULL_RTX, true, OPTAB_DIRECT);
5084 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 5085 }
e8426e0a 5086
9e0218fc
RH
5087 /* Otherwise, in order to encourage CSE (and thence loop strength
5088 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5089 base = expand_binop (Pmode, add_optab, op0, op1,
5090 NULL_RTX, true, OPTAB_DIRECT);
5091 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
5092 }
5093
0c4ec427 5094 /* Does it look like we'll need a load/store-pair operation? */
9e0218fc 5095 HOST_WIDE_INT base_offset;
0c4ec427
RE
5096 if (GET_MODE_SIZE (mode) > 16
5097 || mode == TImode)
5098 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
5099 & ~((128 * GET_MODE_SIZE (mode)) - 1));
5100 /* For offsets aren't a multiple of the access size, the limit is
5101 -256...255. */
5102 else if (offset & (GET_MODE_SIZE (mode) - 1))
5103 base_offset = (offset + 0x100) & ~0x1ff;
5104 else
5105 base_offset = offset & ~0xfff;
5106
9e0218fc
RH
5107 if (base_offset != 0)
5108 {
5109 base = plus_constant (Pmode, base, base_offset);
5110 base = force_operand (base, NULL_RTX);
5111 return plus_constant (Pmode, base, offset - base_offset);
5112 }
0c4ec427
RE
5113 }
5114
5115 return x;
5116}
5117
b4f50fd4
RR
5118/* Return the reload icode required for a constant pool in mode. */
5119static enum insn_code
5120aarch64_constant_pool_reload_icode (machine_mode mode)
5121{
5122 switch (mode)
5123 {
5124 case SFmode:
5125 return CODE_FOR_aarch64_reload_movcpsfdi;
5126
5127 case DFmode:
5128 return CODE_FOR_aarch64_reload_movcpdfdi;
5129
5130 case TFmode:
5131 return CODE_FOR_aarch64_reload_movcptfdi;
5132
5133 case V8QImode:
5134 return CODE_FOR_aarch64_reload_movcpv8qidi;
5135
5136 case V16QImode:
5137 return CODE_FOR_aarch64_reload_movcpv16qidi;
5138
5139 case V4HImode:
5140 return CODE_FOR_aarch64_reload_movcpv4hidi;
5141
5142 case V8HImode:
5143 return CODE_FOR_aarch64_reload_movcpv8hidi;
5144
5145 case V2SImode:
5146 return CODE_FOR_aarch64_reload_movcpv2sidi;
5147
5148 case V4SImode:
5149 return CODE_FOR_aarch64_reload_movcpv4sidi;
5150
5151 case V2DImode:
5152 return CODE_FOR_aarch64_reload_movcpv2didi;
5153
5154 case V2DFmode:
5155 return CODE_FOR_aarch64_reload_movcpv2dfdi;
5156
5157 default:
5158 gcc_unreachable ();
5159 }
5160
5161 gcc_unreachable ();
5162}
43e9d192
IB
5163static reg_class_t
5164aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
5165 reg_class_t rclass,
ef4bddc2 5166 machine_mode mode,
43e9d192
IB
5167 secondary_reload_info *sri)
5168{
b4f50fd4
RR
5169
5170 /* If we have to disable direct literal pool loads and stores because the
5171 function is too big, then we need a scratch register. */
5172 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
5173 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
5174 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 5175 && !aarch64_pcrelative_literal_loads)
b4f50fd4
RR
5176 {
5177 sri->icode = aarch64_constant_pool_reload_icode (mode);
5178 return NO_REGS;
5179 }
5180
43e9d192
IB
5181 /* Without the TARGET_SIMD instructions we cannot move a Q register
5182 to a Q register directly. We need a scratch. */
5183 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
5184 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
5185 && reg_class_subset_p (rclass, FP_REGS))
5186 {
5187 if (mode == TFmode)
5188 sri->icode = CODE_FOR_aarch64_reload_movtf;
5189 else if (mode == TImode)
5190 sri->icode = CODE_FOR_aarch64_reload_movti;
5191 return NO_REGS;
5192 }
5193
5194 /* A TFmode or TImode memory access should be handled via an FP_REGS
5195 because AArch64 has richer addressing modes for LDR/STR instructions
5196 than LDP/STP instructions. */
d5726973 5197 if (TARGET_FLOAT && rclass == GENERAL_REGS
43e9d192
IB
5198 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
5199 return FP_REGS;
5200
5201 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 5202 return GENERAL_REGS;
43e9d192
IB
5203
5204 return NO_REGS;
5205}
5206
5207static bool
5208aarch64_can_eliminate (const int from, const int to)
5209{
5210 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5211 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5212
5213 if (frame_pointer_needed)
5214 {
5215 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5216 return true;
5217 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5218 return false;
5219 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
5220 && !cfun->calls_alloca)
5221 return true;
5222 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5223 return true;
0b7f8166
MS
5224
5225 return false;
43e9d192 5226 }
1c923b60
JW
5227 else
5228 {
5229 /* If we decided that we didn't need a leaf frame pointer but then used
5230 LR in the function, then we'll want a frame pointer after all, so
5231 prevent this elimination to ensure a frame pointer is used. */
5232 if (to == STACK_POINTER_REGNUM
5233 && flag_omit_leaf_frame_pointer
5234 && df_regs_ever_live_p (LR_REGNUM))
5235 return false;
5236 }
777e6976 5237
43e9d192
IB
5238 return true;
5239}
5240
5241HOST_WIDE_INT
5242aarch64_initial_elimination_offset (unsigned from, unsigned to)
5243{
43e9d192 5244 aarch64_layout_frame ();
78c29983
MS
5245
5246 if (to == HARD_FRAME_POINTER_REGNUM)
5247 {
5248 if (from == ARG_POINTER_REGNUM)
1c960e02 5249 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
5250
5251 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
5252 return (cfun->machine->frame.hard_fp_offset
5253 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
5254 }
5255
5256 if (to == STACK_POINTER_REGNUM)
5257 {
5258 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
5259 return (cfun->machine->frame.frame_size
5260 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
5261 }
5262
1c960e02 5263 return cfun->machine->frame.frame_size;
43e9d192
IB
5264}
5265
43e9d192
IB
5266/* Implement RETURN_ADDR_RTX. We do not support moving back to a
5267 previous frame. */
5268
5269rtx
5270aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5271{
5272 if (count != 0)
5273 return const0_rtx;
5274 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5275}
5276
5277
5278static void
5279aarch64_asm_trampoline_template (FILE *f)
5280{
28514dda
YZ
5281 if (TARGET_ILP32)
5282 {
5283 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5284 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5285 }
5286 else
5287 {
5288 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5289 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5290 }
01a3a324 5291 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 5292 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
5293 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5294 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
5295}
5296
5297static void
5298aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5299{
5300 rtx fnaddr, mem, a_tramp;
28514dda 5301 const int tramp_code_sz = 16;
43e9d192
IB
5302
5303 /* Don't need to copy the trailing D-words, we fill those in below. */
5304 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
5305 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5306 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 5307 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
5308 if (GET_MODE (fnaddr) != ptr_mode)
5309 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
5310 emit_move_insn (mem, fnaddr);
5311
28514dda 5312 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
5313 emit_move_insn (mem, chain_value);
5314
5315 /* XXX We should really define a "clear_cache" pattern and use
5316 gen_clear_cache(). */
5317 a_tramp = XEXP (m_tramp, 0);
5318 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
5319 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5320 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5321 ptr_mode);
43e9d192
IB
5322}
5323
5324static unsigned char
ef4bddc2 5325aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
5326{
5327 switch (regclass)
5328 {
fee9ba42 5329 case CALLER_SAVE_REGS:
43e9d192
IB
5330 case POINTER_REGS:
5331 case GENERAL_REGS:
5332 case ALL_REGS:
5333 case FP_REGS:
5334 case FP_LO_REGS:
5335 return
7bd11911
KT
5336 aarch64_vector_mode_p (mode)
5337 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5338 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
43e9d192
IB
5339 case STACK_REG:
5340 return 1;
5341
5342 case NO_REGS:
5343 return 0;
5344
5345 default:
5346 break;
5347 }
5348 gcc_unreachable ();
5349}
5350
5351static reg_class_t
78d8b9f0 5352aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 5353{
51bb310d 5354 if (regclass == POINTER_REGS)
78d8b9f0
IB
5355 return GENERAL_REGS;
5356
51bb310d
MS
5357 if (regclass == STACK_REG)
5358 {
5359 if (REG_P(x)
5360 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
5361 return regclass;
5362
5363 return NO_REGS;
5364 }
5365
78d8b9f0
IB
5366 /* If it's an integer immediate that MOVI can't handle, then
5367 FP_REGS is not an option, so we return NO_REGS instead. */
5368 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
5369 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
5370 return NO_REGS;
5371
27bd251b
IB
5372 /* Register eliminiation can result in a request for
5373 SP+constant->FP_REGS. We cannot support such operations which
5374 use SP as source and an FP_REG as destination, so reject out
5375 right now. */
5376 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
5377 {
5378 rtx lhs = XEXP (x, 0);
5379
5380 /* Look through a possible SUBREG introduced by ILP32. */
5381 if (GET_CODE (lhs) == SUBREG)
5382 lhs = SUBREG_REG (lhs);
5383
5384 gcc_assert (REG_P (lhs));
5385 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
5386 POINTER_REGS));
5387 return NO_REGS;
5388 }
5389
78d8b9f0 5390 return regclass;
43e9d192
IB
5391}
5392
5393void
5394aarch64_asm_output_labelref (FILE* f, const char *name)
5395{
5396 asm_fprintf (f, "%U%s", name);
5397}
5398
5399static void
5400aarch64_elf_asm_constructor (rtx symbol, int priority)
5401{
5402 if (priority == DEFAULT_INIT_PRIORITY)
5403 default_ctor_section_asm_out_constructor (symbol, priority);
5404 else
5405 {
5406 section *s;
5407 char buf[18];
5408 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5409 s = get_section (buf, SECTION_WRITE, NULL);
5410 switch_to_section (s);
5411 assemble_align (POINTER_SIZE);
28514dda 5412 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5413 }
5414}
5415
5416static void
5417aarch64_elf_asm_destructor (rtx symbol, int priority)
5418{
5419 if (priority == DEFAULT_INIT_PRIORITY)
5420 default_dtor_section_asm_out_destructor (symbol, priority);
5421 else
5422 {
5423 section *s;
5424 char buf[18];
5425 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5426 s = get_section (buf, SECTION_WRITE, NULL);
5427 switch_to_section (s);
5428 assemble_align (POINTER_SIZE);
28514dda 5429 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5430 }
5431}
5432
5433const char*
5434aarch64_output_casesi (rtx *operands)
5435{
5436 char buf[100];
5437 char label[100];
b32d5189 5438 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5439 int index;
5440 static const char *const patterns[4][2] =
5441 {
5442 {
5443 "ldrb\t%w3, [%0,%w1,uxtw]",
5444 "add\t%3, %4, %w3, sxtb #2"
5445 },
5446 {
5447 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5448 "add\t%3, %4, %w3, sxth #2"
5449 },
5450 {
5451 "ldr\t%w3, [%0,%w1,uxtw #2]",
5452 "add\t%3, %4, %w3, sxtw #2"
5453 },
5454 /* We assume that DImode is only generated when not optimizing and
5455 that we don't really need 64-bit address offsets. That would
5456 imply an object file with 8GB of code in a single function! */
5457 {
5458 "ldr\t%w3, [%0,%w1,uxtw #2]",
5459 "add\t%3, %4, %w3, sxtw #2"
5460 }
5461 };
5462
5463 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5464
5465 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5466
5467 gcc_assert (index >= 0 && index <= 3);
5468
5469 /* Need to implement table size reduction, by chaning the code below. */
5470 output_asm_insn (patterns[index][0], operands);
5471 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5472 snprintf (buf, sizeof (buf),
5473 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5474 output_asm_insn (buf, operands);
5475 output_asm_insn (patterns[index][1], operands);
5476 output_asm_insn ("br\t%3", operands);
5477 assemble_label (asm_out_file, label);
5478 return "";
5479}
5480
5481
5482/* Return size in bits of an arithmetic operand which is shifted/scaled and
5483 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5484 operator. */
5485
5486int
5487aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5488{
5489 if (shift >= 0 && shift <= 3)
5490 {
5491 int size;
5492 for (size = 8; size <= 32; size *= 2)
5493 {
5494 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5495 if (mask == bits << shift)
5496 return size;
5497 }
5498 }
5499 return 0;
5500}
5501
e78d485e
RR
5502/* Constant pools are per function only when PC relative
5503 literal loads are true or we are in the large memory
5504 model. */
5505
5506static inline bool
5507aarch64_can_use_per_function_literal_pools_p (void)
5508{
9ee6540a 5509 return (aarch64_pcrelative_literal_loads
e78d485e
RR
5510 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
5511}
5512
43e9d192 5513static bool
e78d485e 5514aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 5515{
3eece53d
RR
5516 /* Fixme:: In an ideal world this would work similar
5517 to the logic in aarch64_select_rtx_section but this
5518 breaks bootstrap in gcc go. For now we workaround
5519 this by returning false here. */
5520 return false;
43e9d192
IB
5521}
5522
e78d485e
RR
5523/* Select appropriate section for constants depending
5524 on where we place literal pools. */
5525
43e9d192 5526static section *
e78d485e
RR
5527aarch64_select_rtx_section (machine_mode mode,
5528 rtx x,
5529 unsigned HOST_WIDE_INT align)
43e9d192 5530{
e78d485e
RR
5531 if (aarch64_can_use_per_function_literal_pools_p ())
5532 return function_section (current_function_decl);
43e9d192 5533
e78d485e
RR
5534 return default_elf_select_rtx_section (mode, x, align);
5535}
43e9d192 5536
5fca7b66
RH
5537/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
5538void
5539aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
5540 HOST_WIDE_INT offset)
5541{
5542 /* When using per-function literal pools, we must ensure that any code
5543 section is aligned to the minimal instruction length, lest we get
5544 errors from the assembler re "unaligned instructions". */
5545 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
5546 ASM_OUTPUT_ALIGN (f, 2);
5547}
5548
43e9d192
IB
5549/* Costs. */
5550
5551/* Helper function for rtx cost calculation. Strip a shift expression
5552 from X. Returns the inner operand if successful, or the original
5553 expression on failure. */
5554static rtx
5555aarch64_strip_shift (rtx x)
5556{
5557 rtx op = x;
5558
57b77d46
RE
5559 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5560 we can convert both to ROR during final output. */
43e9d192
IB
5561 if ((GET_CODE (op) == ASHIFT
5562 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5563 || GET_CODE (op) == LSHIFTRT
5564 || GET_CODE (op) == ROTATERT
5565 || GET_CODE (op) == ROTATE)
43e9d192
IB
5566 && CONST_INT_P (XEXP (op, 1)))
5567 return XEXP (op, 0);
5568
5569 if (GET_CODE (op) == MULT
5570 && CONST_INT_P (XEXP (op, 1))
5571 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5572 return XEXP (op, 0);
5573
5574 return x;
5575}
5576
4745e701 5577/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5578 expression from X. Returns the inner operand if successful, or the
5579 original expression on failure. We deal with a number of possible
5580 canonicalization variations here. */
5581static rtx
4745e701 5582aarch64_strip_extend (rtx x)
43e9d192
IB
5583{
5584 rtx op = x;
5585
5586 /* Zero and sign extraction of a widened value. */
5587 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5588 && XEXP (op, 2) == const0_rtx
4745e701 5589 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
5590 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5591 XEXP (op, 1)))
5592 return XEXP (XEXP (op, 0), 0);
5593
5594 /* It can also be represented (for zero-extend) as an AND with an
5595 immediate. */
5596 if (GET_CODE (op) == AND
5597 && GET_CODE (XEXP (op, 0)) == MULT
5598 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5599 && CONST_INT_P (XEXP (op, 1))
5600 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5601 INTVAL (XEXP (op, 1))) != 0)
5602 return XEXP (XEXP (op, 0), 0);
5603
5604 /* Now handle extended register, as this may also have an optional
5605 left shift by 1..4. */
5606 if (GET_CODE (op) == ASHIFT
5607 && CONST_INT_P (XEXP (op, 1))
5608 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5609 op = XEXP (op, 0);
5610
5611 if (GET_CODE (op) == ZERO_EXTEND
5612 || GET_CODE (op) == SIGN_EXTEND)
5613 op = XEXP (op, 0);
5614
5615 if (op != x)
5616 return op;
5617
4745e701
JG
5618 return x;
5619}
5620
0a78ebe4
KT
5621/* Return true iff CODE is a shift supported in combination
5622 with arithmetic instructions. */
4d1919ed 5623
0a78ebe4
KT
5624static bool
5625aarch64_shift_p (enum rtx_code code)
5626{
5627 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
5628}
5629
4745e701 5630/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
5631 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5632 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
5633 operands where needed. */
5634
5635static int
e548c9df 5636aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
5637{
5638 rtx op0, op1;
5639 const struct cpu_cost_table *extra_cost
b175b679 5640 = aarch64_tune_params.insn_extra_cost;
4745e701 5641 int cost = 0;
0a78ebe4 5642 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 5643 machine_mode mode = GET_MODE (x);
4745e701
JG
5644
5645 gcc_checking_assert (code == MULT);
5646
5647 op0 = XEXP (x, 0);
5648 op1 = XEXP (x, 1);
5649
5650 if (VECTOR_MODE_P (mode))
5651 mode = GET_MODE_INNER (mode);
5652
5653 /* Integer multiply/fma. */
5654 if (GET_MODE_CLASS (mode) == MODE_INT)
5655 {
5656 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
5657 if (aarch64_shift_p (GET_CODE (x))
5658 || (CONST_INT_P (op1)
5659 && exact_log2 (INTVAL (op1)) > 0))
4745e701 5660 {
0a78ebe4
KT
5661 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
5662 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
5663 if (speed)
5664 {
0a78ebe4
KT
5665 if (compound_p)
5666 {
5667 if (REG_P (op1))
5668 /* ARITH + shift-by-register. */
5669 cost += extra_cost->alu.arith_shift_reg;
5670 else if (is_extend)
5671 /* ARITH + extended register. We don't have a cost field
5672 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5673 cost += extra_cost->alu.extend_arith;
5674 else
5675 /* ARITH + shift-by-immediate. */
5676 cost += extra_cost->alu.arith_shift;
5677 }
4745e701
JG
5678 else
5679 /* LSL (immediate). */
0a78ebe4
KT
5680 cost += extra_cost->alu.shift;
5681
4745e701 5682 }
0a78ebe4
KT
5683 /* Strip extends as we will have costed them in the case above. */
5684 if (is_extend)
5685 op0 = aarch64_strip_extend (op0);
4745e701 5686
e548c9df 5687 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
5688
5689 return cost;
5690 }
5691
d2ac256b
KT
5692 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5693 compound and let the below cases handle it. After all, MNEG is a
5694 special-case alias of MSUB. */
5695 if (GET_CODE (op0) == NEG)
5696 {
5697 op0 = XEXP (op0, 0);
5698 compound_p = true;
5699 }
5700
4745e701
JG
5701 /* Integer multiplies or FMAs have zero/sign extending variants. */
5702 if ((GET_CODE (op0) == ZERO_EXTEND
5703 && GET_CODE (op1) == ZERO_EXTEND)
5704 || (GET_CODE (op0) == SIGN_EXTEND
5705 && GET_CODE (op1) == SIGN_EXTEND))
5706 {
e548c9df
AM
5707 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
5708 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
5709
5710 if (speed)
5711 {
0a78ebe4 5712 if (compound_p)
d2ac256b 5713 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
5714 cost += extra_cost->mult[0].extend_add;
5715 else
5716 /* MUL/SMULL/UMULL. */
5717 cost += extra_cost->mult[0].extend;
5718 }
5719
5720 return cost;
5721 }
5722
d2ac256b 5723 /* This is either an integer multiply or a MADD. In both cases
4745e701 5724 we want to recurse and cost the operands. */
e548c9df
AM
5725 cost += rtx_cost (op0, mode, MULT, 0, speed);
5726 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
5727
5728 if (speed)
5729 {
0a78ebe4 5730 if (compound_p)
d2ac256b 5731 /* MADD/MSUB. */
4745e701
JG
5732 cost += extra_cost->mult[mode == DImode].add;
5733 else
5734 /* MUL. */
5735 cost += extra_cost->mult[mode == DImode].simple;
5736 }
5737
5738 return cost;
5739 }
5740 else
5741 {
5742 if (speed)
5743 {
3d840f7d 5744 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
5745 operands, unless the rounding mode is upward or downward in
5746 which case FNMUL is different than FMUL with operand negation. */
5747 bool neg0 = GET_CODE (op0) == NEG;
5748 bool neg1 = GET_CODE (op1) == NEG;
5749 if (compound_p || !flag_rounding_math || (neg0 && neg1))
5750 {
5751 if (neg0)
5752 op0 = XEXP (op0, 0);
5753 if (neg1)
5754 op1 = XEXP (op1, 0);
5755 }
4745e701 5756
0a78ebe4 5757 if (compound_p)
4745e701
JG
5758 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5759 cost += extra_cost->fp[mode == DFmode].fma;
5760 else
3d840f7d 5761 /* FMUL/FNMUL. */
4745e701
JG
5762 cost += extra_cost->fp[mode == DFmode].mult;
5763 }
5764
e548c9df
AM
5765 cost += rtx_cost (op0, mode, MULT, 0, speed);
5766 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
5767 return cost;
5768 }
43e9d192
IB
5769}
5770
67747367
JG
5771static int
5772aarch64_address_cost (rtx x,
ef4bddc2 5773 machine_mode mode,
67747367
JG
5774 addr_space_t as ATTRIBUTE_UNUSED,
5775 bool speed)
5776{
5777 enum rtx_code c = GET_CODE (x);
b175b679 5778 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
5779 struct aarch64_address_info info;
5780 int cost = 0;
5781 info.shift = 0;
5782
5783 if (!aarch64_classify_address (&info, x, mode, c, false))
5784 {
5785 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5786 {
5787 /* This is a CONST or SYMBOL ref which will be split
5788 in a different way depending on the code model in use.
5789 Cost it through the generic infrastructure. */
e548c9df 5790 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
5791 /* Divide through by the cost of one instruction to
5792 bring it to the same units as the address costs. */
5793 cost_symbol_ref /= COSTS_N_INSNS (1);
5794 /* The cost is then the cost of preparing the address,
5795 followed by an immediate (possibly 0) offset. */
5796 return cost_symbol_ref + addr_cost->imm_offset;
5797 }
5798 else
5799 {
5800 /* This is most likely a jump table from a case
5801 statement. */
5802 return addr_cost->register_offset;
5803 }
5804 }
5805
5806 switch (info.type)
5807 {
5808 case ADDRESS_LO_SUM:
5809 case ADDRESS_SYMBOLIC:
5810 case ADDRESS_REG_IMM:
5811 cost += addr_cost->imm_offset;
5812 break;
5813
5814 case ADDRESS_REG_WB:
5815 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5816 cost += addr_cost->pre_modify;
5817 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5818 cost += addr_cost->post_modify;
5819 else
5820 gcc_unreachable ();
5821
5822 break;
5823
5824 case ADDRESS_REG_REG:
5825 cost += addr_cost->register_offset;
5826 break;
5827
67747367 5828 case ADDRESS_REG_SXTW:
783879e6
EM
5829 cost += addr_cost->register_sextend;
5830 break;
5831
5832 case ADDRESS_REG_UXTW:
5833 cost += addr_cost->register_zextend;
67747367
JG
5834 break;
5835
5836 default:
5837 gcc_unreachable ();
5838 }
5839
5840
5841 if (info.shift > 0)
5842 {
5843 /* For the sake of calculating the cost of the shifted register
5844 component, we can treat same sized modes in the same way. */
5845 switch (GET_MODE_BITSIZE (mode))
5846 {
5847 case 16:
5848 cost += addr_cost->addr_scale_costs.hi;
5849 break;
5850
5851 case 32:
5852 cost += addr_cost->addr_scale_costs.si;
5853 break;
5854
5855 case 64:
5856 cost += addr_cost->addr_scale_costs.di;
5857 break;
5858
5859 /* We can't tell, or this is a 128-bit vector. */
5860 default:
5861 cost += addr_cost->addr_scale_costs.ti;
5862 break;
5863 }
5864 }
5865
5866 return cost;
5867}
5868
b9066f5a
MW
5869/* Return the cost of a branch. If SPEED_P is true then the compiler is
5870 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5871 to be taken. */
5872
5873int
5874aarch64_branch_cost (bool speed_p, bool predictable_p)
5875{
5876 /* When optimizing for speed, use the cost of unpredictable branches. */
5877 const struct cpu_branch_cost *branch_costs =
b175b679 5878 aarch64_tune_params.branch_costs;
b9066f5a
MW
5879
5880 if (!speed_p || predictable_p)
5881 return branch_costs->predictable;
5882 else
5883 return branch_costs->unpredictable;
5884}
5885
7cc2145f
JG
5886/* Return true if the RTX X in mode MODE is a zero or sign extract
5887 usable in an ADD or SUB (extended register) instruction. */
5888static bool
ef4bddc2 5889aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
5890{
5891 /* Catch add with a sign extract.
5892 This is add_<optab><mode>_multp2. */
5893 if (GET_CODE (x) == SIGN_EXTRACT
5894 || GET_CODE (x) == ZERO_EXTRACT)
5895 {
5896 rtx op0 = XEXP (x, 0);
5897 rtx op1 = XEXP (x, 1);
5898 rtx op2 = XEXP (x, 2);
5899
5900 if (GET_CODE (op0) == MULT
5901 && CONST_INT_P (op1)
5902 && op2 == const0_rtx
5903 && CONST_INT_P (XEXP (op0, 1))
5904 && aarch64_is_extend_from_extract (mode,
5905 XEXP (op0, 1),
5906 op1))
5907 {
5908 return true;
5909 }
5910 }
e47c4031
KT
5911 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
5912 No shift. */
5913 else if (GET_CODE (x) == SIGN_EXTEND
5914 || GET_CODE (x) == ZERO_EXTEND)
5915 return REG_P (XEXP (x, 0));
7cc2145f
JG
5916
5917 return false;
5918}
5919
61263118
KT
5920static bool
5921aarch64_frint_unspec_p (unsigned int u)
5922{
5923 switch (u)
5924 {
5925 case UNSPEC_FRINTZ:
5926 case UNSPEC_FRINTP:
5927 case UNSPEC_FRINTM:
5928 case UNSPEC_FRINTA:
5929 case UNSPEC_FRINTN:
5930 case UNSPEC_FRINTX:
5931 case UNSPEC_FRINTI:
5932 return true;
5933
5934 default:
5935 return false;
5936 }
5937}
5938
fb0cb7fa
KT
5939/* Return true iff X is an rtx that will match an extr instruction
5940 i.e. as described in the *extr<mode>5_insn family of patterns.
5941 OP0 and OP1 will be set to the operands of the shifts involved
5942 on success and will be NULL_RTX otherwise. */
5943
5944static bool
5945aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
5946{
5947 rtx op0, op1;
5948 machine_mode mode = GET_MODE (x);
5949
5950 *res_op0 = NULL_RTX;
5951 *res_op1 = NULL_RTX;
5952
5953 if (GET_CODE (x) != IOR)
5954 return false;
5955
5956 op0 = XEXP (x, 0);
5957 op1 = XEXP (x, 1);
5958
5959 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
5960 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
5961 {
5962 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
5963 if (GET_CODE (op1) == ASHIFT)
5964 std::swap (op0, op1);
5965
5966 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
5967 return false;
5968
5969 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
5970 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
5971
5972 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
5973 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
5974 {
5975 *res_op0 = XEXP (op0, 0);
5976 *res_op1 = XEXP (op1, 0);
5977 return true;
5978 }
5979 }
5980
5981 return false;
5982}
5983
2d5ffe46
AP
5984/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5985 storing it in *COST. Result is true if the total cost of the operation
5986 has now been calculated. */
5987static bool
5988aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5989{
b9e3afe9
AP
5990 rtx inner;
5991 rtx comparator;
5992 enum rtx_code cmpcode;
5993
5994 if (COMPARISON_P (op0))
5995 {
5996 inner = XEXP (op0, 0);
5997 comparator = XEXP (op0, 1);
5998 cmpcode = GET_CODE (op0);
5999 }
6000 else
6001 {
6002 inner = op0;
6003 comparator = const0_rtx;
6004 cmpcode = NE;
6005 }
6006
2d5ffe46
AP
6007 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
6008 {
6009 /* Conditional branch. */
b9e3afe9 6010 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
6011 return true;
6012 else
6013 {
b9e3afe9 6014 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 6015 {
2d5ffe46
AP
6016 if (comparator == const0_rtx)
6017 {
6018 /* TBZ/TBNZ/CBZ/CBNZ. */
6019 if (GET_CODE (inner) == ZERO_EXTRACT)
6020 /* TBZ/TBNZ. */
e548c9df
AM
6021 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
6022 ZERO_EXTRACT, 0, speed);
6023 else
6024 /* CBZ/CBNZ. */
6025 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
6026
6027 return true;
6028 }
6029 }
b9e3afe9 6030 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 6031 {
2d5ffe46
AP
6032 /* TBZ/TBNZ. */
6033 if (comparator == const0_rtx)
6034 return true;
6035 }
6036 }
6037 }
b9e3afe9 6038 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 6039 {
786298dc 6040 /* CCMP. */
6dfeb7ce 6041 if (GET_CODE (op1) == COMPARE)
786298dc
WD
6042 {
6043 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6044 if (XEXP (op1, 1) == const0_rtx)
6045 *cost += 1;
6046 if (speed)
6047 {
6048 machine_mode mode = GET_MODE (XEXP (op1, 0));
6049 const struct cpu_cost_table *extra_cost
6050 = aarch64_tune_params.insn_extra_cost;
6051
6052 if (GET_MODE_CLASS (mode) == MODE_INT)
6053 *cost += extra_cost->alu.arith;
6054 else
6055 *cost += extra_cost->fp[mode == DFmode].compare;
6056 }
6057 return true;
6058 }
6059
2d5ffe46
AP
6060 /* It's a conditional operation based on the status flags,
6061 so it must be some flavor of CSEL. */
6062
6063 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6064 if (GET_CODE (op1) == NEG
6065 || GET_CODE (op1) == NOT
6066 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
6067 op1 = XEXP (op1, 0);
bad00732
KT
6068 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
6069 {
6070 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6071 op1 = XEXP (op1, 0);
6072 op2 = XEXP (op2, 0);
6073 }
2d5ffe46 6074
e548c9df
AM
6075 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
6076 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
6077 return true;
6078 }
6079
6080 /* We don't know what this is, cost all operands. */
6081 return false;
6082}
6083
283b6c85
KT
6084/* Check whether X is a bitfield operation of the form shift + extend that
6085 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6086 operand to which the bitfield operation is applied. Otherwise return
6087 NULL_RTX. */
6088
6089static rtx
6090aarch64_extend_bitfield_pattern_p (rtx x)
6091{
6092 rtx_code outer_code = GET_CODE (x);
6093 machine_mode outer_mode = GET_MODE (x);
6094
6095 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
6096 && outer_mode != SImode && outer_mode != DImode)
6097 return NULL_RTX;
6098
6099 rtx inner = XEXP (x, 0);
6100 rtx_code inner_code = GET_CODE (inner);
6101 machine_mode inner_mode = GET_MODE (inner);
6102 rtx op = NULL_RTX;
6103
6104 switch (inner_code)
6105 {
6106 case ASHIFT:
6107 if (CONST_INT_P (XEXP (inner, 1))
6108 && (inner_mode == QImode || inner_mode == HImode))
6109 op = XEXP (inner, 0);
6110 break;
6111 case LSHIFTRT:
6112 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
6113 && (inner_mode == QImode || inner_mode == HImode))
6114 op = XEXP (inner, 0);
6115 break;
6116 case ASHIFTRT:
6117 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
6118 && (inner_mode == QImode || inner_mode == HImode))
6119 op = XEXP (inner, 0);
6120 break;
6121 default:
6122 break;
6123 }
6124
6125 return op;
6126}
6127
8c83f71d
KT
6128/* Return true if the mask and a shift amount from an RTX of the form
6129 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6130 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6131
6132bool
6133aarch64_mask_and_shift_for_ubfiz_p (machine_mode mode, rtx mask, rtx shft_amnt)
6134{
6135 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
6136 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
6137 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
6138 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
6139}
6140
43e9d192
IB
6141/* Calculate the cost of calculating X, storing it in *COST. Result
6142 is true if the total cost of the operation has now been calculated. */
6143static bool
e548c9df 6144aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
6145 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
6146{
a8eecd00 6147 rtx op0, op1, op2;
73250c4c 6148 const struct cpu_cost_table *extra_cost
b175b679 6149 = aarch64_tune_params.insn_extra_cost;
e548c9df 6150 int code = GET_CODE (x);
43e9d192 6151
7fc5ef02
JG
6152 /* By default, assume that everything has equivalent cost to the
6153 cheapest instruction. Any additional costs are applied as a delta
6154 above this default. */
6155 *cost = COSTS_N_INSNS (1);
6156
43e9d192
IB
6157 switch (code)
6158 {
6159 case SET:
ba123b0d
JG
6160 /* The cost depends entirely on the operands to SET. */
6161 *cost = 0;
43e9d192
IB
6162 op0 = SET_DEST (x);
6163 op1 = SET_SRC (x);
6164
6165 switch (GET_CODE (op0))
6166 {
6167 case MEM:
6168 if (speed)
2961177e
JG
6169 {
6170 rtx address = XEXP (op0, 0);
b6875aac
KV
6171 if (VECTOR_MODE_P (mode))
6172 *cost += extra_cost->ldst.storev;
6173 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6174 *cost += extra_cost->ldst.store;
6175 else if (mode == SFmode)
6176 *cost += extra_cost->ldst.storef;
6177 else if (mode == DFmode)
6178 *cost += extra_cost->ldst.stored;
6179
6180 *cost +=
6181 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6182 0, speed));
6183 }
43e9d192 6184
e548c9df 6185 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6186 return true;
6187
6188 case SUBREG:
6189 if (! REG_P (SUBREG_REG (op0)))
e548c9df 6190 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 6191
43e9d192
IB
6192 /* Fall through. */
6193 case REG:
b6875aac
KV
6194 /* The cost is one per vector-register copied. */
6195 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
6196 {
6197 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
6198 / GET_MODE_SIZE (V4SImode);
6199 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6200 }
ba123b0d
JG
6201 /* const0_rtx is in general free, but we will use an
6202 instruction to set a register to 0. */
b6875aac
KV
6203 else if (REG_P (op1) || op1 == const0_rtx)
6204 {
6205 /* The cost is 1 per register copied. */
6206 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
ba123b0d 6207 / UNITS_PER_WORD;
b6875aac
KV
6208 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6209 }
ba123b0d
JG
6210 else
6211 /* Cost is just the cost of the RHS of the set. */
e548c9df 6212 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6213 return true;
6214
ba123b0d 6215 case ZERO_EXTRACT:
43e9d192 6216 case SIGN_EXTRACT:
ba123b0d
JG
6217 /* Bit-field insertion. Strip any redundant widening of
6218 the RHS to meet the width of the target. */
43e9d192
IB
6219 if (GET_CODE (op1) == SUBREG)
6220 op1 = SUBREG_REG (op1);
6221 if ((GET_CODE (op1) == ZERO_EXTEND
6222 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 6223 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
6224 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
6225 >= INTVAL (XEXP (op0, 1))))
6226 op1 = XEXP (op1, 0);
ba123b0d
JG
6227
6228 if (CONST_INT_P (op1))
6229 {
6230 /* MOV immediate is assumed to always be cheap. */
6231 *cost = COSTS_N_INSNS (1);
6232 }
6233 else
6234 {
6235 /* BFM. */
6236 if (speed)
6237 *cost += extra_cost->alu.bfi;
e548c9df 6238 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
6239 }
6240
43e9d192
IB
6241 return true;
6242
6243 default:
ba123b0d
JG
6244 /* We can't make sense of this, assume default cost. */
6245 *cost = COSTS_N_INSNS (1);
61263118 6246 return false;
43e9d192
IB
6247 }
6248 return false;
6249
9dfc162c
JG
6250 case CONST_INT:
6251 /* If an instruction can incorporate a constant within the
6252 instruction, the instruction's expression avoids calling
6253 rtx_cost() on the constant. If rtx_cost() is called on a
6254 constant, then it is usually because the constant must be
6255 moved into a register by one or more instructions.
6256
6257 The exception is constant 0, which can be expressed
6258 as XZR/WZR and is therefore free. The exception to this is
6259 if we have (set (reg) (const0_rtx)) in which case we must cost
6260 the move. However, we can catch that when we cost the SET, so
6261 we don't need to consider that here. */
6262 if (x == const0_rtx)
6263 *cost = 0;
6264 else
6265 {
6266 /* To an approximation, building any other constant is
6267 proportionally expensive to the number of instructions
6268 required to build that constant. This is true whether we
6269 are compiling for SPEED or otherwise. */
82614948
RR
6270 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
6271 (NULL_RTX, x, false, mode));
9dfc162c
JG
6272 }
6273 return true;
6274
6275 case CONST_DOUBLE:
6276 if (speed)
6277 {
6278 /* mov[df,sf]_aarch64. */
6279 if (aarch64_float_const_representable_p (x))
6280 /* FMOV (scalar immediate). */
6281 *cost += extra_cost->fp[mode == DFmode].fpconst;
6282 else if (!aarch64_float_const_zero_rtx_p (x))
6283 {
6284 /* This will be a load from memory. */
6285 if (mode == DFmode)
6286 *cost += extra_cost->ldst.loadd;
6287 else
6288 *cost += extra_cost->ldst.loadf;
6289 }
6290 else
6291 /* Otherwise this is +0.0. We get this using MOVI d0, #0
6292 or MOV v0.s[0], wzr - neither of which are modeled by the
6293 cost tables. Just use the default cost. */
6294 {
6295 }
6296 }
6297
6298 return true;
6299
43e9d192
IB
6300 case MEM:
6301 if (speed)
2961177e
JG
6302 {
6303 /* For loads we want the base cost of a load, plus an
6304 approximation for the additional cost of the addressing
6305 mode. */
6306 rtx address = XEXP (x, 0);
b6875aac
KV
6307 if (VECTOR_MODE_P (mode))
6308 *cost += extra_cost->ldst.loadv;
6309 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6310 *cost += extra_cost->ldst.load;
6311 else if (mode == SFmode)
6312 *cost += extra_cost->ldst.loadf;
6313 else if (mode == DFmode)
6314 *cost += extra_cost->ldst.loadd;
6315
6316 *cost +=
6317 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6318 0, speed));
6319 }
43e9d192
IB
6320
6321 return true;
6322
6323 case NEG:
4745e701
JG
6324 op0 = XEXP (x, 0);
6325
b6875aac
KV
6326 if (VECTOR_MODE_P (mode))
6327 {
6328 if (speed)
6329 {
6330 /* FNEG. */
6331 *cost += extra_cost->vect.alu;
6332 }
6333 return false;
6334 }
6335
e548c9df
AM
6336 if (GET_MODE_CLASS (mode) == MODE_INT)
6337 {
4745e701
JG
6338 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6339 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6340 {
6341 /* CSETM. */
e548c9df 6342 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
6343 return true;
6344 }
6345
6346 /* Cost this as SUB wzr, X. */
e548c9df 6347 op0 = CONST0_RTX (mode);
4745e701
JG
6348 op1 = XEXP (x, 0);
6349 goto cost_minus;
6350 }
6351
e548c9df 6352 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
6353 {
6354 /* Support (neg(fma...)) as a single instruction only if
6355 sign of zeros is unimportant. This matches the decision
6356 making in aarch64.md. */
6357 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
6358 {
6359 /* FNMADD. */
e548c9df 6360 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
6361 return true;
6362 }
d318517d
SN
6363 if (GET_CODE (op0) == MULT)
6364 {
6365 /* FNMUL. */
6366 *cost = rtx_cost (op0, mode, NEG, 0, speed);
6367 return true;
6368 }
4745e701
JG
6369 if (speed)
6370 /* FNEG. */
6371 *cost += extra_cost->fp[mode == DFmode].neg;
6372 return false;
6373 }
6374
6375 return false;
43e9d192 6376
781aeb73
KT
6377 case CLRSB:
6378 case CLZ:
6379 if (speed)
b6875aac
KV
6380 {
6381 if (VECTOR_MODE_P (mode))
6382 *cost += extra_cost->vect.alu;
6383 else
6384 *cost += extra_cost->alu.clz;
6385 }
781aeb73
KT
6386
6387 return false;
6388
43e9d192
IB
6389 case COMPARE:
6390 op0 = XEXP (x, 0);
6391 op1 = XEXP (x, 1);
6392
6393 if (op1 == const0_rtx
6394 && GET_CODE (op0) == AND)
6395 {
6396 x = op0;
e548c9df 6397 mode = GET_MODE (op0);
43e9d192
IB
6398 goto cost_logic;
6399 }
6400
a8eecd00
JG
6401 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
6402 {
6403 /* TODO: A write to the CC flags possibly costs extra, this
6404 needs encoding in the cost tables. */
6405
e548c9df 6406 mode = GET_MODE (op0);
a8eecd00
JG
6407 /* ANDS. */
6408 if (GET_CODE (op0) == AND)
6409 {
6410 x = op0;
6411 goto cost_logic;
6412 }
6413
6414 if (GET_CODE (op0) == PLUS)
6415 {
6416 /* ADDS (and CMN alias). */
6417 x = op0;
6418 goto cost_plus;
6419 }
6420
6421 if (GET_CODE (op0) == MINUS)
6422 {
6423 /* SUBS. */
6424 x = op0;
6425 goto cost_minus;
6426 }
6427
345854d8
KT
6428 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
6429 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
6430 && CONST_INT_P (XEXP (op0, 2)))
6431 {
6432 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
6433 Handle it here directly rather than going to cost_logic
6434 since we know the immediate generated for the TST is valid
6435 so we can avoid creating an intermediate rtx for it only
6436 for costing purposes. */
6437 if (speed)
6438 *cost += extra_cost->alu.logical;
6439
6440 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
6441 ZERO_EXTRACT, 0, speed);
6442 return true;
6443 }
6444
a8eecd00
JG
6445 if (GET_CODE (op1) == NEG)
6446 {
6447 /* CMN. */
6448 if (speed)
6449 *cost += extra_cost->alu.arith;
6450
e548c9df
AM
6451 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
6452 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
6453 return true;
6454 }
6455
6456 /* CMP.
6457
6458 Compare can freely swap the order of operands, and
6459 canonicalization puts the more complex operation first.
6460 But the integer MINUS logic expects the shift/extend
6461 operation in op1. */
6462 if (! (REG_P (op0)
6463 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
6464 {
6465 op0 = XEXP (x, 1);
6466 op1 = XEXP (x, 0);
6467 }
6468 goto cost_minus;
6469 }
6470
6471 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6472 {
6473 /* FCMP. */
6474 if (speed)
6475 *cost += extra_cost->fp[mode == DFmode].compare;
6476
6477 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
6478 {
e548c9df 6479 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
6480 /* FCMP supports constant 0.0 for no extra cost. */
6481 return true;
6482 }
6483 return false;
6484 }
6485
b6875aac
KV
6486 if (VECTOR_MODE_P (mode))
6487 {
6488 /* Vector compare. */
6489 if (speed)
6490 *cost += extra_cost->vect.alu;
6491
6492 if (aarch64_float_const_zero_rtx_p (op1))
6493 {
6494 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6495 cost. */
6496 return true;
6497 }
6498 return false;
6499 }
a8eecd00 6500 return false;
43e9d192
IB
6501
6502 case MINUS:
4745e701
JG
6503 {
6504 op0 = XEXP (x, 0);
6505 op1 = XEXP (x, 1);
6506
6507cost_minus:
e548c9df 6508 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 6509
4745e701
JG
6510 /* Detect valid immediates. */
6511 if ((GET_MODE_CLASS (mode) == MODE_INT
6512 || (GET_MODE_CLASS (mode) == MODE_CC
6513 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
6514 && CONST_INT_P (op1)
6515 && aarch64_uimm12_shift (INTVAL (op1)))
6516 {
4745e701
JG
6517 if (speed)
6518 /* SUB(S) (immediate). */
6519 *cost += extra_cost->alu.arith;
6520 return true;
4745e701
JG
6521 }
6522
7cc2145f
JG
6523 /* Look for SUB (extended register). */
6524 if (aarch64_rtx_arith_op_extract_p (op1, mode))
6525 {
6526 if (speed)
2533c820 6527 *cost += extra_cost->alu.extend_arith;
7cc2145f 6528
e47c4031
KT
6529 op1 = aarch64_strip_extend (op1);
6530 *cost += rtx_cost (op1, VOIDmode,
e548c9df 6531 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
6532 return true;
6533 }
6534
4745e701
JG
6535 rtx new_op1 = aarch64_strip_extend (op1);
6536
6537 /* Cost this as an FMA-alike operation. */
6538 if ((GET_CODE (new_op1) == MULT
0a78ebe4 6539 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
6540 && code != COMPARE)
6541 {
6542 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
6543 (enum rtx_code) code,
6544 speed);
4745e701
JG
6545 return true;
6546 }
43e9d192 6547
e548c9df 6548 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 6549
4745e701
JG
6550 if (speed)
6551 {
b6875aac
KV
6552 if (VECTOR_MODE_P (mode))
6553 {
6554 /* Vector SUB. */
6555 *cost += extra_cost->vect.alu;
6556 }
6557 else if (GET_MODE_CLASS (mode) == MODE_INT)
6558 {
6559 /* SUB(S). */
6560 *cost += extra_cost->alu.arith;
6561 }
4745e701 6562 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6563 {
6564 /* FSUB. */
6565 *cost += extra_cost->fp[mode == DFmode].addsub;
6566 }
4745e701
JG
6567 }
6568 return true;
6569 }
43e9d192
IB
6570
6571 case PLUS:
4745e701
JG
6572 {
6573 rtx new_op0;
43e9d192 6574
4745e701
JG
6575 op0 = XEXP (x, 0);
6576 op1 = XEXP (x, 1);
43e9d192 6577
a8eecd00 6578cost_plus:
4745e701
JG
6579 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6580 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6581 {
6582 /* CSINC. */
e548c9df
AM
6583 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
6584 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
6585 return true;
6586 }
43e9d192 6587
4745e701
JG
6588 if (GET_MODE_CLASS (mode) == MODE_INT
6589 && CONST_INT_P (op1)
6590 && aarch64_uimm12_shift (INTVAL (op1)))
6591 {
e548c9df 6592 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 6593
4745e701
JG
6594 if (speed)
6595 /* ADD (immediate). */
6596 *cost += extra_cost->alu.arith;
6597 return true;
6598 }
6599
e548c9df 6600 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 6601
7cc2145f
JG
6602 /* Look for ADD (extended register). */
6603 if (aarch64_rtx_arith_op_extract_p (op0, mode))
6604 {
6605 if (speed)
2533c820 6606 *cost += extra_cost->alu.extend_arith;
7cc2145f 6607
e47c4031
KT
6608 op0 = aarch64_strip_extend (op0);
6609 *cost += rtx_cost (op0, VOIDmode,
e548c9df 6610 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
6611 return true;
6612 }
6613
4745e701
JG
6614 /* Strip any extend, leave shifts behind as we will
6615 cost them through mult_cost. */
6616 new_op0 = aarch64_strip_extend (op0);
6617
6618 if (GET_CODE (new_op0) == MULT
0a78ebe4 6619 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
6620 {
6621 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
6622 speed);
4745e701
JG
6623 return true;
6624 }
6625
e548c9df 6626 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
6627
6628 if (speed)
6629 {
b6875aac
KV
6630 if (VECTOR_MODE_P (mode))
6631 {
6632 /* Vector ADD. */
6633 *cost += extra_cost->vect.alu;
6634 }
6635 else if (GET_MODE_CLASS (mode) == MODE_INT)
6636 {
6637 /* ADD. */
6638 *cost += extra_cost->alu.arith;
6639 }
4745e701 6640 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6641 {
6642 /* FADD. */
6643 *cost += extra_cost->fp[mode == DFmode].addsub;
6644 }
4745e701
JG
6645 }
6646 return true;
6647 }
43e9d192 6648
18b42b2a
KT
6649 case BSWAP:
6650 *cost = COSTS_N_INSNS (1);
6651
6652 if (speed)
b6875aac
KV
6653 {
6654 if (VECTOR_MODE_P (mode))
6655 *cost += extra_cost->vect.alu;
6656 else
6657 *cost += extra_cost->alu.rev;
6658 }
18b42b2a
KT
6659 return false;
6660
43e9d192 6661 case IOR:
f7d5cf8d
KT
6662 if (aarch_rev16_p (x))
6663 {
6664 *cost = COSTS_N_INSNS (1);
6665
b6875aac
KV
6666 if (speed)
6667 {
6668 if (VECTOR_MODE_P (mode))
6669 *cost += extra_cost->vect.alu;
6670 else
6671 *cost += extra_cost->alu.rev;
6672 }
6673 return true;
f7d5cf8d 6674 }
fb0cb7fa
KT
6675
6676 if (aarch64_extr_rtx_p (x, &op0, &op1))
6677 {
e548c9df
AM
6678 *cost += rtx_cost (op0, mode, IOR, 0, speed);
6679 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
6680 if (speed)
6681 *cost += extra_cost->alu.shift;
6682
6683 return true;
6684 }
f7d5cf8d 6685 /* Fall through. */
43e9d192
IB
6686 case XOR:
6687 case AND:
6688 cost_logic:
6689 op0 = XEXP (x, 0);
6690 op1 = XEXP (x, 1);
6691
b6875aac
KV
6692 if (VECTOR_MODE_P (mode))
6693 {
6694 if (speed)
6695 *cost += extra_cost->vect.alu;
6696 return true;
6697 }
6698
268c3b47
JG
6699 if (code == AND
6700 && GET_CODE (op0) == MULT
6701 && CONST_INT_P (XEXP (op0, 1))
6702 && CONST_INT_P (op1)
6703 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
6704 INTVAL (op1)) != 0)
6705 {
6706 /* This is a UBFM/SBFM. */
e548c9df 6707 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
6708 if (speed)
6709 *cost += extra_cost->alu.bfx;
6710 return true;
6711 }
6712
e548c9df 6713 if (GET_MODE_CLASS (mode) == MODE_INT)
43e9d192 6714 {
8c83f71d 6715 if (CONST_INT_P (op1))
43e9d192 6716 {
8c83f71d
KT
6717 /* We have a mask + shift version of a UBFIZ
6718 i.e. the *andim_ashift<mode>_bfiz pattern. */
6719 if (GET_CODE (op0) == ASHIFT
6720 && aarch64_mask_and_shift_for_ubfiz_p (mode, op1,
6721 XEXP (op0, 1)))
6722 {
6723 *cost += rtx_cost (XEXP (op0, 0), mode,
6724 (enum rtx_code) code, 0, speed);
6725 if (speed)
6726 *cost += extra_cost->alu.bfx;
268c3b47 6727
8c83f71d
KT
6728 return true;
6729 }
6730 else if (aarch64_bitmask_imm (INTVAL (op1), mode))
6731 {
6732 /* We possibly get the immediate for free, this is not
6733 modelled. */
6734 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6735 if (speed)
6736 *cost += extra_cost->alu.logical;
268c3b47 6737
8c83f71d
KT
6738 return true;
6739 }
43e9d192
IB
6740 }
6741 else
6742 {
268c3b47
JG
6743 rtx new_op0 = op0;
6744
6745 /* Handle ORN, EON, or BIC. */
43e9d192
IB
6746 if (GET_CODE (op0) == NOT)
6747 op0 = XEXP (op0, 0);
268c3b47
JG
6748
6749 new_op0 = aarch64_strip_shift (op0);
6750
6751 /* If we had a shift on op0 then this is a logical-shift-
6752 by-register/immediate operation. Otherwise, this is just
6753 a logical operation. */
6754 if (speed)
6755 {
6756 if (new_op0 != op0)
6757 {
6758 /* Shift by immediate. */
6759 if (CONST_INT_P (XEXP (op0, 1)))
6760 *cost += extra_cost->alu.log_shift;
6761 else
6762 *cost += extra_cost->alu.log_shift_reg;
6763 }
6764 else
6765 *cost += extra_cost->alu.logical;
6766 }
6767
6768 /* In both cases we want to cost both operands. */
e548c9df
AM
6769 *cost += rtx_cost (new_op0, mode, (enum rtx_code) code, 0, speed);
6770 *cost += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
268c3b47
JG
6771
6772 return true;
43e9d192 6773 }
43e9d192
IB
6774 }
6775 return false;
6776
268c3b47 6777 case NOT:
6365da9e
KT
6778 x = XEXP (x, 0);
6779 op0 = aarch64_strip_shift (x);
6780
b6875aac
KV
6781 if (VECTOR_MODE_P (mode))
6782 {
6783 /* Vector NOT. */
6784 *cost += extra_cost->vect.alu;
6785 return false;
6786 }
6787
6365da9e
KT
6788 /* MVN-shifted-reg. */
6789 if (op0 != x)
6790 {
e548c9df 6791 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
6792
6793 if (speed)
6794 *cost += extra_cost->alu.log_shift;
6795
6796 return true;
6797 }
6798 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6799 Handle the second form here taking care that 'a' in the above can
6800 be a shift. */
6801 else if (GET_CODE (op0) == XOR)
6802 {
6803 rtx newop0 = XEXP (op0, 0);
6804 rtx newop1 = XEXP (op0, 1);
6805 rtx op0_stripped = aarch64_strip_shift (newop0);
6806
e548c9df
AM
6807 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
6808 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
6809
6810 if (speed)
6811 {
6812 if (op0_stripped != newop0)
6813 *cost += extra_cost->alu.log_shift;
6814 else
6815 *cost += extra_cost->alu.logical;
6816 }
6817
6818 return true;
6819 }
268c3b47
JG
6820 /* MVN. */
6821 if (speed)
6822 *cost += extra_cost->alu.logical;
6823
268c3b47
JG
6824 return false;
6825
43e9d192 6826 case ZERO_EXTEND:
b1685e62
JG
6827
6828 op0 = XEXP (x, 0);
6829 /* If a value is written in SI mode, then zero extended to DI
6830 mode, the operation will in general be free as a write to
6831 a 'w' register implicitly zeroes the upper bits of an 'x'
6832 register. However, if this is
6833
6834 (set (reg) (zero_extend (reg)))
6835
6836 we must cost the explicit register move. */
6837 if (mode == DImode
6838 && GET_MODE (op0) == SImode
6839 && outer == SET)
6840 {
e548c9df 6841 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62
JG
6842
6843 if (!op_cost && speed)
6844 /* MOV. */
6845 *cost += extra_cost->alu.extend;
6846 else
6847 /* Free, the cost is that of the SI mode operation. */
6848 *cost = op_cost;
6849
6850 return true;
6851 }
e548c9df 6852 else if (MEM_P (op0))
43e9d192 6853 {
b1685e62 6854 /* All loads can zero extend to any size for free. */
e548c9df 6855 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
6856 return true;
6857 }
b1685e62 6858
283b6c85
KT
6859 op0 = aarch64_extend_bitfield_pattern_p (x);
6860 if (op0)
6861 {
6862 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
6863 if (speed)
6864 *cost += extra_cost->alu.bfx;
6865 return true;
6866 }
6867
b1685e62 6868 if (speed)
b6875aac
KV
6869 {
6870 if (VECTOR_MODE_P (mode))
6871 {
6872 /* UMOV. */
6873 *cost += extra_cost->vect.alu;
6874 }
6875 else
6876 {
6877 /* UXTB/UXTH. */
6878 *cost += extra_cost->alu.extend;
6879 }
6880 }
43e9d192
IB
6881 return false;
6882
6883 case SIGN_EXTEND:
b1685e62 6884 if (MEM_P (XEXP (x, 0)))
43e9d192 6885 {
b1685e62
JG
6886 /* LDRSH. */
6887 if (speed)
6888 {
6889 rtx address = XEXP (XEXP (x, 0), 0);
6890 *cost += extra_cost->ldst.load_sign_extend;
6891
6892 *cost +=
6893 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6894 0, speed));
6895 }
43e9d192
IB
6896 return true;
6897 }
b1685e62 6898
283b6c85
KT
6899 op0 = aarch64_extend_bitfield_pattern_p (x);
6900 if (op0)
6901 {
6902 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
6903 if (speed)
6904 *cost += extra_cost->alu.bfx;
6905 return true;
6906 }
6907
b1685e62 6908 if (speed)
b6875aac
KV
6909 {
6910 if (VECTOR_MODE_P (mode))
6911 *cost += extra_cost->vect.alu;
6912 else
6913 *cost += extra_cost->alu.extend;
6914 }
43e9d192
IB
6915 return false;
6916
ba0cfa17
JG
6917 case ASHIFT:
6918 op0 = XEXP (x, 0);
6919 op1 = XEXP (x, 1);
6920
6921 if (CONST_INT_P (op1))
6922 {
ba0cfa17 6923 if (speed)
b6875aac
KV
6924 {
6925 if (VECTOR_MODE_P (mode))
6926 {
6927 /* Vector shift (immediate). */
6928 *cost += extra_cost->vect.alu;
6929 }
6930 else
6931 {
6932 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6933 aliases. */
6934 *cost += extra_cost->alu.shift;
6935 }
6936 }
ba0cfa17
JG
6937
6938 /* We can incorporate zero/sign extend for free. */
6939 if (GET_CODE (op0) == ZERO_EXTEND
6940 || GET_CODE (op0) == SIGN_EXTEND)
6941 op0 = XEXP (op0, 0);
6942
e548c9df 6943 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
6944 return true;
6945 }
6946 else
6947 {
ba0cfa17 6948 if (speed)
b6875aac
KV
6949 {
6950 if (VECTOR_MODE_P (mode))
6951 {
6952 /* Vector shift (register). */
6953 *cost += extra_cost->vect.alu;
6954 }
6955 else
6956 {
6957 /* LSLV. */
6958 *cost += extra_cost->alu.shift_reg;
6959 }
6960 }
ba0cfa17
JG
6961 return false; /* All arguments need to be in registers. */
6962 }
6963
43e9d192 6964 case ROTATE:
43e9d192
IB
6965 case ROTATERT:
6966 case LSHIFTRT:
43e9d192 6967 case ASHIFTRT:
ba0cfa17
JG
6968 op0 = XEXP (x, 0);
6969 op1 = XEXP (x, 1);
43e9d192 6970
ba0cfa17
JG
6971 if (CONST_INT_P (op1))
6972 {
6973 /* ASR (immediate) and friends. */
6974 if (speed)
b6875aac
KV
6975 {
6976 if (VECTOR_MODE_P (mode))
6977 *cost += extra_cost->vect.alu;
6978 else
6979 *cost += extra_cost->alu.shift;
6980 }
43e9d192 6981
e548c9df 6982 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
6983 return true;
6984 }
6985 else
6986 {
6987
6988 /* ASR (register) and friends. */
6989 if (speed)
b6875aac
KV
6990 {
6991 if (VECTOR_MODE_P (mode))
6992 *cost += extra_cost->vect.alu;
6993 else
6994 *cost += extra_cost->alu.shift_reg;
6995 }
ba0cfa17
JG
6996 return false; /* All arguments need to be in registers. */
6997 }
43e9d192 6998
909734be
JG
6999 case SYMBOL_REF:
7000
1b1e81f8
JW
7001 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
7002 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
7003 {
7004 /* LDR. */
7005 if (speed)
7006 *cost += extra_cost->ldst.load;
7007 }
7008 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
7009 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
7010 {
7011 /* ADRP, followed by ADD. */
7012 *cost += COSTS_N_INSNS (1);
7013 if (speed)
7014 *cost += 2 * extra_cost->alu.arith;
7015 }
7016 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
7017 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
7018 {
7019 /* ADR. */
7020 if (speed)
7021 *cost += extra_cost->alu.arith;
7022 }
7023
7024 if (flag_pic)
7025 {
7026 /* One extra load instruction, after accessing the GOT. */
7027 *cost += COSTS_N_INSNS (1);
7028 if (speed)
7029 *cost += extra_cost->ldst.load;
7030 }
43e9d192
IB
7031 return true;
7032
909734be 7033 case HIGH:
43e9d192 7034 case LO_SUM:
909734be
JG
7035 /* ADRP/ADD (immediate). */
7036 if (speed)
7037 *cost += extra_cost->alu.arith;
43e9d192
IB
7038 return true;
7039
7040 case ZERO_EXTRACT:
7041 case SIGN_EXTRACT:
7cc2145f
JG
7042 /* UBFX/SBFX. */
7043 if (speed)
b6875aac
KV
7044 {
7045 if (VECTOR_MODE_P (mode))
7046 *cost += extra_cost->vect.alu;
7047 else
7048 *cost += extra_cost->alu.bfx;
7049 }
7cc2145f
JG
7050
7051 /* We can trust that the immediates used will be correct (there
7052 are no by-register forms), so we need only cost op0. */
e548c9df 7053 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
7054 return true;
7055
7056 case MULT:
4745e701
JG
7057 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
7058 /* aarch64_rtx_mult_cost always handles recursion to its
7059 operands. */
7060 return true;
43e9d192
IB
7061
7062 case MOD:
4f58fe36
KT
7063 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7064 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7065 an unconditional negate. This case should only ever be reached through
7066 the set_smod_pow2_cheap check in expmed.c. */
7067 if (CONST_INT_P (XEXP (x, 1))
7068 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
7069 && (mode == SImode || mode == DImode))
7070 {
7071 /* We expand to 4 instructions. Reset the baseline. */
7072 *cost = COSTS_N_INSNS (4);
7073
7074 if (speed)
7075 *cost += 2 * extra_cost->alu.logical
7076 + 2 * extra_cost->alu.arith;
7077
7078 return true;
7079 }
7080
7081 /* Fall-through. */
43e9d192 7082 case UMOD:
43e9d192
IB
7083 if (speed)
7084 {
b6875aac
KV
7085 if (VECTOR_MODE_P (mode))
7086 *cost += extra_cost->vect.alu;
e548c9df
AM
7087 else if (GET_MODE_CLASS (mode) == MODE_INT)
7088 *cost += (extra_cost->mult[mode == DImode].add
7089 + extra_cost->mult[mode == DImode].idiv);
7090 else if (mode == DFmode)
73250c4c
KT
7091 *cost += (extra_cost->fp[1].mult
7092 + extra_cost->fp[1].div);
e548c9df 7093 else if (mode == SFmode)
73250c4c
KT
7094 *cost += (extra_cost->fp[0].mult
7095 + extra_cost->fp[0].div);
43e9d192
IB
7096 }
7097 return false; /* All arguments need to be in registers. */
7098
7099 case DIV:
7100 case UDIV:
4105fe38 7101 case SQRT:
43e9d192
IB
7102 if (speed)
7103 {
b6875aac
KV
7104 if (VECTOR_MODE_P (mode))
7105 *cost += extra_cost->vect.alu;
7106 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
7107 /* There is no integer SQRT, so only DIV and UDIV can get
7108 here. */
7109 *cost += extra_cost->mult[mode == DImode].idiv;
7110 else
7111 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
7112 }
7113 return false; /* All arguments need to be in registers. */
7114
a8eecd00 7115 case IF_THEN_ELSE:
2d5ffe46
AP
7116 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
7117 XEXP (x, 2), cost, speed);
a8eecd00
JG
7118
7119 case EQ:
7120 case NE:
7121 case GT:
7122 case GTU:
7123 case LT:
7124 case LTU:
7125 case GE:
7126 case GEU:
7127 case LE:
7128 case LEU:
7129
7130 return false; /* All arguments must be in registers. */
7131
b292109f
JG
7132 case FMA:
7133 op0 = XEXP (x, 0);
7134 op1 = XEXP (x, 1);
7135 op2 = XEXP (x, 2);
7136
7137 if (speed)
b6875aac
KV
7138 {
7139 if (VECTOR_MODE_P (mode))
7140 *cost += extra_cost->vect.alu;
7141 else
7142 *cost += extra_cost->fp[mode == DFmode].fma;
7143 }
b292109f
JG
7144
7145 /* FMSUB, FNMADD, and FNMSUB are free. */
7146 if (GET_CODE (op0) == NEG)
7147 op0 = XEXP (op0, 0);
7148
7149 if (GET_CODE (op2) == NEG)
7150 op2 = XEXP (op2, 0);
7151
7152 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7153 and the by-element operand as operand 0. */
7154 if (GET_CODE (op1) == NEG)
7155 op1 = XEXP (op1, 0);
7156
7157 /* Catch vector-by-element operations. The by-element operand can
7158 either be (vec_duplicate (vec_select (x))) or just
7159 (vec_select (x)), depending on whether we are multiplying by
7160 a vector or a scalar.
7161
7162 Canonicalization is not very good in these cases, FMA4 will put the
7163 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7164 if (GET_CODE (op0) == VEC_DUPLICATE)
7165 op0 = XEXP (op0, 0);
7166 else if (GET_CODE (op1) == VEC_DUPLICATE)
7167 op1 = XEXP (op1, 0);
7168
7169 if (GET_CODE (op0) == VEC_SELECT)
7170 op0 = XEXP (op0, 0);
7171 else if (GET_CODE (op1) == VEC_SELECT)
7172 op1 = XEXP (op1, 0);
7173
7174 /* If the remaining parameters are not registers,
7175 get the cost to put them into registers. */
e548c9df
AM
7176 *cost += rtx_cost (op0, mode, FMA, 0, speed);
7177 *cost += rtx_cost (op1, mode, FMA, 1, speed);
7178 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
7179 return true;
7180
5e2a765b
KT
7181 case FLOAT:
7182 case UNSIGNED_FLOAT:
7183 if (speed)
7184 *cost += extra_cost->fp[mode == DFmode].fromint;
7185 return false;
7186
b292109f
JG
7187 case FLOAT_EXTEND:
7188 if (speed)
b6875aac
KV
7189 {
7190 if (VECTOR_MODE_P (mode))
7191 {
7192 /*Vector truncate. */
7193 *cost += extra_cost->vect.alu;
7194 }
7195 else
7196 *cost += extra_cost->fp[mode == DFmode].widen;
7197 }
b292109f
JG
7198 return false;
7199
7200 case FLOAT_TRUNCATE:
7201 if (speed)
b6875aac
KV
7202 {
7203 if (VECTOR_MODE_P (mode))
7204 {
7205 /*Vector conversion. */
7206 *cost += extra_cost->vect.alu;
7207 }
7208 else
7209 *cost += extra_cost->fp[mode == DFmode].narrow;
7210 }
b292109f
JG
7211 return false;
7212
61263118
KT
7213 case FIX:
7214 case UNSIGNED_FIX:
7215 x = XEXP (x, 0);
7216 /* Strip the rounding part. They will all be implemented
7217 by the fcvt* family of instructions anyway. */
7218 if (GET_CODE (x) == UNSPEC)
7219 {
7220 unsigned int uns_code = XINT (x, 1);
7221
7222 if (uns_code == UNSPEC_FRINTA
7223 || uns_code == UNSPEC_FRINTM
7224 || uns_code == UNSPEC_FRINTN
7225 || uns_code == UNSPEC_FRINTP
7226 || uns_code == UNSPEC_FRINTZ)
7227 x = XVECEXP (x, 0, 0);
7228 }
7229
7230 if (speed)
b6875aac
KV
7231 {
7232 if (VECTOR_MODE_P (mode))
7233 *cost += extra_cost->vect.alu;
7234 else
7235 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
7236 }
39252973
KT
7237
7238 /* We can combine fmul by a power of 2 followed by a fcvt into a single
7239 fixed-point fcvt. */
7240 if (GET_CODE (x) == MULT
7241 && ((VECTOR_MODE_P (mode)
7242 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
7243 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
7244 {
7245 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
7246 0, speed);
7247 return true;
7248 }
7249
e548c9df 7250 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
7251 return true;
7252
b292109f 7253 case ABS:
b6875aac
KV
7254 if (VECTOR_MODE_P (mode))
7255 {
7256 /* ABS (vector). */
7257 if (speed)
7258 *cost += extra_cost->vect.alu;
7259 }
7260 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 7261 {
19261b99
KT
7262 op0 = XEXP (x, 0);
7263
7264 /* FABD, which is analogous to FADD. */
7265 if (GET_CODE (op0) == MINUS)
7266 {
e548c9df
AM
7267 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
7268 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
7269 if (speed)
7270 *cost += extra_cost->fp[mode == DFmode].addsub;
7271
7272 return true;
7273 }
7274 /* Simple FABS is analogous to FNEG. */
b292109f
JG
7275 if (speed)
7276 *cost += extra_cost->fp[mode == DFmode].neg;
7277 }
7278 else
7279 {
7280 /* Integer ABS will either be split to
7281 two arithmetic instructions, or will be an ABS
7282 (scalar), which we don't model. */
7283 *cost = COSTS_N_INSNS (2);
7284 if (speed)
7285 *cost += 2 * extra_cost->alu.arith;
7286 }
7287 return false;
7288
7289 case SMAX:
7290 case SMIN:
7291 if (speed)
7292 {
b6875aac
KV
7293 if (VECTOR_MODE_P (mode))
7294 *cost += extra_cost->vect.alu;
7295 else
7296 {
7297 /* FMAXNM/FMINNM/FMAX/FMIN.
7298 TODO: This may not be accurate for all implementations, but
7299 we do not model this in the cost tables. */
7300 *cost += extra_cost->fp[mode == DFmode].addsub;
7301 }
b292109f
JG
7302 }
7303 return false;
7304
61263118
KT
7305 case UNSPEC:
7306 /* The floating point round to integer frint* instructions. */
7307 if (aarch64_frint_unspec_p (XINT (x, 1)))
7308 {
7309 if (speed)
7310 *cost += extra_cost->fp[mode == DFmode].roundint;
7311
7312 return false;
7313 }
781aeb73
KT
7314
7315 if (XINT (x, 1) == UNSPEC_RBIT)
7316 {
7317 if (speed)
7318 *cost += extra_cost->alu.rev;
7319
7320 return false;
7321 }
61263118
KT
7322 break;
7323
fb620c4a
JG
7324 case TRUNCATE:
7325
7326 /* Decompose <su>muldi3_highpart. */
7327 if (/* (truncate:DI */
7328 mode == DImode
7329 /* (lshiftrt:TI */
7330 && GET_MODE (XEXP (x, 0)) == TImode
7331 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7332 /* (mult:TI */
7333 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7334 /* (ANY_EXTEND:TI (reg:DI))
7335 (ANY_EXTEND:TI (reg:DI))) */
7336 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7337 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
7338 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
7339 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
7340 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
7341 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
7342 /* (const_int 64) */
7343 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7344 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
7345 {
7346 /* UMULH/SMULH. */
7347 if (speed)
7348 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
7349 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
7350 mode, MULT, 0, speed);
7351 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
7352 mode, MULT, 1, speed);
fb620c4a
JG
7353 return true;
7354 }
7355
7356 /* Fall through. */
43e9d192 7357 default:
61263118 7358 break;
43e9d192 7359 }
61263118
KT
7360
7361 if (dump_file && (dump_flags & TDF_DETAILS))
7362 fprintf (dump_file,
7363 "\nFailed to cost RTX. Assuming default cost.\n");
7364
7365 return true;
43e9d192
IB
7366}
7367
0ee859b5
JG
7368/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
7369 calculated for X. This cost is stored in *COST. Returns true
7370 if the total cost of X was calculated. */
7371static bool
e548c9df 7372aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
7373 int param, int *cost, bool speed)
7374{
e548c9df 7375 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5
JG
7376
7377 if (dump_file && (dump_flags & TDF_DETAILS))
7378 {
7379 print_rtl_single (dump_file, x);
7380 fprintf (dump_file, "\n%s cost: %d (%s)\n",
7381 speed ? "Hot" : "Cold",
7382 *cost, result ? "final" : "partial");
7383 }
7384
7385 return result;
7386}
7387
43e9d192 7388static int
ef4bddc2 7389aarch64_register_move_cost (machine_mode mode,
8a3a7e67 7390 reg_class_t from_i, reg_class_t to_i)
43e9d192 7391{
8a3a7e67
RH
7392 enum reg_class from = (enum reg_class) from_i;
7393 enum reg_class to = (enum reg_class) to_i;
43e9d192 7394 const struct cpu_regmove_cost *regmove_cost
b175b679 7395 = aarch64_tune_params.regmove_cost;
43e9d192 7396
3be07662 7397 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
2876a13f 7398 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
3be07662
WD
7399 to = GENERAL_REGS;
7400
2876a13f 7401 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
3be07662
WD
7402 from = GENERAL_REGS;
7403
6ee70f81
AP
7404 /* Moving between GPR and stack cost is the same as GP2GP. */
7405 if ((from == GENERAL_REGS && to == STACK_REG)
7406 || (to == GENERAL_REGS && from == STACK_REG))
7407 return regmove_cost->GP2GP;
7408
7409 /* To/From the stack register, we move via the gprs. */
7410 if (to == STACK_REG || from == STACK_REG)
7411 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
7412 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
7413
8919453c
WD
7414 if (GET_MODE_SIZE (mode) == 16)
7415 {
7416 /* 128-bit operations on general registers require 2 instructions. */
7417 if (from == GENERAL_REGS && to == GENERAL_REGS)
7418 return regmove_cost->GP2GP * 2;
7419 else if (from == GENERAL_REGS)
7420 return regmove_cost->GP2FP * 2;
7421 else if (to == GENERAL_REGS)
7422 return regmove_cost->FP2GP * 2;
7423
7424 /* When AdvSIMD instructions are disabled it is not possible to move
7425 a 128-bit value directly between Q registers. This is handled in
7426 secondary reload. A general register is used as a scratch to move
7427 the upper DI value and the lower DI value is moved directly,
7428 hence the cost is the sum of three moves. */
7429 if (! TARGET_SIMD)
7430 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
7431
7432 return regmove_cost->FP2FP;
7433 }
7434
43e9d192
IB
7435 if (from == GENERAL_REGS && to == GENERAL_REGS)
7436 return regmove_cost->GP2GP;
7437 else if (from == GENERAL_REGS)
7438 return regmove_cost->GP2FP;
7439 else if (to == GENERAL_REGS)
7440 return regmove_cost->FP2GP;
7441
43e9d192
IB
7442 return regmove_cost->FP2FP;
7443}
7444
7445static int
ef4bddc2 7446aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
7447 reg_class_t rclass ATTRIBUTE_UNUSED,
7448 bool in ATTRIBUTE_UNUSED)
7449{
b175b679 7450 return aarch64_tune_params.memmov_cost;
43e9d192
IB
7451}
7452
0c30e0f3
EM
7453/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
7454 to optimize 1.0/sqrt. */
ee62a5a6
RS
7455
7456static bool
9acc9cbe 7457use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
7458{
7459 return (!flag_trapping_math
7460 && flag_unsafe_math_optimizations
9acc9cbe
EM
7461 && ((aarch64_tune_params.approx_modes->recip_sqrt
7462 & AARCH64_APPROX_MODE (mode))
1a33079e 7463 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
7464}
7465
0c30e0f3
EM
7466/* Function to decide when to use the approximate reciprocal square root
7467 builtin. */
a6fc00da
BH
7468
7469static tree
ee62a5a6 7470aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 7471{
9acc9cbe
EM
7472 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
7473
7474 if (!use_rsqrt_p (mode))
a6fc00da 7475 return NULL_TREE;
ee62a5a6 7476 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
7477}
7478
7479typedef rtx (*rsqrte_type) (rtx, rtx);
7480
98daafa0
EM
7481/* Select reciprocal square root initial estimate insn depending on machine
7482 mode. */
a6fc00da 7483
98daafa0 7484static rsqrte_type
a6fc00da
BH
7485get_rsqrte_type (machine_mode mode)
7486{
7487 switch (mode)
7488 {
2a823433
JW
7489 case DFmode: return gen_aarch64_rsqrtedf;
7490 case SFmode: return gen_aarch64_rsqrtesf;
7491 case V2DFmode: return gen_aarch64_rsqrtev2df;
7492 case V2SFmode: return gen_aarch64_rsqrtev2sf;
7493 case V4SFmode: return gen_aarch64_rsqrtev4sf;
a6fc00da
BH
7494 default: gcc_unreachable ();
7495 }
7496}
7497
7498typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
7499
98daafa0 7500/* Select reciprocal square root series step insn depending on machine mode. */
a6fc00da 7501
98daafa0 7502static rsqrts_type
a6fc00da
BH
7503get_rsqrts_type (machine_mode mode)
7504{
7505 switch (mode)
7506 {
00ea75d4
JW
7507 case DFmode: return gen_aarch64_rsqrtsdf;
7508 case SFmode: return gen_aarch64_rsqrtssf;
7509 case V2DFmode: return gen_aarch64_rsqrtsv2df;
7510 case V2SFmode: return gen_aarch64_rsqrtsv2sf;
7511 case V4SFmode: return gen_aarch64_rsqrtsv4sf;
a6fc00da
BH
7512 default: gcc_unreachable ();
7513 }
7514}
7515
98daafa0
EM
7516/* Emit instruction sequence to compute either the approximate square root
7517 or its approximate reciprocal, depending on the flag RECP, and return
7518 whether the sequence was emitted or not. */
a6fc00da 7519
98daafa0
EM
7520bool
7521aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 7522{
98daafa0
EM
7523 machine_mode mode = GET_MODE (dst);
7524 machine_mode mmsk = mode_for_vector
7525 (int_mode_for_mode (GET_MODE_INNER (mode)),
7526 GET_MODE_NUNITS (mode));
7527 bool use_approx_sqrt_p = (!recp
7528 && (flag_mlow_precision_sqrt
7529 || (aarch64_tune_params.approx_modes->sqrt
7530 & AARCH64_APPROX_MODE (mode))));
7531 bool use_approx_rsqrt_p = (recp
7532 && (flag_mrecip_low_precision_sqrt
7533 || (aarch64_tune_params.approx_modes->recip_sqrt
7534 & AARCH64_APPROX_MODE (mode))));
7535
7536 if (!flag_finite_math_only
7537 || flag_trapping_math
7538 || !flag_unsafe_math_optimizations
7539 || !(use_approx_sqrt_p || use_approx_rsqrt_p)
7540 || optimize_function_for_size_p (cfun))
7541 return false;
a6fc00da 7542
98daafa0
EM
7543 rtx xmsk = gen_reg_rtx (mmsk);
7544 if (!recp)
7545 /* When calculating the approximate square root, compare the argument with
7546 0.0 and create a mask. */
7547 emit_insn (gen_rtx_SET (xmsk, gen_rtx_NEG (mmsk, gen_rtx_EQ (mmsk, src,
7548 CONST0_RTX (mode)))));
a6fc00da 7549
98daafa0
EM
7550 /* Estimate the approximate reciprocal square root. */
7551 rtx xdst = gen_reg_rtx (mode);
7552 emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
a6fc00da 7553
98daafa0
EM
7554 /* Iterate over the series twice for SF and thrice for DF. */
7555 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 7556
98daafa0
EM
7557 /* Optionally iterate over the series once less for faster performance
7558 while sacrificing the accuracy. */
7559 if ((recp && flag_mrecip_low_precision_sqrt)
7560 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
7561 iterations--;
7562
98daafa0
EM
7563 /* Iterate over the series to calculate the approximate reciprocal square
7564 root. */
7565 rtx x1 = gen_reg_rtx (mode);
7566 while (iterations--)
a6fc00da 7567 {
a6fc00da 7568 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
7569 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
7570
7571 emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
a6fc00da 7572
98daafa0
EM
7573 if (iterations > 0)
7574 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
7575 }
7576
7577 if (!recp)
7578 {
7579 /* Qualify the approximate reciprocal square root when the argument is
7580 0.0 by squashing the intermediary result to 0.0. */
7581 rtx xtmp = gen_reg_rtx (mmsk);
7582 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
7583 gen_rtx_SUBREG (mmsk, xdst, 0)));
7584 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 7585
98daafa0
EM
7586 /* Calculate the approximate square root. */
7587 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
7588 }
7589
98daafa0
EM
7590 /* Finalize the approximation. */
7591 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
7592
7593 return true;
a6fc00da
BH
7594}
7595
79a2bc2d
EM
7596typedef rtx (*recpe_type) (rtx, rtx);
7597
7598/* Select reciprocal initial estimate insn depending on machine mode. */
7599
7600static recpe_type
7601get_recpe_type (machine_mode mode)
7602{
7603 switch (mode)
7604 {
7605 case SFmode: return (gen_aarch64_frecpesf);
7606 case V2SFmode: return (gen_aarch64_frecpev2sf);
7607 case V4SFmode: return (gen_aarch64_frecpev4sf);
7608 case DFmode: return (gen_aarch64_frecpedf);
7609 case V2DFmode: return (gen_aarch64_frecpev2df);
7610 default: gcc_unreachable ();
7611 }
7612}
7613
7614typedef rtx (*recps_type) (rtx, rtx, rtx);
7615
7616/* Select reciprocal series step insn depending on machine mode. */
7617
7618static recps_type
7619get_recps_type (machine_mode mode)
7620{
7621 switch (mode)
7622 {
7623 case SFmode: return (gen_aarch64_frecpssf);
7624 case V2SFmode: return (gen_aarch64_frecpsv2sf);
7625 case V4SFmode: return (gen_aarch64_frecpsv4sf);
7626 case DFmode: return (gen_aarch64_frecpsdf);
7627 case V2DFmode: return (gen_aarch64_frecpsv2df);
7628 default: gcc_unreachable ();
7629 }
7630}
7631
7632/* Emit the instruction sequence to compute the approximation for the division
7633 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
7634
7635bool
7636aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
7637{
7638 machine_mode mode = GET_MODE (quo);
7639 bool use_approx_division_p = (flag_mlow_precision_div
7640 || (aarch64_tune_params.approx_modes->division
7641 & AARCH64_APPROX_MODE (mode)));
7642
7643 if (!flag_finite_math_only
7644 || flag_trapping_math
7645 || !flag_unsafe_math_optimizations
7646 || optimize_function_for_size_p (cfun)
7647 || !use_approx_division_p)
7648 return false;
7649
7650 /* Estimate the approximate reciprocal. */
7651 rtx xrcp = gen_reg_rtx (mode);
7652 emit_insn ((*get_recpe_type (mode)) (xrcp, den));
7653
7654 /* Iterate over the series twice for SF and thrice for DF. */
7655 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
7656
7657 /* Optionally iterate over the series once less for faster performance,
7658 while sacrificing the accuracy. */
7659 if (flag_mlow_precision_div)
7660 iterations--;
7661
7662 /* Iterate over the series to calculate the approximate reciprocal. */
7663 rtx xtmp = gen_reg_rtx (mode);
7664 while (iterations--)
7665 {
7666 emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
7667
7668 if (iterations > 0)
7669 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
7670 }
7671
7672 if (num != CONST1_RTX (mode))
7673 {
7674 /* As the approximate reciprocal of DEN is already calculated, only
7675 calculate the approximate division when NUM is not 1.0. */
7676 rtx xnum = force_reg (mode, num);
7677 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
7678 }
7679
7680 /* Finalize the approximation. */
7681 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
7682 return true;
7683}
7684
d126a4ae
AP
7685/* Return the number of instructions that can be issued per cycle. */
7686static int
7687aarch64_sched_issue_rate (void)
7688{
b175b679 7689 return aarch64_tune_params.issue_rate;
d126a4ae
AP
7690}
7691
d03f7e44
MK
7692static int
7693aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
7694{
7695 int issue_rate = aarch64_sched_issue_rate ();
7696
7697 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
7698}
7699
2d6bc7fa
KT
7700
7701/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
7702 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
7703 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
7704
7705static int
7706aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
7707 int ready_index)
7708{
7709 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
7710}
7711
7712
8990e73a
TB
7713/* Vectorizer cost model target hooks. */
7714
7715/* Implement targetm.vectorize.builtin_vectorization_cost. */
7716static int
7717aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
7718 tree vectype,
7719 int misalign ATTRIBUTE_UNUSED)
7720{
7721 unsigned elements;
7722
7723 switch (type_of_cost)
7724 {
7725 case scalar_stmt:
b175b679 7726 return aarch64_tune_params.vec_costs->scalar_stmt_cost;
8990e73a
TB
7727
7728 case scalar_load:
b175b679 7729 return aarch64_tune_params.vec_costs->scalar_load_cost;
8990e73a
TB
7730
7731 case scalar_store:
b175b679 7732 return aarch64_tune_params.vec_costs->scalar_store_cost;
8990e73a
TB
7733
7734 case vector_stmt:
b175b679 7735 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
7736
7737 case vector_load:
b175b679 7738 return aarch64_tune_params.vec_costs->vec_align_load_cost;
8990e73a
TB
7739
7740 case vector_store:
b175b679 7741 return aarch64_tune_params.vec_costs->vec_store_cost;
8990e73a
TB
7742
7743 case vec_to_scalar:
b175b679 7744 return aarch64_tune_params.vec_costs->vec_to_scalar_cost;
8990e73a
TB
7745
7746 case scalar_to_vec:
b175b679 7747 return aarch64_tune_params.vec_costs->scalar_to_vec_cost;
8990e73a
TB
7748
7749 case unaligned_load:
b175b679 7750 return aarch64_tune_params.vec_costs->vec_unalign_load_cost;
8990e73a
TB
7751
7752 case unaligned_store:
b175b679 7753 return aarch64_tune_params.vec_costs->vec_unalign_store_cost;
8990e73a
TB
7754
7755 case cond_branch_taken:
b175b679 7756 return aarch64_tune_params.vec_costs->cond_taken_branch_cost;
8990e73a
TB
7757
7758 case cond_branch_not_taken:
b175b679 7759 return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
8990e73a
TB
7760
7761 case vec_perm:
c428f91c
WD
7762 return aarch64_tune_params.vec_costs->vec_permute_cost;
7763
8990e73a 7764 case vec_promote_demote:
b175b679 7765 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
7766
7767 case vec_construct:
7768 elements = TYPE_VECTOR_SUBPARTS (vectype);
7769 return elements / 2 + 1;
7770
7771 default:
7772 gcc_unreachable ();
7773 }
7774}
7775
7776/* Implement targetm.vectorize.add_stmt_cost. */
7777static unsigned
7778aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
7779 struct _stmt_vec_info *stmt_info, int misalign,
7780 enum vect_cost_model_location where)
7781{
7782 unsigned *cost = (unsigned *) data;
7783 unsigned retval = 0;
7784
7785 if (flag_vect_cost_model)
7786 {
7787 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
7788 int stmt_cost =
7789 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
7790
7791 /* Statements in an inner loop relative to the loop being
7792 vectorized are weighted more heavily. The value here is
058e4c71 7793 arbitrary and could potentially be improved with analysis. */
8990e73a 7794 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 7795 count *= 50; /* FIXME */
8990e73a
TB
7796
7797 retval = (unsigned) (count * stmt_cost);
7798 cost[where] += retval;
7799 }
7800
7801 return retval;
7802}
7803
0cfff2a1 7804static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 7805
0cfff2a1
KT
7806/* Parse the TO_PARSE string and put the architecture struct that it
7807 selects into RES and the architectural features into ISA_FLAGS.
7808 Return an aarch64_parse_opt_result describing the parse result.
7809 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 7810
0cfff2a1
KT
7811static enum aarch64_parse_opt_result
7812aarch64_parse_arch (const char *to_parse, const struct processor **res,
7813 unsigned long *isa_flags)
43e9d192
IB
7814{
7815 char *ext;
7816 const struct processor *arch;
0cfff2a1 7817 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
7818 size_t len;
7819
0cfff2a1 7820 strcpy (str, to_parse);
43e9d192
IB
7821
7822 ext = strchr (str, '+');
7823
7824 if (ext != NULL)
7825 len = ext - str;
7826 else
7827 len = strlen (str);
7828
7829 if (len == 0)
0cfff2a1
KT
7830 return AARCH64_PARSE_MISSING_ARG;
7831
43e9d192 7832
0cfff2a1 7833 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
7834 for (arch = all_architectures; arch->name != NULL; arch++)
7835 {
7836 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
7837 {
0cfff2a1 7838 unsigned long isa_temp = arch->flags;
43e9d192
IB
7839
7840 if (ext != NULL)
7841 {
0cfff2a1
KT
7842 /* TO_PARSE string contains at least one extension. */
7843 enum aarch64_parse_opt_result ext_res
7844 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 7845
0cfff2a1
KT
7846 if (ext_res != AARCH64_PARSE_OK)
7847 return ext_res;
ffee7aa9 7848 }
0cfff2a1
KT
7849 /* Extension parsing was successful. Confirm the result
7850 arch and ISA flags. */
7851 *res = arch;
7852 *isa_flags = isa_temp;
7853 return AARCH64_PARSE_OK;
43e9d192
IB
7854 }
7855 }
7856
7857 /* ARCH name not found in list. */
0cfff2a1 7858 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7859}
7860
0cfff2a1
KT
7861/* Parse the TO_PARSE string and put the result tuning in RES and the
7862 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
7863 describing the parse result. If there is an error parsing, RES and
7864 ISA_FLAGS are left unchanged. */
43e9d192 7865
0cfff2a1
KT
7866static enum aarch64_parse_opt_result
7867aarch64_parse_cpu (const char *to_parse, const struct processor **res,
7868 unsigned long *isa_flags)
43e9d192
IB
7869{
7870 char *ext;
7871 const struct processor *cpu;
0cfff2a1 7872 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
7873 size_t len;
7874
0cfff2a1 7875 strcpy (str, to_parse);
43e9d192
IB
7876
7877 ext = strchr (str, '+');
7878
7879 if (ext != NULL)
7880 len = ext - str;
7881 else
7882 len = strlen (str);
7883
7884 if (len == 0)
0cfff2a1
KT
7885 return AARCH64_PARSE_MISSING_ARG;
7886
43e9d192
IB
7887
7888 /* Loop through the list of supported CPUs to find a match. */
7889 for (cpu = all_cores; cpu->name != NULL; cpu++)
7890 {
7891 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
7892 {
0cfff2a1
KT
7893 unsigned long isa_temp = cpu->flags;
7894
43e9d192
IB
7895
7896 if (ext != NULL)
7897 {
0cfff2a1
KT
7898 /* TO_PARSE string contains at least one extension. */
7899 enum aarch64_parse_opt_result ext_res
7900 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 7901
0cfff2a1
KT
7902 if (ext_res != AARCH64_PARSE_OK)
7903 return ext_res;
7904 }
7905 /* Extension parsing was successfull. Confirm the result
7906 cpu and ISA flags. */
7907 *res = cpu;
7908 *isa_flags = isa_temp;
7909 return AARCH64_PARSE_OK;
43e9d192
IB
7910 }
7911 }
7912
7913 /* CPU name not found in list. */
0cfff2a1 7914 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7915}
7916
0cfff2a1
KT
7917/* Parse the TO_PARSE string and put the cpu it selects into RES.
7918 Return an aarch64_parse_opt_result describing the parse result.
7919 If the parsing fails the RES does not change. */
43e9d192 7920
0cfff2a1
KT
7921static enum aarch64_parse_opt_result
7922aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
7923{
7924 const struct processor *cpu;
0cfff2a1
KT
7925 char *str = (char *) alloca (strlen (to_parse) + 1);
7926
7927 strcpy (str, to_parse);
43e9d192
IB
7928
7929 /* Loop through the list of supported CPUs to find a match. */
7930 for (cpu = all_cores; cpu->name != NULL; cpu++)
7931 {
7932 if (strcmp (cpu->name, str) == 0)
7933 {
0cfff2a1
KT
7934 *res = cpu;
7935 return AARCH64_PARSE_OK;
43e9d192
IB
7936 }
7937 }
7938
7939 /* CPU name not found in list. */
0cfff2a1 7940 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7941}
7942
8dec06f2
JG
7943/* Parse TOKEN, which has length LENGTH to see if it is an option
7944 described in FLAG. If it is, return the index bit for that fusion type.
7945 If not, error (printing OPTION_NAME) and return zero. */
7946
7947static unsigned int
7948aarch64_parse_one_option_token (const char *token,
7949 size_t length,
7950 const struct aarch64_flag_desc *flag,
7951 const char *option_name)
7952{
7953 for (; flag->name != NULL; flag++)
7954 {
7955 if (length == strlen (flag->name)
7956 && !strncmp (flag->name, token, length))
7957 return flag->flag;
7958 }
7959
7960 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
7961 return 0;
7962}
7963
7964/* Parse OPTION which is a comma-separated list of flags to enable.
7965 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
7966 default state we inherit from the CPU tuning structures. OPTION_NAME
7967 gives the top-level option we are parsing in the -moverride string,
7968 for use in error messages. */
7969
7970static unsigned int
7971aarch64_parse_boolean_options (const char *option,
7972 const struct aarch64_flag_desc *flags,
7973 unsigned int initial_state,
7974 const char *option_name)
7975{
7976 const char separator = '.';
7977 const char* specs = option;
7978 const char* ntoken = option;
7979 unsigned int found_flags = initial_state;
7980
7981 while ((ntoken = strchr (specs, separator)))
7982 {
7983 size_t token_length = ntoken - specs;
7984 unsigned token_ops = aarch64_parse_one_option_token (specs,
7985 token_length,
7986 flags,
7987 option_name);
7988 /* If we find "none" (or, for simplicity's sake, an error) anywhere
7989 in the token stream, reset the supported operations. So:
7990
7991 adrp+add.cmp+branch.none.adrp+add
7992
7993 would have the result of turning on only adrp+add fusion. */
7994 if (!token_ops)
7995 found_flags = 0;
7996
7997 found_flags |= token_ops;
7998 specs = ++ntoken;
7999 }
8000
8001 /* We ended with a comma, print something. */
8002 if (!(*specs))
8003 {
8004 error ("%s string ill-formed\n", option_name);
8005 return 0;
8006 }
8007
8008 /* We still have one more token to parse. */
8009 size_t token_length = strlen (specs);
8010 unsigned token_ops = aarch64_parse_one_option_token (specs,
8011 token_length,
8012 flags,
8013 option_name);
8014 if (!token_ops)
8015 found_flags = 0;
8016
8017 found_flags |= token_ops;
8018 return found_flags;
8019}
8020
8021/* Support for overriding instruction fusion. */
8022
8023static void
8024aarch64_parse_fuse_string (const char *fuse_string,
8025 struct tune_params *tune)
8026{
8027 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
8028 aarch64_fusible_pairs,
8029 tune->fusible_ops,
8030 "fuse=");
8031}
8032
8033/* Support for overriding other tuning flags. */
8034
8035static void
8036aarch64_parse_tune_string (const char *tune_string,
8037 struct tune_params *tune)
8038{
8039 tune->extra_tuning_flags
8040 = aarch64_parse_boolean_options (tune_string,
8041 aarch64_tuning_flags,
8042 tune->extra_tuning_flags,
8043 "tune=");
8044}
8045
8046/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8047 we understand. If it is, extract the option string and handoff to
8048 the appropriate function. */
8049
8050void
8051aarch64_parse_one_override_token (const char* token,
8052 size_t length,
8053 struct tune_params *tune)
8054{
8055 const struct aarch64_tuning_override_function *fn
8056 = aarch64_tuning_override_functions;
8057
8058 const char *option_part = strchr (token, '=');
8059 if (!option_part)
8060 {
8061 error ("tuning string missing in option (%s)", token);
8062 return;
8063 }
8064
8065 /* Get the length of the option name. */
8066 length = option_part - token;
8067 /* Skip the '=' to get to the option string. */
8068 option_part++;
8069
8070 for (; fn->name != NULL; fn++)
8071 {
8072 if (!strncmp (fn->name, token, length))
8073 {
8074 fn->parse_override (option_part, tune);
8075 return;
8076 }
8077 }
8078
8079 error ("unknown tuning option (%s)",token);
8080 return;
8081}
8082
5eee3c34
JW
8083/* A checking mechanism for the implementation of the tls size. */
8084
8085static void
8086initialize_aarch64_tls_size (struct gcc_options *opts)
8087{
8088 if (aarch64_tls_size == 0)
8089 aarch64_tls_size = 24;
8090
8091 switch (opts->x_aarch64_cmodel_var)
8092 {
8093 case AARCH64_CMODEL_TINY:
8094 /* Both the default and maximum TLS size allowed under tiny is 1M which
8095 needs two instructions to address, so we clamp the size to 24. */
8096 if (aarch64_tls_size > 24)
8097 aarch64_tls_size = 24;
8098 break;
8099 case AARCH64_CMODEL_SMALL:
8100 /* The maximum TLS size allowed under small is 4G. */
8101 if (aarch64_tls_size > 32)
8102 aarch64_tls_size = 32;
8103 break;
8104 case AARCH64_CMODEL_LARGE:
8105 /* The maximum TLS size allowed under large is 16E.
8106 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8107 if (aarch64_tls_size > 48)
8108 aarch64_tls_size = 48;
8109 break;
8110 default:
8111 gcc_unreachable ();
8112 }
8113
8114 return;
8115}
8116
8dec06f2
JG
8117/* Parse STRING looking for options in the format:
8118 string :: option:string
8119 option :: name=substring
8120 name :: {a-z}
8121 substring :: defined by option. */
8122
8123static void
8124aarch64_parse_override_string (const char* input_string,
8125 struct tune_params* tune)
8126{
8127 const char separator = ':';
8128 size_t string_length = strlen (input_string) + 1;
8129 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
8130 char *string = string_root;
8131 strncpy (string, input_string, string_length);
8132 string[string_length - 1] = '\0';
8133
8134 char* ntoken = string;
8135
8136 while ((ntoken = strchr (string, separator)))
8137 {
8138 size_t token_length = ntoken - string;
8139 /* Make this substring look like a string. */
8140 *ntoken = '\0';
8141 aarch64_parse_one_override_token (string, token_length, tune);
8142 string = ++ntoken;
8143 }
8144
8145 /* One last option to parse. */
8146 aarch64_parse_one_override_token (string, strlen (string), tune);
8147 free (string_root);
8148}
43e9d192 8149
43e9d192
IB
8150
8151static void
0cfff2a1 8152aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 8153{
a3dc8760
NC
8154 /* The logic here is that if we are disabling all frame pointer generation
8155 then we do not need to disable leaf frame pointer generation as a
8156 separate operation. But if we are *only* disabling leaf frame pointer
8157 generation then we set flag_omit_frame_pointer to true, but in
8158 aarch64_frame_pointer_required we return false only for leaf functions.
8159
8160 PR 70044: We have to be careful about being called multiple times for the
8161 same function. Once we have decided to set flag_omit_frame_pointer just
8162 so that we can omit leaf frame pointers, we must then not interpret a
8163 second call as meaning that all frame pointer generation should be
8164 omitted. We do this by setting flag_omit_frame_pointer to a special,
8165 non-zero value. */
8166 if (opts->x_flag_omit_frame_pointer == 2)
8167 opts->x_flag_omit_frame_pointer = 0;
8168
0cfff2a1
KT
8169 if (opts->x_flag_omit_frame_pointer)
8170 opts->x_flag_omit_leaf_frame_pointer = false;
8171 else if (opts->x_flag_omit_leaf_frame_pointer)
a3dc8760 8172 opts->x_flag_omit_frame_pointer = 2;
43e9d192 8173
1be34295 8174 /* If not optimizing for size, set the default
0cfff2a1
KT
8175 alignment to what the target wants. */
8176 if (!opts->x_optimize_size)
43e9d192 8177 {
0cfff2a1
KT
8178 if (opts->x_align_loops <= 0)
8179 opts->x_align_loops = aarch64_tune_params.loop_align;
8180 if (opts->x_align_jumps <= 0)
8181 opts->x_align_jumps = aarch64_tune_params.jump_align;
8182 if (opts->x_align_functions <= 0)
8183 opts->x_align_functions = aarch64_tune_params.function_align;
43e9d192 8184 }
b4f50fd4 8185
9ee6540a
WD
8186 /* We default to no pc-relative literal loads. */
8187
8188 aarch64_pcrelative_literal_loads = false;
8189
8190 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 8191 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
8192 if (opts->x_pcrelative_literal_loads == 1)
8193 aarch64_pcrelative_literal_loads = true;
b4f50fd4 8194
48bb1a55
CL
8195 /* This is PR70113. When building the Linux kernel with
8196 CONFIG_ARM64_ERRATUM_843419, support for relocations
8197 R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_ADR_PREL_PG_HI21_NC is
8198 removed from the kernel to avoid loading objects with possibly
9ee6540a 8199 offending sequences. Without -mpc-relative-literal-loads we would
48bb1a55
CL
8200 generate such relocations, preventing the kernel build from
8201 succeeding. */
9ee6540a
WD
8202 if (opts->x_pcrelative_literal_loads == 2
8203 && TARGET_FIX_ERR_A53_843419)
8204 aarch64_pcrelative_literal_loads = true;
8205
8206 /* In the tiny memory model it makes no sense to disallow PC relative
8207 literal pool loads. */
8208 if (aarch64_cmodel == AARCH64_CMODEL_TINY
8209 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
8210 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
8211
8212 /* When enabling the lower precision Newton series for the square root, also
8213 enable it for the reciprocal square root, since the latter is an
8214 intermediary step for the former. */
8215 if (flag_mlow_precision_sqrt)
8216 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 8217}
43e9d192 8218
0cfff2a1
KT
8219/* 'Unpack' up the internal tuning structs and update the options
8220 in OPTS. The caller must have set up selected_tune and selected_arch
8221 as all the other target-specific codegen decisions are
8222 derived from them. */
8223
e4ea20c8 8224void
0cfff2a1
KT
8225aarch64_override_options_internal (struct gcc_options *opts)
8226{
8227 aarch64_tune_flags = selected_tune->flags;
8228 aarch64_tune = selected_tune->sched_core;
8229 /* Make a copy of the tuning parameters attached to the core, which
8230 we may later overwrite. */
8231 aarch64_tune_params = *(selected_tune->tune);
8232 aarch64_architecture_version = selected_arch->architecture_version;
8233
8234 if (opts->x_aarch64_override_tune_string)
8235 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
8236 &aarch64_tune_params);
8237
8238 /* This target defaults to strict volatile bitfields. */
8239 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
8240 opts->x_flag_strict_volatile_bitfields = 1;
8241
0cfff2a1 8242 initialize_aarch64_code_model (opts);
5eee3c34 8243 initialize_aarch64_tls_size (opts);
63892fa2 8244
2d6bc7fa
KT
8245 int queue_depth = 0;
8246 switch (aarch64_tune_params.autoprefetcher_model)
8247 {
8248 case tune_params::AUTOPREFETCHER_OFF:
8249 queue_depth = -1;
8250 break;
8251 case tune_params::AUTOPREFETCHER_WEAK:
8252 queue_depth = 0;
8253 break;
8254 case tune_params::AUTOPREFETCHER_STRONG:
8255 queue_depth = max_insn_queue_index + 1;
8256 break;
8257 default:
8258 gcc_unreachable ();
8259 }
8260
8261 /* We don't mind passing in global_options_set here as we don't use
8262 the *options_set structs anyway. */
8263 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
8264 queue_depth,
8265 opts->x_param_values,
8266 global_options_set.x_param_values);
8267
50487d79
EM
8268 /* Set the L1 cache line size. */
8269 if (selected_cpu->tune->cache_line_size != 0)
8270 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
8271 selected_cpu->tune->cache_line_size,
8272 opts->x_param_values,
8273 global_options_set.x_param_values);
8274
0cfff2a1
KT
8275 aarch64_override_options_after_change_1 (opts);
8276}
43e9d192 8277
0cfff2a1
KT
8278/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
8279 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
8280 they are valid in RES and ISA_FLAGS. Return whether the option is
8281 valid. */
43e9d192 8282
361fb3ee 8283static bool
0cfff2a1
KT
8284aarch64_validate_mcpu (const char *str, const struct processor **res,
8285 unsigned long *isa_flags)
8286{
8287 enum aarch64_parse_opt_result parse_res
8288 = aarch64_parse_cpu (str, res, isa_flags);
8289
8290 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8291 return true;
0cfff2a1
KT
8292
8293 switch (parse_res)
8294 {
8295 case AARCH64_PARSE_MISSING_ARG:
8296 error ("missing cpu name in -mcpu=%qs", str);
8297 break;
8298 case AARCH64_PARSE_INVALID_ARG:
8299 error ("unknown value %qs for -mcpu", str);
8300 break;
8301 case AARCH64_PARSE_INVALID_FEATURE:
8302 error ("invalid feature modifier in -mcpu=%qs", str);
8303 break;
8304 default:
8305 gcc_unreachable ();
8306 }
361fb3ee
KT
8307
8308 return false;
0cfff2a1
KT
8309}
8310
8311/* Validate a command-line -march option. Parse the arch and extensions
8312 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
8313 results, if they are valid, in RES and ISA_FLAGS. Return whether the
8314 option is valid. */
0cfff2a1 8315
361fb3ee 8316static bool
0cfff2a1
KT
8317aarch64_validate_march (const char *str, const struct processor **res,
8318 unsigned long *isa_flags)
8319{
8320 enum aarch64_parse_opt_result parse_res
8321 = aarch64_parse_arch (str, res, isa_flags);
8322
8323 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8324 return true;
0cfff2a1
KT
8325
8326 switch (parse_res)
8327 {
8328 case AARCH64_PARSE_MISSING_ARG:
8329 error ("missing arch name in -march=%qs", str);
8330 break;
8331 case AARCH64_PARSE_INVALID_ARG:
8332 error ("unknown value %qs for -march", str);
8333 break;
8334 case AARCH64_PARSE_INVALID_FEATURE:
8335 error ("invalid feature modifier in -march=%qs", str);
8336 break;
8337 default:
8338 gcc_unreachable ();
8339 }
361fb3ee
KT
8340
8341 return false;
0cfff2a1
KT
8342}
8343
8344/* Validate a command-line -mtune option. Parse the cpu
8345 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
8346 result, if it is valid, in RES. Return whether the option is
8347 valid. */
0cfff2a1 8348
361fb3ee 8349static bool
0cfff2a1
KT
8350aarch64_validate_mtune (const char *str, const struct processor **res)
8351{
8352 enum aarch64_parse_opt_result parse_res
8353 = aarch64_parse_tune (str, res);
8354
8355 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8356 return true;
0cfff2a1
KT
8357
8358 switch (parse_res)
8359 {
8360 case AARCH64_PARSE_MISSING_ARG:
8361 error ("missing cpu name in -mtune=%qs", str);
8362 break;
8363 case AARCH64_PARSE_INVALID_ARG:
8364 error ("unknown value %qs for -mtune", str);
8365 break;
8366 default:
8367 gcc_unreachable ();
8368 }
361fb3ee
KT
8369 return false;
8370}
8371
8372/* Return the CPU corresponding to the enum CPU.
8373 If it doesn't specify a cpu, return the default. */
8374
8375static const struct processor *
8376aarch64_get_tune_cpu (enum aarch64_processor cpu)
8377{
8378 if (cpu != aarch64_none)
8379 return &all_cores[cpu];
8380
8381 /* The & 0x3f is to extract the bottom 6 bits that encode the
8382 default cpu as selected by the --with-cpu GCC configure option
8383 in config.gcc.
8384 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
8385 flags mechanism should be reworked to make it more sane. */
8386 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
8387}
8388
8389/* Return the architecture corresponding to the enum ARCH.
8390 If it doesn't specify a valid architecture, return the default. */
8391
8392static const struct processor *
8393aarch64_get_arch (enum aarch64_arch arch)
8394{
8395 if (arch != aarch64_no_arch)
8396 return &all_architectures[arch];
8397
8398 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
8399
8400 return &all_architectures[cpu->arch];
0cfff2a1
KT
8401}
8402
8403/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
8404 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
8405 tuning structs. In particular it must set selected_tune and
8406 aarch64_isa_flags that define the available ISA features and tuning
8407 decisions. It must also set selected_arch as this will be used to
8408 output the .arch asm tags for each function. */
8409
8410static void
8411aarch64_override_options (void)
8412{
8413 unsigned long cpu_isa = 0;
8414 unsigned long arch_isa = 0;
8415 aarch64_isa_flags = 0;
8416
361fb3ee
KT
8417 bool valid_cpu = true;
8418 bool valid_tune = true;
8419 bool valid_arch = true;
8420
0cfff2a1
KT
8421 selected_cpu = NULL;
8422 selected_arch = NULL;
8423 selected_tune = NULL;
8424
8425 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
8426 If either of -march or -mtune is given, they override their
8427 respective component of -mcpu. */
8428 if (aarch64_cpu_string)
361fb3ee
KT
8429 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
8430 &cpu_isa);
0cfff2a1
KT
8431
8432 if (aarch64_arch_string)
361fb3ee
KT
8433 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
8434 &arch_isa);
0cfff2a1
KT
8435
8436 if (aarch64_tune_string)
361fb3ee 8437 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
8438
8439 /* If the user did not specify a processor, choose the default
8440 one for them. This will be the CPU set during configuration using
a3cd0246 8441 --with-cpu, otherwise it is "generic". */
43e9d192
IB
8442 if (!selected_cpu)
8443 {
0cfff2a1
KT
8444 if (selected_arch)
8445 {
8446 selected_cpu = &all_cores[selected_arch->ident];
8447 aarch64_isa_flags = arch_isa;
361fb3ee 8448 explicit_arch = selected_arch->arch;
0cfff2a1
KT
8449 }
8450 else
8451 {
361fb3ee
KT
8452 /* Get default configure-time CPU. */
8453 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
8454 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
8455 }
361fb3ee
KT
8456
8457 if (selected_tune)
8458 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
8459 }
8460 /* If both -mcpu and -march are specified check that they are architecturally
8461 compatible, warn if they're not and prefer the -march ISA flags. */
8462 else if (selected_arch)
8463 {
8464 if (selected_arch->arch != selected_cpu->arch)
8465 {
8466 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
8467 all_architectures[selected_cpu->arch].name,
8468 selected_arch->name);
8469 }
8470 aarch64_isa_flags = arch_isa;
361fb3ee
KT
8471 explicit_arch = selected_arch->arch;
8472 explicit_tune_core = selected_tune ? selected_tune->ident
8473 : selected_cpu->ident;
0cfff2a1
KT
8474 }
8475 else
8476 {
8477 /* -mcpu but no -march. */
8478 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
8479 explicit_tune_core = selected_tune ? selected_tune->ident
8480 : selected_cpu->ident;
8481 gcc_assert (selected_cpu);
8482 selected_arch = &all_architectures[selected_cpu->arch];
8483 explicit_arch = selected_arch->arch;
43e9d192
IB
8484 }
8485
0cfff2a1
KT
8486 /* Set the arch as well as we will need it when outputing
8487 the .arch directive in assembly. */
8488 if (!selected_arch)
8489 {
8490 gcc_assert (selected_cpu);
8491 selected_arch = &all_architectures[selected_cpu->arch];
8492 }
43e9d192 8493
43e9d192 8494 if (!selected_tune)
3edaf26d 8495 selected_tune = selected_cpu;
43e9d192 8496
0cfff2a1
KT
8497#ifndef HAVE_AS_MABI_OPTION
8498 /* The compiler may have been configured with 2.23.* binutils, which does
8499 not have support for ILP32. */
8500 if (TARGET_ILP32)
8501 error ("Assembler does not support -mabi=ilp32");
8502#endif
43e9d192 8503
361fb3ee
KT
8504 /* Make sure we properly set up the explicit options. */
8505 if ((aarch64_cpu_string && valid_cpu)
8506 || (aarch64_tune_string && valid_tune))
8507 gcc_assert (explicit_tune_core != aarch64_none);
8508
8509 if ((aarch64_cpu_string && valid_cpu)
8510 || (aarch64_arch_string && valid_arch))
8511 gcc_assert (explicit_arch != aarch64_no_arch);
8512
0cfff2a1
KT
8513 aarch64_override_options_internal (&global_options);
8514
8515 /* Save these options as the default ones in case we push and pop them later
8516 while processing functions with potential target attributes. */
8517 target_option_default_node = target_option_current_node
8518 = build_target_option_node (&global_options);
5e396da6 8519
e2fc7193 8520 aarch64_register_fma_steering ();
fde9b31b 8521
43e9d192
IB
8522}
8523
8524/* Implement targetm.override_options_after_change. */
8525
8526static void
8527aarch64_override_options_after_change (void)
8528{
0cfff2a1 8529 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
8530}
8531
8532static struct machine_function *
8533aarch64_init_machine_status (void)
8534{
8535 struct machine_function *machine;
766090c2 8536 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
8537 return machine;
8538}
8539
8540void
8541aarch64_init_expanders (void)
8542{
8543 init_machine_status = aarch64_init_machine_status;
8544}
8545
8546/* A checking mechanism for the implementation of the various code models. */
8547static void
0cfff2a1 8548initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 8549{
0cfff2a1 8550 if (opts->x_flag_pic)
43e9d192 8551 {
0cfff2a1 8552 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
8553 {
8554 case AARCH64_CMODEL_TINY:
8555 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
8556 break;
8557 case AARCH64_CMODEL_SMALL:
34ecdb0f 8558#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
8559 aarch64_cmodel = (flag_pic == 2
8560 ? AARCH64_CMODEL_SMALL_PIC
8561 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
8562#else
8563 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
8564#endif
43e9d192
IB
8565 break;
8566 case AARCH64_CMODEL_LARGE:
8567 sorry ("code model %qs with -f%s", "large",
0cfff2a1 8568 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 8569 break;
43e9d192
IB
8570 default:
8571 gcc_unreachable ();
8572 }
8573 }
8574 else
0cfff2a1 8575 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
8576}
8577
361fb3ee
KT
8578/* Implement TARGET_OPTION_SAVE. */
8579
8580static void
8581aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
8582{
8583 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
8584}
8585
8586/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
8587 using the information saved in PTR. */
8588
8589static void
8590aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
8591{
8592 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
8593 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
8594 opts->x_explicit_arch = ptr->x_explicit_arch;
8595 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
8596 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
8597
8598 aarch64_override_options_internal (opts);
8599}
8600
8601/* Implement TARGET_OPTION_PRINT. */
8602
8603static void
8604aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
8605{
8606 const struct processor *cpu
8607 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
8608 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
8609 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 8610 std::string extension
04a99ebe 8611 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
8612
8613 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
8614 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
8615 arch->name, extension.c_str ());
361fb3ee
KT
8616}
8617
d78006d9
KT
8618static GTY(()) tree aarch64_previous_fndecl;
8619
e4ea20c8
KT
8620void
8621aarch64_reset_previous_fndecl (void)
8622{
8623 aarch64_previous_fndecl = NULL;
8624}
8625
acfc1ac1
KT
8626/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
8627 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
8628 make sure optab availability predicates are recomputed when necessary. */
8629
8630void
8631aarch64_save_restore_target_globals (tree new_tree)
8632{
8633 if (TREE_TARGET_GLOBALS (new_tree))
8634 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
8635 else if (new_tree == target_option_default_node)
8636 restore_target_globals (&default_target_globals);
8637 else
8638 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
8639}
8640
d78006d9
KT
8641/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
8642 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
8643 of the function, if such exists. This function may be called multiple
8644 times on a single function so use aarch64_previous_fndecl to avoid
8645 setting up identical state. */
8646
8647static void
8648aarch64_set_current_function (tree fndecl)
8649{
acfc1ac1
KT
8650 if (!fndecl || fndecl == aarch64_previous_fndecl)
8651 return;
8652
d78006d9
KT
8653 tree old_tree = (aarch64_previous_fndecl
8654 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
8655 : NULL_TREE);
8656
acfc1ac1 8657 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 8658
acfc1ac1
KT
8659 /* If current function has no attributes but the previous one did,
8660 use the default node. */
8661 if (!new_tree && old_tree)
8662 new_tree = target_option_default_node;
d78006d9 8663
acfc1ac1
KT
8664 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
8665 the default have been handled by aarch64_save_restore_target_globals from
8666 aarch64_pragma_target_parse. */
8667 if (old_tree == new_tree)
8668 return;
d78006d9 8669
acfc1ac1 8670 aarch64_previous_fndecl = fndecl;
6e17a23b 8671
acfc1ac1
KT
8672 /* First set the target options. */
8673 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 8674
acfc1ac1 8675 aarch64_save_restore_target_globals (new_tree);
d78006d9 8676}
361fb3ee 8677
5a2c8331
KT
8678/* Enum describing the various ways we can handle attributes.
8679 In many cases we can reuse the generic option handling machinery. */
8680
8681enum aarch64_attr_opt_type
8682{
8683 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
8684 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
8685 aarch64_attr_enum, /* Attribute sets an enum variable. */
8686 aarch64_attr_custom /* Attribute requires a custom handling function. */
8687};
8688
8689/* All the information needed to handle a target attribute.
8690 NAME is the name of the attribute.
9c582551 8691 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
8692 in the definition of enum aarch64_attr_opt_type.
8693 ALLOW_NEG is true if the attribute supports a "no-" form.
8694 HANDLER is the function that takes the attribute string and whether
8695 it is a pragma or attribute and handles the option. It is needed only
8696 when the ATTR_TYPE is aarch64_attr_custom.
8697 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 8698 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
8699 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
8700 aarch64_attr_enum. */
8701
8702struct aarch64_attribute_info
8703{
8704 const char *name;
8705 enum aarch64_attr_opt_type attr_type;
8706 bool allow_neg;
8707 bool (*handler) (const char *, const char *);
8708 enum opt_code opt_num;
8709};
8710
8711/* Handle the ARCH_STR argument to the arch= target attribute.
8712 PRAGMA_OR_ATTR is used in potential error messages. */
8713
8714static bool
8715aarch64_handle_attr_arch (const char *str, const char *pragma_or_attr)
8716{
8717 const struct processor *tmp_arch = NULL;
8718 enum aarch64_parse_opt_result parse_res
8719 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
8720
8721 if (parse_res == AARCH64_PARSE_OK)
8722 {
8723 gcc_assert (tmp_arch);
8724 selected_arch = tmp_arch;
8725 explicit_arch = selected_arch->arch;
8726 return true;
8727 }
8728
8729 switch (parse_res)
8730 {
8731 case AARCH64_PARSE_MISSING_ARG:
8732 error ("missing architecture name in 'arch' target %s", pragma_or_attr);
8733 break;
8734 case AARCH64_PARSE_INVALID_ARG:
8735 error ("unknown value %qs for 'arch' target %s", str, pragma_or_attr);
8736 break;
8737 case AARCH64_PARSE_INVALID_FEATURE:
8738 error ("invalid feature modifier %qs for 'arch' target %s",
8739 str, pragma_or_attr);
8740 break;
8741 default:
8742 gcc_unreachable ();
8743 }
8744
8745 return false;
8746}
8747
8748/* Handle the argument CPU_STR to the cpu= target attribute.
8749 PRAGMA_OR_ATTR is used in potential error messages. */
8750
8751static bool
8752aarch64_handle_attr_cpu (const char *str, const char *pragma_or_attr)
8753{
8754 const struct processor *tmp_cpu = NULL;
8755 enum aarch64_parse_opt_result parse_res
8756 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
8757
8758 if (parse_res == AARCH64_PARSE_OK)
8759 {
8760 gcc_assert (tmp_cpu);
8761 selected_tune = tmp_cpu;
8762 explicit_tune_core = selected_tune->ident;
8763
8764 selected_arch = &all_architectures[tmp_cpu->arch];
8765 explicit_arch = selected_arch->arch;
8766 return true;
8767 }
8768
8769 switch (parse_res)
8770 {
8771 case AARCH64_PARSE_MISSING_ARG:
8772 error ("missing cpu name in 'cpu' target %s", pragma_or_attr);
8773 break;
8774 case AARCH64_PARSE_INVALID_ARG:
8775 error ("unknown value %qs for 'cpu' target %s", str, pragma_or_attr);
8776 break;
8777 case AARCH64_PARSE_INVALID_FEATURE:
8778 error ("invalid feature modifier %qs for 'cpu' target %s",
8779 str, pragma_or_attr);
8780 break;
8781 default:
8782 gcc_unreachable ();
8783 }
8784
8785 return false;
8786}
8787
8788/* Handle the argument STR to the tune= target attribute.
8789 PRAGMA_OR_ATTR is used in potential error messages. */
8790
8791static bool
8792aarch64_handle_attr_tune (const char *str, const char *pragma_or_attr)
8793{
8794 const struct processor *tmp_tune = NULL;
8795 enum aarch64_parse_opt_result parse_res
8796 = aarch64_parse_tune (str, &tmp_tune);
8797
8798 if (parse_res == AARCH64_PARSE_OK)
8799 {
8800 gcc_assert (tmp_tune);
8801 selected_tune = tmp_tune;
8802 explicit_tune_core = selected_tune->ident;
8803 return true;
8804 }
8805
8806 switch (parse_res)
8807 {
8808 case AARCH64_PARSE_INVALID_ARG:
8809 error ("unknown value %qs for 'tune' target %s", str, pragma_or_attr);
8810 break;
8811 default:
8812 gcc_unreachable ();
8813 }
8814
8815 return false;
8816}
8817
8818/* Parse an architecture extensions target attribute string specified in STR.
8819 For example "+fp+nosimd". Show any errors if needed. Return TRUE
8820 if successful. Update aarch64_isa_flags to reflect the ISA features
8821 modified.
8822 PRAGMA_OR_ATTR is used in potential error messages. */
8823
8824static bool
8825aarch64_handle_attr_isa_flags (char *str, const char *pragma_or_attr)
8826{
8827 enum aarch64_parse_opt_result parse_res;
8828 unsigned long isa_flags = aarch64_isa_flags;
8829
e4ea20c8
KT
8830 /* We allow "+nothing" in the beginning to clear out all architectural
8831 features if the user wants to handpick specific features. */
8832 if (strncmp ("+nothing", str, 8) == 0)
8833 {
8834 isa_flags = 0;
8835 str += 8;
8836 }
8837
5a2c8331
KT
8838 parse_res = aarch64_parse_extension (str, &isa_flags);
8839
8840 if (parse_res == AARCH64_PARSE_OK)
8841 {
8842 aarch64_isa_flags = isa_flags;
8843 return true;
8844 }
8845
8846 switch (parse_res)
8847 {
8848 case AARCH64_PARSE_MISSING_ARG:
8849 error ("missing feature modifier in target %s %qs",
8850 pragma_or_attr, str);
8851 break;
8852
8853 case AARCH64_PARSE_INVALID_FEATURE:
8854 error ("invalid feature modifier in target %s %qs",
8855 pragma_or_attr, str);
8856 break;
8857
8858 default:
8859 gcc_unreachable ();
8860 }
8861
8862 return false;
8863}
8864
8865/* The target attributes that we support. On top of these we also support just
8866 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
8867 handled explicitly in aarch64_process_one_target_attr. */
8868
8869static const struct aarch64_attribute_info aarch64_attributes[] =
8870{
8871 { "general-regs-only", aarch64_attr_mask, false, NULL,
8872 OPT_mgeneral_regs_only },
8873 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
8874 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
8875 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
8876 OPT_mfix_cortex_a53_843419 },
5a2c8331
KT
8877 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
8878 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
8879 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
8880 OPT_momit_leaf_frame_pointer },
8881 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
8882 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
8883 OPT_march_ },
8884 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
8885 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
8886 OPT_mtune_ },
8887 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
8888};
8889
8890/* Parse ARG_STR which contains the definition of one target attribute.
8891 Show appropriate errors if any or return true if the attribute is valid.
8892 PRAGMA_OR_ATTR holds the string to use in error messages about whether
8893 we're processing a target attribute or pragma. */
8894
8895static bool
8896aarch64_process_one_target_attr (char *arg_str, const char* pragma_or_attr)
8897{
8898 bool invert = false;
8899
8900 size_t len = strlen (arg_str);
8901
8902 if (len == 0)
8903 {
8904 error ("malformed target %s", pragma_or_attr);
8905 return false;
8906 }
8907
8908 char *str_to_check = (char *) alloca (len + 1);
8909 strcpy (str_to_check, arg_str);
8910
8911 /* Skip leading whitespace. */
8912 while (*str_to_check == ' ' || *str_to_check == '\t')
8913 str_to_check++;
8914
8915 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
8916 It is easier to detect and handle it explicitly here rather than going
8917 through the machinery for the rest of the target attributes in this
8918 function. */
8919 if (*str_to_check == '+')
8920 return aarch64_handle_attr_isa_flags (str_to_check, pragma_or_attr);
8921
8922 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
8923 {
8924 invert = true;
8925 str_to_check += 3;
8926 }
8927 char *arg = strchr (str_to_check, '=');
8928
8929 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
8930 and point ARG to "foo". */
8931 if (arg)
8932 {
8933 *arg = '\0';
8934 arg++;
8935 }
8936 const struct aarch64_attribute_info *p_attr;
16d12992 8937 bool found = false;
5a2c8331
KT
8938 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
8939 {
8940 /* If the names don't match up, or the user has given an argument
8941 to an attribute that doesn't accept one, or didn't give an argument
8942 to an attribute that expects one, fail to match. */
8943 if (strcmp (str_to_check, p_attr->name) != 0)
8944 continue;
8945
16d12992 8946 found = true;
5a2c8331
KT
8947 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
8948 || p_attr->attr_type == aarch64_attr_enum;
8949
8950 if (attr_need_arg_p ^ (arg != NULL))
8951 {
8952 error ("target %s %qs does not accept an argument",
8953 pragma_or_attr, str_to_check);
8954 return false;
8955 }
8956
8957 /* If the name matches but the attribute does not allow "no-" versions
8958 then we can't match. */
8959 if (invert && !p_attr->allow_neg)
8960 {
8961 error ("target %s %qs does not allow a negated form",
8962 pragma_or_attr, str_to_check);
8963 return false;
8964 }
8965
8966 switch (p_attr->attr_type)
8967 {
8968 /* Has a custom handler registered.
8969 For example, cpu=, arch=, tune=. */
8970 case aarch64_attr_custom:
8971 gcc_assert (p_attr->handler);
8972 if (!p_attr->handler (arg, pragma_or_attr))
8973 return false;
8974 break;
8975
8976 /* Either set or unset a boolean option. */
8977 case aarch64_attr_bool:
8978 {
8979 struct cl_decoded_option decoded;
8980
8981 generate_option (p_attr->opt_num, NULL, !invert,
8982 CL_TARGET, &decoded);
8983 aarch64_handle_option (&global_options, &global_options_set,
8984 &decoded, input_location);
8985 break;
8986 }
8987 /* Set or unset a bit in the target_flags. aarch64_handle_option
8988 should know what mask to apply given the option number. */
8989 case aarch64_attr_mask:
8990 {
8991 struct cl_decoded_option decoded;
8992 /* We only need to specify the option number.
8993 aarch64_handle_option will know which mask to apply. */
8994 decoded.opt_index = p_attr->opt_num;
8995 decoded.value = !invert;
8996 aarch64_handle_option (&global_options, &global_options_set,
8997 &decoded, input_location);
8998 break;
8999 }
9000 /* Use the option setting machinery to set an option to an enum. */
9001 case aarch64_attr_enum:
9002 {
9003 gcc_assert (arg);
9004 bool valid;
9005 int value;
9006 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
9007 &value, CL_TARGET);
9008 if (valid)
9009 {
9010 set_option (&global_options, NULL, p_attr->opt_num, value,
9011 NULL, DK_UNSPECIFIED, input_location,
9012 global_dc);
9013 }
9014 else
9015 {
9016 error ("target %s %s=%s is not valid",
9017 pragma_or_attr, str_to_check, arg);
9018 }
9019 break;
9020 }
9021 default:
9022 gcc_unreachable ();
9023 }
9024 }
9025
16d12992
KT
9026 /* If we reached here we either have found an attribute and validated
9027 it or didn't match any. If we matched an attribute but its arguments
9028 were malformed we will have returned false already. */
9029 return found;
5a2c8331
KT
9030}
9031
9032/* Count how many times the character C appears in
9033 NULL-terminated string STR. */
9034
9035static unsigned int
9036num_occurences_in_str (char c, char *str)
9037{
9038 unsigned int res = 0;
9039 while (*str != '\0')
9040 {
9041 if (*str == c)
9042 res++;
9043
9044 str++;
9045 }
9046
9047 return res;
9048}
9049
9050/* Parse the tree in ARGS that contains the target attribute information
9051 and update the global target options space. PRAGMA_OR_ATTR is a string
9052 to be used in error messages, specifying whether this is processing
9053 a target attribute or a target pragma. */
9054
9055bool
9056aarch64_process_target_attr (tree args, const char* pragma_or_attr)
9057{
9058 if (TREE_CODE (args) == TREE_LIST)
9059 {
9060 do
9061 {
9062 tree head = TREE_VALUE (args);
9063 if (head)
9064 {
9065 if (!aarch64_process_target_attr (head, pragma_or_attr))
9066 return false;
9067 }
9068 args = TREE_CHAIN (args);
9069 } while (args);
9070
9071 return true;
9072 }
9073 /* We expect to find a string to parse. */
9074 gcc_assert (TREE_CODE (args) == STRING_CST);
9075
9076 size_t len = strlen (TREE_STRING_POINTER (args));
9077 char *str_to_check = (char *) alloca (len + 1);
9078 strcpy (str_to_check, TREE_STRING_POINTER (args));
9079
9080 if (len == 0)
9081 {
9082 error ("malformed target %s value", pragma_or_attr);
9083 return false;
9084 }
9085
9086 /* Used to catch empty spaces between commas i.e.
9087 attribute ((target ("attr1,,attr2"))). */
9088 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
9089
9090 /* Handle multiple target attributes separated by ','. */
9091 char *token = strtok (str_to_check, ",");
9092
9093 unsigned int num_attrs = 0;
9094 while (token)
9095 {
9096 num_attrs++;
9097 if (!aarch64_process_one_target_attr (token, pragma_or_attr))
9098 {
9099 error ("target %s %qs is invalid", pragma_or_attr, token);
9100 return false;
9101 }
9102
9103 token = strtok (NULL, ",");
9104 }
9105
9106 if (num_attrs != num_commas + 1)
9107 {
9108 error ("malformed target %s list %qs",
9109 pragma_or_attr, TREE_STRING_POINTER (args));
9110 return false;
9111 }
9112
9113 return true;
9114}
9115
9116/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9117 process attribute ((target ("..."))). */
9118
9119static bool
9120aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
9121{
9122 struct cl_target_option cur_target;
9123 bool ret;
9124 tree old_optimize;
9125 tree new_target, new_optimize;
9126 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
9127
9128 /* If what we're processing is the current pragma string then the
9129 target option node is already stored in target_option_current_node
9130 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
9131 having to re-parse the string. This is especially useful to keep
9132 arm_neon.h compile times down since that header contains a lot
9133 of intrinsics enclosed in pragmas. */
9134 if (!existing_target && args == current_target_pragma)
9135 {
9136 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
9137 return true;
9138 }
5a2c8331
KT
9139 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9140
9141 old_optimize = build_optimization_node (&global_options);
9142 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9143
9144 /* If the function changed the optimization levels as well as setting
9145 target options, start with the optimizations specified. */
9146 if (func_optimize && func_optimize != old_optimize)
9147 cl_optimization_restore (&global_options,
9148 TREE_OPTIMIZATION (func_optimize));
9149
9150 /* Save the current target options to restore at the end. */
9151 cl_target_option_save (&cur_target, &global_options);
9152
9153 /* If fndecl already has some target attributes applied to it, unpack
9154 them so that we add this attribute on top of them, rather than
9155 overwriting them. */
9156 if (existing_target)
9157 {
9158 struct cl_target_option *existing_options
9159 = TREE_TARGET_OPTION (existing_target);
9160
9161 if (existing_options)
9162 cl_target_option_restore (&global_options, existing_options);
9163 }
9164 else
9165 cl_target_option_restore (&global_options,
9166 TREE_TARGET_OPTION (target_option_current_node));
9167
9168
9169 ret = aarch64_process_target_attr (args, "attribute");
9170
9171 /* Set up any additional state. */
9172 if (ret)
9173 {
9174 aarch64_override_options_internal (&global_options);
e95a988a
KT
9175 /* Initialize SIMD builtins if we haven't already.
9176 Set current_target_pragma to NULL for the duration so that
9177 the builtin initialization code doesn't try to tag the functions
9178 being built with the attributes specified by any current pragma, thus
9179 going into an infinite recursion. */
9180 if (TARGET_SIMD)
9181 {
9182 tree saved_current_target_pragma = current_target_pragma;
9183 current_target_pragma = NULL;
9184 aarch64_init_simd_builtins ();
9185 current_target_pragma = saved_current_target_pragma;
9186 }
5a2c8331
KT
9187 new_target = build_target_option_node (&global_options);
9188 }
9189 else
9190 new_target = NULL;
9191
9192 new_optimize = build_optimization_node (&global_options);
9193
9194 if (fndecl && ret)
9195 {
9196 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
9197
9198 if (old_optimize != new_optimize)
9199 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
9200 }
9201
9202 cl_target_option_restore (&global_options, &cur_target);
9203
9204 if (old_optimize != new_optimize)
9205 cl_optimization_restore (&global_options,
9206 TREE_OPTIMIZATION (old_optimize));
9207 return ret;
9208}
9209
1fd8d40c
KT
9210/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
9211 tri-bool options (yes, no, don't care) and the default value is
9212 DEF, determine whether to reject inlining. */
9213
9214static bool
9215aarch64_tribools_ok_for_inlining_p (int caller, int callee,
9216 int dont_care, int def)
9217{
9218 /* If the callee doesn't care, always allow inlining. */
9219 if (callee == dont_care)
9220 return true;
9221
9222 /* If the caller doesn't care, always allow inlining. */
9223 if (caller == dont_care)
9224 return true;
9225
9226 /* Otherwise, allow inlining if either the callee and caller values
9227 agree, or if the callee is using the default value. */
9228 return (callee == caller || callee == def);
9229}
9230
9231/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
9232 to inline CALLEE into CALLER based on target-specific info.
9233 Make sure that the caller and callee have compatible architectural
9234 features. Then go through the other possible target attributes
9235 and see if they can block inlining. Try not to reject always_inline
9236 callees unless they are incompatible architecturally. */
9237
9238static bool
9239aarch64_can_inline_p (tree caller, tree callee)
9240{
9241 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
9242 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
9243
9244 /* If callee has no option attributes, then it is ok to inline. */
9245 if (!callee_tree)
9246 return true;
9247
9248 struct cl_target_option *caller_opts
9249 = TREE_TARGET_OPTION (caller_tree ? caller_tree
9250 : target_option_default_node);
9251
9252 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
9253
9254
9255 /* Callee's ISA flags should be a subset of the caller's. */
9256 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
9257 != callee_opts->x_aarch64_isa_flags)
9258 return false;
9259
9260 /* Allow non-strict aligned functions inlining into strict
9261 aligned ones. */
9262 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
9263 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
9264 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
9265 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
9266 return false;
9267
9268 bool always_inline = lookup_attribute ("always_inline",
9269 DECL_ATTRIBUTES (callee));
9270
9271 /* If the architectural features match up and the callee is always_inline
9272 then the other attributes don't matter. */
9273 if (always_inline)
9274 return true;
9275
9276 if (caller_opts->x_aarch64_cmodel_var
9277 != callee_opts->x_aarch64_cmodel_var)
9278 return false;
9279
9280 if (caller_opts->x_aarch64_tls_dialect
9281 != callee_opts->x_aarch64_tls_dialect)
9282 return false;
9283
9284 /* Honour explicit requests to workaround errata. */
9285 if (!aarch64_tribools_ok_for_inlining_p (
9286 caller_opts->x_aarch64_fix_a53_err835769,
9287 callee_opts->x_aarch64_fix_a53_err835769,
9288 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
9289 return false;
9290
48bb1a55
CL
9291 if (!aarch64_tribools_ok_for_inlining_p (
9292 caller_opts->x_aarch64_fix_a53_err843419,
9293 callee_opts->x_aarch64_fix_a53_err843419,
9294 2, TARGET_FIX_ERR_A53_843419))
9295 return false;
9296
1fd8d40c
KT
9297 /* If the user explicitly specified -momit-leaf-frame-pointer for the
9298 caller and calle and they don't match up, reject inlining. */
9299 if (!aarch64_tribools_ok_for_inlining_p (
9300 caller_opts->x_flag_omit_leaf_frame_pointer,
9301 callee_opts->x_flag_omit_leaf_frame_pointer,
9302 2, 1))
9303 return false;
9304
9305 /* If the callee has specific tuning overrides, respect them. */
9306 if (callee_opts->x_aarch64_override_tune_string != NULL
9307 && caller_opts->x_aarch64_override_tune_string == NULL)
9308 return false;
9309
9310 /* If the user specified tuning override strings for the
9311 caller and callee and they don't match up, reject inlining.
9312 We just do a string compare here, we don't analyze the meaning
9313 of the string, as it would be too costly for little gain. */
9314 if (callee_opts->x_aarch64_override_tune_string
9315 && caller_opts->x_aarch64_override_tune_string
9316 && (strcmp (callee_opts->x_aarch64_override_tune_string,
9317 caller_opts->x_aarch64_override_tune_string) != 0))
9318 return false;
9319
9320 return true;
9321}
9322
43e9d192
IB
9323/* Return true if SYMBOL_REF X binds locally. */
9324
9325static bool
9326aarch64_symbol_binds_local_p (const_rtx x)
9327{
9328 return (SYMBOL_REF_DECL (x)
9329 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
9330 : SYMBOL_REF_LOCAL_P (x));
9331}
9332
9333/* Return true if SYMBOL_REF X is thread local */
9334static bool
9335aarch64_tls_symbol_p (rtx x)
9336{
9337 if (! TARGET_HAVE_TLS)
9338 return false;
9339
9340 if (GET_CODE (x) != SYMBOL_REF)
9341 return false;
9342
9343 return SYMBOL_REF_TLS_MODEL (x) != 0;
9344}
9345
9346/* Classify a TLS symbol into one of the TLS kinds. */
9347enum aarch64_symbol_type
9348aarch64_classify_tls_symbol (rtx x)
9349{
9350 enum tls_model tls_kind = tls_symbolic_operand_type (x);
9351
9352 switch (tls_kind)
9353 {
9354 case TLS_MODEL_GLOBAL_DYNAMIC:
9355 case TLS_MODEL_LOCAL_DYNAMIC:
9356 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
9357
9358 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
9359 switch (aarch64_cmodel)
9360 {
9361 case AARCH64_CMODEL_TINY:
9362 case AARCH64_CMODEL_TINY_PIC:
9363 return SYMBOL_TINY_TLSIE;
9364 default:
79496620 9365 return SYMBOL_SMALL_TLSIE;
5ae7caad 9366 }
43e9d192
IB
9367
9368 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
9369 if (aarch64_tls_size == 12)
9370 return SYMBOL_TLSLE12;
9371 else if (aarch64_tls_size == 24)
9372 return SYMBOL_TLSLE24;
9373 else if (aarch64_tls_size == 32)
9374 return SYMBOL_TLSLE32;
9375 else if (aarch64_tls_size == 48)
9376 return SYMBOL_TLSLE48;
9377 else
9378 gcc_unreachable ();
43e9d192
IB
9379
9380 case TLS_MODEL_EMULATED:
9381 case TLS_MODEL_NONE:
9382 return SYMBOL_FORCE_TO_MEM;
9383
9384 default:
9385 gcc_unreachable ();
9386 }
9387}
9388
9389/* Return the method that should be used to access SYMBOL_REF or
a6e0bfa7 9390 LABEL_REF X. */
17f4d4bf 9391
43e9d192 9392enum aarch64_symbol_type
a6e0bfa7 9393aarch64_classify_symbol (rtx x, rtx offset)
43e9d192
IB
9394{
9395 if (GET_CODE (x) == LABEL_REF)
9396 {
9397 switch (aarch64_cmodel)
9398 {
9399 case AARCH64_CMODEL_LARGE:
9400 return SYMBOL_FORCE_TO_MEM;
9401
9402 case AARCH64_CMODEL_TINY_PIC:
9403 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
9404 return SYMBOL_TINY_ABSOLUTE;
9405
1b1e81f8 9406 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
9407 case AARCH64_CMODEL_SMALL_PIC:
9408 case AARCH64_CMODEL_SMALL:
9409 return SYMBOL_SMALL_ABSOLUTE;
9410
9411 default:
9412 gcc_unreachable ();
9413 }
9414 }
9415
17f4d4bf 9416 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 9417 {
43e9d192
IB
9418 if (aarch64_tls_symbol_p (x))
9419 return aarch64_classify_tls_symbol (x);
9420
17f4d4bf
CSS
9421 switch (aarch64_cmodel)
9422 {
9423 case AARCH64_CMODEL_TINY:
15f6e0da 9424 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
9425 the offset does not cause overflow of the final address. But
9426 we have no way of knowing the address of symbol at compile time
9427 so we can't accurately say if the distance between the PC and
9428 symbol + offset is outside the addressible range of +/-1M in the
9429 TINY code model. So we rely on images not being greater than
9430 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
9431 be loaded using an alternative mechanism. Furthermore if the
9432 symbol is a weak reference to something that isn't known to
9433 resolve to a symbol in this module, then force to memory. */
9434 if ((SYMBOL_REF_WEAK (x)
9435 && !aarch64_symbol_binds_local_p (x))
f8b756b7 9436 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
9437 return SYMBOL_FORCE_TO_MEM;
9438 return SYMBOL_TINY_ABSOLUTE;
9439
17f4d4bf 9440 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
9441 /* Same reasoning as the tiny code model, but the offset cap here is
9442 4G. */
15f6e0da
RR
9443 if ((SYMBOL_REF_WEAK (x)
9444 && !aarch64_symbol_binds_local_p (x))
3ff5d1f0
TB
9445 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
9446 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
9447 return SYMBOL_FORCE_TO_MEM;
9448 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 9449
17f4d4bf 9450 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 9451 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 9452 return SYMBOL_TINY_GOT;
38e6c9a6
MS
9453 return SYMBOL_TINY_ABSOLUTE;
9454
1b1e81f8 9455 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
9456 case AARCH64_CMODEL_SMALL_PIC:
9457 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
9458 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
9459 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 9460 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 9461
9ee6540a
WD
9462 case AARCH64_CMODEL_LARGE:
9463 /* This is alright even in PIC code as the constant
9464 pool reference is always PC relative and within
9465 the same translation unit. */
9466 if (CONSTANT_POOL_ADDRESS_P (x))
9467 return SYMBOL_SMALL_ABSOLUTE;
9468 else
9469 return SYMBOL_FORCE_TO_MEM;
9470
17f4d4bf
CSS
9471 default:
9472 gcc_unreachable ();
9473 }
43e9d192 9474 }
17f4d4bf 9475
43e9d192
IB
9476 /* By default push everything into the constant pool. */
9477 return SYMBOL_FORCE_TO_MEM;
9478}
9479
43e9d192
IB
9480bool
9481aarch64_constant_address_p (rtx x)
9482{
9483 return (CONSTANT_P (x) && memory_address_p (DImode, x));
9484}
9485
9486bool
9487aarch64_legitimate_pic_operand_p (rtx x)
9488{
9489 if (GET_CODE (x) == SYMBOL_REF
9490 || (GET_CODE (x) == CONST
9491 && GET_CODE (XEXP (x, 0)) == PLUS
9492 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
9493 return false;
9494
9495 return true;
9496}
9497
3520f7cc
JG
9498/* Return true if X holds either a quarter-precision or
9499 floating-point +0.0 constant. */
9500static bool
ef4bddc2 9501aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
9502{
9503 if (!CONST_DOUBLE_P (x))
9504 return false;
9505
6a0f8c01
JW
9506 if (aarch64_float_const_zero_rtx_p (x))
9507 return true;
9508
9509 /* We only handle moving 0.0 to a TFmode register. */
3520f7cc
JG
9510 if (!(mode == SFmode || mode == DFmode))
9511 return false;
9512
3520f7cc
JG
9513 return aarch64_float_const_representable_p (x);
9514}
9515
43e9d192 9516static bool
ef4bddc2 9517aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
9518{
9519 /* Do not allow vector struct mode constants. We could support
9520 0 and -1 easily, but they need support in aarch64-simd.md. */
9521 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
9522 return false;
9523
9524 /* This could probably go away because
9525 we now decompose CONST_INTs according to expand_mov_immediate. */
9526 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 9527 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
9528 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
9529 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
9530
9531 if (GET_CODE (x) == HIGH
9532 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
9533 return true;
9534
9535 return aarch64_constant_address_p (x);
9536}
9537
a5bc806c 9538rtx
43e9d192
IB
9539aarch64_load_tp (rtx target)
9540{
9541 if (!target
9542 || GET_MODE (target) != Pmode
9543 || !register_operand (target, Pmode))
9544 target = gen_reg_rtx (Pmode);
9545
9546 /* Can return in any reg. */
9547 emit_insn (gen_aarch64_load_tp_hard (target));
9548 return target;
9549}
9550
43e9d192
IB
9551/* On AAPCS systems, this is the "struct __va_list". */
9552static GTY(()) tree va_list_type;
9553
9554/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
9555 Return the type to use as __builtin_va_list.
9556
9557 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
9558
9559 struct __va_list
9560 {
9561 void *__stack;
9562 void *__gr_top;
9563 void *__vr_top;
9564 int __gr_offs;
9565 int __vr_offs;
9566 }; */
9567
9568static tree
9569aarch64_build_builtin_va_list (void)
9570{
9571 tree va_list_name;
9572 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9573
9574 /* Create the type. */
9575 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
9576 /* Give it the required name. */
9577 va_list_name = build_decl (BUILTINS_LOCATION,
9578 TYPE_DECL,
9579 get_identifier ("__va_list"),
9580 va_list_type);
9581 DECL_ARTIFICIAL (va_list_name) = 1;
9582 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 9583 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
9584
9585 /* Create the fields. */
9586 f_stack = build_decl (BUILTINS_LOCATION,
9587 FIELD_DECL, get_identifier ("__stack"),
9588 ptr_type_node);
9589 f_grtop = build_decl (BUILTINS_LOCATION,
9590 FIELD_DECL, get_identifier ("__gr_top"),
9591 ptr_type_node);
9592 f_vrtop = build_decl (BUILTINS_LOCATION,
9593 FIELD_DECL, get_identifier ("__vr_top"),
9594 ptr_type_node);
9595 f_groff = build_decl (BUILTINS_LOCATION,
9596 FIELD_DECL, get_identifier ("__gr_offs"),
9597 integer_type_node);
9598 f_vroff = build_decl (BUILTINS_LOCATION,
9599 FIELD_DECL, get_identifier ("__vr_offs"),
9600 integer_type_node);
9601
88e3bdd1 9602 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
9603 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
9604 purpose to identify whether the code is updating va_list internal
9605 offset fields through irregular way. */
9606 va_list_gpr_counter_field = f_groff;
9607 va_list_fpr_counter_field = f_vroff;
9608
43e9d192
IB
9609 DECL_ARTIFICIAL (f_stack) = 1;
9610 DECL_ARTIFICIAL (f_grtop) = 1;
9611 DECL_ARTIFICIAL (f_vrtop) = 1;
9612 DECL_ARTIFICIAL (f_groff) = 1;
9613 DECL_ARTIFICIAL (f_vroff) = 1;
9614
9615 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
9616 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
9617 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
9618 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
9619 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
9620
9621 TYPE_FIELDS (va_list_type) = f_stack;
9622 DECL_CHAIN (f_stack) = f_grtop;
9623 DECL_CHAIN (f_grtop) = f_vrtop;
9624 DECL_CHAIN (f_vrtop) = f_groff;
9625 DECL_CHAIN (f_groff) = f_vroff;
9626
9627 /* Compute its layout. */
9628 layout_type (va_list_type);
9629
9630 return va_list_type;
9631}
9632
9633/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
9634static void
9635aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9636{
9637 const CUMULATIVE_ARGS *cum;
9638 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9639 tree stack, grtop, vrtop, groff, vroff;
9640 tree t;
88e3bdd1
JW
9641 int gr_save_area_size = cfun->va_list_gpr_size;
9642 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
9643 int vr_offset;
9644
9645 cum = &crtl->args.info;
88e3bdd1
JW
9646 if (cfun->va_list_gpr_size)
9647 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
9648 cfun->va_list_gpr_size);
9649 if (cfun->va_list_fpr_size)
9650 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
9651 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 9652
d5726973 9653 if (!TARGET_FLOAT)
43e9d192 9654 {
261fb553 9655 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
9656 vr_save_area_size = 0;
9657 }
9658
9659 f_stack = TYPE_FIELDS (va_list_type_node);
9660 f_grtop = DECL_CHAIN (f_stack);
9661 f_vrtop = DECL_CHAIN (f_grtop);
9662 f_groff = DECL_CHAIN (f_vrtop);
9663 f_vroff = DECL_CHAIN (f_groff);
9664
9665 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
9666 NULL_TREE);
9667 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
9668 NULL_TREE);
9669 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
9670 NULL_TREE);
9671 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
9672 NULL_TREE);
9673 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
9674 NULL_TREE);
9675
9676 /* Emit code to initialize STACK, which points to the next varargs stack
9677 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
9678 by named arguments. STACK is 8-byte aligned. */
9679 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
9680 if (cum->aapcs_stack_size > 0)
9681 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
9682 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
9683 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9684
9685 /* Emit code to initialize GRTOP, the top of the GR save area.
9686 virtual_incoming_args_rtx should have been 16 byte aligned. */
9687 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
9688 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
9689 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9690
9691 /* Emit code to initialize VRTOP, the top of the VR save area.
9692 This address is gr_save_area_bytes below GRTOP, rounded
9693 down to the next 16-byte boundary. */
9694 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
9695 vr_offset = ROUND_UP (gr_save_area_size,
9696 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
9697
9698 if (vr_offset)
9699 t = fold_build_pointer_plus_hwi (t, -vr_offset);
9700 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
9701 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9702
9703 /* Emit code to initialize GROFF, the offset from GRTOP of the
9704 next GPR argument. */
9705 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
9706 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
9707 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9708
9709 /* Likewise emit code to initialize VROFF, the offset from FTOP
9710 of the next VR argument. */
9711 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
9712 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
9713 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9714}
9715
9716/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
9717
9718static tree
9719aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
9720 gimple_seq *post_p ATTRIBUTE_UNUSED)
9721{
9722 tree addr;
9723 bool indirect_p;
9724 bool is_ha; /* is HFA or HVA. */
9725 bool dw_align; /* double-word align. */
ef4bddc2 9726 machine_mode ag_mode = VOIDmode;
43e9d192 9727 int nregs;
ef4bddc2 9728 machine_mode mode;
43e9d192
IB
9729
9730 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9731 tree stack, f_top, f_off, off, arg, roundup, on_stack;
9732 HOST_WIDE_INT size, rsize, adjust, align;
9733 tree t, u, cond1, cond2;
9734
9735 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9736 if (indirect_p)
9737 type = build_pointer_type (type);
9738
9739 mode = TYPE_MODE (type);
9740
9741 f_stack = TYPE_FIELDS (va_list_type_node);
9742 f_grtop = DECL_CHAIN (f_stack);
9743 f_vrtop = DECL_CHAIN (f_grtop);
9744 f_groff = DECL_CHAIN (f_vrtop);
9745 f_vroff = DECL_CHAIN (f_groff);
9746
9747 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
9748 f_stack, NULL_TREE);
9749 size = int_size_in_bytes (type);
9750 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
9751
9752 dw_align = false;
9753 adjust = 0;
9754 if (aarch64_vfp_is_call_or_return_candidate (mode,
9755 type,
9756 &ag_mode,
9757 &nregs,
9758 &is_ha))
9759 {
9760 /* TYPE passed in fp/simd registers. */
d5726973 9761 if (!TARGET_FLOAT)
261fb553 9762 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
9763
9764 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
9765 unshare_expr (valist), f_vrtop, NULL_TREE);
9766 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
9767 unshare_expr (valist), f_vroff, NULL_TREE);
9768
9769 rsize = nregs * UNITS_PER_VREG;
9770
9771 if (is_ha)
9772 {
9773 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
9774 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
9775 }
9776 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
9777 && size < UNITS_PER_VREG)
9778 {
9779 adjust = UNITS_PER_VREG - size;
9780 }
9781 }
9782 else
9783 {
9784 /* TYPE passed in general registers. */
9785 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
9786 unshare_expr (valist), f_grtop, NULL_TREE);
9787 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
9788 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 9789 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
9790 nregs = rsize / UNITS_PER_WORD;
9791
9792 if (align > 8)
9793 dw_align = true;
9794
9795 if (BLOCK_REG_PADDING (mode, type, 1) == downward
9796 && size < UNITS_PER_WORD)
9797 {
9798 adjust = UNITS_PER_WORD - size;
9799 }
9800 }
9801
9802 /* Get a local temporary for the field value. */
9803 off = get_initialized_tmp_var (f_off, pre_p, NULL);
9804
9805 /* Emit code to branch if off >= 0. */
9806 t = build2 (GE_EXPR, boolean_type_node, off,
9807 build_int_cst (TREE_TYPE (off), 0));
9808 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
9809
9810 if (dw_align)
9811 {
9812 /* Emit: offs = (offs + 15) & -16. */
9813 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
9814 build_int_cst (TREE_TYPE (off), 15));
9815 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
9816 build_int_cst (TREE_TYPE (off), -16));
9817 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
9818 }
9819 else
9820 roundup = NULL;
9821
9822 /* Update ap.__[g|v]r_offs */
9823 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
9824 build_int_cst (TREE_TYPE (off), rsize));
9825 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
9826
9827 /* String up. */
9828 if (roundup)
9829 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
9830
9831 /* [cond2] if (ap.__[g|v]r_offs > 0) */
9832 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
9833 build_int_cst (TREE_TYPE (f_off), 0));
9834 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
9835
9836 /* String up: make sure the assignment happens before the use. */
9837 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
9838 COND_EXPR_ELSE (cond1) = t;
9839
9840 /* Prepare the trees handling the argument that is passed on the stack;
9841 the top level node will store in ON_STACK. */
9842 arg = get_initialized_tmp_var (stack, pre_p, NULL);
9843 if (align > 8)
9844 {
9845 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
9846 t = fold_convert (intDI_type_node, arg);
9847 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
9848 build_int_cst (TREE_TYPE (t), 15));
9849 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9850 build_int_cst (TREE_TYPE (t), -16));
9851 t = fold_convert (TREE_TYPE (arg), t);
9852 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
9853 }
9854 else
9855 roundup = NULL;
9856 /* Advance ap.__stack */
9857 t = fold_convert (intDI_type_node, arg);
9858 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
9859 build_int_cst (TREE_TYPE (t), size + 7));
9860 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9861 build_int_cst (TREE_TYPE (t), -8));
9862 t = fold_convert (TREE_TYPE (arg), t);
9863 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
9864 /* String up roundup and advance. */
9865 if (roundup)
9866 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
9867 /* String up with arg */
9868 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
9869 /* Big-endianness related address adjustment. */
9870 if (BLOCK_REG_PADDING (mode, type, 1) == downward
9871 && size < UNITS_PER_WORD)
9872 {
9873 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
9874 size_int (UNITS_PER_WORD - size));
9875 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
9876 }
9877
9878 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
9879 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
9880
9881 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
9882 t = off;
9883 if (adjust)
9884 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
9885 build_int_cst (TREE_TYPE (off), adjust));
9886
9887 t = fold_convert (sizetype, t);
9888 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
9889
9890 if (is_ha)
9891 {
9892 /* type ha; // treat as "struct {ftype field[n];}"
9893 ... [computing offs]
9894 for (i = 0; i <nregs; ++i, offs += 16)
9895 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
9896 return ha; */
9897 int i;
9898 tree tmp_ha, field_t, field_ptr_t;
9899
9900 /* Declare a local variable. */
9901 tmp_ha = create_tmp_var_raw (type, "ha");
9902 gimple_add_tmp_var (tmp_ha);
9903
9904 /* Establish the base type. */
9905 switch (ag_mode)
9906 {
9907 case SFmode:
9908 field_t = float_type_node;
9909 field_ptr_t = float_ptr_type_node;
9910 break;
9911 case DFmode:
9912 field_t = double_type_node;
9913 field_ptr_t = double_ptr_type_node;
9914 break;
9915 case TFmode:
9916 field_t = long_double_type_node;
9917 field_ptr_t = long_double_ptr_type_node;
9918 break;
9919/* The half precision and quad precision are not fully supported yet. Enable
9920 the following code after the support is complete. Need to find the correct
9921 type node for __fp16 *. */
9922#if 0
9923 case HFmode:
9924 field_t = float_type_node;
9925 field_ptr_t = float_ptr_type_node;
9926 break;
9927#endif
9928 case V2SImode:
9929 case V4SImode:
9930 {
9931 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
9932 field_t = build_vector_type_for_mode (innertype, ag_mode);
9933 field_ptr_t = build_pointer_type (field_t);
9934 }
9935 break;
9936 default:
9937 gcc_assert (0);
9938 }
9939
9940 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
9941 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
9942 addr = t;
9943 t = fold_convert (field_ptr_t, addr);
9944 t = build2 (MODIFY_EXPR, field_t,
9945 build1 (INDIRECT_REF, field_t, tmp_ha),
9946 build1 (INDIRECT_REF, field_t, t));
9947
9948 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
9949 for (i = 1; i < nregs; ++i)
9950 {
9951 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
9952 u = fold_convert (field_ptr_t, addr);
9953 u = build2 (MODIFY_EXPR, field_t,
9954 build2 (MEM_REF, field_t, tmp_ha,
9955 build_int_cst (field_ptr_t,
9956 (i *
9957 int_size_in_bytes (field_t)))),
9958 build1 (INDIRECT_REF, field_t, u));
9959 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
9960 }
9961
9962 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
9963 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
9964 }
9965
9966 COND_EXPR_ELSE (cond2) = t;
9967 addr = fold_convert (build_pointer_type (type), cond1);
9968 addr = build_va_arg_indirect_ref (addr);
9969
9970 if (indirect_p)
9971 addr = build_va_arg_indirect_ref (addr);
9972
9973 return addr;
9974}
9975
9976/* Implement TARGET_SETUP_INCOMING_VARARGS. */
9977
9978static void
ef4bddc2 9979aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
9980 tree type, int *pretend_size ATTRIBUTE_UNUSED,
9981 int no_rtl)
9982{
9983 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9984 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
9985 int gr_saved = cfun->va_list_gpr_size;
9986 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
9987
9988 /* The caller has advanced CUM up to, but not beyond, the last named
9989 argument. Advance a local copy of CUM past the last "real" named
9990 argument, to find out how many registers are left over. */
9991 local_cum = *cum;
9992 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
9993
88e3bdd1
JW
9994 /* Found out how many registers we need to save.
9995 Honor tree-stdvar analysis results. */
9996 if (cfun->va_list_gpr_size)
9997 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
9998 cfun->va_list_gpr_size / UNITS_PER_WORD);
9999 if (cfun->va_list_fpr_size)
10000 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
10001 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 10002
d5726973 10003 if (!TARGET_FLOAT)
43e9d192 10004 {
261fb553 10005 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
10006 vr_saved = 0;
10007 }
10008
10009 if (!no_rtl)
10010 {
10011 if (gr_saved > 0)
10012 {
10013 rtx ptr, mem;
10014
10015 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10016 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
10017 - gr_saved * UNITS_PER_WORD);
10018 mem = gen_frame_mem (BLKmode, ptr);
10019 set_mem_alias_set (mem, get_varargs_alias_set ());
10020
10021 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
10022 mem, gr_saved);
10023 }
10024 if (vr_saved > 0)
10025 {
10026 /* We can't use move_block_from_reg, because it will use
10027 the wrong mode, storing D regs only. */
ef4bddc2 10028 machine_mode mode = TImode;
88e3bdd1 10029 int off, i, vr_start;
43e9d192
IB
10030
10031 /* Set OFF to the offset from virtual_incoming_args_rtx of
10032 the first vector register. The VR save area lies below
10033 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
10034 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
10035 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10036 off -= vr_saved * UNITS_PER_VREG;
10037
88e3bdd1
JW
10038 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
10039 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
10040 {
10041 rtx ptr, mem;
10042
10043 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
10044 mem = gen_frame_mem (mode, ptr);
10045 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 10046 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
10047 off += UNITS_PER_VREG;
10048 }
10049 }
10050 }
10051
10052 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10053 any complication of having crtl->args.pretend_args_size changed. */
8799637a 10054 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
10055 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
10056 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
10057 + vr_saved * UNITS_PER_VREG);
10058}
10059
10060static void
10061aarch64_conditional_register_usage (void)
10062{
10063 int i;
10064 if (!TARGET_FLOAT)
10065 {
10066 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
10067 {
10068 fixed_regs[i] = 1;
10069 call_used_regs[i] = 1;
10070 }
10071 }
10072}
10073
10074/* Walk down the type tree of TYPE counting consecutive base elements.
10075 If *MODEP is VOIDmode, then set it to the first valid floating point
10076 type. If a non-floating point type is found, or if a floating point
10077 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10078 otherwise return the count in the sub-tree. */
10079static int
ef4bddc2 10080aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 10081{
ef4bddc2 10082 machine_mode mode;
43e9d192
IB
10083 HOST_WIDE_INT size;
10084
10085 switch (TREE_CODE (type))
10086 {
10087 case REAL_TYPE:
10088 mode = TYPE_MODE (type);
10089 if (mode != DFmode && mode != SFmode && mode != TFmode)
10090 return -1;
10091
10092 if (*modep == VOIDmode)
10093 *modep = mode;
10094
10095 if (*modep == mode)
10096 return 1;
10097
10098 break;
10099
10100 case COMPLEX_TYPE:
10101 mode = TYPE_MODE (TREE_TYPE (type));
10102 if (mode != DFmode && mode != SFmode && mode != TFmode)
10103 return -1;
10104
10105 if (*modep == VOIDmode)
10106 *modep = mode;
10107
10108 if (*modep == mode)
10109 return 2;
10110
10111 break;
10112
10113 case VECTOR_TYPE:
10114 /* Use V2SImode and V4SImode as representatives of all 64-bit
10115 and 128-bit vector types. */
10116 size = int_size_in_bytes (type);
10117 switch (size)
10118 {
10119 case 8:
10120 mode = V2SImode;
10121 break;
10122 case 16:
10123 mode = V4SImode;
10124 break;
10125 default:
10126 return -1;
10127 }
10128
10129 if (*modep == VOIDmode)
10130 *modep = mode;
10131
10132 /* Vector modes are considered to be opaque: two vectors are
10133 equivalent for the purposes of being homogeneous aggregates
10134 if they are the same size. */
10135 if (*modep == mode)
10136 return 1;
10137
10138 break;
10139
10140 case ARRAY_TYPE:
10141 {
10142 int count;
10143 tree index = TYPE_DOMAIN (type);
10144
807e902e
KZ
10145 /* Can't handle incomplete types nor sizes that are not
10146 fixed. */
10147 if (!COMPLETE_TYPE_P (type)
10148 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10149 return -1;
10150
10151 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
10152 if (count == -1
10153 || !index
10154 || !TYPE_MAX_VALUE (index)
cc269bb6 10155 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 10156 || !TYPE_MIN_VALUE (index)
cc269bb6 10157 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
10158 || count < 0)
10159 return -1;
10160
ae7e9ddd
RS
10161 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10162 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
10163
10164 /* There must be no padding. */
807e902e 10165 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10166 return -1;
10167
10168 return count;
10169 }
10170
10171 case RECORD_TYPE:
10172 {
10173 int count = 0;
10174 int sub_count;
10175 tree field;
10176
807e902e
KZ
10177 /* Can't handle incomplete types nor sizes that are not
10178 fixed. */
10179 if (!COMPLETE_TYPE_P (type)
10180 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10181 return -1;
10182
10183 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10184 {
10185 if (TREE_CODE (field) != FIELD_DECL)
10186 continue;
10187
10188 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10189 if (sub_count < 0)
10190 return -1;
10191 count += sub_count;
10192 }
10193
10194 /* There must be no padding. */
807e902e 10195 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10196 return -1;
10197
10198 return count;
10199 }
10200
10201 case UNION_TYPE:
10202 case QUAL_UNION_TYPE:
10203 {
10204 /* These aren't very interesting except in a degenerate case. */
10205 int count = 0;
10206 int sub_count;
10207 tree field;
10208
807e902e
KZ
10209 /* Can't handle incomplete types nor sizes that are not
10210 fixed. */
10211 if (!COMPLETE_TYPE_P (type)
10212 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10213 return -1;
10214
10215 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10216 {
10217 if (TREE_CODE (field) != FIELD_DECL)
10218 continue;
10219
10220 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10221 if (sub_count < 0)
10222 return -1;
10223 count = count > sub_count ? count : sub_count;
10224 }
10225
10226 /* There must be no padding. */
807e902e 10227 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10228 return -1;
10229
10230 return count;
10231 }
10232
10233 default:
10234 break;
10235 }
10236
10237 return -1;
10238}
10239
b6ec6215
KT
10240/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
10241 type as described in AAPCS64 \S 4.1.2.
10242
10243 See the comment above aarch64_composite_type_p for the notes on MODE. */
10244
10245static bool
10246aarch64_short_vector_p (const_tree type,
10247 machine_mode mode)
10248{
10249 HOST_WIDE_INT size = -1;
10250
10251 if (type && TREE_CODE (type) == VECTOR_TYPE)
10252 size = int_size_in_bytes (type);
10253 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10254 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
10255 size = GET_MODE_SIZE (mode);
10256
10257 return (size == 8 || size == 16);
10258}
10259
43e9d192
IB
10260/* Return TRUE if the type, as described by TYPE and MODE, is a composite
10261 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
10262 array types. The C99 floating-point complex types are also considered
10263 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
10264 types, which are GCC extensions and out of the scope of AAPCS64, are
10265 treated as composite types here as well.
10266
10267 Note that MODE itself is not sufficient in determining whether a type
10268 is such a composite type or not. This is because
10269 stor-layout.c:compute_record_mode may have already changed the MODE
10270 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
10271 structure with only one field may have its MODE set to the mode of the
10272 field. Also an integer mode whose size matches the size of the
10273 RECORD_TYPE type may be used to substitute the original mode
10274 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
10275 solely relied on. */
10276
10277static bool
10278aarch64_composite_type_p (const_tree type,
ef4bddc2 10279 machine_mode mode)
43e9d192 10280{
b6ec6215
KT
10281 if (aarch64_short_vector_p (type, mode))
10282 return false;
10283
43e9d192
IB
10284 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
10285 return true;
10286
10287 if (mode == BLKmode
10288 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
10289 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
10290 return true;
10291
10292 return false;
10293}
10294
43e9d192
IB
10295/* Return TRUE if an argument, whose type is described by TYPE and MODE,
10296 shall be passed or returned in simd/fp register(s) (providing these
10297 parameter passing registers are available).
10298
10299 Upon successful return, *COUNT returns the number of needed registers,
10300 *BASE_MODE returns the mode of the individual register and when IS_HAF
10301 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
10302 floating-point aggregate or a homogeneous short-vector aggregate. */
10303
10304static bool
ef4bddc2 10305aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 10306 const_tree type,
ef4bddc2 10307 machine_mode *base_mode,
43e9d192
IB
10308 int *count,
10309 bool *is_ha)
10310{
ef4bddc2 10311 machine_mode new_mode = VOIDmode;
43e9d192
IB
10312 bool composite_p = aarch64_composite_type_p (type, mode);
10313
10314 if (is_ha != NULL) *is_ha = false;
10315
10316 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
10317 || aarch64_short_vector_p (type, mode))
10318 {
10319 *count = 1;
10320 new_mode = mode;
10321 }
10322 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10323 {
10324 if (is_ha != NULL) *is_ha = true;
10325 *count = 2;
10326 new_mode = GET_MODE_INNER (mode);
10327 }
10328 else if (type && composite_p)
10329 {
10330 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
10331
10332 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
10333 {
10334 if (is_ha != NULL) *is_ha = true;
10335 *count = ag_count;
10336 }
10337 else
10338 return false;
10339 }
10340 else
10341 return false;
10342
10343 *base_mode = new_mode;
10344 return true;
10345}
10346
10347/* Implement TARGET_STRUCT_VALUE_RTX. */
10348
10349static rtx
10350aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
10351 int incoming ATTRIBUTE_UNUSED)
10352{
10353 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
10354}
10355
10356/* Implements target hook vector_mode_supported_p. */
10357static bool
ef4bddc2 10358aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
10359{
10360 if (TARGET_SIMD
10361 && (mode == V4SImode || mode == V8HImode
10362 || mode == V16QImode || mode == V2DImode
10363 || mode == V2SImode || mode == V4HImode
10364 || mode == V8QImode || mode == V2SFmode
ad7d90cc 10365 || mode == V4SFmode || mode == V2DFmode
71a11456 10366 || mode == V4HFmode || mode == V8HFmode
ad7d90cc 10367 || mode == V1DFmode))
43e9d192
IB
10368 return true;
10369
10370 return false;
10371}
10372
b7342d25
IB
10373/* Return appropriate SIMD container
10374 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
10375static machine_mode
10376aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 10377{
b7342d25 10378 gcc_assert (width == 64 || width == 128);
43e9d192 10379 if (TARGET_SIMD)
b7342d25
IB
10380 {
10381 if (width == 128)
10382 switch (mode)
10383 {
10384 case DFmode:
10385 return V2DFmode;
10386 case SFmode:
10387 return V4SFmode;
10388 case SImode:
10389 return V4SImode;
10390 case HImode:
10391 return V8HImode;
10392 case QImode:
10393 return V16QImode;
10394 case DImode:
10395 return V2DImode;
10396 default:
10397 break;
10398 }
10399 else
10400 switch (mode)
10401 {
10402 case SFmode:
10403 return V2SFmode;
10404 case SImode:
10405 return V2SImode;
10406 case HImode:
10407 return V4HImode;
10408 case QImode:
10409 return V8QImode;
10410 default:
10411 break;
10412 }
10413 }
43e9d192
IB
10414 return word_mode;
10415}
10416
b7342d25 10417/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
10418static machine_mode
10419aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
10420{
10421 return aarch64_simd_container_mode (mode, 128);
10422}
10423
3b357264
JG
10424/* Return the bitmask of possible vector sizes for the vectorizer
10425 to iterate over. */
10426static unsigned int
10427aarch64_autovectorize_vector_sizes (void)
10428{
10429 return (16 | 8);
10430}
10431
ac2b960f
YZ
10432/* Implement TARGET_MANGLE_TYPE. */
10433
6f549691 10434static const char *
ac2b960f
YZ
10435aarch64_mangle_type (const_tree type)
10436{
10437 /* The AArch64 ABI documents say that "__va_list" has to be
10438 managled as if it is in the "std" namespace. */
10439 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
10440 return "St9__va_list";
10441
c2ec330c
AL
10442 /* Half-precision float. */
10443 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
10444 return "Dh";
10445
f9d53c27
TB
10446 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
10447 builtin types. */
10448 if (TYPE_NAME (type) != NULL)
10449 return aarch64_mangle_builtin_type (type);
c6fc9e43 10450
ac2b960f
YZ
10451 /* Use the default mangling. */
10452 return NULL;
10453}
10454
8baff86e
KT
10455
10456/* Return true if the rtx_insn contains a MEM RTX somewhere
10457 in it. */
75cf1494
KT
10458
10459static bool
8baff86e 10460has_memory_op (rtx_insn *mem_insn)
75cf1494 10461{
8baff86e
KT
10462 subrtx_iterator::array_type array;
10463 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
10464 if (MEM_P (*iter))
10465 return true;
10466
10467 return false;
75cf1494
KT
10468}
10469
10470/* Find the first rtx_insn before insn that will generate an assembly
10471 instruction. */
10472
10473static rtx_insn *
10474aarch64_prev_real_insn (rtx_insn *insn)
10475{
10476 if (!insn)
10477 return NULL;
10478
10479 do
10480 {
10481 insn = prev_real_insn (insn);
10482 }
10483 while (insn && recog_memoized (insn) < 0);
10484
10485 return insn;
10486}
10487
10488static bool
10489is_madd_op (enum attr_type t1)
10490{
10491 unsigned int i;
10492 /* A number of these may be AArch32 only. */
10493 enum attr_type mlatypes[] = {
10494 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
10495 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
10496 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
10497 };
10498
10499 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
10500 {
10501 if (t1 == mlatypes[i])
10502 return true;
10503 }
10504
10505 return false;
10506}
10507
10508/* Check if there is a register dependency between a load and the insn
10509 for which we hold recog_data. */
10510
10511static bool
10512dep_between_memop_and_curr (rtx memop)
10513{
10514 rtx load_reg;
10515 int opno;
10516
8baff86e 10517 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
10518
10519 if (!REG_P (SET_DEST (memop)))
10520 return false;
10521
10522 load_reg = SET_DEST (memop);
8baff86e 10523 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
10524 {
10525 rtx operand = recog_data.operand[opno];
10526 if (REG_P (operand)
10527 && reg_overlap_mentioned_p (load_reg, operand))
10528 return true;
10529
10530 }
10531 return false;
10532}
10533
8baff86e
KT
10534
10535/* When working around the Cortex-A53 erratum 835769,
10536 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
10537 instruction and has a preceding memory instruction such that a NOP
10538 should be inserted between them. */
10539
75cf1494
KT
10540bool
10541aarch64_madd_needs_nop (rtx_insn* insn)
10542{
10543 enum attr_type attr_type;
10544 rtx_insn *prev;
10545 rtx body;
10546
b32c1043 10547 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
10548 return false;
10549
e322d6e3 10550 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
10551 return false;
10552
10553 attr_type = get_attr_type (insn);
10554 if (!is_madd_op (attr_type))
10555 return false;
10556
10557 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
10558 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
10559 Restore recog state to INSN to avoid state corruption. */
10560 extract_constrain_insn_cached (insn);
10561
8baff86e 10562 if (!prev || !has_memory_op (prev))
75cf1494
KT
10563 return false;
10564
10565 body = single_set (prev);
10566
10567 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
10568 it and the DImode madd, emit a NOP between them. If body is NULL then we
10569 have a complex memory operation, probably a load/store pair.
10570 Be conservative for now and emit a NOP. */
10571 if (GET_MODE (recog_data.operand[0]) == DImode
10572 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
10573 return true;
10574
10575 return false;
10576
10577}
10578
8baff86e
KT
10579
10580/* Implement FINAL_PRESCAN_INSN. */
10581
75cf1494
KT
10582void
10583aarch64_final_prescan_insn (rtx_insn *insn)
10584{
10585 if (aarch64_madd_needs_nop (insn))
10586 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
10587}
10588
10589
43e9d192 10590/* Return the equivalent letter for size. */
81c2dfb9 10591static char
43e9d192
IB
10592sizetochar (int size)
10593{
10594 switch (size)
10595 {
10596 case 64: return 'd';
10597 case 32: return 's';
10598 case 16: return 'h';
10599 case 8 : return 'b';
10600 default: gcc_unreachable ();
10601 }
10602}
10603
3520f7cc
JG
10604/* Return true iff x is a uniform vector of floating-point
10605 constants, and the constant can be represented in
10606 quarter-precision form. Note, as aarch64_float_const_representable
10607 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
10608static bool
10609aarch64_vect_float_const_representable_p (rtx x)
10610{
92695fbb
RS
10611 rtx elt;
10612 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
10613 && const_vec_duplicate_p (x, &elt)
10614 && aarch64_float_const_representable_p (elt));
3520f7cc
JG
10615}
10616
d8edd899 10617/* Return true for valid and false for invalid. */
3ea63f60 10618bool
ef4bddc2 10619aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 10620 struct simd_immediate_info *info)
43e9d192
IB
10621{
10622#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
10623 matches = 1; \
10624 for (i = 0; i < idx; i += (STRIDE)) \
10625 if (!(TEST)) \
10626 matches = 0; \
10627 if (matches) \
10628 { \
10629 immtype = (CLASS); \
10630 elsize = (ELSIZE); \
43e9d192
IB
10631 eshift = (SHIFT); \
10632 emvn = (NEG); \
10633 break; \
10634 }
10635
10636 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
cb5ca315 10637 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
43e9d192 10638 unsigned char bytes[16];
43e9d192
IB
10639 int immtype = -1, matches;
10640 unsigned int invmask = inverse ? 0xff : 0;
10641 int eshift, emvn;
10642
43e9d192 10643 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 10644 {
81c2dfb9
IB
10645 if (! (aarch64_simd_imm_zero_p (op, mode)
10646 || aarch64_vect_float_const_representable_p (op)))
d8edd899 10647 return false;
3520f7cc 10648
48063b9d
IB
10649 if (info)
10650 {
10651 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 10652 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
10653 info->mvn = false;
10654 info->shift = 0;
10655 }
3520f7cc 10656
d8edd899 10657 return true;
3520f7cc 10658 }
43e9d192
IB
10659
10660 /* Splat vector constant out into a byte vector. */
10661 for (i = 0; i < n_elts; i++)
10662 {
4b1e108c
AL
10663 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
10664 it must be laid out in the vector register in reverse order. */
10665 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192 10666 unsigned HOST_WIDE_INT elpart;
43e9d192 10667
ee78df47
KT
10668 gcc_assert (CONST_INT_P (el));
10669 elpart = INTVAL (el);
10670
10671 for (unsigned int byte = 0; byte < innersize; byte++)
10672 {
10673 bytes[idx++] = (elpart & 0xff) ^ invmask;
10674 elpart >>= BITS_PER_UNIT;
10675 }
43e9d192 10676
43e9d192
IB
10677 }
10678
10679 /* Sanity check. */
10680 gcc_assert (idx == GET_MODE_SIZE (mode));
10681
10682 do
10683 {
10684 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
10685 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
10686
10687 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
10688 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
10689
10690 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
10691 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
10692
10693 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
10694 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
10695
10696 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
10697
10698 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
10699
10700 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
10701 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
10702
10703 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
10704 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
10705
10706 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
10707 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
10708
10709 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
10710 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
10711
10712 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
10713
10714 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
10715
10716 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 10717 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
10718
10719 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 10720 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
10721
10722 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 10723 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
10724
10725 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 10726 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
10727
10728 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
10729
10730 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
10731 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
10732 }
10733 while (0);
10734
e4f0f84d 10735 if (immtype == -1)
d8edd899 10736 return false;
43e9d192 10737
48063b9d 10738 if (info)
43e9d192 10739 {
48063b9d 10740 info->element_width = elsize;
48063b9d
IB
10741 info->mvn = emvn != 0;
10742 info->shift = eshift;
10743
43e9d192
IB
10744 unsigned HOST_WIDE_INT imm = 0;
10745
e4f0f84d
TB
10746 if (immtype >= 12 && immtype <= 15)
10747 info->msl = true;
10748
43e9d192
IB
10749 /* Un-invert bytes of recognized vector, if necessary. */
10750 if (invmask != 0)
10751 for (i = 0; i < idx; i++)
10752 bytes[i] ^= invmask;
10753
10754 if (immtype == 17)
10755 {
10756 /* FIXME: Broken on 32-bit H_W_I hosts. */
10757 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
10758
10759 for (i = 0; i < 8; i++)
10760 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
10761 << (i * BITS_PER_UNIT);
10762
43e9d192 10763
48063b9d
IB
10764 info->value = GEN_INT (imm);
10765 }
10766 else
10767 {
10768 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
10769 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
10770
10771 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
10772 generic constants. */
10773 if (info->mvn)
43e9d192 10774 imm = ~imm;
48063b9d
IB
10775 imm = (imm >> info->shift) & 0xff;
10776 info->value = GEN_INT (imm);
10777 }
43e9d192
IB
10778 }
10779
48063b9d 10780 return true;
43e9d192
IB
10781#undef CHECK
10782}
10783
43e9d192
IB
10784/* Check of immediate shift constants are within range. */
10785bool
ef4bddc2 10786aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
10787{
10788 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
10789 if (left)
ddeabd3e 10790 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 10791 else
ddeabd3e 10792 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
10793}
10794
3520f7cc
JG
10795/* Return true if X is a uniform vector where all elements
10796 are either the floating-point constant 0.0 or the
10797 integer constant 0. */
43e9d192 10798bool
ef4bddc2 10799aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 10800{
3520f7cc 10801 return x == CONST0_RTX (mode);
43e9d192
IB
10802}
10803
7325d85a
KT
10804
10805/* Return the bitmask CONST_INT to select the bits required by a zero extract
10806 operation of width WIDTH at bit position POS. */
10807
10808rtx
10809aarch64_mask_from_zextract_ops (rtx width, rtx pos)
10810{
10811 gcc_assert (CONST_INT_P (width));
10812 gcc_assert (CONST_INT_P (pos));
10813
10814 unsigned HOST_WIDE_INT mask
10815 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
10816 return GEN_INT (mask << UINTVAL (pos));
10817}
10818
43e9d192 10819bool
ef4bddc2 10820aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
10821{
10822 HOST_WIDE_INT imm = INTVAL (x);
10823 int i;
10824
10825 for (i = 0; i < 8; i++)
10826 {
10827 unsigned int byte = imm & 0xff;
10828 if (byte != 0xff && byte != 0)
10829 return false;
10830 imm >>= 8;
10831 }
10832
10833 return true;
10834}
10835
83f8c414 10836bool
a6e0bfa7 10837aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 10838{
83f8c414
CSS
10839 if (GET_CODE (x) == HIGH
10840 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
10841 return true;
10842
82614948 10843 if (CONST_INT_P (x))
83f8c414
CSS
10844 return true;
10845
10846 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
10847 return true;
10848
a6e0bfa7 10849 return aarch64_classify_symbolic_expression (x)
a5350ddc 10850 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
10851}
10852
43e9d192
IB
10853/* Return a const_int vector of VAL. */
10854rtx
ef4bddc2 10855aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
10856{
10857 int nunits = GET_MODE_NUNITS (mode);
10858 rtvec v = rtvec_alloc (nunits);
10859 int i;
10860
10861 for (i=0; i < nunits; i++)
10862 RTVEC_ELT (v, i) = GEN_INT (val);
10863
10864 return gen_rtx_CONST_VECTOR (mode, v);
10865}
10866
051d0e2f
SN
10867/* Check OP is a legal scalar immediate for the MOVI instruction. */
10868
10869bool
ef4bddc2 10870aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 10871{
ef4bddc2 10872 machine_mode vmode;
051d0e2f
SN
10873
10874 gcc_assert (!VECTOR_MODE_P (mode));
10875 vmode = aarch64_preferred_simd_mode (mode);
10876 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 10877 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
10878}
10879
988fa693
JG
10880/* Construct and return a PARALLEL RTX vector with elements numbering the
10881 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
10882 the vector - from the perspective of the architecture. This does not
10883 line up with GCC's perspective on lane numbers, so we end up with
10884 different masks depending on our target endian-ness. The diagram
10885 below may help. We must draw the distinction when building masks
10886 which select one half of the vector. An instruction selecting
10887 architectural low-lanes for a big-endian target, must be described using
10888 a mask selecting GCC high-lanes.
10889
10890 Big-Endian Little-Endian
10891
10892GCC 0 1 2 3 3 2 1 0
10893 | x | x | x | x | | x | x | x | x |
10894Architecture 3 2 1 0 3 2 1 0
10895
10896Low Mask: { 2, 3 } { 0, 1 }
10897High Mask: { 0, 1 } { 2, 3 }
10898*/
10899
43e9d192 10900rtx
ef4bddc2 10901aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
10902{
10903 int nunits = GET_MODE_NUNITS (mode);
10904 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
10905 int high_base = nunits / 2;
10906 int low_base = 0;
10907 int base;
43e9d192
IB
10908 rtx t1;
10909 int i;
10910
988fa693
JG
10911 if (BYTES_BIG_ENDIAN)
10912 base = high ? low_base : high_base;
10913 else
10914 base = high ? high_base : low_base;
10915
10916 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
10917 RTVEC_ELT (v, i) = GEN_INT (base + i);
10918
10919 t1 = gen_rtx_PARALLEL (mode, v);
10920 return t1;
10921}
10922
988fa693
JG
10923/* Check OP for validity as a PARALLEL RTX vector with elements
10924 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
10925 from the perspective of the architecture. See the diagram above
10926 aarch64_simd_vect_par_cnst_half for more details. */
10927
10928bool
ef4bddc2 10929aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
10930 bool high)
10931{
10932 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
10933 HOST_WIDE_INT count_op = XVECLEN (op, 0);
10934 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
10935 int i = 0;
10936
10937 if (!VECTOR_MODE_P (mode))
10938 return false;
10939
10940 if (count_op != count_ideal)
10941 return false;
10942
10943 for (i = 0; i < count_ideal; i++)
10944 {
10945 rtx elt_op = XVECEXP (op, 0, i);
10946 rtx elt_ideal = XVECEXP (ideal, 0, i);
10947
4aa81c2e 10948 if (!CONST_INT_P (elt_op)
988fa693
JG
10949 || INTVAL (elt_ideal) != INTVAL (elt_op))
10950 return false;
10951 }
10952 return true;
10953}
10954
43e9d192
IB
10955/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
10956 HIGH (exclusive). */
10957void
46ed6024
CB
10958aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
10959 const_tree exp)
43e9d192
IB
10960{
10961 HOST_WIDE_INT lane;
4aa81c2e 10962 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
10963 lane = INTVAL (operand);
10964
10965 if (lane < low || lane >= high)
46ed6024
CB
10966 {
10967 if (exp)
cf0c27ef 10968 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 10969 else
cf0c27ef 10970 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 10971 }
43e9d192
IB
10972}
10973
43e9d192
IB
10974/* Return TRUE if OP is a valid vector addressing mode. */
10975bool
10976aarch64_simd_mem_operand_p (rtx op)
10977{
10978 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 10979 || REG_P (XEXP (op, 0)));
43e9d192
IB
10980}
10981
2d8c6dc1
AH
10982/* Emit a register copy from operand to operand, taking care not to
10983 early-clobber source registers in the process.
43e9d192 10984
2d8c6dc1
AH
10985 COUNT is the number of components into which the copy needs to be
10986 decomposed. */
43e9d192 10987void
2d8c6dc1
AH
10988aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
10989 unsigned int count)
43e9d192
IB
10990{
10991 unsigned int i;
2d8c6dc1
AH
10992 int rdest = REGNO (operands[0]);
10993 int rsrc = REGNO (operands[1]);
43e9d192
IB
10994
10995 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
10996 || rdest < rsrc)
10997 for (i = 0; i < count; i++)
10998 emit_move_insn (gen_rtx_REG (mode, rdest + i),
10999 gen_rtx_REG (mode, rsrc + i));
43e9d192 11000 else
2d8c6dc1
AH
11001 for (i = 0; i < count; i++)
11002 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
11003 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
11004}
11005
668046d1 11006/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 11007 one of VSTRUCT modes: OI, CI, or XI. */
668046d1
DS
11008int
11009aarch64_simd_attr_length_rglist (enum machine_mode mode)
11010{
11011 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
11012}
11013
db0253a4
TB
11014/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11015 alignment of a vector to 128 bits. */
11016static HOST_WIDE_INT
11017aarch64_simd_vector_alignment (const_tree type)
11018{
9439e9a1 11019 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
11020 return MIN (align, 128);
11021}
11022
11023/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11024static bool
11025aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
11026{
11027 if (is_packed)
11028 return false;
11029
11030 /* We guarantee alignment for vectors up to 128-bits. */
11031 if (tree_int_cst_compare (TYPE_SIZE (type),
11032 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
11033 return false;
11034
11035 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11036 return true;
11037}
11038
4369c11e
TB
11039/* If VALS is a vector constant that can be loaded into a register
11040 using DUP, generate instructions to do so and return an RTX to
11041 assign to the register. Otherwise return NULL_RTX. */
11042static rtx
11043aarch64_simd_dup_constant (rtx vals)
11044{
ef4bddc2
RS
11045 machine_mode mode = GET_MODE (vals);
11046 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 11047 rtx x;
4369c11e 11048
92695fbb 11049 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
11050 return NULL_RTX;
11051
11052 /* We can load this constant by using DUP and a constant in a
11053 single ARM register. This will be cheaper than a vector
11054 load. */
92695fbb 11055 x = copy_to_mode_reg (inner_mode, x);
4369c11e
TB
11056 return gen_rtx_VEC_DUPLICATE (mode, x);
11057}
11058
11059
11060/* Generate code to load VALS, which is a PARALLEL containing only
11061 constants (for vec_init) or CONST_VECTOR, efficiently into a
11062 register. Returns an RTX to copy into the register, or NULL_RTX
11063 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 11064static rtx
4369c11e
TB
11065aarch64_simd_make_constant (rtx vals)
11066{
ef4bddc2 11067 machine_mode mode = GET_MODE (vals);
4369c11e
TB
11068 rtx const_dup;
11069 rtx const_vec = NULL_RTX;
11070 int n_elts = GET_MODE_NUNITS (mode);
11071 int n_const = 0;
11072 int i;
11073
11074 if (GET_CODE (vals) == CONST_VECTOR)
11075 const_vec = vals;
11076 else if (GET_CODE (vals) == PARALLEL)
11077 {
11078 /* A CONST_VECTOR must contain only CONST_INTs and
11079 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11080 Only store valid constants in a CONST_VECTOR. */
11081 for (i = 0; i < n_elts; ++i)
11082 {
11083 rtx x = XVECEXP (vals, 0, i);
11084 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11085 n_const++;
11086 }
11087 if (n_const == n_elts)
11088 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11089 }
11090 else
11091 gcc_unreachable ();
11092
11093 if (const_vec != NULL_RTX
48063b9d 11094 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
11095 /* Load using MOVI/MVNI. */
11096 return const_vec;
11097 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
11098 /* Loaded using DUP. */
11099 return const_dup;
11100 else if (const_vec != NULL_RTX)
11101 /* Load from constant pool. We can not take advantage of single-cycle
11102 LD1 because we need a PC-relative addressing mode. */
11103 return const_vec;
11104 else
11105 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11106 We can not construct an initializer. */
11107 return NULL_RTX;
11108}
11109
35a093b6
JG
11110/* Expand a vector initialisation sequence, such that TARGET is
11111 initialised to contain VALS. */
11112
4369c11e
TB
11113void
11114aarch64_expand_vector_init (rtx target, rtx vals)
11115{
ef4bddc2
RS
11116 machine_mode mode = GET_MODE (target);
11117 machine_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 11118 /* The number of vector elements. */
4369c11e 11119 int n_elts = GET_MODE_NUNITS (mode);
35a093b6 11120 /* The number of vector elements which are not constant. */
8b66a2d4
AL
11121 int n_var = 0;
11122 rtx any_const = NULL_RTX;
35a093b6
JG
11123 /* The first element of vals. */
11124 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 11125 bool all_same = true;
4369c11e 11126
35a093b6 11127 /* Count the number of variable elements to initialise. */
8b66a2d4 11128 for (int i = 0; i < n_elts; ++i)
4369c11e 11129 {
8b66a2d4 11130 rtx x = XVECEXP (vals, 0, i);
35a093b6 11131 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
11132 ++n_var;
11133 else
11134 any_const = x;
4369c11e 11135
35a093b6 11136 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
11137 }
11138
35a093b6
JG
11139 /* No variable elements, hand off to aarch64_simd_make_constant which knows
11140 how best to handle this. */
4369c11e
TB
11141 if (n_var == 0)
11142 {
11143 rtx constant = aarch64_simd_make_constant (vals);
11144 if (constant != NULL_RTX)
11145 {
11146 emit_move_insn (target, constant);
11147 return;
11148 }
11149 }
11150
11151 /* Splat a single non-constant element if we can. */
11152 if (all_same)
11153 {
35a093b6 11154 rtx x = copy_to_mode_reg (inner_mode, v0);
4369c11e
TB
11155 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
11156 return;
11157 }
11158
35a093b6
JG
11159 /* Initialise a vector which is part-variable. We want to first try
11160 to build those lanes which are constant in the most efficient way we
11161 can. */
11162 if (n_var != n_elts)
4369c11e
TB
11163 {
11164 rtx copy = copy_rtx (vals);
4369c11e 11165
8b66a2d4
AL
11166 /* Load constant part of vector. We really don't care what goes into the
11167 parts we will overwrite, but we're more likely to be able to load the
11168 constant efficiently if it has fewer, larger, repeating parts
11169 (see aarch64_simd_valid_immediate). */
11170 for (int i = 0; i < n_elts; i++)
11171 {
11172 rtx x = XVECEXP (vals, 0, i);
11173 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11174 continue;
11175 rtx subst = any_const;
11176 for (int bit = n_elts / 2; bit > 0; bit /= 2)
11177 {
11178 /* Look in the copied vector, as more elements are const. */
11179 rtx test = XVECEXP (copy, 0, i ^ bit);
11180 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
11181 {
11182 subst = test;
11183 break;
11184 }
11185 }
11186 XVECEXP (copy, 0, i) = subst;
11187 }
4369c11e 11188 aarch64_expand_vector_init (target, copy);
35a093b6 11189 }
4369c11e 11190
35a093b6 11191 /* Insert the variable lanes directly. */
8b66a2d4 11192
35a093b6
JG
11193 enum insn_code icode = optab_handler (vec_set_optab, mode);
11194 gcc_assert (icode != CODE_FOR_nothing);
4369c11e 11195
8b66a2d4 11196 for (int i = 0; i < n_elts; i++)
35a093b6
JG
11197 {
11198 rtx x = XVECEXP (vals, 0, i);
11199 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11200 continue;
11201 x = copy_to_mode_reg (inner_mode, x);
11202 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
11203 }
4369c11e
TB
11204}
11205
43e9d192 11206static unsigned HOST_WIDE_INT
ef4bddc2 11207aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
11208{
11209 return
ac59ad4e
KT
11210 (!SHIFT_COUNT_TRUNCATED
11211 || aarch64_vector_mode_supported_p (mode)
43e9d192
IB
11212 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
11213}
11214
43e9d192
IB
11215/* Select a format to encode pointers in exception handling data. */
11216int
11217aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
11218{
11219 int type;
11220 switch (aarch64_cmodel)
11221 {
11222 case AARCH64_CMODEL_TINY:
11223 case AARCH64_CMODEL_TINY_PIC:
11224 case AARCH64_CMODEL_SMALL:
11225 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 11226 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
11227 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
11228 for everything. */
11229 type = DW_EH_PE_sdata4;
11230 break;
11231 default:
11232 /* No assumptions here. 8-byte relocs required. */
11233 type = DW_EH_PE_sdata8;
11234 break;
11235 }
11236 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
11237}
11238
e1c1ecb0
KT
11239/* The last .arch and .tune assembly strings that we printed. */
11240static std::string aarch64_last_printed_arch_string;
11241static std::string aarch64_last_printed_tune_string;
11242
361fb3ee
KT
11243/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
11244 by the function fndecl. */
11245
11246void
11247aarch64_declare_function_name (FILE *stream, const char* name,
11248 tree fndecl)
11249{
11250 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
11251
11252 struct cl_target_option *targ_options;
11253 if (target_parts)
11254 targ_options = TREE_TARGET_OPTION (target_parts);
11255 else
11256 targ_options = TREE_TARGET_OPTION (target_option_current_node);
11257 gcc_assert (targ_options);
11258
11259 const struct processor *this_arch
11260 = aarch64_get_arch (targ_options->x_explicit_arch);
11261
054b4005
JG
11262 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
11263 std::string extension
04a99ebe
JG
11264 = aarch64_get_extension_string_for_isa_flags (isa_flags,
11265 this_arch->flags);
e1c1ecb0
KT
11266 /* Only update the assembler .arch string if it is distinct from the last
11267 such string we printed. */
11268 std::string to_print = this_arch->name + extension;
11269 if (to_print != aarch64_last_printed_arch_string)
11270 {
11271 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
11272 aarch64_last_printed_arch_string = to_print;
11273 }
361fb3ee
KT
11274
11275 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
11276 useful to readers of the generated asm. Do it only when it changes
11277 from function to function and verbose assembly is requested. */
361fb3ee
KT
11278 const struct processor *this_tune
11279 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
11280
e1c1ecb0
KT
11281 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
11282 {
11283 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
11284 this_tune->name);
11285 aarch64_last_printed_tune_string = this_tune->name;
11286 }
361fb3ee
KT
11287
11288 /* Don't forget the type directive for ELF. */
11289 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
11290 ASM_OUTPUT_LABEL (stream, name);
11291}
11292
e1c1ecb0
KT
11293/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
11294
11295static void
11296aarch64_start_file (void)
11297{
11298 struct cl_target_option *default_options
11299 = TREE_TARGET_OPTION (target_option_default_node);
11300
11301 const struct processor *default_arch
11302 = aarch64_get_arch (default_options->x_explicit_arch);
11303 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
11304 std::string extension
04a99ebe
JG
11305 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
11306 default_arch->flags);
e1c1ecb0
KT
11307
11308 aarch64_last_printed_arch_string = default_arch->name + extension;
11309 aarch64_last_printed_tune_string = "";
11310 asm_fprintf (asm_out_file, "\t.arch %s\n",
11311 aarch64_last_printed_arch_string.c_str ());
11312
11313 default_file_start ();
11314}
11315
0462169c
SN
11316/* Emit load exclusive. */
11317
11318static void
ef4bddc2 11319aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
11320 rtx mem, rtx model_rtx)
11321{
11322 rtx (*gen) (rtx, rtx, rtx);
11323
11324 switch (mode)
11325 {
11326 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
11327 case HImode: gen = gen_aarch64_load_exclusivehi; break;
11328 case SImode: gen = gen_aarch64_load_exclusivesi; break;
11329 case DImode: gen = gen_aarch64_load_exclusivedi; break;
11330 default:
11331 gcc_unreachable ();
11332 }
11333
11334 emit_insn (gen (rval, mem, model_rtx));
11335}
11336
11337/* Emit store exclusive. */
11338
11339static void
ef4bddc2 11340aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
11341 rtx rval, rtx mem, rtx model_rtx)
11342{
11343 rtx (*gen) (rtx, rtx, rtx, rtx);
11344
11345 switch (mode)
11346 {
11347 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
11348 case HImode: gen = gen_aarch64_store_exclusivehi; break;
11349 case SImode: gen = gen_aarch64_store_exclusivesi; break;
11350 case DImode: gen = gen_aarch64_store_exclusivedi; break;
11351 default:
11352 gcc_unreachable ();
11353 }
11354
11355 emit_insn (gen (bval, rval, mem, model_rtx));
11356}
11357
11358/* Mark the previous jump instruction as unlikely. */
11359
11360static void
11361aarch64_emit_unlikely_jump (rtx insn)
11362{
e5af9ddd 11363 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
11364
11365 insn = emit_jump_insn (insn);
e5af9ddd 11366 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
11367}
11368
11369/* Expand a compare and swap pattern. */
11370
11371void
11372aarch64_expand_compare_and_swap (rtx operands[])
11373{
11374 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 11375 machine_mode mode, cmp_mode;
b0770c0f
MW
11376 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
11377 int idx;
11378 gen_cas_fn gen;
11379 const gen_cas_fn split_cas[] =
11380 {
11381 gen_aarch64_compare_and_swapqi,
11382 gen_aarch64_compare_and_swaphi,
11383 gen_aarch64_compare_and_swapsi,
11384 gen_aarch64_compare_and_swapdi
11385 };
11386 const gen_cas_fn atomic_cas[] =
11387 {
11388 gen_aarch64_compare_and_swapqi_lse,
11389 gen_aarch64_compare_and_swaphi_lse,
11390 gen_aarch64_compare_and_swapsi_lse,
11391 gen_aarch64_compare_and_swapdi_lse
11392 };
0462169c
SN
11393
11394 bval = operands[0];
11395 rval = operands[1];
11396 mem = operands[2];
11397 oldval = operands[3];
11398 newval = operands[4];
11399 is_weak = operands[5];
11400 mod_s = operands[6];
11401 mod_f = operands[7];
11402 mode = GET_MODE (mem);
11403 cmp_mode = mode;
11404
11405 /* Normally the succ memory model must be stronger than fail, but in the
11406 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
11407 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
11408
46b35980
AM
11409 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
11410 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
11411 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
11412
11413 switch (mode)
11414 {
11415 case QImode:
11416 case HImode:
11417 /* For short modes, we're going to perform the comparison in SImode,
11418 so do the zero-extension now. */
11419 cmp_mode = SImode;
11420 rval = gen_reg_rtx (SImode);
11421 oldval = convert_modes (SImode, mode, oldval, true);
11422 /* Fall through. */
11423
11424 case SImode:
11425 case DImode:
11426 /* Force the value into a register if needed. */
11427 if (!aarch64_plus_operand (oldval, mode))
11428 oldval = force_reg (cmp_mode, oldval);
11429 break;
11430
11431 default:
11432 gcc_unreachable ();
11433 }
11434
11435 switch (mode)
11436 {
b0770c0f
MW
11437 case QImode: idx = 0; break;
11438 case HImode: idx = 1; break;
11439 case SImode: idx = 2; break;
11440 case DImode: idx = 3; break;
0462169c
SN
11441 default:
11442 gcc_unreachable ();
11443 }
b0770c0f
MW
11444 if (TARGET_LSE)
11445 gen = atomic_cas[idx];
11446 else
11447 gen = split_cas[idx];
0462169c
SN
11448
11449 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
11450
11451 if (mode == QImode || mode == HImode)
11452 emit_move_insn (operands[1], gen_lowpart (mode, rval));
11453
11454 x = gen_rtx_REG (CCmode, CC_REGNUM);
11455 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 11456 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
11457}
11458
641c2f8b
MW
11459/* Test whether the target supports using a atomic load-operate instruction.
11460 CODE is the operation and AFTER is TRUE if the data in memory after the
11461 operation should be returned and FALSE if the data before the operation
11462 should be returned. Returns FALSE if the operation isn't supported by the
11463 architecture. */
11464
11465bool
11466aarch64_atomic_ldop_supported_p (enum rtx_code code)
11467{
11468 if (!TARGET_LSE)
11469 return false;
11470
11471 switch (code)
11472 {
11473 case SET:
11474 case AND:
11475 case IOR:
11476 case XOR:
11477 case MINUS:
11478 case PLUS:
11479 return true;
11480 default:
11481 return false;
11482 }
11483}
11484
f70fb3b6
MW
11485/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
11486 sequence implementing an atomic operation. */
11487
11488static void
11489aarch64_emit_post_barrier (enum memmodel model)
11490{
11491 const enum memmodel base_model = memmodel_base (model);
11492
11493 if (is_mm_sync (model)
11494 && (base_model == MEMMODEL_ACQUIRE
11495 || base_model == MEMMODEL_ACQ_REL
11496 || base_model == MEMMODEL_SEQ_CST))
11497 {
11498 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
11499 }
11500}
11501
b0770c0f
MW
11502/* Emit an atomic compare-and-swap operation. RVAL is the destination register
11503 for the data in memory. EXPECTED is the value expected to be in memory.
11504 DESIRED is the value to store to memory. MEM is the memory location. MODEL
11505 is the memory ordering to use. */
11506
11507void
11508aarch64_gen_atomic_cas (rtx rval, rtx mem,
11509 rtx expected, rtx desired,
11510 rtx model)
11511{
11512 rtx (*gen) (rtx, rtx, rtx, rtx);
11513 machine_mode mode;
11514
11515 mode = GET_MODE (mem);
11516
11517 switch (mode)
11518 {
11519 case QImode: gen = gen_aarch64_atomic_casqi; break;
11520 case HImode: gen = gen_aarch64_atomic_cashi; break;
11521 case SImode: gen = gen_aarch64_atomic_cassi; break;
11522 case DImode: gen = gen_aarch64_atomic_casdi; break;
11523 default:
11524 gcc_unreachable ();
11525 }
11526
11527 /* Move the expected value into the CAS destination register. */
11528 emit_insn (gen_rtx_SET (rval, expected));
11529
11530 /* Emit the CAS. */
11531 emit_insn (gen (rval, mem, desired, model));
11532
11533 /* Compare the expected value with the value loaded by the CAS, to establish
11534 whether the swap was made. */
11535 aarch64_gen_compare_reg (EQ, rval, expected);
11536}
11537
0462169c
SN
11538/* Split a compare and swap pattern. */
11539
11540void
11541aarch64_split_compare_and_swap (rtx operands[])
11542{
11543 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 11544 machine_mode mode;
0462169c 11545 bool is_weak;
5d8a22a5
DM
11546 rtx_code_label *label1, *label2;
11547 rtx x, cond;
ab876106
MW
11548 enum memmodel model;
11549 rtx model_rtx;
0462169c
SN
11550
11551 rval = operands[0];
11552 mem = operands[1];
11553 oldval = operands[2];
11554 newval = operands[3];
11555 is_weak = (operands[4] != const0_rtx);
ab876106 11556 model_rtx = operands[5];
0462169c
SN
11557 scratch = operands[7];
11558 mode = GET_MODE (mem);
ab876106 11559 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 11560
5d8a22a5 11561 label1 = NULL;
0462169c
SN
11562 if (!is_weak)
11563 {
11564 label1 = gen_label_rtx ();
11565 emit_label (label1);
11566 }
11567 label2 = gen_label_rtx ();
11568
ab876106
MW
11569 /* The initial load can be relaxed for a __sync operation since a final
11570 barrier will be emitted to stop code hoisting. */
11571 if (is_mm_sync (model))
11572 aarch64_emit_load_exclusive (mode, rval, mem,
11573 GEN_INT (MEMMODEL_RELAXED));
11574 else
11575 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c
SN
11576
11577 cond = aarch64_gen_compare_reg (NE, rval, oldval);
11578 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
11579 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11580 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
f7df4a84 11581 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c 11582
ab876106 11583 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
11584
11585 if (!is_weak)
11586 {
11587 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
11588 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11589 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 11590 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
11591 }
11592 else
11593 {
11594 cond = gen_rtx_REG (CCmode, CC_REGNUM);
11595 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 11596 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
11597 }
11598
11599 emit_label (label2);
ab876106
MW
11600
11601 /* Emit any final barrier needed for a __sync operation. */
11602 if (is_mm_sync (model))
11603 aarch64_emit_post_barrier (model);
0462169c
SN
11604}
11605
68729b06
MW
11606/* Emit a BIC instruction. */
11607
11608static void
11609aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
11610{
11611 rtx shift_rtx = GEN_INT (shift);
11612 rtx (*gen) (rtx, rtx, rtx, rtx);
11613
11614 switch (mode)
11615 {
11616 case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
11617 case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
11618 default:
11619 gcc_unreachable ();
11620 }
11621
11622 emit_insn (gen (dst, s2, shift_rtx, s1));
11623}
11624
9cd7b720
MW
11625/* Emit an atomic swap. */
11626
11627static void
11628aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
11629 rtx mem, rtx model)
11630{
11631 rtx (*gen) (rtx, rtx, rtx, rtx);
11632
11633 switch (mode)
11634 {
11635 case QImode: gen = gen_aarch64_atomic_swpqi; break;
11636 case HImode: gen = gen_aarch64_atomic_swphi; break;
11637 case SImode: gen = gen_aarch64_atomic_swpsi; break;
11638 case DImode: gen = gen_aarch64_atomic_swpdi; break;
11639 default:
11640 gcc_unreachable ();
11641 }
11642
11643 emit_insn (gen (dst, mem, value, model));
11644}
11645
641c2f8b
MW
11646/* Operations supported by aarch64_emit_atomic_load_op. */
11647
11648enum aarch64_atomic_load_op_code
11649{
11650 AARCH64_LDOP_PLUS, /* A + B */
11651 AARCH64_LDOP_XOR, /* A ^ B */
11652 AARCH64_LDOP_OR, /* A | B */
11653 AARCH64_LDOP_BIC /* A & ~B */
11654};
11655
11656/* Emit an atomic load-operate. */
11657
11658static void
11659aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
11660 machine_mode mode, rtx dst, rtx src,
11661 rtx mem, rtx model)
11662{
11663 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
11664 const aarch64_atomic_load_op_fn plus[] =
11665 {
11666 gen_aarch64_atomic_loadaddqi,
11667 gen_aarch64_atomic_loadaddhi,
11668 gen_aarch64_atomic_loadaddsi,
11669 gen_aarch64_atomic_loadadddi
11670 };
11671 const aarch64_atomic_load_op_fn eor[] =
11672 {
11673 gen_aarch64_atomic_loadeorqi,
11674 gen_aarch64_atomic_loadeorhi,
11675 gen_aarch64_atomic_loadeorsi,
11676 gen_aarch64_atomic_loadeordi
11677 };
11678 const aarch64_atomic_load_op_fn ior[] =
11679 {
11680 gen_aarch64_atomic_loadsetqi,
11681 gen_aarch64_atomic_loadsethi,
11682 gen_aarch64_atomic_loadsetsi,
11683 gen_aarch64_atomic_loadsetdi
11684 };
11685 const aarch64_atomic_load_op_fn bic[] =
11686 {
11687 gen_aarch64_atomic_loadclrqi,
11688 gen_aarch64_atomic_loadclrhi,
11689 gen_aarch64_atomic_loadclrsi,
11690 gen_aarch64_atomic_loadclrdi
11691 };
11692 aarch64_atomic_load_op_fn gen;
11693 int idx = 0;
11694
11695 switch (mode)
11696 {
11697 case QImode: idx = 0; break;
11698 case HImode: idx = 1; break;
11699 case SImode: idx = 2; break;
11700 case DImode: idx = 3; break;
11701 default:
11702 gcc_unreachable ();
11703 }
11704
11705 switch (code)
11706 {
11707 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
11708 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
11709 case AARCH64_LDOP_OR: gen = ior[idx]; break;
11710 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
11711 default:
11712 gcc_unreachable ();
11713 }
11714
11715 emit_insn (gen (dst, mem, src, model));
11716}
11717
11718/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
68729b06
MW
11719 location to store the data read from memory. OUT_RESULT is the location to
11720 store the result of the operation. MEM is the memory location to read and
11721 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
11722 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
11723 be NULL. */
9cd7b720
MW
11724
11725void
68729b06 11726aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
9cd7b720
MW
11727 rtx mem, rtx value, rtx model_rtx)
11728{
11729 machine_mode mode = GET_MODE (mem);
641c2f8b
MW
11730 machine_mode wmode = (mode == DImode ? DImode : SImode);
11731 const bool short_mode = (mode < SImode);
11732 aarch64_atomic_load_op_code ldop_code;
11733 rtx src;
11734 rtx x;
11735
11736 if (out_data)
11737 out_data = gen_lowpart (mode, out_data);
9cd7b720 11738
68729b06
MW
11739 if (out_result)
11740 out_result = gen_lowpart (mode, out_result);
11741
641c2f8b
MW
11742 /* Make sure the value is in a register, putting it into a destination
11743 register if it needs to be manipulated. */
11744 if (!register_operand (value, mode)
11745 || code == AND || code == MINUS)
11746 {
68729b06 11747 src = out_result ? out_result : out_data;
641c2f8b
MW
11748 emit_move_insn (src, gen_lowpart (mode, value));
11749 }
11750 else
11751 src = value;
11752 gcc_assert (register_operand (src, mode));
9cd7b720 11753
641c2f8b
MW
11754 /* Preprocess the data for the operation as necessary. If the operation is
11755 a SET then emit a swap instruction and finish. */
9cd7b720
MW
11756 switch (code)
11757 {
11758 case SET:
641c2f8b 11759 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
9cd7b720
MW
11760 return;
11761
641c2f8b
MW
11762 case MINUS:
11763 /* Negate the value and treat it as a PLUS. */
11764 {
11765 rtx neg_src;
11766
11767 /* Resize the value if necessary. */
11768 if (short_mode)
11769 src = gen_lowpart (wmode, src);
11770
11771 neg_src = gen_rtx_NEG (wmode, src);
11772 emit_insn (gen_rtx_SET (src, neg_src));
11773
11774 if (short_mode)
11775 src = gen_lowpart (mode, src);
11776 }
11777 /* Fall-through. */
11778 case PLUS:
11779 ldop_code = AARCH64_LDOP_PLUS;
11780 break;
11781
11782 case IOR:
11783 ldop_code = AARCH64_LDOP_OR;
11784 break;
11785
11786 case XOR:
11787 ldop_code = AARCH64_LDOP_XOR;
11788 break;
11789
11790 case AND:
11791 {
11792 rtx not_src;
11793
11794 /* Resize the value if necessary. */
11795 if (short_mode)
11796 src = gen_lowpart (wmode, src);
11797
11798 not_src = gen_rtx_NOT (wmode, src);
11799 emit_insn (gen_rtx_SET (src, not_src));
11800
11801 if (short_mode)
11802 src = gen_lowpart (mode, src);
11803 }
11804 ldop_code = AARCH64_LDOP_BIC;
11805 break;
11806
9cd7b720
MW
11807 default:
11808 /* The operation can't be done with atomic instructions. */
11809 gcc_unreachable ();
11810 }
641c2f8b
MW
11811
11812 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
68729b06
MW
11813
11814 /* If necessary, calculate the data in memory after the update by redoing the
11815 operation from values in registers. */
11816 if (!out_result)
11817 return;
11818
11819 if (short_mode)
11820 {
11821 src = gen_lowpart (wmode, src);
11822 out_data = gen_lowpart (wmode, out_data);
11823 out_result = gen_lowpart (wmode, out_result);
11824 }
11825
11826 x = NULL_RTX;
11827
11828 switch (code)
11829 {
11830 case MINUS:
11831 case PLUS:
11832 x = gen_rtx_PLUS (wmode, out_data, src);
11833 break;
11834 case IOR:
11835 x = gen_rtx_IOR (wmode, out_data, src);
11836 break;
11837 case XOR:
11838 x = gen_rtx_XOR (wmode, out_data, src);
11839 break;
11840 case AND:
11841 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
11842 return;
11843 default:
11844 gcc_unreachable ();
11845 }
11846
11847 emit_set_insn (out_result, x);
11848
11849 return;
9cd7b720
MW
11850}
11851
0462169c
SN
11852/* Split an atomic operation. */
11853
11854void
11855aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 11856 rtx value, rtx model_rtx, rtx cond)
0462169c 11857{
ef4bddc2
RS
11858 machine_mode mode = GET_MODE (mem);
11859 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
11860 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
11861 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
11862 rtx_code_label *label;
11863 rtx x;
0462169c 11864
9cd7b720 11865 /* Split the atomic operation into a sequence. */
0462169c
SN
11866 label = gen_label_rtx ();
11867 emit_label (label);
11868
11869 if (new_out)
11870 new_out = gen_lowpart (wmode, new_out);
11871 if (old_out)
11872 old_out = gen_lowpart (wmode, old_out);
11873 else
11874 old_out = new_out;
11875 value = simplify_gen_subreg (wmode, value, mode, 0);
11876
f70fb3b6
MW
11877 /* The initial load can be relaxed for a __sync operation since a final
11878 barrier will be emitted to stop code hoisting. */
11879 if (is_sync)
11880 aarch64_emit_load_exclusive (mode, old_out, mem,
11881 GEN_INT (MEMMODEL_RELAXED));
11882 else
11883 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
11884
11885 switch (code)
11886 {
11887 case SET:
11888 new_out = value;
11889 break;
11890
11891 case NOT:
11892 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 11893 emit_insn (gen_rtx_SET (new_out, x));
0462169c 11894 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 11895 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
11896 break;
11897
11898 case MINUS:
11899 if (CONST_INT_P (value))
11900 {
11901 value = GEN_INT (-INTVAL (value));
11902 code = PLUS;
11903 }
11904 /* Fall through. */
11905
11906 default:
11907 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 11908 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
11909 break;
11910 }
11911
11912 aarch64_emit_store_exclusive (mode, cond, mem,
11913 gen_lowpart (mode, new_out), model_rtx);
11914
11915 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
11916 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11917 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 11918 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
11919
11920 /* Emit any final barrier needed for a __sync operation. */
11921 if (is_sync)
11922 aarch64_emit_post_barrier (model);
0462169c
SN
11923}
11924
c2ec330c
AL
11925static void
11926aarch64_init_libfuncs (void)
11927{
11928 /* Half-precision float operations. The compiler handles all operations
11929 with NULL libfuncs by converting to SFmode. */
11930
11931 /* Conversions. */
11932 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
11933 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
11934
11935 /* Arithmetic. */
11936 set_optab_libfunc (add_optab, HFmode, NULL);
11937 set_optab_libfunc (sdiv_optab, HFmode, NULL);
11938 set_optab_libfunc (smul_optab, HFmode, NULL);
11939 set_optab_libfunc (neg_optab, HFmode, NULL);
11940 set_optab_libfunc (sub_optab, HFmode, NULL);
11941
11942 /* Comparisons. */
11943 set_optab_libfunc (eq_optab, HFmode, NULL);
11944 set_optab_libfunc (ne_optab, HFmode, NULL);
11945 set_optab_libfunc (lt_optab, HFmode, NULL);
11946 set_optab_libfunc (le_optab, HFmode, NULL);
11947 set_optab_libfunc (ge_optab, HFmode, NULL);
11948 set_optab_libfunc (gt_optab, HFmode, NULL);
11949 set_optab_libfunc (unord_optab, HFmode, NULL);
11950}
11951
43e9d192 11952/* Target hook for c_mode_for_suffix. */
ef4bddc2 11953static machine_mode
43e9d192
IB
11954aarch64_c_mode_for_suffix (char suffix)
11955{
11956 if (suffix == 'q')
11957 return TFmode;
11958
11959 return VOIDmode;
11960}
11961
3520f7cc
JG
11962/* We can only represent floating point constants which will fit in
11963 "quarter-precision" values. These values are characterised by
11964 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
11965 by:
11966
11967 (-1)^s * (n/16) * 2^r
11968
11969 Where:
11970 's' is the sign bit.
11971 'n' is an integer in the range 16 <= n <= 31.
11972 'r' is an integer in the range -3 <= r <= 4. */
11973
11974/* Return true iff X can be represented by a quarter-precision
11975 floating point immediate operand X. Note, we cannot represent 0.0. */
11976bool
11977aarch64_float_const_representable_p (rtx x)
11978{
11979 /* This represents our current view of how many bits
11980 make up the mantissa. */
11981 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 11982 int exponent;
3520f7cc 11983 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 11984 REAL_VALUE_TYPE r, m;
807e902e 11985 bool fail;
3520f7cc
JG
11986
11987 if (!CONST_DOUBLE_P (x))
11988 return false;
11989
c2ec330c
AL
11990 /* We don't support HFmode constants yet. */
11991 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
11992 return false;
11993
34a72c33 11994 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
11995
11996 /* We cannot represent infinities, NaNs or +/-zero. We won't
11997 know if we have +zero until we analyse the mantissa, but we
11998 can reject the other invalid values. */
11999 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
12000 || REAL_VALUE_MINUS_ZERO (r))
12001 return false;
12002
ba96cdfb 12003 /* Extract exponent. */
3520f7cc
JG
12004 r = real_value_abs (&r);
12005 exponent = REAL_EXP (&r);
12006
12007 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12008 highest (sign) bit, with a fixed binary point at bit point_pos.
12009 m1 holds the low part of the mantissa, m2 the high part.
12010 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12011 bits for the mantissa, this can fail (low bits will be lost). */
12012 real_ldexp (&m, &r, point_pos - exponent);
807e902e 12013 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
12014
12015 /* If the low part of the mantissa has bits set we cannot represent
12016 the value. */
807e902e 12017 if (w.elt (0) != 0)
3520f7cc
JG
12018 return false;
12019 /* We have rejected the lower HOST_WIDE_INT, so update our
12020 understanding of how many bits lie in the mantissa and
12021 look only at the high HOST_WIDE_INT. */
807e902e 12022 mantissa = w.elt (1);
3520f7cc
JG
12023 point_pos -= HOST_BITS_PER_WIDE_INT;
12024
12025 /* We can only represent values with a mantissa of the form 1.xxxx. */
12026 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12027 if ((mantissa & mask) != 0)
12028 return false;
12029
12030 /* Having filtered unrepresentable values, we may now remove all
12031 but the highest 5 bits. */
12032 mantissa >>= point_pos - 5;
12033
12034 /* We cannot represent the value 0.0, so reject it. This is handled
12035 elsewhere. */
12036 if (mantissa == 0)
12037 return false;
12038
12039 /* Then, as bit 4 is always set, we can mask it off, leaving
12040 the mantissa in the range [0, 15]. */
12041 mantissa &= ~(1 << 4);
12042 gcc_assert (mantissa <= 15);
12043
12044 /* GCC internally does not use IEEE754-like encoding (where normalized
12045 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
12046 Our mantissa values are shifted 4 places to the left relative to
12047 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
12048 by 5 places to correct for GCC's representation. */
12049 exponent = 5 - exponent;
12050
12051 return (exponent >= 0 && exponent <= 7);
12052}
12053
12054char*
81c2dfb9 12055aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 12056 machine_mode mode,
3520f7cc
JG
12057 unsigned width)
12058{
3ea63f60 12059 bool is_valid;
3520f7cc 12060 static char templ[40];
3520f7cc 12061 const char *mnemonic;
e4f0f84d 12062 const char *shift_op;
3520f7cc 12063 unsigned int lane_count = 0;
81c2dfb9 12064 char element_char;
3520f7cc 12065
e4f0f84d 12066 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
12067
12068 /* This will return true to show const_vector is legal for use as either
12069 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
12070 also update INFO to show how the immediate should be generated. */
81c2dfb9 12071 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
12072 gcc_assert (is_valid);
12073
81c2dfb9 12074 element_char = sizetochar (info.element_width);
48063b9d
IB
12075 lane_count = width / info.element_width;
12076
3520f7cc 12077 mode = GET_MODE_INNER (mode);
0d8e1702 12078 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
3520f7cc 12079 {
48063b9d 12080 gcc_assert (info.shift == 0 && ! info.mvn);
0d8e1702
KT
12081 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
12082 move immediate path. */
48063b9d
IB
12083 if (aarch64_float_const_zero_rtx_p (info.value))
12084 info.value = GEN_INT (0);
12085 else
12086 {
83faf7d0 12087 const unsigned int buf_size = 20;
48063b9d 12088 char float_buf[buf_size] = {'\0'};
34a72c33
RS
12089 real_to_decimal_for_mode (float_buf,
12090 CONST_DOUBLE_REAL_VALUE (info.value),
12091 buf_size, buf_size, 1, mode);
48063b9d
IB
12092
12093 if (lane_count == 1)
12094 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
12095 else
12096 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 12097 lane_count, element_char, float_buf);
48063b9d
IB
12098 return templ;
12099 }
3520f7cc 12100 }
3520f7cc 12101
48063b9d 12102 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 12103 shift_op = info.msl ? "msl" : "lsl";
3520f7cc 12104
0d8e1702 12105 gcc_assert (CONST_INT_P (info.value));
3520f7cc 12106 if (lane_count == 1)
48063b9d
IB
12107 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
12108 mnemonic, UINTVAL (info.value));
12109 else if (info.shift)
12110 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
12111 ", %s %d", mnemonic, lane_count, element_char,
12112 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 12113 else
48063b9d 12114 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 12115 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
12116 return templ;
12117}
12118
b7342d25
IB
12119char*
12120aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 12121 machine_mode mode)
b7342d25 12122{
ef4bddc2 12123 machine_mode vmode;
b7342d25
IB
12124
12125 gcc_assert (!VECTOR_MODE_P (mode));
12126 vmode = aarch64_simd_container_mode (mode, 64);
12127 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
12128 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
12129}
12130
88b08073
JG
12131/* Split operands into moves from op[1] + op[2] into op[0]. */
12132
12133void
12134aarch64_split_combinev16qi (rtx operands[3])
12135{
12136 unsigned int dest = REGNO (operands[0]);
12137 unsigned int src1 = REGNO (operands[1]);
12138 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 12139 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
12140 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
12141 rtx destlo, desthi;
12142
12143 gcc_assert (halfmode == V16QImode);
12144
12145 if (src1 == dest && src2 == dest + halfregs)
12146 {
12147 /* No-op move. Can't split to nothing; emit something. */
12148 emit_note (NOTE_INSN_DELETED);
12149 return;
12150 }
12151
12152 /* Preserve register attributes for variable tracking. */
12153 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
12154 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
12155 GET_MODE_SIZE (halfmode));
12156
12157 /* Special case of reversed high/low parts. */
12158 if (reg_overlap_mentioned_p (operands[2], destlo)
12159 && reg_overlap_mentioned_p (operands[1], desthi))
12160 {
12161 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12162 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
12163 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12164 }
12165 else if (!reg_overlap_mentioned_p (operands[2], destlo))
12166 {
12167 /* Try to avoid unnecessary moves if part of the result
12168 is in the right place already. */
12169 if (src1 != dest)
12170 emit_move_insn (destlo, operands[1]);
12171 if (src2 != dest + halfregs)
12172 emit_move_insn (desthi, operands[2]);
12173 }
12174 else
12175 {
12176 if (src2 != dest + halfregs)
12177 emit_move_insn (desthi, operands[2]);
12178 if (src1 != dest)
12179 emit_move_insn (destlo, operands[1]);
12180 }
12181}
12182
12183/* vec_perm support. */
12184
12185#define MAX_VECT_LEN 16
12186
12187struct expand_vec_perm_d
12188{
12189 rtx target, op0, op1;
12190 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 12191 machine_mode vmode;
88b08073
JG
12192 unsigned char nelt;
12193 bool one_vector_p;
12194 bool testing_p;
12195};
12196
12197/* Generate a variable permutation. */
12198
12199static void
12200aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
12201{
ef4bddc2 12202 machine_mode vmode = GET_MODE (target);
88b08073
JG
12203 bool one_vector_p = rtx_equal_p (op0, op1);
12204
12205 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
12206 gcc_checking_assert (GET_MODE (op0) == vmode);
12207 gcc_checking_assert (GET_MODE (op1) == vmode);
12208 gcc_checking_assert (GET_MODE (sel) == vmode);
12209 gcc_checking_assert (TARGET_SIMD);
12210
12211 if (one_vector_p)
12212 {
12213 if (vmode == V8QImode)
12214 {
12215 /* Expand the argument to a V16QI mode by duplicating it. */
12216 rtx pair = gen_reg_rtx (V16QImode);
12217 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
12218 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
12219 }
12220 else
12221 {
12222 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
12223 }
12224 }
12225 else
12226 {
12227 rtx pair;
12228
12229 if (vmode == V8QImode)
12230 {
12231 pair = gen_reg_rtx (V16QImode);
12232 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
12233 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
12234 }
12235 else
12236 {
12237 pair = gen_reg_rtx (OImode);
12238 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
12239 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
12240 }
12241 }
12242}
12243
12244void
12245aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
12246{
ef4bddc2 12247 machine_mode vmode = GET_MODE (target);
c9d1a16a 12248 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 12249 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 12250 rtx mask;
88b08073
JG
12251
12252 /* The TBL instruction does not use a modulo index, so we must take care
12253 of that ourselves. */
f7c4e5b8
AL
12254 mask = aarch64_simd_gen_const_vector_dup (vmode,
12255 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
12256 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
12257
f7c4e5b8
AL
12258 /* For big-endian, we also need to reverse the index within the vector
12259 (but not which vector). */
12260 if (BYTES_BIG_ENDIAN)
12261 {
12262 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
12263 if (!one_vector_p)
12264 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
12265 sel = expand_simple_binop (vmode, XOR, sel, mask,
12266 NULL, 0, OPTAB_LIB_WIDEN);
12267 }
88b08073
JG
12268 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
12269}
12270
cc4d934f
JG
12271/* Recognize patterns suitable for the TRN instructions. */
12272static bool
12273aarch64_evpc_trn (struct expand_vec_perm_d *d)
12274{
12275 unsigned int i, odd, mask, nelt = d->nelt;
12276 rtx out, in0, in1, x;
12277 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12278 machine_mode vmode = d->vmode;
cc4d934f
JG
12279
12280 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12281 return false;
12282
12283 /* Note that these are little-endian tests.
12284 We correct for big-endian later. */
12285 if (d->perm[0] == 0)
12286 odd = 0;
12287 else if (d->perm[0] == 1)
12288 odd = 1;
12289 else
12290 return false;
12291 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12292
12293 for (i = 0; i < nelt; i += 2)
12294 {
12295 if (d->perm[i] != i + odd)
12296 return false;
12297 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
12298 return false;
12299 }
12300
12301 /* Success! */
12302 if (d->testing_p)
12303 return true;
12304
12305 in0 = d->op0;
12306 in1 = d->op1;
12307 if (BYTES_BIG_ENDIAN)
12308 {
12309 x = in0, in0 = in1, in1 = x;
12310 odd = !odd;
12311 }
12312 out = d->target;
12313
12314 if (odd)
12315 {
12316 switch (vmode)
12317 {
12318 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
12319 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
12320 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
12321 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
12322 case V4SImode: gen = gen_aarch64_trn2v4si; break;
12323 case V2SImode: gen = gen_aarch64_trn2v2si; break;
12324 case V2DImode: gen = gen_aarch64_trn2v2di; break;
12325 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
12326 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
12327 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
12328 default:
12329 return false;
12330 }
12331 }
12332 else
12333 {
12334 switch (vmode)
12335 {
12336 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
12337 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
12338 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
12339 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
12340 case V4SImode: gen = gen_aarch64_trn1v4si; break;
12341 case V2SImode: gen = gen_aarch64_trn1v2si; break;
12342 case V2DImode: gen = gen_aarch64_trn1v2di; break;
12343 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
12344 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
12345 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
12346 default:
12347 return false;
12348 }
12349 }
12350
12351 emit_insn (gen (out, in0, in1));
12352 return true;
12353}
12354
12355/* Recognize patterns suitable for the UZP instructions. */
12356static bool
12357aarch64_evpc_uzp (struct expand_vec_perm_d *d)
12358{
12359 unsigned int i, odd, mask, nelt = d->nelt;
12360 rtx out, in0, in1, x;
12361 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12362 machine_mode vmode = d->vmode;
cc4d934f
JG
12363
12364 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12365 return false;
12366
12367 /* Note that these are little-endian tests.
12368 We correct for big-endian later. */
12369 if (d->perm[0] == 0)
12370 odd = 0;
12371 else if (d->perm[0] == 1)
12372 odd = 1;
12373 else
12374 return false;
12375 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12376
12377 for (i = 0; i < nelt; i++)
12378 {
12379 unsigned elt = (i * 2 + odd) & mask;
12380 if (d->perm[i] != elt)
12381 return false;
12382 }
12383
12384 /* Success! */
12385 if (d->testing_p)
12386 return true;
12387
12388 in0 = d->op0;
12389 in1 = d->op1;
12390 if (BYTES_BIG_ENDIAN)
12391 {
12392 x = in0, in0 = in1, in1 = x;
12393 odd = !odd;
12394 }
12395 out = d->target;
12396
12397 if (odd)
12398 {
12399 switch (vmode)
12400 {
12401 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
12402 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
12403 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
12404 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
12405 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
12406 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
12407 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
12408 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
12409 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
12410 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
12411 default:
12412 return false;
12413 }
12414 }
12415 else
12416 {
12417 switch (vmode)
12418 {
12419 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
12420 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
12421 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
12422 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
12423 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
12424 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
12425 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
12426 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
12427 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
12428 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
12429 default:
12430 return false;
12431 }
12432 }
12433
12434 emit_insn (gen (out, in0, in1));
12435 return true;
12436}
12437
12438/* Recognize patterns suitable for the ZIP instructions. */
12439static bool
12440aarch64_evpc_zip (struct expand_vec_perm_d *d)
12441{
12442 unsigned int i, high, mask, nelt = d->nelt;
12443 rtx out, in0, in1, x;
12444 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12445 machine_mode vmode = d->vmode;
cc4d934f
JG
12446
12447 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12448 return false;
12449
12450 /* Note that these are little-endian tests.
12451 We correct for big-endian later. */
12452 high = nelt / 2;
12453 if (d->perm[0] == high)
12454 /* Do Nothing. */
12455 ;
12456 else if (d->perm[0] == 0)
12457 high = 0;
12458 else
12459 return false;
12460 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12461
12462 for (i = 0; i < nelt / 2; i++)
12463 {
12464 unsigned elt = (i + high) & mask;
12465 if (d->perm[i * 2] != elt)
12466 return false;
12467 elt = (elt + nelt) & mask;
12468 if (d->perm[i * 2 + 1] != elt)
12469 return false;
12470 }
12471
12472 /* Success! */
12473 if (d->testing_p)
12474 return true;
12475
12476 in0 = d->op0;
12477 in1 = d->op1;
12478 if (BYTES_BIG_ENDIAN)
12479 {
12480 x = in0, in0 = in1, in1 = x;
12481 high = !high;
12482 }
12483 out = d->target;
12484
12485 if (high)
12486 {
12487 switch (vmode)
12488 {
12489 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
12490 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
12491 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
12492 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
12493 case V4SImode: gen = gen_aarch64_zip2v4si; break;
12494 case V2SImode: gen = gen_aarch64_zip2v2si; break;
12495 case V2DImode: gen = gen_aarch64_zip2v2di; break;
12496 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
12497 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
12498 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
12499 default:
12500 return false;
12501 }
12502 }
12503 else
12504 {
12505 switch (vmode)
12506 {
12507 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
12508 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
12509 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
12510 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
12511 case V4SImode: gen = gen_aarch64_zip1v4si; break;
12512 case V2SImode: gen = gen_aarch64_zip1v2si; break;
12513 case V2DImode: gen = gen_aarch64_zip1v2di; break;
12514 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
12515 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
12516 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
12517 default:
12518 return false;
12519 }
12520 }
12521
12522 emit_insn (gen (out, in0, in1));
12523 return true;
12524}
12525
ae0533da
AL
12526/* Recognize patterns for the EXT insn. */
12527
12528static bool
12529aarch64_evpc_ext (struct expand_vec_perm_d *d)
12530{
12531 unsigned int i, nelt = d->nelt;
12532 rtx (*gen) (rtx, rtx, rtx, rtx);
12533 rtx offset;
12534
12535 unsigned int location = d->perm[0]; /* Always < nelt. */
12536
12537 /* Check if the extracted indices are increasing by one. */
12538 for (i = 1; i < nelt; i++)
12539 {
12540 unsigned int required = location + i;
12541 if (d->one_vector_p)
12542 {
12543 /* We'll pass the same vector in twice, so allow indices to wrap. */
12544 required &= (nelt - 1);
12545 }
12546 if (d->perm[i] != required)
12547 return false;
12548 }
12549
ae0533da
AL
12550 switch (d->vmode)
12551 {
12552 case V16QImode: gen = gen_aarch64_extv16qi; break;
12553 case V8QImode: gen = gen_aarch64_extv8qi; break;
12554 case V4HImode: gen = gen_aarch64_extv4hi; break;
12555 case V8HImode: gen = gen_aarch64_extv8hi; break;
12556 case V2SImode: gen = gen_aarch64_extv2si; break;
12557 case V4SImode: gen = gen_aarch64_extv4si; break;
12558 case V2SFmode: gen = gen_aarch64_extv2sf; break;
12559 case V4SFmode: gen = gen_aarch64_extv4sf; break;
12560 case V2DImode: gen = gen_aarch64_extv2di; break;
12561 case V2DFmode: gen = gen_aarch64_extv2df; break;
12562 default:
12563 return false;
12564 }
12565
12566 /* Success! */
12567 if (d->testing_p)
12568 return true;
12569
b31e65bb
AL
12570 /* The case where (location == 0) is a no-op for both big- and little-endian,
12571 and is removed by the mid-end at optimization levels -O1 and higher. */
12572
12573 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
12574 {
12575 /* After setup, we want the high elements of the first vector (stored
12576 at the LSB end of the register), and the low elements of the second
12577 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 12578 std::swap (d->op0, d->op1);
ae0533da
AL
12579 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
12580 location = nelt - location;
12581 }
12582
12583 offset = GEN_INT (location);
12584 emit_insn (gen (d->target, d->op0, d->op1, offset));
12585 return true;
12586}
12587
923fcec3
AL
12588/* Recognize patterns for the REV insns. */
12589
12590static bool
12591aarch64_evpc_rev (struct expand_vec_perm_d *d)
12592{
12593 unsigned int i, j, diff, nelt = d->nelt;
12594 rtx (*gen) (rtx, rtx);
12595
12596 if (!d->one_vector_p)
12597 return false;
12598
12599 diff = d->perm[0];
12600 switch (diff)
12601 {
12602 case 7:
12603 switch (d->vmode)
12604 {
12605 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
12606 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
12607 default:
12608 return false;
12609 }
12610 break;
12611 case 3:
12612 switch (d->vmode)
12613 {
12614 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
12615 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
12616 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
12617 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
12618 default:
12619 return false;
12620 }
12621 break;
12622 case 1:
12623 switch (d->vmode)
12624 {
12625 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
12626 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
12627 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
12628 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
12629 case V4SImode: gen = gen_aarch64_rev64v4si; break;
12630 case V2SImode: gen = gen_aarch64_rev64v2si; break;
12631 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
12632 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
12633 default:
12634 return false;
12635 }
12636 break;
12637 default:
12638 return false;
12639 }
12640
12641 for (i = 0; i < nelt ; i += diff + 1)
12642 for (j = 0; j <= diff; j += 1)
12643 {
12644 /* This is guaranteed to be true as the value of diff
12645 is 7, 3, 1 and we should have enough elements in the
12646 queue to generate this. Getting a vector mask with a
12647 value of diff other than these values implies that
12648 something is wrong by the time we get here. */
12649 gcc_assert (i + j < nelt);
12650 if (d->perm[i + j] != i + diff - j)
12651 return false;
12652 }
12653
12654 /* Success! */
12655 if (d->testing_p)
12656 return true;
12657
12658 emit_insn (gen (d->target, d->op0));
12659 return true;
12660}
12661
91bd4114
JG
12662static bool
12663aarch64_evpc_dup (struct expand_vec_perm_d *d)
12664{
12665 rtx (*gen) (rtx, rtx, rtx);
12666 rtx out = d->target;
12667 rtx in0;
ef4bddc2 12668 machine_mode vmode = d->vmode;
91bd4114
JG
12669 unsigned int i, elt, nelt = d->nelt;
12670 rtx lane;
12671
91bd4114
JG
12672 elt = d->perm[0];
12673 for (i = 1; i < nelt; i++)
12674 {
12675 if (elt != d->perm[i])
12676 return false;
12677 }
12678
12679 /* The generic preparation in aarch64_expand_vec_perm_const_1
12680 swaps the operand order and the permute indices if it finds
12681 d->perm[0] to be in the second operand. Thus, we can always
12682 use d->op0 and need not do any extra arithmetic to get the
12683 correct lane number. */
12684 in0 = d->op0;
f901401e 12685 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
12686
12687 switch (vmode)
12688 {
12689 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
12690 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
12691 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
12692 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
12693 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
12694 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
12695 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
862abc04
AL
12696 case V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
12697 case V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
91bd4114
JG
12698 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
12699 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
12700 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
12701 default:
12702 return false;
12703 }
12704
12705 emit_insn (gen (out, in0, lane));
12706 return true;
12707}
12708
88b08073
JG
12709static bool
12710aarch64_evpc_tbl (struct expand_vec_perm_d *d)
12711{
12712 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 12713 machine_mode vmode = d->vmode;
88b08073
JG
12714 unsigned int i, nelt = d->nelt;
12715
88b08073
JG
12716 if (d->testing_p)
12717 return true;
12718
12719 /* Generic code will try constant permutation twice. Once with the
12720 original mode and again with the elements lowered to QImode.
12721 So wait and don't do the selector expansion ourselves. */
12722 if (vmode != V8QImode && vmode != V16QImode)
12723 return false;
12724
12725 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
12726 {
12727 int nunits = GET_MODE_NUNITS (vmode);
12728
12729 /* If big-endian and two vectors we end up with a weird mixed-endian
12730 mode on NEON. Reverse the index within each word but not the word
12731 itself. */
12732 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
12733 : d->perm[i]);
12734 }
88b08073
JG
12735 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
12736 sel = force_reg (vmode, sel);
12737
12738 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
12739 return true;
12740}
12741
12742static bool
12743aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
12744{
12745 /* The pattern matching functions above are written to look for a small
12746 number to begin the sequence (0, 1, N/2). If we begin with an index
12747 from the second operand, we can swap the operands. */
12748 if (d->perm[0] >= d->nelt)
12749 {
12750 unsigned i, nelt = d->nelt;
88b08073 12751
0696116a 12752 gcc_assert (nelt == (nelt & -nelt));
88b08073 12753 for (i = 0; i < nelt; ++i)
0696116a 12754 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 12755
cb5c6c29 12756 std::swap (d->op0, d->op1);
88b08073
JG
12757 }
12758
12759 if (TARGET_SIMD)
cc4d934f 12760 {
923fcec3
AL
12761 if (aarch64_evpc_rev (d))
12762 return true;
12763 else if (aarch64_evpc_ext (d))
ae0533da 12764 return true;
f901401e
AL
12765 else if (aarch64_evpc_dup (d))
12766 return true;
ae0533da 12767 else if (aarch64_evpc_zip (d))
cc4d934f
JG
12768 return true;
12769 else if (aarch64_evpc_uzp (d))
12770 return true;
12771 else if (aarch64_evpc_trn (d))
12772 return true;
12773 return aarch64_evpc_tbl (d);
12774 }
88b08073
JG
12775 return false;
12776}
12777
12778/* Expand a vec_perm_const pattern. */
12779
12780bool
12781aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
12782{
12783 struct expand_vec_perm_d d;
12784 int i, nelt, which;
12785
12786 d.target = target;
12787 d.op0 = op0;
12788 d.op1 = op1;
12789
12790 d.vmode = GET_MODE (target);
12791 gcc_assert (VECTOR_MODE_P (d.vmode));
12792 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
12793 d.testing_p = false;
12794
12795 for (i = which = 0; i < nelt; ++i)
12796 {
12797 rtx e = XVECEXP (sel, 0, i);
12798 int ei = INTVAL (e) & (2 * nelt - 1);
12799 which |= (ei < nelt ? 1 : 2);
12800 d.perm[i] = ei;
12801 }
12802
12803 switch (which)
12804 {
12805 default:
12806 gcc_unreachable ();
12807
12808 case 3:
12809 d.one_vector_p = false;
12810 if (!rtx_equal_p (op0, op1))
12811 break;
12812
12813 /* The elements of PERM do not suggest that only the first operand
12814 is used, but both operands are identical. Allow easier matching
12815 of the permutation by folding the permutation into the single
12816 input vector. */
12817 /* Fall Through. */
12818 case 2:
12819 for (i = 0; i < nelt; ++i)
12820 d.perm[i] &= nelt - 1;
12821 d.op0 = op1;
12822 d.one_vector_p = true;
12823 break;
12824
12825 case 1:
12826 d.op1 = op0;
12827 d.one_vector_p = true;
12828 break;
12829 }
12830
12831 return aarch64_expand_vec_perm_const_1 (&d);
12832}
12833
12834static bool
ef4bddc2 12835aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
12836 const unsigned char *sel)
12837{
12838 struct expand_vec_perm_d d;
12839 unsigned int i, nelt, which;
12840 bool ret;
12841
12842 d.vmode = vmode;
12843 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
12844 d.testing_p = true;
12845 memcpy (d.perm, sel, nelt);
12846
12847 /* Calculate whether all elements are in one vector. */
12848 for (i = which = 0; i < nelt; ++i)
12849 {
12850 unsigned char e = d.perm[i];
12851 gcc_assert (e < 2 * nelt);
12852 which |= (e < nelt ? 1 : 2);
12853 }
12854
12855 /* If all elements are from the second vector, reindex as if from the
12856 first vector. */
12857 if (which == 2)
12858 for (i = 0; i < nelt; ++i)
12859 d.perm[i] -= nelt;
12860
12861 /* Check whether the mask can be applied to a single vector. */
12862 d.one_vector_p = (which != 3);
12863
12864 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
12865 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
12866 if (!d.one_vector_p)
12867 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
12868
12869 start_sequence ();
12870 ret = aarch64_expand_vec_perm_const_1 (&d);
12871 end_sequence ();
12872
12873 return ret;
12874}
12875
668046d1
DS
12876rtx
12877aarch64_reverse_mask (enum machine_mode mode)
12878{
12879 /* We have to reverse each vector because we dont have
12880 a permuted load that can reverse-load according to ABI rules. */
12881 rtx mask;
12882 rtvec v = rtvec_alloc (16);
12883 int i, j;
12884 int nunits = GET_MODE_NUNITS (mode);
12885 int usize = GET_MODE_UNIT_SIZE (mode);
12886
12887 gcc_assert (BYTES_BIG_ENDIAN);
12888 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
12889
12890 for (i = 0; i < nunits; i++)
12891 for (j = 0; j < usize; j++)
12892 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
12893 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
12894 return force_reg (V16QImode, mask);
12895}
12896
61f17a5c
WD
12897/* Implement MODES_TIEABLE_P. In principle we should always return true.
12898 However due to issues with register allocation it is preferable to avoid
12899 tieing integer scalar and FP scalar modes. Executing integer operations
12900 in general registers is better than treating them as scalar vector
12901 operations. This reduces latency and avoids redundant int<->FP moves.
12902 So tie modes if they are either the same class, or vector modes with
12903 other vector modes, vector structs or any scalar mode.
12904*/
97e1ad78
JG
12905
12906bool
ef4bddc2 12907aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
12908{
12909 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
12910 return true;
12911
12912 /* We specifically want to allow elements of "structure" modes to
12913 be tieable to the structure. This more general condition allows
12914 other rarer situations too. */
61f17a5c
WD
12915 if (aarch64_vector_mode_p (mode1) && aarch64_vector_mode_p (mode2))
12916 return true;
12917
12918 /* Also allow any scalar modes with vectors. */
12919 if (aarch64_vector_mode_supported_p (mode1)
12920 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
12921 return true;
12922
12923 return false;
12924}
12925
e2c75eea
JG
12926/* Return a new RTX holding the result of moving POINTER forward by
12927 AMOUNT bytes. */
12928
12929static rtx
12930aarch64_move_pointer (rtx pointer, int amount)
12931{
12932 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
12933
12934 return adjust_automodify_address (pointer, GET_MODE (pointer),
12935 next, amount);
12936}
12937
12938/* Return a new RTX holding the result of moving POINTER forward by the
12939 size of the mode it points to. */
12940
12941static rtx
12942aarch64_progress_pointer (rtx pointer)
12943{
12944 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
12945
12946 return aarch64_move_pointer (pointer, amount);
12947}
12948
12949/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
12950 MODE bytes. */
12951
12952static void
12953aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 12954 machine_mode mode)
e2c75eea
JG
12955{
12956 rtx reg = gen_reg_rtx (mode);
12957
12958 /* "Cast" the pointers to the correct mode. */
12959 *src = adjust_address (*src, mode, 0);
12960 *dst = adjust_address (*dst, mode, 0);
12961 /* Emit the memcpy. */
12962 emit_move_insn (reg, *src);
12963 emit_move_insn (*dst, reg);
12964 /* Move the pointers forward. */
12965 *src = aarch64_progress_pointer (*src);
12966 *dst = aarch64_progress_pointer (*dst);
12967}
12968
12969/* Expand movmem, as if from a __builtin_memcpy. Return true if
12970 we succeed, otherwise return false. */
12971
12972bool
12973aarch64_expand_movmem (rtx *operands)
12974{
12975 unsigned int n;
12976 rtx dst = operands[0];
12977 rtx src = operands[1];
12978 rtx base;
12979 bool speed_p = !optimize_function_for_size_p (cfun);
12980
12981 /* When optimizing for size, give a better estimate of the length of a
12982 memcpy call, but use the default otherwise. */
12983 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
12984
12985 /* We can't do anything smart if the amount to copy is not constant. */
12986 if (!CONST_INT_P (operands[2]))
12987 return false;
12988
12989 n = UINTVAL (operands[2]);
12990
12991 /* Try to keep the number of instructions low. For cases below 16 bytes we
12992 need to make at most two moves. For cases above 16 bytes it will be one
12993 move for each 16 byte chunk, then at most two additional moves. */
12994 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
12995 return false;
12996
12997 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12998 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
12999
13000 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
13001 src = adjust_automodify_address (src, VOIDmode, base, 0);
13002
13003 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13004 1-byte chunk. */
13005 if (n < 4)
13006 {
13007 if (n >= 2)
13008 {
13009 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13010 n -= 2;
13011 }
13012
13013 if (n == 1)
13014 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13015
13016 return true;
13017 }
13018
13019 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
13020 4-byte chunk, partially overlapping with the previously copied chunk. */
13021 if (n < 8)
13022 {
13023 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13024 n -= 4;
13025 if (n > 0)
13026 {
13027 int move = n - 4;
13028
13029 src = aarch64_move_pointer (src, move);
13030 dst = aarch64_move_pointer (dst, move);
13031 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13032 }
13033 return true;
13034 }
13035
13036 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
13037 them, then (if applicable) an 8-byte chunk. */
13038 while (n >= 8)
13039 {
13040 if (n / 16)
13041 {
13042 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
13043 n -= 16;
13044 }
13045 else
13046 {
13047 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13048 n -= 8;
13049 }
13050 }
13051
13052 /* Finish the final bytes of the copy. We can always do this in one
13053 instruction. We either copy the exact amount we need, or partially
13054 overlap with the previous chunk we copied and copy 8-bytes. */
13055 if (n == 0)
13056 return true;
13057 else if (n == 1)
13058 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13059 else if (n == 2)
13060 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13061 else if (n == 4)
13062 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13063 else
13064 {
13065 if (n == 3)
13066 {
13067 src = aarch64_move_pointer (src, -1);
13068 dst = aarch64_move_pointer (dst, -1);
13069 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13070 }
13071 else
13072 {
13073 int move = n - 8;
13074
13075 src = aarch64_move_pointer (src, move);
13076 dst = aarch64_move_pointer (dst, move);
13077 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13078 }
13079 }
13080
13081 return true;
13082}
13083
a3125fc2
CL
13084/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
13085
13086static unsigned HOST_WIDE_INT
13087aarch64_asan_shadow_offset (void)
13088{
13089 return (HOST_WIDE_INT_1 << 36);
13090}
13091
d3006da6 13092static bool
445d7826 13093aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
13094 unsigned int align,
13095 enum by_pieces_operation op,
13096 bool speed_p)
13097{
13098 /* STORE_BY_PIECES can be used when copying a constant string, but
13099 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
13100 For now we always fail this and let the move_by_pieces code copy
13101 the string from read-only memory. */
13102 if (op == STORE_BY_PIECES)
13103 return false;
13104
13105 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
13106}
13107
5f3bc026
ZC
13108static rtx
13109aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
13110 int code, tree treeop0, tree treeop1)
13111{
c8012fbc
WD
13112 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
13113 rtx op0, op1;
5f3bc026 13114 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 13115 insn_code icode;
5f3bc026
ZC
13116 struct expand_operand ops[4];
13117
5f3bc026
ZC
13118 start_sequence ();
13119 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
13120
13121 op_mode = GET_MODE (op0);
13122 if (op_mode == VOIDmode)
13123 op_mode = GET_MODE (op1);
13124
13125 switch (op_mode)
13126 {
13127 case QImode:
13128 case HImode:
13129 case SImode:
13130 cmp_mode = SImode;
13131 icode = CODE_FOR_cmpsi;
13132 break;
13133
13134 case DImode:
13135 cmp_mode = DImode;
13136 icode = CODE_FOR_cmpdi;
13137 break;
13138
786e3c06
WD
13139 case SFmode:
13140 cmp_mode = SFmode;
13141 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
13142 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
13143 break;
13144
13145 case DFmode:
13146 cmp_mode = DFmode;
13147 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
13148 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
13149 break;
13150
5f3bc026
ZC
13151 default:
13152 end_sequence ();
13153 return NULL_RTX;
13154 }
13155
c8012fbc
WD
13156 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
13157 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
13158 if (!op0 || !op1)
13159 {
13160 end_sequence ();
13161 return NULL_RTX;
13162 }
13163 *prep_seq = get_insns ();
13164 end_sequence ();
13165
c8012fbc
WD
13166 create_fixed_operand (&ops[0], op0);
13167 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
13168
13169 start_sequence ();
c8012fbc 13170 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
13171 {
13172 end_sequence ();
13173 return NULL_RTX;
13174 }
13175 *gen_seq = get_insns ();
13176 end_sequence ();
13177
c8012fbc
WD
13178 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
13179 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
13180}
13181
13182static rtx
13183aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
13184 tree treeop0, tree treeop1, int bit_code)
13185{
c8012fbc
WD
13186 rtx op0, op1, target;
13187 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 13188 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 13189 insn_code icode;
5f3bc026 13190 struct expand_operand ops[6];
c8012fbc 13191 int aarch64_cond;
5f3bc026
ZC
13192
13193 push_to_sequence ((rtx_insn*) *prep_seq);
13194 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
13195
13196 op_mode = GET_MODE (op0);
13197 if (op_mode == VOIDmode)
13198 op_mode = GET_MODE (op1);
13199
13200 switch (op_mode)
13201 {
13202 case QImode:
13203 case HImode:
13204 case SImode:
13205 cmp_mode = SImode;
c8012fbc 13206 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
13207 break;
13208
13209 case DImode:
13210 cmp_mode = DImode;
c8012fbc 13211 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
13212 break;
13213
786e3c06
WD
13214 case SFmode:
13215 cmp_mode = SFmode;
13216 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
13217 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
13218 break;
13219
13220 case DFmode:
13221 cmp_mode = DFmode;
13222 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
13223 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
13224 break;
13225
5f3bc026
ZC
13226 default:
13227 end_sequence ();
13228 return NULL_RTX;
13229 }
13230
13231 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
13232 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
13233 if (!op0 || !op1)
13234 {
13235 end_sequence ();
13236 return NULL_RTX;
13237 }
13238 *prep_seq = get_insns ();
13239 end_sequence ();
13240
13241 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 13242 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 13243
c8012fbc
WD
13244 if (bit_code != AND)
13245 {
13246 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
13247 GET_MODE (XEXP (prev, 0))),
13248 VOIDmode, XEXP (prev, 0), const0_rtx);
13249 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
13250 }
13251
13252 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
13253 create_fixed_operand (&ops[1], target);
13254 create_fixed_operand (&ops[2], op0);
13255 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
13256 create_fixed_operand (&ops[4], prev);
13257 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026
ZC
13258
13259 push_to_sequence ((rtx_insn*) *gen_seq);
13260 if (!maybe_expand_insn (icode, 6, ops))
13261 {
13262 end_sequence ();
13263 return NULL_RTX;
13264 }
13265
13266 *gen_seq = get_insns ();
13267 end_sequence ();
13268
c8012fbc 13269 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
13270}
13271
13272#undef TARGET_GEN_CCMP_FIRST
13273#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
13274
13275#undef TARGET_GEN_CCMP_NEXT
13276#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
13277
6a569cdd
KT
13278/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
13279 instruction fusion of some sort. */
13280
13281static bool
13282aarch64_macro_fusion_p (void)
13283{
b175b679 13284 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
13285}
13286
13287
13288/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
13289 should be kept together during scheduling. */
13290
13291static bool
13292aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
13293{
13294 rtx set_dest;
13295 rtx prev_set = single_set (prev);
13296 rtx curr_set = single_set (curr);
13297 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
13298 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
13299
13300 if (!aarch64_macro_fusion_p ())
13301 return false;
13302
d7b03373 13303 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
13304 {
13305 /* We are trying to match:
13306 prev (mov) == (set (reg r0) (const_int imm16))
13307 curr (movk) == (set (zero_extract (reg r0)
13308 (const_int 16)
13309 (const_int 16))
13310 (const_int imm16_1)) */
13311
13312 set_dest = SET_DEST (curr_set);
13313
13314 if (GET_CODE (set_dest) == ZERO_EXTRACT
13315 && CONST_INT_P (SET_SRC (curr_set))
13316 && CONST_INT_P (SET_SRC (prev_set))
13317 && CONST_INT_P (XEXP (set_dest, 2))
13318 && INTVAL (XEXP (set_dest, 2)) == 16
13319 && REG_P (XEXP (set_dest, 0))
13320 && REG_P (SET_DEST (prev_set))
13321 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
13322 {
13323 return true;
13324 }
13325 }
13326
d7b03373 13327 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
13328 {
13329
13330 /* We're trying to match:
13331 prev (adrp) == (set (reg r1)
13332 (high (symbol_ref ("SYM"))))
13333 curr (add) == (set (reg r0)
13334 (lo_sum (reg r1)
13335 (symbol_ref ("SYM"))))
13336 Note that r0 need not necessarily be the same as r1, especially
13337 during pre-regalloc scheduling. */
13338
13339 if (satisfies_constraint_Ush (SET_SRC (prev_set))
13340 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
13341 {
13342 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
13343 && REG_P (XEXP (SET_SRC (curr_set), 0))
13344 && REGNO (XEXP (SET_SRC (curr_set), 0))
13345 == REGNO (SET_DEST (prev_set))
13346 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
13347 XEXP (SET_SRC (curr_set), 1)))
13348 return true;
13349 }
13350 }
13351
d7b03373 13352 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
13353 {
13354
13355 /* We're trying to match:
13356 prev (movk) == (set (zero_extract (reg r0)
13357 (const_int 16)
13358 (const_int 32))
13359 (const_int imm16_1))
13360 curr (movk) == (set (zero_extract (reg r0)
13361 (const_int 16)
13362 (const_int 48))
13363 (const_int imm16_2)) */
13364
13365 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
13366 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
13367 && REG_P (XEXP (SET_DEST (prev_set), 0))
13368 && REG_P (XEXP (SET_DEST (curr_set), 0))
13369 && REGNO (XEXP (SET_DEST (prev_set), 0))
13370 == REGNO (XEXP (SET_DEST (curr_set), 0))
13371 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
13372 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
13373 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
13374 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
13375 && CONST_INT_P (SET_SRC (prev_set))
13376 && CONST_INT_P (SET_SRC (curr_set)))
13377 return true;
13378
13379 }
d7b03373 13380 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
13381 {
13382 /* We're trying to match:
13383 prev (adrp) == (set (reg r0)
13384 (high (symbol_ref ("SYM"))))
13385 curr (ldr) == (set (reg r1)
13386 (mem (lo_sum (reg r0)
13387 (symbol_ref ("SYM")))))
13388 or
13389 curr (ldr) == (set (reg r1)
13390 (zero_extend (mem
13391 (lo_sum (reg r0)
13392 (symbol_ref ("SYM")))))) */
13393 if (satisfies_constraint_Ush (SET_SRC (prev_set))
13394 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
13395 {
13396 rtx curr_src = SET_SRC (curr_set);
13397
13398 if (GET_CODE (curr_src) == ZERO_EXTEND)
13399 curr_src = XEXP (curr_src, 0);
13400
13401 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
13402 && REG_P (XEXP (XEXP (curr_src, 0), 0))
13403 && REGNO (XEXP (XEXP (curr_src, 0), 0))
13404 == REGNO (SET_DEST (prev_set))
13405 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
13406 XEXP (SET_SRC (prev_set), 0)))
13407 return true;
13408 }
13409 }
cd0cb232 13410
d7b03373 13411 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
13412 && aarch_crypto_can_dual_issue (prev, curr))
13413 return true;
13414
d7b03373 13415 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
13416 && any_condjump_p (curr))
13417 {
13418 enum attr_type prev_type = get_attr_type (prev);
13419
13420 /* FIXME: this misses some which is considered simple arthematic
13421 instructions for ThunderX. Simple shifts are missed here. */
13422 if (prev_type == TYPE_ALUS_SREG
13423 || prev_type == TYPE_ALUS_IMM
13424 || prev_type == TYPE_LOGICS_REG
13425 || prev_type == TYPE_LOGICS_IMM)
13426 return true;
13427 }
13428
6a569cdd
KT
13429 return false;
13430}
13431
f2879a90
KT
13432/* Return true iff the instruction fusion described by OP is enabled. */
13433
13434bool
13435aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
13436{
13437 return (aarch64_tune_params.fusible_ops & op) != 0;
13438}
13439
350013bc
BC
13440/* If MEM is in the form of [base+offset], extract the two parts
13441 of address and set to BASE and OFFSET, otherwise return false
13442 after clearing BASE and OFFSET. */
13443
13444bool
13445extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
13446{
13447 rtx addr;
13448
13449 gcc_assert (MEM_P (mem));
13450
13451 addr = XEXP (mem, 0);
13452
13453 if (REG_P (addr))
13454 {
13455 *base = addr;
13456 *offset = const0_rtx;
13457 return true;
13458 }
13459
13460 if (GET_CODE (addr) == PLUS
13461 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
13462 {
13463 *base = XEXP (addr, 0);
13464 *offset = XEXP (addr, 1);
13465 return true;
13466 }
13467
13468 *base = NULL_RTX;
13469 *offset = NULL_RTX;
13470
13471 return false;
13472}
13473
13474/* Types for scheduling fusion. */
13475enum sched_fusion_type
13476{
13477 SCHED_FUSION_NONE = 0,
13478 SCHED_FUSION_LD_SIGN_EXTEND,
13479 SCHED_FUSION_LD_ZERO_EXTEND,
13480 SCHED_FUSION_LD,
13481 SCHED_FUSION_ST,
13482 SCHED_FUSION_NUM
13483};
13484
13485/* If INSN is a load or store of address in the form of [base+offset],
13486 extract the two parts and set to BASE and OFFSET. Return scheduling
13487 fusion type this INSN is. */
13488
13489static enum sched_fusion_type
13490fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
13491{
13492 rtx x, dest, src;
13493 enum sched_fusion_type fusion = SCHED_FUSION_LD;
13494
13495 gcc_assert (INSN_P (insn));
13496 x = PATTERN (insn);
13497 if (GET_CODE (x) != SET)
13498 return SCHED_FUSION_NONE;
13499
13500 src = SET_SRC (x);
13501 dest = SET_DEST (x);
13502
abc52318
KT
13503 machine_mode dest_mode = GET_MODE (dest);
13504
13505 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
13506 return SCHED_FUSION_NONE;
13507
13508 if (GET_CODE (src) == SIGN_EXTEND)
13509 {
13510 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
13511 src = XEXP (src, 0);
13512 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
13513 return SCHED_FUSION_NONE;
13514 }
13515 else if (GET_CODE (src) == ZERO_EXTEND)
13516 {
13517 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
13518 src = XEXP (src, 0);
13519 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
13520 return SCHED_FUSION_NONE;
13521 }
13522
13523 if (GET_CODE (src) == MEM && REG_P (dest))
13524 extract_base_offset_in_addr (src, base, offset);
13525 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
13526 {
13527 fusion = SCHED_FUSION_ST;
13528 extract_base_offset_in_addr (dest, base, offset);
13529 }
13530 else
13531 return SCHED_FUSION_NONE;
13532
13533 if (*base == NULL_RTX || *offset == NULL_RTX)
13534 fusion = SCHED_FUSION_NONE;
13535
13536 return fusion;
13537}
13538
13539/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
13540
13541 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
13542 and PRI are only calculated for these instructions. For other instruction,
13543 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
13544 type instruction fusion can be added by returning different priorities.
13545
13546 It's important that irrelevant instructions get the largest FUSION_PRI. */
13547
13548static void
13549aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
13550 int *fusion_pri, int *pri)
13551{
13552 int tmp, off_val;
13553 rtx base, offset;
13554 enum sched_fusion_type fusion;
13555
13556 gcc_assert (INSN_P (insn));
13557
13558 tmp = max_pri - 1;
13559 fusion = fusion_load_store (insn, &base, &offset);
13560 if (fusion == SCHED_FUSION_NONE)
13561 {
13562 *pri = tmp;
13563 *fusion_pri = tmp;
13564 return;
13565 }
13566
13567 /* Set FUSION_PRI according to fusion type and base register. */
13568 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
13569
13570 /* Calculate PRI. */
13571 tmp /= 2;
13572
13573 /* INSN with smaller offset goes first. */
13574 off_val = (int)(INTVAL (offset));
13575 if (off_val >= 0)
13576 tmp -= (off_val & 0xfffff);
13577 else
13578 tmp += ((- off_val) & 0xfffff);
13579
13580 *pri = tmp;
13581 return;
13582}
13583
13584/* Given OPERANDS of consecutive load/store, check if we can merge
13585 them into ldp/stp. LOAD is true if they are load instructions.
13586 MODE is the mode of memory operands. */
13587
13588bool
13589aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
13590 enum machine_mode mode)
13591{
13592 HOST_WIDE_INT offval_1, offval_2, msize;
13593 enum reg_class rclass_1, rclass_2;
13594 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
13595
13596 if (load)
13597 {
13598 mem_1 = operands[1];
13599 mem_2 = operands[3];
13600 reg_1 = operands[0];
13601 reg_2 = operands[2];
13602 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
13603 if (REGNO (reg_1) == REGNO (reg_2))
13604 return false;
13605 }
13606 else
13607 {
13608 mem_1 = operands[0];
13609 mem_2 = operands[2];
13610 reg_1 = operands[1];
13611 reg_2 = operands[3];
13612 }
13613
bf84ac44
AP
13614 /* The mems cannot be volatile. */
13615 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
13616 return false;
13617
350013bc
BC
13618 /* Check if the addresses are in the form of [base+offset]. */
13619 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
13620 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
13621 return false;
13622 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
13623 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
13624 return false;
13625
13626 /* Check if the bases are same. */
13627 if (!rtx_equal_p (base_1, base_2))
13628 return false;
13629
13630 offval_1 = INTVAL (offset_1);
13631 offval_2 = INTVAL (offset_2);
13632 msize = GET_MODE_SIZE (mode);
13633 /* Check if the offsets are consecutive. */
13634 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
13635 return false;
13636
13637 /* Check if the addresses are clobbered by load. */
13638 if (load)
13639 {
13640 if (reg_mentioned_p (reg_1, mem_1))
13641 return false;
13642
13643 /* In increasing order, the last load can clobber the address. */
13644 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
13645 return false;
13646 }
13647
13648 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
13649 rclass_1 = FP_REGS;
13650 else
13651 rclass_1 = GENERAL_REGS;
13652
13653 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
13654 rclass_2 = FP_REGS;
13655 else
13656 rclass_2 = GENERAL_REGS;
13657
13658 /* Check if the registers are of same class. */
13659 if (rclass_1 != rclass_2)
13660 return false;
13661
13662 return true;
13663}
13664
13665/* Given OPERANDS of consecutive load/store, check if we can merge
13666 them into ldp/stp by adjusting the offset. LOAD is true if they
13667 are load instructions. MODE is the mode of memory operands.
13668
13669 Given below consecutive stores:
13670
13671 str w1, [xb, 0x100]
13672 str w1, [xb, 0x104]
13673 str w1, [xb, 0x108]
13674 str w1, [xb, 0x10c]
13675
13676 Though the offsets are out of the range supported by stp, we can
13677 still pair them after adjusting the offset, like:
13678
13679 add scratch, xb, 0x100
13680 stp w1, w1, [scratch]
13681 stp w1, w1, [scratch, 0x8]
13682
13683 The peephole patterns detecting this opportunity should guarantee
13684 the scratch register is avaliable. */
13685
13686bool
13687aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
13688 enum machine_mode mode)
13689{
13690 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
13691 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
13692 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
13693 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
13694
13695 if (load)
13696 {
13697 reg_1 = operands[0];
13698 mem_1 = operands[1];
13699 reg_2 = operands[2];
13700 mem_2 = operands[3];
13701 reg_3 = operands[4];
13702 mem_3 = operands[5];
13703 reg_4 = operands[6];
13704 mem_4 = operands[7];
13705 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
13706 && REG_P (reg_3) && REG_P (reg_4));
13707 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
13708 return false;
13709 }
13710 else
13711 {
13712 mem_1 = operands[0];
13713 reg_1 = operands[1];
13714 mem_2 = operands[2];
13715 reg_2 = operands[3];
13716 mem_3 = operands[4];
13717 reg_3 = operands[5];
13718 mem_4 = operands[6];
13719 reg_4 = operands[7];
13720 }
13721 /* Skip if memory operand is by itslef valid for ldp/stp. */
13722 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
13723 return false;
13724
bf84ac44
AP
13725 /* The mems cannot be volatile. */
13726 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
13727 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
13728 return false;
13729
350013bc
BC
13730 /* Check if the addresses are in the form of [base+offset]. */
13731 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
13732 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
13733 return false;
13734 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
13735 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
13736 return false;
13737 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
13738 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
13739 return false;
13740 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
13741 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
13742 return false;
13743
13744 /* Check if the bases are same. */
13745 if (!rtx_equal_p (base_1, base_2)
13746 || !rtx_equal_p (base_2, base_3)
13747 || !rtx_equal_p (base_3, base_4))
13748 return false;
13749
13750 offval_1 = INTVAL (offset_1);
13751 offval_2 = INTVAL (offset_2);
13752 offval_3 = INTVAL (offset_3);
13753 offval_4 = INTVAL (offset_4);
13754 msize = GET_MODE_SIZE (mode);
13755 /* Check if the offsets are consecutive. */
13756 if ((offval_1 != (offval_2 + msize)
13757 || offval_1 != (offval_3 + msize * 2)
13758 || offval_1 != (offval_4 + msize * 3))
13759 && (offval_4 != (offval_3 + msize)
13760 || offval_4 != (offval_2 + msize * 2)
13761 || offval_4 != (offval_1 + msize * 3)))
13762 return false;
13763
13764 /* Check if the addresses are clobbered by load. */
13765 if (load)
13766 {
13767 if (reg_mentioned_p (reg_1, mem_1)
13768 || reg_mentioned_p (reg_2, mem_2)
13769 || reg_mentioned_p (reg_3, mem_3))
13770 return false;
13771
13772 /* In increasing order, the last load can clobber the address. */
13773 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
13774 return false;
13775 }
13776
13777 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
13778 rclass_1 = FP_REGS;
13779 else
13780 rclass_1 = GENERAL_REGS;
13781
13782 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
13783 rclass_2 = FP_REGS;
13784 else
13785 rclass_2 = GENERAL_REGS;
13786
13787 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
13788 rclass_3 = FP_REGS;
13789 else
13790 rclass_3 = GENERAL_REGS;
13791
13792 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
13793 rclass_4 = FP_REGS;
13794 else
13795 rclass_4 = GENERAL_REGS;
13796
13797 /* Check if the registers are of same class. */
13798 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
13799 return false;
13800
13801 return true;
13802}
13803
13804/* Given OPERANDS of consecutive load/store, this function pairs them
13805 into ldp/stp after adjusting the offset. It depends on the fact
13806 that addresses of load/store instructions are in increasing order.
13807 MODE is the mode of memory operands. CODE is the rtl operator
13808 which should be applied to all memory operands, it's SIGN_EXTEND,
13809 ZERO_EXTEND or UNKNOWN. */
13810
13811bool
13812aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
13813 enum machine_mode mode, RTX_CODE code)
13814{
13815 rtx base, offset, t1, t2;
13816 rtx mem_1, mem_2, mem_3, mem_4;
13817 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
13818
13819 if (load)
13820 {
13821 mem_1 = operands[1];
13822 mem_2 = operands[3];
13823 mem_3 = operands[5];
13824 mem_4 = operands[7];
13825 }
13826 else
13827 {
13828 mem_1 = operands[0];
13829 mem_2 = operands[2];
13830 mem_3 = operands[4];
13831 mem_4 = operands[6];
13832 gcc_assert (code == UNKNOWN);
13833 }
13834
13835 extract_base_offset_in_addr (mem_1, &base, &offset);
13836 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
13837
13838 /* Adjust offset thus it can fit in ldp/stp instruction. */
13839 msize = GET_MODE_SIZE (mode);
13840 stp_off_limit = msize * 0x40;
13841 off_val = INTVAL (offset);
13842 abs_off = (off_val < 0) ? -off_val : off_val;
13843 new_off = abs_off % stp_off_limit;
13844 adj_off = abs_off - new_off;
13845
13846 /* Further adjust to make sure all offsets are OK. */
13847 if ((new_off + msize * 2) >= stp_off_limit)
13848 {
13849 adj_off += stp_off_limit;
13850 new_off -= stp_off_limit;
13851 }
13852
13853 /* Make sure the adjustment can be done with ADD/SUB instructions. */
13854 if (adj_off >= 0x1000)
13855 return false;
13856
13857 if (off_val < 0)
13858 {
13859 adj_off = -adj_off;
13860 new_off = -new_off;
13861 }
13862
13863 /* Create new memory references. */
13864 mem_1 = change_address (mem_1, VOIDmode,
13865 plus_constant (DImode, operands[8], new_off));
13866
13867 /* Check if the adjusted address is OK for ldp/stp. */
13868 if (!aarch64_mem_pair_operand (mem_1, mode))
13869 return false;
13870
13871 msize = GET_MODE_SIZE (mode);
13872 mem_2 = change_address (mem_2, VOIDmode,
13873 plus_constant (DImode,
13874 operands[8],
13875 new_off + msize));
13876 mem_3 = change_address (mem_3, VOIDmode,
13877 plus_constant (DImode,
13878 operands[8],
13879 new_off + msize * 2));
13880 mem_4 = change_address (mem_4, VOIDmode,
13881 plus_constant (DImode,
13882 operands[8],
13883 new_off + msize * 3));
13884
13885 if (code == ZERO_EXTEND)
13886 {
13887 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
13888 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
13889 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
13890 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
13891 }
13892 else if (code == SIGN_EXTEND)
13893 {
13894 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
13895 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
13896 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
13897 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
13898 }
13899
13900 if (load)
13901 {
13902 operands[1] = mem_1;
13903 operands[3] = mem_2;
13904 operands[5] = mem_3;
13905 operands[7] = mem_4;
13906 }
13907 else
13908 {
13909 operands[0] = mem_1;
13910 operands[2] = mem_2;
13911 operands[4] = mem_3;
13912 operands[6] = mem_4;
13913 }
13914
13915 /* Emit adjusting instruction. */
f7df4a84 13916 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 13917 /* Emit ldp/stp instructions. */
f7df4a84
RS
13918 t1 = gen_rtx_SET (operands[0], operands[1]);
13919 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 13920 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
13921 t1 = gen_rtx_SET (operands[4], operands[5]);
13922 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
13923 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
13924 return true;
13925}
13926
1b1e81f8
JW
13927/* Return 1 if pseudo register should be created and used to hold
13928 GOT address for PIC code. */
13929
13930bool
13931aarch64_use_pseudo_pic_reg (void)
13932{
13933 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
13934}
13935
7b841a12
JW
13936/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
13937
13938static int
13939aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
13940{
13941 switch (XINT (x, 1))
13942 {
13943 case UNSPEC_GOTSMALLPIC:
13944 case UNSPEC_GOTSMALLPIC28K:
13945 case UNSPEC_GOTTINYPIC:
13946 return 0;
13947 default:
13948 break;
13949 }
13950
13951 return default_unspec_may_trap_p (x, flags);
13952}
13953
39252973
KT
13954
13955/* If X is a positive CONST_DOUBLE with a value that is a power of 2
13956 return the log2 of that value. Otherwise return -1. */
13957
13958int
13959aarch64_fpconst_pow_of_2 (rtx x)
13960{
13961 const REAL_VALUE_TYPE *r;
13962
13963 if (!CONST_DOUBLE_P (x))
13964 return -1;
13965
13966 r = CONST_DOUBLE_REAL_VALUE (x);
13967
13968 if (REAL_VALUE_NEGATIVE (*r)
13969 || REAL_VALUE_ISNAN (*r)
13970 || REAL_VALUE_ISINF (*r)
13971 || !real_isinteger (r, DFmode))
13972 return -1;
13973
13974 return exact_log2 (real_to_integer (r));
13975}
13976
13977/* If X is a vector of equal CONST_DOUBLE values and that value is
13978 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
13979
13980int
13981aarch64_vec_fpconst_pow_of_2 (rtx x)
13982{
13983 if (GET_CODE (x) != CONST_VECTOR)
13984 return -1;
13985
13986 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
13987 return -1;
13988
13989 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
13990 if (firstval <= 0)
13991 return -1;
13992
13993 for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
13994 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
13995 return -1;
13996
13997 return firstval;
13998}
13999
c2ec330c
AL
14000/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
14001static tree
14002aarch64_promoted_type (const_tree t)
14003{
14004 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
14005 return float_type_node;
14006 return NULL_TREE;
14007}
ee62a5a6
RS
14008
14009/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
14010
14011static bool
9acc9cbe 14012aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
14013 optimization_type opt_type)
14014{
14015 switch (op)
14016 {
14017 case rsqrt_optab:
9acc9cbe 14018 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
14019
14020 default:
14021 return true;
14022 }
14023}
14024
43e9d192
IB
14025#undef TARGET_ADDRESS_COST
14026#define TARGET_ADDRESS_COST aarch64_address_cost
14027
14028/* This hook will determines whether unnamed bitfields affect the alignment
14029 of the containing structure. The hook returns true if the structure
14030 should inherit the alignment requirements of an unnamed bitfield's
14031 type. */
14032#undef TARGET_ALIGN_ANON_BITFIELD
14033#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
14034
14035#undef TARGET_ASM_ALIGNED_DI_OP
14036#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
14037
14038#undef TARGET_ASM_ALIGNED_HI_OP
14039#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
14040
14041#undef TARGET_ASM_ALIGNED_SI_OP
14042#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
14043
14044#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
14045#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
14046 hook_bool_const_tree_hwi_hwi_const_tree_true
14047
e1c1ecb0
KT
14048#undef TARGET_ASM_FILE_START
14049#define TARGET_ASM_FILE_START aarch64_start_file
14050
43e9d192
IB
14051#undef TARGET_ASM_OUTPUT_MI_THUNK
14052#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
14053
14054#undef TARGET_ASM_SELECT_RTX_SECTION
14055#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
14056
14057#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
14058#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
14059
14060#undef TARGET_BUILD_BUILTIN_VA_LIST
14061#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
14062
14063#undef TARGET_CALLEE_COPIES
14064#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
14065
14066#undef TARGET_CAN_ELIMINATE
14067#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
14068
1fd8d40c
KT
14069#undef TARGET_CAN_INLINE_P
14070#define TARGET_CAN_INLINE_P aarch64_can_inline_p
14071
43e9d192
IB
14072#undef TARGET_CANNOT_FORCE_CONST_MEM
14073#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
14074
50487d79
EM
14075#undef TARGET_CASE_VALUES_THRESHOLD
14076#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
14077
43e9d192
IB
14078#undef TARGET_CONDITIONAL_REGISTER_USAGE
14079#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
14080
14081/* Only the least significant bit is used for initialization guard
14082 variables. */
14083#undef TARGET_CXX_GUARD_MASK_BIT
14084#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
14085
14086#undef TARGET_C_MODE_FOR_SUFFIX
14087#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
14088
14089#ifdef TARGET_BIG_ENDIAN_DEFAULT
14090#undef TARGET_DEFAULT_TARGET_FLAGS
14091#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
14092#endif
14093
14094#undef TARGET_CLASS_MAX_NREGS
14095#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
14096
119103ca
JG
14097#undef TARGET_BUILTIN_DECL
14098#define TARGET_BUILTIN_DECL aarch64_builtin_decl
14099
a6fc00da
BH
14100#undef TARGET_BUILTIN_RECIPROCAL
14101#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
14102
43e9d192
IB
14103#undef TARGET_EXPAND_BUILTIN
14104#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
14105
14106#undef TARGET_EXPAND_BUILTIN_VA_START
14107#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
14108
9697e620
JG
14109#undef TARGET_FOLD_BUILTIN
14110#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
14111
43e9d192
IB
14112#undef TARGET_FUNCTION_ARG
14113#define TARGET_FUNCTION_ARG aarch64_function_arg
14114
14115#undef TARGET_FUNCTION_ARG_ADVANCE
14116#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
14117
14118#undef TARGET_FUNCTION_ARG_BOUNDARY
14119#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
14120
14121#undef TARGET_FUNCTION_OK_FOR_SIBCALL
14122#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
14123
14124#undef TARGET_FUNCTION_VALUE
14125#define TARGET_FUNCTION_VALUE aarch64_function_value
14126
14127#undef TARGET_FUNCTION_VALUE_REGNO_P
14128#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
14129
14130#undef TARGET_FRAME_POINTER_REQUIRED
14131#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
14132
fc72cba7
AL
14133#undef TARGET_GIMPLE_FOLD_BUILTIN
14134#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 14135
43e9d192
IB
14136#undef TARGET_GIMPLIFY_VA_ARG_EXPR
14137#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
14138
14139#undef TARGET_INIT_BUILTINS
14140#define TARGET_INIT_BUILTINS aarch64_init_builtins
14141
c64f7d37
WD
14142#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
14143#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
14144 aarch64_ira_change_pseudo_allocno_class
14145
43e9d192
IB
14146#undef TARGET_LEGITIMATE_ADDRESS_P
14147#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
14148
14149#undef TARGET_LEGITIMATE_CONSTANT_P
14150#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
14151
14152#undef TARGET_LIBGCC_CMP_RETURN_MODE
14153#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
14154
38e8f663 14155#undef TARGET_LRA_P
98d404be 14156#define TARGET_LRA_P hook_bool_void_true
38e8f663 14157
ac2b960f
YZ
14158#undef TARGET_MANGLE_TYPE
14159#define TARGET_MANGLE_TYPE aarch64_mangle_type
14160
43e9d192
IB
14161#undef TARGET_MEMORY_MOVE_COST
14162#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
14163
26e0ff94
WD
14164#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
14165#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
14166
43e9d192
IB
14167#undef TARGET_MUST_PASS_IN_STACK
14168#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
14169
14170/* This target hook should return true if accesses to volatile bitfields
14171 should use the narrowest mode possible. It should return false if these
14172 accesses should use the bitfield container type. */
14173#undef TARGET_NARROW_VOLATILE_BITFIELD
14174#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
14175
14176#undef TARGET_OPTION_OVERRIDE
14177#define TARGET_OPTION_OVERRIDE aarch64_override_options
14178
14179#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
14180#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
14181 aarch64_override_options_after_change
14182
361fb3ee
KT
14183#undef TARGET_OPTION_SAVE
14184#define TARGET_OPTION_SAVE aarch64_option_save
14185
14186#undef TARGET_OPTION_RESTORE
14187#define TARGET_OPTION_RESTORE aarch64_option_restore
14188
14189#undef TARGET_OPTION_PRINT
14190#define TARGET_OPTION_PRINT aarch64_option_print
14191
5a2c8331
KT
14192#undef TARGET_OPTION_VALID_ATTRIBUTE_P
14193#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
14194
d78006d9
KT
14195#undef TARGET_SET_CURRENT_FUNCTION
14196#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
14197
43e9d192
IB
14198#undef TARGET_PASS_BY_REFERENCE
14199#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
14200
14201#undef TARGET_PREFERRED_RELOAD_CLASS
14202#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
14203
cee66c68
WD
14204#undef TARGET_SCHED_REASSOCIATION_WIDTH
14205#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
14206
c2ec330c
AL
14207#undef TARGET_PROMOTED_TYPE
14208#define TARGET_PROMOTED_TYPE aarch64_promoted_type
14209
43e9d192
IB
14210#undef TARGET_SECONDARY_RELOAD
14211#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
14212
14213#undef TARGET_SHIFT_TRUNCATION_MASK
14214#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
14215
14216#undef TARGET_SETUP_INCOMING_VARARGS
14217#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
14218
14219#undef TARGET_STRUCT_VALUE_RTX
14220#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
14221
14222#undef TARGET_REGISTER_MOVE_COST
14223#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
14224
14225#undef TARGET_RETURN_IN_MEMORY
14226#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
14227
14228#undef TARGET_RETURN_IN_MSB
14229#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
14230
14231#undef TARGET_RTX_COSTS
7cc2145f 14232#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 14233
d126a4ae
AP
14234#undef TARGET_SCHED_ISSUE_RATE
14235#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
14236
d03f7e44
MK
14237#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
14238#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
14239 aarch64_sched_first_cycle_multipass_dfa_lookahead
14240
2d6bc7fa
KT
14241#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
14242#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
14243 aarch64_first_cycle_multipass_dfa_lookahead_guard
14244
43e9d192
IB
14245#undef TARGET_TRAMPOLINE_INIT
14246#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
14247
14248#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
14249#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
14250
14251#undef TARGET_VECTOR_MODE_SUPPORTED_P
14252#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
14253
14254#undef TARGET_ARRAY_MODE_SUPPORTED_P
14255#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
14256
8990e73a
TB
14257#undef TARGET_VECTORIZE_ADD_STMT_COST
14258#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
14259
14260#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
14261#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
14262 aarch64_builtin_vectorization_cost
14263
43e9d192
IB
14264#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
14265#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
14266
42fc9a7f
JG
14267#undef TARGET_VECTORIZE_BUILTINS
14268#define TARGET_VECTORIZE_BUILTINS
14269
14270#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
14271#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
14272 aarch64_builtin_vectorized_function
14273
3b357264
JG
14274#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
14275#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
14276 aarch64_autovectorize_vector_sizes
14277
aa87aced
KV
14278#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
14279#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
14280 aarch64_atomic_assign_expand_fenv
14281
43e9d192
IB
14282/* Section anchor support. */
14283
14284#undef TARGET_MIN_ANCHOR_OFFSET
14285#define TARGET_MIN_ANCHOR_OFFSET -256
14286
14287/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
14288 byte offset; we can do much more for larger data types, but have no way
14289 to determine the size of the access. We assume accesses are aligned. */
14290#undef TARGET_MAX_ANCHOR_OFFSET
14291#define TARGET_MAX_ANCHOR_OFFSET 4095
14292
db0253a4
TB
14293#undef TARGET_VECTOR_ALIGNMENT
14294#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
14295
14296#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
14297#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
14298 aarch64_simd_vector_alignment_reachable
14299
88b08073
JG
14300/* vec_perm support. */
14301
14302#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
14303#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
14304 aarch64_vectorize_vec_perm_const_ok
14305
c2ec330c
AL
14306#undef TARGET_INIT_LIBFUNCS
14307#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 14308
706b2314 14309#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
14310#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
14311
5cb74e90
RR
14312#undef TARGET_FLAGS_REGNUM
14313#define TARGET_FLAGS_REGNUM CC_REGNUM
14314
78607708
TV
14315#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
14316#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
14317
a3125fc2
CL
14318#undef TARGET_ASAN_SHADOW_OFFSET
14319#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
14320
0c4ec427
RE
14321#undef TARGET_LEGITIMIZE_ADDRESS
14322#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
14323
d3006da6
JG
14324#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
14325#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
14326 aarch64_use_by_pieces_infrastructure_p
14327
594bdd53
FY
14328#undef TARGET_CAN_USE_DOLOOP_P
14329#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
14330
6a569cdd
KT
14331#undef TARGET_SCHED_MACRO_FUSION_P
14332#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
14333
14334#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
14335#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
14336
350013bc
BC
14337#undef TARGET_SCHED_FUSION_PRIORITY
14338#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
14339
7b841a12
JW
14340#undef TARGET_UNSPEC_MAY_TRAP_P
14341#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
14342
1b1e81f8
JW
14343#undef TARGET_USE_PSEUDO_PIC_REG
14344#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
14345
cc8ca59e
JB
14346#undef TARGET_PRINT_OPERAND
14347#define TARGET_PRINT_OPERAND aarch64_print_operand
14348
14349#undef TARGET_PRINT_OPERAND_ADDRESS
14350#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
14351
ee62a5a6
RS
14352#undef TARGET_OPTAB_SUPPORTED_P
14353#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
14354
43203dea
RR
14355#undef TARGET_OMIT_STRUCT_RETURN_REG
14356#define TARGET_OMIT_STRUCT_RETURN_REG true
14357
43e9d192
IB
14358struct gcc_target targetm = TARGET_INITIALIZER;
14359
14360#include "gt-aarch64.h"