]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
godump-1.c: Update expected output for recent changes.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
818ab71a 2 Copyright (C) 2009-2016 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
01736018 22#define INCLUDE_STRING
43e9d192
IB
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
e11c4407
AM
26#include "target.h"
27#include "rtl.h"
c7131fb2 28#include "tree.h"
e73cf9a2 29#include "memmodel.h"
c7131fb2 30#include "gimple.h"
e11c4407
AM
31#include "cfghooks.h"
32#include "cfgloop.h"
c7131fb2 33#include "df.h"
e11c4407
AM
34#include "tm_p.h"
35#include "stringpool.h"
36#include "optabs.h"
37#include "regs.h"
38#include "emit-rtl.h"
39#include "recog.h"
40#include "diagnostic.h"
43e9d192 41#include "insn-attr.h"
40e23961 42#include "alias.h"
40e23961 43#include "fold-const.h"
d8a2d370
DN
44#include "stor-layout.h"
45#include "calls.h"
46#include "varasm.h"
43e9d192 47#include "output.h"
36566b39 48#include "flags.h"
36566b39 49#include "explow.h"
43e9d192
IB
50#include "expr.h"
51#include "reload.h"
43e9d192 52#include "langhooks.h"
5a2c8331 53#include "opts.h"
2d6bc7fa 54#include "params.h"
45b0be94 55#include "gimplify.h"
43e9d192 56#include "dwarf2.h"
61d371eb 57#include "gimple-iterator.h"
8990e73a 58#include "tree-vectorizer.h"
d1bcc29f 59#include "aarch64-cost-tables.h"
0ee859b5 60#include "dumpfile.h"
9b2b7279 61#include "builtins.h"
8baff86e 62#include "rtl-iter.h"
9bbe08fe 63#include "tm-constrs.h"
d03f7e44 64#include "sched-int.h"
d78006d9 65#include "target-globals.h"
a3eb8a52 66#include "common/common-target.h"
43e9d192 67
994c5d85 68/* This file should be included last. */
d58627a0
RS
69#include "target-def.h"
70
28514dda
YZ
71/* Defined for convenience. */
72#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
73
43e9d192
IB
74/* Classifies an address.
75
76 ADDRESS_REG_IMM
77 A simple base register plus immediate offset.
78
79 ADDRESS_REG_WB
80 A base register indexed by immediate offset with writeback.
81
82 ADDRESS_REG_REG
83 A base register indexed by (optionally scaled) register.
84
85 ADDRESS_REG_UXTW
86 A base register indexed by (optionally scaled) zero-extended register.
87
88 ADDRESS_REG_SXTW
89 A base register indexed by (optionally scaled) sign-extended register.
90
91 ADDRESS_LO_SUM
92 A LO_SUM rtx with a base register and "LO12" symbol relocation.
93
94 ADDRESS_SYMBOLIC:
95 A constant symbolic address, in pc-relative literal pool. */
96
97enum aarch64_address_type {
98 ADDRESS_REG_IMM,
99 ADDRESS_REG_WB,
100 ADDRESS_REG_REG,
101 ADDRESS_REG_UXTW,
102 ADDRESS_REG_SXTW,
103 ADDRESS_LO_SUM,
104 ADDRESS_SYMBOLIC
105};
106
107struct aarch64_address_info {
108 enum aarch64_address_type type;
109 rtx base;
110 rtx offset;
111 int shift;
112 enum aarch64_symbol_type symbol_type;
113};
114
48063b9d
IB
115struct simd_immediate_info
116{
117 rtx value;
118 int shift;
119 int element_width;
48063b9d 120 bool mvn;
e4f0f84d 121 bool msl;
48063b9d
IB
122};
123
43e9d192
IB
124/* The current code model. */
125enum aarch64_code_model aarch64_cmodel;
126
127#ifdef HAVE_AS_TLS
128#undef TARGET_HAVE_TLS
129#define TARGET_HAVE_TLS 1
130#endif
131
ef4bddc2
RS
132static bool aarch64_composite_type_p (const_tree, machine_mode);
133static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 134 const_tree,
ef4bddc2 135 machine_mode *, int *,
43e9d192
IB
136 bool *);
137static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
138static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 139static void aarch64_override_options_after_change (void);
ef4bddc2 140static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 141static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 142 const unsigned char *sel);
ef4bddc2 143static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
88b08073 144
0c6caaf8
RL
145/* Major revision number of the ARM Architecture implemented by the target. */
146unsigned aarch64_architecture_version;
147
43e9d192 148/* The processor for which instructions should be scheduled. */
02fdbd5b 149enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 150
43e9d192
IB
151/* Mask to specify which instruction scheduling options should be used. */
152unsigned long aarch64_tune_flags = 0;
153
1be34295 154/* Global flag for PC relative loads. */
9ee6540a 155bool aarch64_pcrelative_literal_loads;
1be34295 156
8dec06f2
JG
157/* Support for command line parsing of boolean flags in the tuning
158 structures. */
159struct aarch64_flag_desc
160{
161 const char* name;
162 unsigned int flag;
163};
164
ed9fa8d2 165#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
166 { name, AARCH64_FUSE_##internal_name },
167static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
168{
169 { "none", AARCH64_FUSE_NOTHING },
170#include "aarch64-fusion-pairs.def"
171 { "all", AARCH64_FUSE_ALL },
172 { NULL, AARCH64_FUSE_NOTHING }
173};
8dec06f2 174
a339a01c 175#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
176 { name, AARCH64_EXTRA_TUNE_##internal_name },
177static const struct aarch64_flag_desc aarch64_tuning_flags[] =
178{
179 { "none", AARCH64_EXTRA_TUNE_NONE },
180#include "aarch64-tuning-flags.def"
181 { "all", AARCH64_EXTRA_TUNE_ALL },
182 { NULL, AARCH64_EXTRA_TUNE_NONE }
183};
8dec06f2 184
43e9d192
IB
185/* Tuning parameters. */
186
43e9d192
IB
187static const struct cpu_addrcost_table generic_addrcost_table =
188{
67747367 189 {
bd95e655
JG
190 0, /* hi */
191 0, /* si */
192 0, /* di */
193 0, /* ti */
67747367 194 },
bd95e655
JG
195 0, /* pre_modify */
196 0, /* post_modify */
197 0, /* register_offset */
783879e6
EM
198 0, /* register_sextend */
199 0, /* register_zextend */
bd95e655 200 0 /* imm_offset */
43e9d192
IB
201};
202
60bff090
JG
203static const struct cpu_addrcost_table cortexa57_addrcost_table =
204{
60bff090 205 {
bd95e655
JG
206 1, /* hi */
207 0, /* si */
208 0, /* di */
209 1, /* ti */
60bff090 210 },
bd95e655
JG
211 0, /* pre_modify */
212 0, /* post_modify */
213 0, /* register_offset */
783879e6
EM
214 0, /* register_sextend */
215 0, /* register_zextend */
bd95e655 216 0, /* imm_offset */
60bff090
JG
217};
218
5ec1ae3b
EM
219static const struct cpu_addrcost_table exynosm1_addrcost_table =
220{
221 {
222 0, /* hi */
223 0, /* si */
224 0, /* di */
225 2, /* ti */
226 },
227 0, /* pre_modify */
228 0, /* post_modify */
229 1, /* register_offset */
230 1, /* register_sextend */
231 2, /* register_zextend */
232 0, /* imm_offset */
233};
234
381e27aa
PT
235static const struct cpu_addrcost_table xgene1_addrcost_table =
236{
381e27aa 237 {
bd95e655
JG
238 1, /* hi */
239 0, /* si */
240 0, /* di */
241 1, /* ti */
381e27aa 242 },
bd95e655
JG
243 1, /* pre_modify */
244 0, /* post_modify */
245 0, /* register_offset */
783879e6
EM
246 1, /* register_sextend */
247 1, /* register_zextend */
bd95e655 248 0, /* imm_offset */
381e27aa
PT
249};
250
ee446d9f
JW
251static const struct cpu_addrcost_table qdf24xx_addrcost_table =
252{
253 {
254 1, /* hi */
255 0, /* si */
256 0, /* di */
257 1, /* ti */
258 },
259 0, /* pre_modify */
260 0, /* post_modify */
261 0, /* register_offset */
262 0, /* register_sextend */
263 0, /* register_zextend */
264 0 /* imm_offset */
265};
266
ad611a4c
VP
267static const struct cpu_addrcost_table vulcan_addrcost_table =
268{
269 {
270 0, /* hi */
271 0, /* si */
272 0, /* di */
273 2, /* ti */
274 },
275 0, /* pre_modify */
276 0, /* post_modify */
277 2, /* register_offset */
278 3, /* register_sextend */
279 3, /* register_zextend */
280 0, /* imm_offset */
281};
282
43e9d192
IB
283static const struct cpu_regmove_cost generic_regmove_cost =
284{
bd95e655 285 1, /* GP2GP */
3969c510
WD
286 /* Avoid the use of slow int<->fp moves for spilling by setting
287 their cost higher than memmov_cost. */
bd95e655
JG
288 5, /* GP2FP */
289 5, /* FP2GP */
290 2 /* FP2FP */
43e9d192
IB
291};
292
e4a9c55a
WD
293static const struct cpu_regmove_cost cortexa57_regmove_cost =
294{
bd95e655 295 1, /* GP2GP */
e4a9c55a
WD
296 /* Avoid the use of slow int<->fp moves for spilling by setting
297 their cost higher than memmov_cost. */
bd95e655
JG
298 5, /* GP2FP */
299 5, /* FP2GP */
300 2 /* FP2FP */
e4a9c55a
WD
301};
302
303static const struct cpu_regmove_cost cortexa53_regmove_cost =
304{
bd95e655 305 1, /* GP2GP */
e4a9c55a
WD
306 /* Avoid the use of slow int<->fp moves for spilling by setting
307 their cost higher than memmov_cost. */
bd95e655
JG
308 5, /* GP2FP */
309 5, /* FP2GP */
310 2 /* FP2FP */
e4a9c55a
WD
311};
312
5ec1ae3b
EM
313static const struct cpu_regmove_cost exynosm1_regmove_cost =
314{
315 1, /* GP2GP */
316 /* Avoid the use of slow int<->fp moves for spilling by setting
317 their cost higher than memmov_cost (actual, 4 and 9). */
318 9, /* GP2FP */
319 9, /* FP2GP */
320 1 /* FP2FP */
321};
322
d1bcc29f
AP
323static const struct cpu_regmove_cost thunderx_regmove_cost =
324{
bd95e655
JG
325 2, /* GP2GP */
326 2, /* GP2FP */
327 6, /* FP2GP */
328 4 /* FP2FP */
d1bcc29f
AP
329};
330
381e27aa
PT
331static const struct cpu_regmove_cost xgene1_regmove_cost =
332{
bd95e655 333 1, /* GP2GP */
381e27aa
PT
334 /* Avoid the use of slow int<->fp moves for spilling by setting
335 their cost higher than memmov_cost. */
bd95e655
JG
336 8, /* GP2FP */
337 8, /* FP2GP */
338 2 /* FP2FP */
381e27aa
PT
339};
340
ee446d9f
JW
341static const struct cpu_regmove_cost qdf24xx_regmove_cost =
342{
343 2, /* GP2GP */
344 /* Avoid the use of int<->fp moves for spilling. */
345 6, /* GP2FP */
346 6, /* FP2GP */
347 4 /* FP2FP */
348};
349
ad611a4c
VP
350static const struct cpu_regmove_cost vulcan_regmove_cost =
351{
352 1, /* GP2GP */
353 /* Avoid the use of int<->fp moves for spilling. */
354 8, /* GP2FP */
355 8, /* FP2GP */
356 4 /* FP2FP */
357};
358
8990e73a 359/* Generic costs for vector insn classes. */
8990e73a
TB
360static const struct cpu_vector_cost generic_vector_cost =
361{
bd95e655
JG
362 1, /* scalar_stmt_cost */
363 1, /* scalar_load_cost */
364 1, /* scalar_store_cost */
365 1, /* vec_stmt_cost */
c428f91c 366 2, /* vec_permute_cost */
bd95e655
JG
367 1, /* vec_to_scalar_cost */
368 1, /* scalar_to_vec_cost */
369 1, /* vec_align_load_cost */
370 1, /* vec_unalign_load_cost */
371 1, /* vec_unalign_store_cost */
372 1, /* vec_store_cost */
373 3, /* cond_taken_branch_cost */
374 1 /* cond_not_taken_branch_cost */
8990e73a
TB
375};
376
c3f20327
AP
377/* ThunderX costs for vector insn classes. */
378static const struct cpu_vector_cost thunderx_vector_cost =
379{
380 1, /* scalar_stmt_cost */
381 3, /* scalar_load_cost */
382 1, /* scalar_store_cost */
383 4, /* vec_stmt_cost */
384 4, /* vec_permute_cost */
385 2, /* vec_to_scalar_cost */
386 2, /* scalar_to_vec_cost */
387 3, /* vec_align_load_cost */
388 10, /* vec_unalign_load_cost */
389 10, /* vec_unalign_store_cost */
390 1, /* vec_store_cost */
391 3, /* cond_taken_branch_cost */
392 3 /* cond_not_taken_branch_cost */
393};
394
60bff090 395/* Generic costs for vector insn classes. */
60bff090
JG
396static const struct cpu_vector_cost cortexa57_vector_cost =
397{
bd95e655
JG
398 1, /* scalar_stmt_cost */
399 4, /* scalar_load_cost */
400 1, /* scalar_store_cost */
db4a1c18 401 2, /* vec_stmt_cost */
c428f91c 402 3, /* vec_permute_cost */
bd95e655
JG
403 8, /* vec_to_scalar_cost */
404 8, /* scalar_to_vec_cost */
db4a1c18
WD
405 4, /* vec_align_load_cost */
406 4, /* vec_unalign_load_cost */
bd95e655
JG
407 1, /* vec_unalign_store_cost */
408 1, /* vec_store_cost */
409 1, /* cond_taken_branch_cost */
410 1 /* cond_not_taken_branch_cost */
60bff090
JG
411};
412
5ec1ae3b
EM
413static const struct cpu_vector_cost exynosm1_vector_cost =
414{
415 1, /* scalar_stmt_cost */
416 5, /* scalar_load_cost */
417 1, /* scalar_store_cost */
418 3, /* vec_stmt_cost */
c428f91c 419 3, /* vec_permute_cost */
5ec1ae3b
EM
420 3, /* vec_to_scalar_cost */
421 3, /* scalar_to_vec_cost */
422 5, /* vec_align_load_cost */
423 5, /* vec_unalign_load_cost */
424 1, /* vec_unalign_store_cost */
425 1, /* vec_store_cost */
426 1, /* cond_taken_branch_cost */
427 1 /* cond_not_taken_branch_cost */
428};
429
381e27aa 430/* Generic costs for vector insn classes. */
381e27aa
PT
431static const struct cpu_vector_cost xgene1_vector_cost =
432{
bd95e655
JG
433 1, /* scalar_stmt_cost */
434 5, /* scalar_load_cost */
435 1, /* scalar_store_cost */
436 2, /* vec_stmt_cost */
c428f91c 437 2, /* vec_permute_cost */
bd95e655
JG
438 4, /* vec_to_scalar_cost */
439 4, /* scalar_to_vec_cost */
440 10, /* vec_align_load_cost */
441 10, /* vec_unalign_load_cost */
442 2, /* vec_unalign_store_cost */
443 2, /* vec_store_cost */
444 2, /* cond_taken_branch_cost */
445 1 /* cond_not_taken_branch_cost */
381e27aa
PT
446};
447
ad611a4c
VP
448/* Costs for vector insn classes for Vulcan. */
449static const struct cpu_vector_cost vulcan_vector_cost =
450{
451 6, /* scalar_stmt_cost */
452 4, /* scalar_load_cost */
453 1, /* scalar_store_cost */
454 6, /* vec_stmt_cost */
455 3, /* vec_permute_cost */
456 6, /* vec_to_scalar_cost */
457 5, /* scalar_to_vec_cost */
458 8, /* vec_align_load_cost */
459 8, /* vec_unalign_load_cost */
460 4, /* vec_unalign_store_cost */
461 4, /* vec_store_cost */
462 2, /* cond_taken_branch_cost */
463 1 /* cond_not_taken_branch_cost */
464};
465
b9066f5a
MW
466/* Generic costs for branch instructions. */
467static const struct cpu_branch_cost generic_branch_cost =
468{
469 2, /* Predictable. */
470 2 /* Unpredictable. */
471};
472
67707f65
JG
473/* Branch costs for Cortex-A57. */
474static const struct cpu_branch_cost cortexa57_branch_cost =
475{
476 1, /* Predictable. */
477 3 /* Unpredictable. */
478};
479
ad611a4c
VP
480/* Branch costs for Vulcan. */
481static const struct cpu_branch_cost vulcan_branch_cost =
482{
483 1, /* Predictable. */
484 3 /* Unpredictable. */
485};
486
9acc9cbe
EM
487/* Generic approximation modes. */
488static const cpu_approx_modes generic_approx_modes =
489{
79a2bc2d 490 AARCH64_APPROX_NONE, /* division */
98daafa0 491 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
492 AARCH64_APPROX_NONE /* recip_sqrt */
493};
494
495/* Approximation modes for Exynos M1. */
496static const cpu_approx_modes exynosm1_approx_modes =
497{
79a2bc2d 498 AARCH64_APPROX_NONE, /* division */
98daafa0 499 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
500 AARCH64_APPROX_ALL /* recip_sqrt */
501};
502
503/* Approximation modes for X-Gene 1. */
504static const cpu_approx_modes xgene1_approx_modes =
505{
79a2bc2d 506 AARCH64_APPROX_NONE, /* division */
98daafa0 507 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
508 AARCH64_APPROX_ALL /* recip_sqrt */
509};
510
43e9d192
IB
511static const struct tune_params generic_tunings =
512{
4e2cd668 513 &cortexa57_extra_costs,
43e9d192
IB
514 &generic_addrcost_table,
515 &generic_regmove_cost,
8990e73a 516 &generic_vector_cost,
b9066f5a 517 &generic_branch_cost,
9acc9cbe 518 &generic_approx_modes,
bd95e655
JG
519 4, /* memmov_cost */
520 2, /* issue_rate */
e9a3a175 521 AARCH64_FUSE_NOTHING, /* fusible_ops */
0b82a5a2
WD
522 8, /* function_align. */
523 8, /* jump_align. */
524 4, /* loop_align. */
cee66c68
WD
525 2, /* int_reassoc_width. */
526 4, /* fp_reassoc_width. */
50093a33
WD
527 1, /* vec_reassoc_width. */
528 2, /* min_div_recip_mul_sf. */
dfba575f 529 2, /* min_div_recip_mul_df. */
50487d79
EM
530 0, /* max_case_values. */
531 0, /* cache_line_size. */
2d6bc7fa 532 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
dfba575f 533 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
43e9d192
IB
534};
535
1c72a3ca
JG
536static const struct tune_params cortexa35_tunings =
537{
538 &cortexa53_extra_costs,
539 &generic_addrcost_table,
540 &cortexa53_regmove_cost,
541 &generic_vector_cost,
0bc24338 542 &cortexa57_branch_cost,
9acc9cbe 543 &generic_approx_modes,
1c72a3ca
JG
544 4, /* memmov_cost */
545 1, /* issue_rate */
0bc24338 546 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 547 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 548 16, /* function_align. */
1c72a3ca 549 8, /* jump_align. */
d4407370 550 8, /* loop_align. */
1c72a3ca
JG
551 2, /* int_reassoc_width. */
552 4, /* fp_reassoc_width. */
553 1, /* vec_reassoc_width. */
554 2, /* min_div_recip_mul_sf. */
555 2, /* min_div_recip_mul_df. */
556 0, /* max_case_values. */
557 0, /* cache_line_size. */
558 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
559 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
560};
561
984239ad
KT
562static const struct tune_params cortexa53_tunings =
563{
564 &cortexa53_extra_costs,
565 &generic_addrcost_table,
e4a9c55a 566 &cortexa53_regmove_cost,
984239ad 567 &generic_vector_cost,
0bc24338 568 &cortexa57_branch_cost,
9acc9cbe 569 &generic_approx_modes,
bd95e655
JG
570 4, /* memmov_cost */
571 2, /* issue_rate */
00a8574a 572 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 573 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 574 16, /* function_align. */
0b82a5a2 575 8, /* jump_align. */
d4407370 576 8, /* loop_align. */
cee66c68
WD
577 2, /* int_reassoc_width. */
578 4, /* fp_reassoc_width. */
50093a33
WD
579 1, /* vec_reassoc_width. */
580 2, /* min_div_recip_mul_sf. */
dfba575f 581 2, /* min_div_recip_mul_df. */
50487d79
EM
582 0, /* max_case_values. */
583 0, /* cache_line_size. */
2d6bc7fa 584 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
dfba575f 585 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
984239ad
KT
586};
587
4fd92af6
KT
588static const struct tune_params cortexa57_tunings =
589{
590 &cortexa57_extra_costs,
60bff090 591 &cortexa57_addrcost_table,
e4a9c55a 592 &cortexa57_regmove_cost,
60bff090 593 &cortexa57_vector_cost,
67707f65 594 &cortexa57_branch_cost,
9acc9cbe 595 &generic_approx_modes,
bd95e655
JG
596 4, /* memmov_cost */
597 3, /* issue_rate */
00a8574a 598 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 599 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2
WD
600 16, /* function_align. */
601 8, /* jump_align. */
d4407370 602 8, /* loop_align. */
cee66c68
WD
603 2, /* int_reassoc_width. */
604 4, /* fp_reassoc_width. */
50093a33
WD
605 1, /* vec_reassoc_width. */
606 2, /* min_div_recip_mul_sf. */
dfba575f 607 2, /* min_div_recip_mul_df. */
50487d79
EM
608 0, /* max_case_values. */
609 0, /* cache_line_size. */
2d6bc7fa 610 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
7c175186 611 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
dfba575f
JG
612};
613
614static const struct tune_params cortexa72_tunings =
615{
616 &cortexa57_extra_costs,
617 &cortexa57_addrcost_table,
618 &cortexa57_regmove_cost,
619 &cortexa57_vector_cost,
0bc24338 620 &cortexa57_branch_cost,
9acc9cbe 621 &generic_approx_modes,
dfba575f
JG
622 4, /* memmov_cost */
623 3, /* issue_rate */
00a8574a 624 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f
JG
625 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
626 16, /* function_align. */
627 8, /* jump_align. */
d4407370 628 8, /* loop_align. */
dfba575f
JG
629 2, /* int_reassoc_width. */
630 4, /* fp_reassoc_width. */
631 1, /* vec_reassoc_width. */
632 2, /* min_div_recip_mul_sf. */
633 2, /* min_div_recip_mul_df. */
50487d79
EM
634 0, /* max_case_values. */
635 0, /* cache_line_size. */
0bc24338 636 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
dfba575f 637 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
4fd92af6
KT
638};
639
4fb570c4
KT
640static const struct tune_params cortexa73_tunings =
641{
642 &cortexa57_extra_costs,
643 &cortexa57_addrcost_table,
644 &cortexa57_regmove_cost,
645 &cortexa57_vector_cost,
0bc24338 646 &cortexa57_branch_cost,
4fb570c4
KT
647 &generic_approx_modes,
648 4, /* memmov_cost. */
649 2, /* issue_rate. */
650 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
651 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
652 16, /* function_align. */
653 8, /* jump_align. */
d4407370 654 8, /* loop_align. */
4fb570c4
KT
655 2, /* int_reassoc_width. */
656 4, /* fp_reassoc_width. */
657 1, /* vec_reassoc_width. */
658 2, /* min_div_recip_mul_sf. */
659 2, /* min_div_recip_mul_df. */
660 0, /* max_case_values. */
661 0, /* cache_line_size. */
662 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
663 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
664};
665
5ec1ae3b
EM
666static const struct tune_params exynosm1_tunings =
667{
668 &exynosm1_extra_costs,
669 &exynosm1_addrcost_table,
670 &exynosm1_regmove_cost,
671 &exynosm1_vector_cost,
672 &generic_branch_cost,
9acc9cbe 673 &exynosm1_approx_modes,
5ec1ae3b
EM
674 4, /* memmov_cost */
675 3, /* issue_rate */
25cc2199 676 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
5ec1ae3b
EM
677 4, /* function_align. */
678 4, /* jump_align. */
679 4, /* loop_align. */
680 2, /* int_reassoc_width. */
681 4, /* fp_reassoc_width. */
682 1, /* vec_reassoc_width. */
683 2, /* min_div_recip_mul_sf. */
684 2, /* min_div_recip_mul_df. */
685 48, /* max_case_values. */
686 64, /* cache_line_size. */
220379df 687 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9acc9cbe 688 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
5ec1ae3b
EM
689};
690
d1bcc29f
AP
691static const struct tune_params thunderx_tunings =
692{
693 &thunderx_extra_costs,
694 &generic_addrcost_table,
695 &thunderx_regmove_cost,
c3f20327 696 &thunderx_vector_cost,
b9066f5a 697 &generic_branch_cost,
9acc9cbe 698 &generic_approx_modes,
bd95e655
JG
699 6, /* memmov_cost */
700 2, /* issue_rate */
e9a3a175 701 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
702 8, /* function_align. */
703 8, /* jump_align. */
704 8, /* loop_align. */
cee66c68
WD
705 2, /* int_reassoc_width. */
706 4, /* fp_reassoc_width. */
50093a33
WD
707 1, /* vec_reassoc_width. */
708 2, /* min_div_recip_mul_sf. */
dfba575f 709 2, /* min_div_recip_mul_df. */
50487d79
EM
710 0, /* max_case_values. */
711 0, /* cache_line_size. */
2d6bc7fa 712 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
54700e2e 713 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
d1bcc29f
AP
714};
715
381e27aa
PT
716static const struct tune_params xgene1_tunings =
717{
718 &xgene1_extra_costs,
719 &xgene1_addrcost_table,
720 &xgene1_regmove_cost,
721 &xgene1_vector_cost,
b9066f5a 722 &generic_branch_cost,
9acc9cbe 723 &xgene1_approx_modes,
bd95e655
JG
724 6, /* memmov_cost */
725 4, /* issue_rate */
e9a3a175 726 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
727 16, /* function_align. */
728 8, /* jump_align. */
729 16, /* loop_align. */
730 2, /* int_reassoc_width. */
731 4, /* fp_reassoc_width. */
50093a33
WD
732 1, /* vec_reassoc_width. */
733 2, /* min_div_recip_mul_sf. */
dfba575f 734 2, /* min_div_recip_mul_df. */
50487d79
EM
735 0, /* max_case_values. */
736 0, /* cache_line_size. */
2d6bc7fa 737 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9acc9cbe 738 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
381e27aa
PT
739};
740
ee446d9f
JW
741static const struct tune_params qdf24xx_tunings =
742{
743 &qdf24xx_extra_costs,
744 &qdf24xx_addrcost_table,
745 &qdf24xx_regmove_cost,
746 &generic_vector_cost,
747 &generic_branch_cost,
748 &generic_approx_modes,
749 4, /* memmov_cost */
750 4, /* issue_rate */
751 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
752 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
753 16, /* function_align. */
754 8, /* jump_align. */
755 16, /* loop_align. */
756 2, /* int_reassoc_width. */
757 4, /* fp_reassoc_width. */
758 1, /* vec_reassoc_width. */
759 2, /* min_div_recip_mul_sf. */
760 2, /* min_div_recip_mul_df. */
761 0, /* max_case_values. */
762 64, /* cache_line_size. */
763 tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
764 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
765};
766
ad611a4c
VP
767static const struct tune_params vulcan_tunings =
768{
769 &vulcan_extra_costs,
770 &vulcan_addrcost_table,
771 &vulcan_regmove_cost,
772 &vulcan_vector_cost,
773 &vulcan_branch_cost,
774 &generic_approx_modes,
775 4, /* memmov_cost. */
776 4, /* issue_rate. */
777 AARCH64_FUSE_NOTHING, /* fuseable_ops. */
778 16, /* function_align. */
779 8, /* jump_align. */
780 16, /* loop_align. */
781 3, /* int_reassoc_width. */
782 2, /* fp_reassoc_width. */
783 2, /* vec_reassoc_width. */
784 2, /* min_div_recip_mul_sf. */
785 2, /* min_div_recip_mul_df. */
786 0, /* max_case_values. */
b91cd96b 787 64, /* cache_line_size. */
ad611a4c
VP
788 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
789 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
790};
791
8dec06f2
JG
792/* Support for fine-grained override of the tuning structures. */
793struct aarch64_tuning_override_function
794{
795 const char* name;
796 void (*parse_override)(const char*, struct tune_params*);
797};
798
799static void aarch64_parse_fuse_string (const char*, struct tune_params*);
800static void aarch64_parse_tune_string (const char*, struct tune_params*);
801
802static const struct aarch64_tuning_override_function
803aarch64_tuning_override_functions[] =
804{
805 { "fuse", aarch64_parse_fuse_string },
806 { "tune", aarch64_parse_tune_string },
807 { NULL, NULL }
808};
809
43e9d192
IB
810/* A processor implementing AArch64. */
811struct processor
812{
813 const char *const name;
46806c44
KT
814 enum aarch64_processor ident;
815 enum aarch64_processor sched_core;
393ae126 816 enum aarch64_arch arch;
0c6caaf8 817 unsigned architecture_version;
43e9d192
IB
818 const unsigned long flags;
819 const struct tune_params *const tune;
820};
821
393ae126
KT
822/* Architectures implementing AArch64. */
823static const struct processor all_architectures[] =
824{
825#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
826 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
827#include "aarch64-arches.def"
393ae126
KT
828 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
829};
830
43e9d192
IB
831/* Processor cores implementing AArch64. */
832static const struct processor all_cores[] =
833{
7e1bcce3 834#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
393ae126
KT
835 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
836 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
837 FLAGS, &COSTS##_tunings},
43e9d192 838#include "aarch64-cores.def"
393ae126
KT
839 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
840 AARCH64_FL_FOR_ARCH8, &generic_tunings},
841 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
842};
843
43e9d192 844
361fb3ee
KT
845/* Target specification. These are populated by the -march, -mtune, -mcpu
846 handling code or by target attributes. */
43e9d192
IB
847static const struct processor *selected_arch;
848static const struct processor *selected_cpu;
849static const struct processor *selected_tune;
850
b175b679
JG
851/* The current tuning set. */
852struct tune_params aarch64_tune_params = generic_tunings;
853
43e9d192
IB
854#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
855
856/* An ISA extension in the co-processor and main instruction set space. */
857struct aarch64_option_extension
858{
859 const char *const name;
860 const unsigned long flags_on;
861 const unsigned long flags_off;
862};
863
43e9d192
IB
864typedef enum aarch64_cond_code
865{
866 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
867 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
868 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
869}
870aarch64_cc;
871
872#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
873
874/* The condition codes of the processor, and the inverse function. */
875static const char * const aarch64_condition_codes[] =
876{
877 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
878 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
879};
880
973d2e01
TP
881/* Generate code to enable conditional branches in functions over 1 MiB. */
882const char *
883aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
884 const char * branch_format)
885{
886 rtx_code_label * tmp_label = gen_label_rtx ();
887 char label_buf[256];
888 char buffer[128];
889 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
890 CODE_LABEL_NUMBER (tmp_label));
891 const char *label_ptr = targetm.strip_name_encoding (label_buf);
892 rtx dest_label = operands[pos_label];
893 operands[pos_label] = tmp_label;
894
895 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
896 output_asm_insn (buffer, operands);
897
898 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
899 operands[pos_label] = dest_label;
900 output_asm_insn (buffer, operands);
901 return "";
902}
903
261fb553
AL
904void
905aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
906{
907 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
908 if (TARGET_GENERAL_REGS_ONLY)
909 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
910 else
911 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
912}
913
c64f7d37
WD
914/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
915 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
31e2b5a3
WD
916 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
917 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
918 cost (in this case the best class is the lowest cost one). Using ALL_REGS
919 irrespectively of its cost results in bad allocations with many redundant
920 int<->FP moves which are expensive on various cores.
921 To avoid this we don't allow ALL_REGS as the allocno class, but force a
922 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
923 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
924 Otherwise set the allocno class depending on the mode.
925 The result of this is that it is no longer inefficient to have a higher
926 memory move cost than the register move cost.
927*/
c64f7d37
WD
928
929static reg_class_t
31e2b5a3
WD
930aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
931 reg_class_t best_class)
c64f7d37
WD
932{
933 enum machine_mode mode;
934
935 if (allocno_class != ALL_REGS)
936 return allocno_class;
937
31e2b5a3
WD
938 if (best_class != ALL_REGS)
939 return best_class;
940
c64f7d37
WD
941 mode = PSEUDO_REGNO_MODE (regno);
942 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
943}
944
26e0ff94 945static unsigned int
50093a33 946aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
26e0ff94 947{
50093a33 948 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
949 return aarch64_tune_params.min_div_recip_mul_sf;
950 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
951}
952
cee66c68
WD
953static int
954aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
955 enum machine_mode mode)
956{
957 if (VECTOR_MODE_P (mode))
b175b679 958 return aarch64_tune_params.vec_reassoc_width;
cee66c68 959 if (INTEGRAL_MODE_P (mode))
b175b679 960 return aarch64_tune_params.int_reassoc_width;
cee66c68 961 if (FLOAT_MODE_P (mode))
b175b679 962 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
963 return 1;
964}
965
43e9d192
IB
966/* Provide a mapping from gcc register numbers to dwarf register numbers. */
967unsigned
968aarch64_dbx_register_number (unsigned regno)
969{
970 if (GP_REGNUM_P (regno))
971 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
972 else if (regno == SP_REGNUM)
973 return AARCH64_DWARF_SP;
974 else if (FP_REGNUM_P (regno))
975 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
976
977 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
978 equivalent DWARF register. */
979 return DWARF_FRAME_REGISTERS;
980}
981
982/* Return TRUE if MODE is any of the large INT modes. */
983static bool
ef4bddc2 984aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
985{
986 return mode == OImode || mode == CImode || mode == XImode;
987}
988
989/* Return TRUE if MODE is any of the vector modes. */
990static bool
ef4bddc2 991aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
992{
993 return aarch64_vector_mode_supported_p (mode)
994 || aarch64_vect_struct_mode_p (mode);
995}
996
997/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
998static bool
ef4bddc2 999aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1000 unsigned HOST_WIDE_INT nelems)
1001{
1002 if (TARGET_SIMD
635e66fe
AL
1003 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1004 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1005 && (nelems >= 2 && nelems <= 4))
1006 return true;
1007
1008 return false;
1009}
1010
1011/* Implement HARD_REGNO_NREGS. */
1012
1013int
ef4bddc2 1014aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
1015{
1016 switch (aarch64_regno_regclass (regno))
1017 {
1018 case FP_REGS:
1019 case FP_LO_REGS:
1020 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
1021 default:
1022 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1023 }
1024 gcc_unreachable ();
1025}
1026
1027/* Implement HARD_REGNO_MODE_OK. */
1028
1029int
ef4bddc2 1030aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1031{
1032 if (GET_MODE_CLASS (mode) == MODE_CC)
1033 return regno == CC_REGNUM;
1034
9259db42
YZ
1035 if (regno == SP_REGNUM)
1036 /* The purpose of comparing with ptr_mode is to support the
1037 global register variable associated with the stack pointer
1038 register via the syntax of asm ("wsp") in ILP32. */
1039 return mode == Pmode || mode == ptr_mode;
1040
1041 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1042 return mode == Pmode;
1043
1044 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
1045 return 1;
1046
1047 if (FP_REGNUM_P (regno))
1048 {
1049 if (aarch64_vect_struct_mode_p (mode))
1050 return
1051 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
1052 else
1053 return 1;
1054 }
1055
1056 return 0;
1057}
1058
73d9ac6a 1059/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1060machine_mode
73d9ac6a 1061aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 1062 machine_mode mode)
73d9ac6a
IB
1063{
1064 /* Handle modes that fit within single registers. */
1065 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
1066 {
1067 if (GET_MODE_SIZE (mode) >= 4)
1068 return mode;
1069 else
1070 return SImode;
1071 }
1072 /* Fall back to generic for multi-reg and very large modes. */
1073 else
1074 return choose_hard_reg_mode (regno, nregs, false);
1075}
1076
43e9d192
IB
1077/* Return true if calls to DECL should be treated as
1078 long-calls (ie called via a register). */
1079static bool
1080aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1081{
1082 return false;
1083}
1084
1085/* Return true if calls to symbol-ref SYM should be treated as
1086 long-calls (ie called via a register). */
1087bool
1088aarch64_is_long_call_p (rtx sym)
1089{
1090 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1091}
1092
b60d63cb
JW
1093/* Return true if calls to symbol-ref SYM should not go through
1094 plt stubs. */
1095
1096bool
1097aarch64_is_noplt_call_p (rtx sym)
1098{
1099 const_tree decl = SYMBOL_REF_DECL (sym);
1100
1101 if (flag_pic
1102 && decl
1103 && (!flag_plt
1104 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1105 && !targetm.binds_local_p (decl))
1106 return true;
1107
1108 return false;
1109}
1110
43e9d192
IB
1111/* Return true if the offsets to a zero/sign-extract operation
1112 represent an expression that matches an extend operation. The
1113 operands represent the paramters from
1114
4745e701 1115 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1116bool
ef4bddc2 1117aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
1118 rtx extract_imm)
1119{
1120 HOST_WIDE_INT mult_val, extract_val;
1121
1122 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1123 return false;
1124
1125 mult_val = INTVAL (mult_imm);
1126 extract_val = INTVAL (extract_imm);
1127
1128 if (extract_val > 8
1129 && extract_val < GET_MODE_BITSIZE (mode)
1130 && exact_log2 (extract_val & ~7) > 0
1131 && (extract_val & 7) <= 4
1132 && mult_val == (1 << (extract_val & 7)))
1133 return true;
1134
1135 return false;
1136}
1137
1138/* Emit an insn that's a simple single-set. Both the operands must be
1139 known to be valid. */
1140inline static rtx
1141emit_set_insn (rtx x, rtx y)
1142{
f7df4a84 1143 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1144}
1145
1146/* X and Y are two things to compare using CODE. Emit the compare insn and
1147 return the rtx for register 0 in the proper mode. */
1148rtx
1149aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1150{
ef4bddc2 1151 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1152 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1153
1154 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1155 return cc_reg;
1156}
1157
1158/* Build the SYMBOL_REF for __tls_get_addr. */
1159
1160static GTY(()) rtx tls_get_addr_libfunc;
1161
1162rtx
1163aarch64_tls_get_addr (void)
1164{
1165 if (!tls_get_addr_libfunc)
1166 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1167 return tls_get_addr_libfunc;
1168}
1169
1170/* Return the TLS model to use for ADDR. */
1171
1172static enum tls_model
1173tls_symbolic_operand_type (rtx addr)
1174{
1175 enum tls_model tls_kind = TLS_MODEL_NONE;
1176 rtx sym, addend;
1177
1178 if (GET_CODE (addr) == CONST)
1179 {
1180 split_const (addr, &sym, &addend);
1181 if (GET_CODE (sym) == SYMBOL_REF)
1182 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1183 }
1184 else if (GET_CODE (addr) == SYMBOL_REF)
1185 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1186
1187 return tls_kind;
1188}
1189
1190/* We'll allow lo_sum's in addresses in our legitimate addresses
1191 so that combine would take care of combining addresses where
1192 necessary, but for generation purposes, we'll generate the address
1193 as :
1194 RTL Absolute
1195 tmp = hi (symbol_ref); adrp x1, foo
1196 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1197 nop
1198
1199 PIC TLS
1200 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1201 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1202 bl __tls_get_addr
1203 nop
1204
1205 Load TLS symbol, depending on TLS mechanism and TLS access model.
1206
1207 Global Dynamic - Traditional TLS:
1208 adrp tmp, :tlsgd:imm
1209 add dest, tmp, #:tlsgd_lo12:imm
1210 bl __tls_get_addr
1211
1212 Global Dynamic - TLS Descriptors:
1213 adrp dest, :tlsdesc:imm
1214 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1215 add dest, dest, #:tlsdesc_lo12:imm
1216 blr tmp
1217 mrs tp, tpidr_el0
1218 add dest, dest, tp
1219
1220 Initial Exec:
1221 mrs tp, tpidr_el0
1222 adrp tmp, :gottprel:imm
1223 ldr dest, [tmp, #:gottprel_lo12:imm]
1224 add dest, dest, tp
1225
1226 Local Exec:
1227 mrs tp, tpidr_el0
0699caae
RL
1228 add t0, tp, #:tprel_hi12:imm, lsl #12
1229 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1230*/
1231
1232static void
1233aarch64_load_symref_appropriately (rtx dest, rtx imm,
1234 enum aarch64_symbol_type type)
1235{
1236 switch (type)
1237 {
1238 case SYMBOL_SMALL_ABSOLUTE:
1239 {
28514dda 1240 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1241 rtx tmp_reg = dest;
ef4bddc2 1242 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1243
1244 gcc_assert (mode == Pmode || mode == ptr_mode);
1245
43e9d192 1246 if (can_create_pseudo_p ())
28514dda 1247 tmp_reg = gen_reg_rtx (mode);
43e9d192 1248
28514dda 1249 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1250 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1251 return;
1252 }
1253
a5350ddc 1254 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1255 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1256 return;
1257
1b1e81f8
JW
1258 case SYMBOL_SMALL_GOT_28K:
1259 {
1260 machine_mode mode = GET_MODE (dest);
1261 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1262 rtx insn;
1263 rtx mem;
1b1e81f8
JW
1264
1265 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1266 here before rtl expand. Tree IVOPT will generate rtl pattern to
1267 decide rtx costs, in which case pic_offset_table_rtx is not
1268 initialized. For that case no need to generate the first adrp
026c3cfd 1269 instruction as the final cost for global variable access is
1b1e81f8
JW
1270 one instruction. */
1271 if (gp_rtx != NULL)
1272 {
1273 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1274 using the page base as GOT base, the first page may be wasted,
1275 in the worst scenario, there is only 28K space for GOT).
1276
1277 The generate instruction sequence for accessing global variable
1278 is:
1279
a3957742 1280 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1281
1282 Only one instruction needed. But we must initialize
1283 pic_offset_table_rtx properly. We generate initialize insn for
1284 every global access, and allow CSE to remove all redundant.
1285
1286 The final instruction sequences will look like the following
1287 for multiply global variables access.
1288
a3957742 1289 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1290
a3957742
JW
1291 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1292 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1293 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1294 ... */
1b1e81f8
JW
1295
1296 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1297 crtl->uses_pic_offset_table = 1;
1298 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1299
1300 if (mode != GET_MODE (gp_rtx))
1301 gp_rtx = simplify_gen_subreg (mode, gp_rtx, GET_MODE (gp_rtx), 0);
1302 }
1303
1304 if (mode == ptr_mode)
1305 {
1306 if (mode == DImode)
53021678 1307 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1308 else
53021678
JW
1309 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1310
1311 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1312 }
1313 else
1314 {
1315 gcc_assert (mode == Pmode);
53021678
JW
1316
1317 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1318 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1319 }
1320
53021678
JW
1321 /* The operand is expected to be MEM. Whenever the related insn
1322 pattern changed, above code which calculate mem should be
1323 updated. */
1324 gcc_assert (GET_CODE (mem) == MEM);
1325 MEM_READONLY_P (mem) = 1;
1326 MEM_NOTRAP_P (mem) = 1;
1327 emit_insn (insn);
1b1e81f8
JW
1328 return;
1329 }
1330
6642bdb4 1331 case SYMBOL_SMALL_GOT_4G:
43e9d192 1332 {
28514dda
YZ
1333 /* In ILP32, the mode of dest can be either SImode or DImode,
1334 while the got entry is always of SImode size. The mode of
1335 dest depends on how dest is used: if dest is assigned to a
1336 pointer (e.g. in the memory), it has SImode; it may have
1337 DImode if dest is dereferenced to access the memeory.
1338 This is why we have to handle three different ldr_got_small
1339 patterns here (two patterns for ILP32). */
53021678
JW
1340
1341 rtx insn;
1342 rtx mem;
43e9d192 1343 rtx tmp_reg = dest;
ef4bddc2 1344 machine_mode mode = GET_MODE (dest);
28514dda 1345
43e9d192 1346 if (can_create_pseudo_p ())
28514dda
YZ
1347 tmp_reg = gen_reg_rtx (mode);
1348
1349 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1350 if (mode == ptr_mode)
1351 {
1352 if (mode == DImode)
53021678 1353 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1354 else
53021678
JW
1355 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1356
1357 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1358 }
1359 else
1360 {
1361 gcc_assert (mode == Pmode);
53021678
JW
1362
1363 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1364 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1365 }
1366
53021678
JW
1367 gcc_assert (GET_CODE (mem) == MEM);
1368 MEM_READONLY_P (mem) = 1;
1369 MEM_NOTRAP_P (mem) = 1;
1370 emit_insn (insn);
43e9d192
IB
1371 return;
1372 }
1373
1374 case SYMBOL_SMALL_TLSGD:
1375 {
5d8a22a5 1376 rtx_insn *insns;
43e9d192
IB
1377 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
1378
1379 start_sequence ();
78607708 1380 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
1381 insns = get_insns ();
1382 end_sequence ();
1383
1384 RTL_CONST_CALL_P (insns) = 1;
1385 emit_libcall_block (insns, dest, result, imm);
1386 return;
1387 }
1388
1389 case SYMBOL_SMALL_TLSDESC:
1390 {
ef4bddc2 1391 machine_mode mode = GET_MODE (dest);
621ad2de 1392 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1393 rtx tp;
1394
621ad2de
AP
1395 gcc_assert (mode == Pmode || mode == ptr_mode);
1396
2876a13f
JW
1397 /* In ILP32, the got entry is always of SImode size. Unlike
1398 small GOT, the dest is fixed at reg 0. */
1399 if (TARGET_ILP32)
1400 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1401 else
2876a13f 1402 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1403 tp = aarch64_load_tp (NULL);
621ad2de
AP
1404
1405 if (mode != Pmode)
1406 tp = gen_lowpart (mode, tp);
1407
2876a13f 1408 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
1409 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1410 return;
1411 }
1412
79496620 1413 case SYMBOL_SMALL_TLSIE:
43e9d192 1414 {
621ad2de
AP
1415 /* In ILP32, the mode of dest can be either SImode or DImode,
1416 while the got entry is always of SImode size. The mode of
1417 dest depends on how dest is used: if dest is assigned to a
1418 pointer (e.g. in the memory), it has SImode; it may have
1419 DImode if dest is dereferenced to access the memeory.
1420 This is why we have to handle three different tlsie_small
1421 patterns here (two patterns for ILP32). */
ef4bddc2 1422 machine_mode mode = GET_MODE (dest);
621ad2de 1423 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1424 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1425
1426 if (mode == ptr_mode)
1427 {
1428 if (mode == DImode)
1429 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1430 else
1431 {
1432 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1433 tp = gen_lowpart (mode, tp);
1434 }
1435 }
1436 else
1437 {
1438 gcc_assert (mode == Pmode);
1439 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1440 }
1441
f7df4a84 1442 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
1443 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1444 return;
1445 }
1446
cbf5629e 1447 case SYMBOL_TLSLE12:
d18ba284 1448 case SYMBOL_TLSLE24:
cbf5629e
JW
1449 case SYMBOL_TLSLE32:
1450 case SYMBOL_TLSLE48:
43e9d192 1451 {
cbf5629e 1452 machine_mode mode = GET_MODE (dest);
43e9d192 1453 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1454
cbf5629e
JW
1455 if (mode != Pmode)
1456 tp = gen_lowpart (mode, tp);
1457
1458 switch (type)
1459 {
1460 case SYMBOL_TLSLE12:
1461 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1462 (dest, tp, imm));
1463 break;
1464 case SYMBOL_TLSLE24:
1465 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1466 (dest, tp, imm));
1467 break;
1468 case SYMBOL_TLSLE32:
1469 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1470 (dest, imm));
1471 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1472 (dest, dest, tp));
1473 break;
1474 case SYMBOL_TLSLE48:
1475 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1476 (dest, imm));
1477 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1478 (dest, dest, tp));
1479 break;
1480 default:
1481 gcc_unreachable ();
1482 }
e6f7f0e9 1483
43e9d192
IB
1484 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1485 return;
1486 }
1487
87dd8ab0
MS
1488 case SYMBOL_TINY_GOT:
1489 emit_insn (gen_ldr_got_tiny (dest, imm));
1490 return;
1491
5ae7caad
JW
1492 case SYMBOL_TINY_TLSIE:
1493 {
1494 machine_mode mode = GET_MODE (dest);
1495 rtx tp = aarch64_load_tp (NULL);
1496
1497 if (mode == ptr_mode)
1498 {
1499 if (mode == DImode)
1500 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1501 else
1502 {
1503 tp = gen_lowpart (mode, tp);
1504 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1505 }
1506 }
1507 else
1508 {
1509 gcc_assert (mode == Pmode);
1510 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1511 }
1512
1513 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1514 return;
1515 }
1516
43e9d192
IB
1517 default:
1518 gcc_unreachable ();
1519 }
1520}
1521
1522/* Emit a move from SRC to DEST. Assume that the move expanders can
1523 handle all moves if !can_create_pseudo_p (). The distinction is
1524 important because, unlike emit_move_insn, the move expanders know
1525 how to force Pmode objects into the constant pool even when the
1526 constant pool address is not itself legitimate. */
1527static rtx
1528aarch64_emit_move (rtx dest, rtx src)
1529{
1530 return (can_create_pseudo_p ()
1531 ? emit_move_insn (dest, src)
1532 : emit_move_insn_1 (dest, src));
1533}
1534
030d03b8
RE
1535/* Split a 128-bit move operation into two 64-bit move operations,
1536 taking care to handle partial overlap of register to register
1537 copies. Special cases are needed when moving between GP regs and
1538 FP regs. SRC can be a register, constant or memory; DST a register
1539 or memory. If either operand is memory it must not have any side
1540 effects. */
43e9d192
IB
1541void
1542aarch64_split_128bit_move (rtx dst, rtx src)
1543{
030d03b8
RE
1544 rtx dst_lo, dst_hi;
1545 rtx src_lo, src_hi;
43e9d192 1546
ef4bddc2 1547 machine_mode mode = GET_MODE (dst);
12dc6974 1548
030d03b8
RE
1549 gcc_assert (mode == TImode || mode == TFmode);
1550 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1551 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1552
1553 if (REG_P (dst) && REG_P (src))
1554 {
030d03b8
RE
1555 int src_regno = REGNO (src);
1556 int dst_regno = REGNO (dst);
43e9d192 1557
030d03b8 1558 /* Handle FP <-> GP regs. */
43e9d192
IB
1559 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1560 {
030d03b8
RE
1561 src_lo = gen_lowpart (word_mode, src);
1562 src_hi = gen_highpart (word_mode, src);
1563
1564 if (mode == TImode)
1565 {
1566 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1567 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1568 }
1569 else
1570 {
1571 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1572 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1573 }
1574 return;
43e9d192
IB
1575 }
1576 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1577 {
030d03b8
RE
1578 dst_lo = gen_lowpart (word_mode, dst);
1579 dst_hi = gen_highpart (word_mode, dst);
1580
1581 if (mode == TImode)
1582 {
1583 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1584 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1585 }
1586 else
1587 {
1588 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1589 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1590 }
1591 return;
43e9d192 1592 }
43e9d192
IB
1593 }
1594
030d03b8
RE
1595 dst_lo = gen_lowpart (word_mode, dst);
1596 dst_hi = gen_highpart (word_mode, dst);
1597 src_lo = gen_lowpart (word_mode, src);
1598 src_hi = gen_highpart_mode (word_mode, mode, src);
1599
1600 /* At most one pairing may overlap. */
1601 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1602 {
1603 aarch64_emit_move (dst_hi, src_hi);
1604 aarch64_emit_move (dst_lo, src_lo);
1605 }
1606 else
1607 {
1608 aarch64_emit_move (dst_lo, src_lo);
1609 aarch64_emit_move (dst_hi, src_hi);
1610 }
43e9d192
IB
1611}
1612
1613bool
1614aarch64_split_128bit_move_p (rtx dst, rtx src)
1615{
1616 return (! REG_P (src)
1617 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1618}
1619
8b033a8a
SN
1620/* Split a complex SIMD combine. */
1621
1622void
1623aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1624{
ef4bddc2
RS
1625 machine_mode src_mode = GET_MODE (src1);
1626 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1627
1628 gcc_assert (VECTOR_MODE_P (dst_mode));
1629
1630 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1631 {
1632 rtx (*gen) (rtx, rtx, rtx);
1633
1634 switch (src_mode)
1635 {
1636 case V8QImode:
1637 gen = gen_aarch64_simd_combinev8qi;
1638 break;
1639 case V4HImode:
1640 gen = gen_aarch64_simd_combinev4hi;
1641 break;
1642 case V2SImode:
1643 gen = gen_aarch64_simd_combinev2si;
1644 break;
7c369485
AL
1645 case V4HFmode:
1646 gen = gen_aarch64_simd_combinev4hf;
1647 break;
8b033a8a
SN
1648 case V2SFmode:
1649 gen = gen_aarch64_simd_combinev2sf;
1650 break;
1651 case DImode:
1652 gen = gen_aarch64_simd_combinedi;
1653 break;
1654 case DFmode:
1655 gen = gen_aarch64_simd_combinedf;
1656 break;
1657 default:
1658 gcc_unreachable ();
1659 }
1660
1661 emit_insn (gen (dst, src1, src2));
1662 return;
1663 }
1664}
1665
fd4842cd
SN
1666/* Split a complex SIMD move. */
1667
1668void
1669aarch64_split_simd_move (rtx dst, rtx src)
1670{
ef4bddc2
RS
1671 machine_mode src_mode = GET_MODE (src);
1672 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
1673
1674 gcc_assert (VECTOR_MODE_P (dst_mode));
1675
1676 if (REG_P (dst) && REG_P (src))
1677 {
c59b7e28
SN
1678 rtx (*gen) (rtx, rtx);
1679
fd4842cd
SN
1680 gcc_assert (VECTOR_MODE_P (src_mode));
1681
1682 switch (src_mode)
1683 {
1684 case V16QImode:
c59b7e28 1685 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
1686 break;
1687 case V8HImode:
c59b7e28 1688 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
1689 break;
1690 case V4SImode:
c59b7e28 1691 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
1692 break;
1693 case V2DImode:
c59b7e28 1694 gen = gen_aarch64_split_simd_movv2di;
fd4842cd 1695 break;
71a11456
AL
1696 case V8HFmode:
1697 gen = gen_aarch64_split_simd_movv8hf;
1698 break;
fd4842cd 1699 case V4SFmode:
c59b7e28 1700 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1701 break;
1702 case V2DFmode:
c59b7e28 1703 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1704 break;
1705 default:
1706 gcc_unreachable ();
1707 }
c59b7e28
SN
1708
1709 emit_insn (gen (dst, src));
fd4842cd
SN
1710 return;
1711 }
1712}
1713
ef22810a
RH
1714bool
1715aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
1716 machine_mode ymode, rtx y)
1717{
1718 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
1719 gcc_assert (r != NULL);
1720 return rtx_equal_p (x, r);
1721}
1722
1723
43e9d192 1724static rtx
ef4bddc2 1725aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1726{
1727 if (can_create_pseudo_p ())
e18b4a81 1728 return force_reg (mode, value);
43e9d192
IB
1729 else
1730 {
1731 x = aarch64_emit_move (x, value);
1732 return x;
1733 }
1734}
1735
1736
1737static rtx
ef4bddc2 1738aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1739{
9c023bf0 1740 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1741 {
1742 rtx high;
1743 /* Load the full offset into a register. This
1744 might be improvable in the future. */
1745 high = GEN_INT (offset);
1746 offset = 0;
e18b4a81
YZ
1747 high = aarch64_force_temporary (mode, temp, high);
1748 reg = aarch64_force_temporary (mode, temp,
1749 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1750 }
1751 return plus_constant (mode, reg, offset);
1752}
1753
82614948
RR
1754static int
1755aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1756 machine_mode mode)
43e9d192 1757{
43e9d192 1758 int i;
9a4865db
WD
1759 unsigned HOST_WIDE_INT val, val2, mask;
1760 int one_match, zero_match;
1761 int num_insns;
43e9d192 1762
9a4865db
WD
1763 val = INTVAL (imm);
1764
1765 if (aarch64_move_imm (val, mode))
43e9d192 1766 {
82614948 1767 if (generate)
f7df4a84 1768 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 1769 return 1;
43e9d192
IB
1770 }
1771
9a4865db 1772 if ((val >> 32) == 0 || mode == SImode)
43e9d192 1773 {
82614948
RR
1774 if (generate)
1775 {
9a4865db
WD
1776 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
1777 if (mode == SImode)
1778 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1779 GEN_INT ((val >> 16) & 0xffff)));
1780 else
1781 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
1782 GEN_INT ((val >> 16) & 0xffff)));
82614948 1783 }
9a4865db 1784 return 2;
43e9d192
IB
1785 }
1786
1787 /* Remaining cases are all for DImode. */
1788
43e9d192 1789 mask = 0xffff;
9a4865db
WD
1790 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
1791 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
1792 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
1793 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 1794
62c8d76c 1795 if (zero_match != 2 && one_match != 2)
43e9d192 1796 {
62c8d76c
WD
1797 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1798 For a 64-bit bitmask try whether changing 16 bits to all ones or
1799 zeroes creates a valid bitmask. To check any repeated bitmask,
1800 try using 16 bits from the other 32-bit half of val. */
43e9d192 1801
62c8d76c 1802 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 1803 {
62c8d76c
WD
1804 val2 = val & ~mask;
1805 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1806 break;
1807 val2 = val | mask;
1808 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1809 break;
1810 val2 = val2 & ~mask;
1811 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
1812 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1813 break;
43e9d192 1814 }
62c8d76c 1815 if (i != 64)
43e9d192 1816 {
62c8d76c 1817 if (generate)
43e9d192 1818 {
62c8d76c
WD
1819 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1820 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 1821 GEN_INT ((val >> i) & 0xffff)));
43e9d192 1822 }
1312b1ba 1823 return 2;
43e9d192
IB
1824 }
1825 }
1826
9a4865db
WD
1827 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1828 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1829 otherwise skip zero bits. */
2c274197 1830
9a4865db 1831 num_insns = 1;
43e9d192 1832 mask = 0xffff;
9a4865db
WD
1833 val2 = one_match > zero_match ? ~val : val;
1834 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
1835
1836 if (generate)
1837 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
1838 ? (val | ~(mask << i))
1839 : (val & (mask << i)))));
1840 for (i += 16; i < 64; i += 16)
43e9d192 1841 {
9a4865db
WD
1842 if ((val2 & (mask << i)) == 0)
1843 continue;
1844 if (generate)
1845 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1846 GEN_INT ((val >> i) & 0xffff)));
1847 num_insns ++;
82614948
RR
1848 }
1849
1850 return num_insns;
1851}
1852
1853
1854void
1855aarch64_expand_mov_immediate (rtx dest, rtx imm)
1856{
1857 machine_mode mode = GET_MODE (dest);
1858
1859 gcc_assert (mode == SImode || mode == DImode);
1860
1861 /* Check on what type of symbol it is. */
1862 if (GET_CODE (imm) == SYMBOL_REF
1863 || GET_CODE (imm) == LABEL_REF
1864 || GET_CODE (imm) == CONST)
1865 {
1866 rtx mem, base, offset;
1867 enum aarch64_symbol_type sty;
1868
1869 /* If we have (const (plus symbol offset)), separate out the offset
1870 before we start classifying the symbol. */
1871 split_const (imm, &base, &offset);
1872
a6e0bfa7 1873 sty = aarch64_classify_symbol (base, offset);
82614948
RR
1874 switch (sty)
1875 {
1876 case SYMBOL_FORCE_TO_MEM:
1877 if (offset != const0_rtx
1878 && targetm.cannot_force_const_mem (mode, imm))
1879 {
1880 gcc_assert (can_create_pseudo_p ());
1881 base = aarch64_force_temporary (mode, dest, base);
1882 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1883 aarch64_emit_move (dest, base);
1884 return;
1885 }
b4f50fd4 1886
82614948
RR
1887 mem = force_const_mem (ptr_mode, imm);
1888 gcc_assert (mem);
b4f50fd4
RR
1889
1890 /* If we aren't generating PC relative literals, then
1891 we need to expand the literal pool access carefully.
1892 This is something that needs to be done in a number
1893 of places, so could well live as a separate function. */
9ee6540a 1894 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
1895 {
1896 gcc_assert (can_create_pseudo_p ());
1897 base = gen_reg_rtx (ptr_mode);
1898 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
1899 mem = gen_rtx_MEM (ptr_mode, base);
1900 }
1901
82614948
RR
1902 if (mode != ptr_mode)
1903 mem = gen_rtx_ZERO_EXTEND (mode, mem);
b4f50fd4 1904
f7df4a84 1905 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 1906
82614948
RR
1907 return;
1908
1909 case SYMBOL_SMALL_TLSGD:
1910 case SYMBOL_SMALL_TLSDESC:
79496620 1911 case SYMBOL_SMALL_TLSIE:
1b1e81f8 1912 case SYMBOL_SMALL_GOT_28K:
6642bdb4 1913 case SYMBOL_SMALL_GOT_4G:
82614948 1914 case SYMBOL_TINY_GOT:
5ae7caad 1915 case SYMBOL_TINY_TLSIE:
82614948
RR
1916 if (offset != const0_rtx)
1917 {
1918 gcc_assert(can_create_pseudo_p ());
1919 base = aarch64_force_temporary (mode, dest, base);
1920 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1921 aarch64_emit_move (dest, base);
1922 return;
1923 }
1924 /* FALLTHRU */
1925
82614948
RR
1926 case SYMBOL_SMALL_ABSOLUTE:
1927 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 1928 case SYMBOL_TLSLE12:
d18ba284 1929 case SYMBOL_TLSLE24:
cbf5629e
JW
1930 case SYMBOL_TLSLE32:
1931 case SYMBOL_TLSLE48:
82614948
RR
1932 aarch64_load_symref_appropriately (dest, imm, sty);
1933 return;
1934
1935 default:
1936 gcc_unreachable ();
1937 }
1938 }
1939
1940 if (!CONST_INT_P (imm))
1941 {
1942 if (GET_CODE (imm) == HIGH)
f7df4a84 1943 emit_insn (gen_rtx_SET (dest, imm));
82614948
RR
1944 else
1945 {
1946 rtx mem = force_const_mem (mode, imm);
1947 gcc_assert (mem);
f7df4a84 1948 emit_insn (gen_rtx_SET (dest, mem));
43e9d192 1949 }
82614948
RR
1950
1951 return;
43e9d192 1952 }
82614948
RR
1953
1954 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
43e9d192
IB
1955}
1956
5be6b295
WD
1957/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
1958 temporary value if necessary. FRAME_RELATED_P should be true if
1959 the RTX_FRAME_RELATED flag should be set and CFA adjustments added
1960 to the generated instructions. If SCRATCHREG is known to hold
1961 abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
1962 immediate again.
1963
1964 Since this function may be used to adjust the stack pointer, we must
1965 ensure that it cannot cause transient stack deallocation (for example
1966 by first incrementing SP and then decrementing when adjusting by a
1967 large immediate). */
c4ddc43a
JW
1968
1969static void
5be6b295
WD
1970aarch64_add_constant_internal (machine_mode mode, int regnum, int scratchreg,
1971 HOST_WIDE_INT delta, bool frame_related_p,
1972 bool emit_move_imm)
c4ddc43a
JW
1973{
1974 HOST_WIDE_INT mdelta = abs_hwi (delta);
1975 rtx this_rtx = gen_rtx_REG (mode, regnum);
37d6a4b7 1976 rtx_insn *insn;
c4ddc43a 1977
c4ddc43a
JW
1978 if (!mdelta)
1979 return;
1980
5be6b295 1981 /* Single instruction adjustment. */
c4ddc43a
JW
1982 if (aarch64_uimm12_shift (mdelta))
1983 {
37d6a4b7
JW
1984 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
1985 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
1986 return;
1987 }
1988
5be6b295
WD
1989 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
1990 Only do this if mdelta is not a 16-bit move as adjusting using a move
1991 is better. */
1992 if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
c4ddc43a
JW
1993 {
1994 HOST_WIDE_INT low_off = mdelta & 0xfff;
1995
1996 low_off = delta < 0 ? -low_off : low_off;
37d6a4b7
JW
1997 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
1998 RTX_FRAME_RELATED_P (insn) = frame_related_p;
1999 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
2000 RTX_FRAME_RELATED_P (insn) = frame_related_p;
c4ddc43a
JW
2001 return;
2002 }
2003
5be6b295 2004 /* Emit a move immediate if required and an addition/subtraction. */
c4ddc43a 2005 rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
5be6b295
WD
2006 if (emit_move_imm)
2007 aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
2008 insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
2009 : gen_add2_insn (this_rtx, scratch_rtx));
37d6a4b7
JW
2010 if (frame_related_p)
2011 {
2012 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2013 rtx adj = plus_constant (mode, this_rtx, delta);
2014 add_reg_note (insn , REG_CFA_ADJUST_CFA, gen_rtx_SET (this_rtx, adj));
2015 }
c4ddc43a
JW
2016}
2017
5be6b295
WD
2018static inline void
2019aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
2020 HOST_WIDE_INT delta)
2021{
2022 aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
2023}
2024
2025static inline void
2026aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
2027{
2028 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
2029 true, emit_move_imm);
2030}
2031
2032static inline void
2033aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
2034{
2035 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
2036 frame_related_p, true);
2037}
2038
43e9d192 2039static bool
fee9ba42
JW
2040aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
2041 tree exp ATTRIBUTE_UNUSED)
43e9d192 2042{
fee9ba42 2043 /* Currently, always true. */
43e9d192
IB
2044 return true;
2045}
2046
2047/* Implement TARGET_PASS_BY_REFERENCE. */
2048
2049static bool
2050aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 2051 machine_mode mode,
43e9d192
IB
2052 const_tree type,
2053 bool named ATTRIBUTE_UNUSED)
2054{
2055 HOST_WIDE_INT size;
ef4bddc2 2056 machine_mode dummymode;
43e9d192
IB
2057 int nregs;
2058
2059 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
2060 size = (mode == BLKmode && type)
2061 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2062
aadc1c43
MHD
2063 /* Aggregates are passed by reference based on their size. */
2064 if (type && AGGREGATE_TYPE_P (type))
43e9d192 2065 {
aadc1c43 2066 size = int_size_in_bytes (type);
43e9d192
IB
2067 }
2068
2069 /* Variable sized arguments are always returned by reference. */
2070 if (size < 0)
2071 return true;
2072
2073 /* Can this be a candidate to be passed in fp/simd register(s)? */
2074 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2075 &dummymode, &nregs,
2076 NULL))
2077 return false;
2078
2079 /* Arguments which are variable sized or larger than 2 registers are
2080 passed by reference unless they are a homogenous floating point
2081 aggregate. */
2082 return size > 2 * UNITS_PER_WORD;
2083}
2084
2085/* Return TRUE if VALTYPE is padded to its least significant bits. */
2086static bool
2087aarch64_return_in_msb (const_tree valtype)
2088{
ef4bddc2 2089 machine_mode dummy_mode;
43e9d192
IB
2090 int dummy_int;
2091
2092 /* Never happens in little-endian mode. */
2093 if (!BYTES_BIG_ENDIAN)
2094 return false;
2095
2096 /* Only composite types smaller than or equal to 16 bytes can
2097 be potentially returned in registers. */
2098 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
2099 || int_size_in_bytes (valtype) <= 0
2100 || int_size_in_bytes (valtype) > 16)
2101 return false;
2102
2103 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2104 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2105 is always passed/returned in the least significant bits of fp/simd
2106 register(s). */
2107 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
2108 &dummy_mode, &dummy_int, NULL))
2109 return false;
2110
2111 return true;
2112}
2113
2114/* Implement TARGET_FUNCTION_VALUE.
2115 Define how to find the value returned by a function. */
2116
2117static rtx
2118aarch64_function_value (const_tree type, const_tree func,
2119 bool outgoing ATTRIBUTE_UNUSED)
2120{
ef4bddc2 2121 machine_mode mode;
43e9d192
IB
2122 int unsignedp;
2123 int count;
ef4bddc2 2124 machine_mode ag_mode;
43e9d192
IB
2125
2126 mode = TYPE_MODE (type);
2127 if (INTEGRAL_TYPE_P (type))
2128 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
2129
2130 if (aarch64_return_in_msb (type))
2131 {
2132 HOST_WIDE_INT size = int_size_in_bytes (type);
2133
2134 if (size % UNITS_PER_WORD != 0)
2135 {
2136 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2137 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2138 }
2139 }
2140
2141 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2142 &ag_mode, &count, NULL))
2143 {
2144 if (!aarch64_composite_type_p (type, mode))
2145 {
2146 gcc_assert (count == 1 && mode == ag_mode);
2147 return gen_rtx_REG (mode, V0_REGNUM);
2148 }
2149 else
2150 {
2151 int i;
2152 rtx par;
2153
2154 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
2155 for (i = 0; i < count; i++)
2156 {
2157 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
2158 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2159 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
2160 XVECEXP (par, 0, i) = tmp;
2161 }
2162 return par;
2163 }
2164 }
2165 else
2166 return gen_rtx_REG (mode, R0_REGNUM);
2167}
2168
2169/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2170 Return true if REGNO is the number of a hard register in which the values
2171 of called function may come back. */
2172
2173static bool
2174aarch64_function_value_regno_p (const unsigned int regno)
2175{
2176 /* Maximum of 16 bytes can be returned in the general registers. Examples
2177 of 16-byte return values are: 128-bit integers and 16-byte small
2178 structures (excluding homogeneous floating-point aggregates). */
2179 if (regno == R0_REGNUM || regno == R1_REGNUM)
2180 return true;
2181
2182 /* Up to four fp/simd registers can return a function value, e.g. a
2183 homogeneous floating-point aggregate having four members. */
2184 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 2185 return TARGET_FLOAT;
43e9d192
IB
2186
2187 return false;
2188}
2189
2190/* Implement TARGET_RETURN_IN_MEMORY.
2191
2192 If the type T of the result of a function is such that
2193 void func (T arg)
2194 would require that arg be passed as a value in a register (or set of
2195 registers) according to the parameter passing rules, then the result
2196 is returned in the same registers as would be used for such an
2197 argument. */
2198
2199static bool
2200aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
2201{
2202 HOST_WIDE_INT size;
ef4bddc2 2203 machine_mode ag_mode;
43e9d192
IB
2204 int count;
2205
2206 if (!AGGREGATE_TYPE_P (type)
2207 && TREE_CODE (type) != COMPLEX_TYPE
2208 && TREE_CODE (type) != VECTOR_TYPE)
2209 /* Simple scalar types always returned in registers. */
2210 return false;
2211
2212 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
2213 type,
2214 &ag_mode,
2215 &count,
2216 NULL))
2217 return false;
2218
2219 /* Types larger than 2 registers returned in memory. */
2220 size = int_size_in_bytes (type);
2221 return (size < 0 || size > 2 * UNITS_PER_WORD);
2222}
2223
2224static bool
ef4bddc2 2225aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2226 const_tree type, int *nregs)
2227{
2228 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2229 return aarch64_vfp_is_call_or_return_candidate (mode,
2230 type,
2231 &pcum->aapcs_vfp_rmode,
2232 nregs,
2233 NULL);
2234}
2235
2236/* Given MODE and TYPE of a function argument, return the alignment in
2237 bits. The idea is to suppress any stronger alignment requested by
2238 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2239 This is a helper function for local use only. */
2240
2241static unsigned int
ef4bddc2 2242aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 2243{
75d6cc81
AL
2244 if (!type)
2245 return GET_MODE_ALIGNMENT (mode);
2246 if (integer_zerop (TYPE_SIZE (type)))
2247 return 0;
43e9d192 2248
75d6cc81
AL
2249 gcc_assert (TYPE_MODE (type) == mode);
2250
2251 if (!AGGREGATE_TYPE_P (type))
2252 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
2253
2254 if (TREE_CODE (type) == ARRAY_TYPE)
2255 return TYPE_ALIGN (TREE_TYPE (type));
2256
2257 unsigned int alignment = 0;
2258
2259 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2260 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192
IB
2261
2262 return alignment;
2263}
2264
2265/* Layout a function argument according to the AAPCS64 rules. The rule
2266 numbers refer to the rule numbers in the AAPCS64. */
2267
2268static void
ef4bddc2 2269aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2270 const_tree type,
2271 bool named ATTRIBUTE_UNUSED)
2272{
2273 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2274 int ncrn, nvrn, nregs;
2275 bool allocate_ncrn, allocate_nvrn;
3abf17cf 2276 HOST_WIDE_INT size;
43e9d192
IB
2277
2278 /* We need to do this once per argument. */
2279 if (pcum->aapcs_arg_processed)
2280 return;
2281
2282 pcum->aapcs_arg_processed = true;
2283
3abf17cf
YZ
2284 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2285 size
4f59f9f2
UB
2286 = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
2287 UNITS_PER_WORD);
3abf17cf 2288
43e9d192
IB
2289 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
2290 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
2291 mode,
2292 type,
2293 &nregs);
2294
2295 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2296 The following code thus handles passing by SIMD/FP registers first. */
2297
2298 nvrn = pcum->aapcs_nvrn;
2299
2300 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2301 and homogenous short-vector aggregates (HVA). */
2302 if (allocate_nvrn)
2303 {
261fb553
AL
2304 if (!TARGET_FLOAT)
2305 aarch64_err_no_fpadvsimd (mode, "argument");
2306
43e9d192
IB
2307 if (nvrn + nregs <= NUM_FP_ARG_REGS)
2308 {
2309 pcum->aapcs_nextnvrn = nvrn + nregs;
2310 if (!aarch64_composite_type_p (type, mode))
2311 {
2312 gcc_assert (nregs == 1);
2313 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
2314 }
2315 else
2316 {
2317 rtx par;
2318 int i;
2319 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2320 for (i = 0; i < nregs; i++)
2321 {
2322 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
2323 V0_REGNUM + nvrn + i);
2324 tmp = gen_rtx_EXPR_LIST
2325 (VOIDmode, tmp,
2326 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
2327 XVECEXP (par, 0, i) = tmp;
2328 }
2329 pcum->aapcs_reg = par;
2330 }
2331 return;
2332 }
2333 else
2334 {
2335 /* C.3 NSRN is set to 8. */
2336 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
2337 goto on_stack;
2338 }
2339 }
2340
2341 ncrn = pcum->aapcs_ncrn;
3abf17cf 2342 nregs = size / UNITS_PER_WORD;
43e9d192
IB
2343
2344 /* C6 - C9. though the sign and zero extension semantics are
2345 handled elsewhere. This is the case where the argument fits
2346 entirely general registers. */
2347 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
2348 {
2349 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2350
2351 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
2352
2353 /* C.8 if the argument has an alignment of 16 then the NGRN is
2354 rounded up to the next even number. */
2355 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
2356 {
2357 ++ncrn;
2358 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
2359 }
2360 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2361 A reg is still generated for it, but the caller should be smart
2362 enough not to use it. */
2363 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2364 {
2365 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
2366 }
2367 else
2368 {
2369 rtx par;
2370 int i;
2371
2372 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2373 for (i = 0; i < nregs; i++)
2374 {
2375 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2376 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2377 GEN_INT (i * UNITS_PER_WORD));
2378 XVECEXP (par, 0, i) = tmp;
2379 }
2380 pcum->aapcs_reg = par;
2381 }
2382
2383 pcum->aapcs_nextncrn = ncrn + nregs;
2384 return;
2385 }
2386
2387 /* C.11 */
2388 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2389
2390 /* The argument is passed on stack; record the needed number of words for
3abf17cf 2391 this argument and align the total size if necessary. */
43e9d192 2392on_stack:
3abf17cf 2393 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192 2394 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
2395 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
2396 16 / UNITS_PER_WORD);
43e9d192
IB
2397 return;
2398}
2399
2400/* Implement TARGET_FUNCTION_ARG. */
2401
2402static rtx
ef4bddc2 2403aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
2404 const_tree type, bool named)
2405{
2406 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2407 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2408
2409 if (mode == VOIDmode)
2410 return NULL_RTX;
2411
2412 aarch64_layout_arg (pcum_v, mode, type, named);
2413 return pcum->aapcs_reg;
2414}
2415
2416void
2417aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2418 const_tree fntype ATTRIBUTE_UNUSED,
2419 rtx libname ATTRIBUTE_UNUSED,
2420 const_tree fndecl ATTRIBUTE_UNUSED,
2421 unsigned n_named ATTRIBUTE_UNUSED)
2422{
2423 pcum->aapcs_ncrn = 0;
2424 pcum->aapcs_nvrn = 0;
2425 pcum->aapcs_nextncrn = 0;
2426 pcum->aapcs_nextnvrn = 0;
2427 pcum->pcs_variant = ARM_PCS_AAPCS64;
2428 pcum->aapcs_reg = NULL_RTX;
2429 pcum->aapcs_arg_processed = false;
2430 pcum->aapcs_stack_words = 0;
2431 pcum->aapcs_stack_size = 0;
2432
261fb553
AL
2433 if (!TARGET_FLOAT
2434 && fndecl && TREE_PUBLIC (fndecl)
2435 && fntype && fntype != error_mark_node)
2436 {
2437 const_tree type = TREE_TYPE (fntype);
2438 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2439 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2440 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2441 &mode, &nregs, NULL))
2442 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2443 }
43e9d192
IB
2444 return;
2445}
2446
2447static void
2448aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 2449 machine_mode mode,
43e9d192
IB
2450 const_tree type,
2451 bool named)
2452{
2453 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2454 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2455 {
2456 aarch64_layout_arg (pcum_v, mode, type, named);
2457 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2458 != (pcum->aapcs_stack_words != 0));
2459 pcum->aapcs_arg_processed = false;
2460 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2461 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2462 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2463 pcum->aapcs_stack_words = 0;
2464 pcum->aapcs_reg = NULL_RTX;
2465 }
2466}
2467
2468bool
2469aarch64_function_arg_regno_p (unsigned regno)
2470{
2471 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2472 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2473}
2474
2475/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2476 PARM_BOUNDARY bits of alignment, but will be given anything up
2477 to STACK_BOUNDARY bits if the type requires it. This makes sure
2478 that both before and after the layout of each argument, the Next
2479 Stacked Argument Address (NSAA) will have a minimum alignment of
2480 8 bytes. */
2481
2482static unsigned int
ef4bddc2 2483aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
2484{
2485 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2486
2487 if (alignment < PARM_BOUNDARY)
2488 alignment = PARM_BOUNDARY;
2489 if (alignment > STACK_BOUNDARY)
2490 alignment = STACK_BOUNDARY;
2491 return alignment;
2492}
2493
2494/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2495
2496 Return true if an argument passed on the stack should be padded upwards,
2497 i.e. if the least-significant byte of the stack slot has useful data.
2498
2499 Small aggregate types are placed in the lowest memory address.
2500
2501 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2502
2503bool
ef4bddc2 2504aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
2505{
2506 /* On little-endian targets, the least significant byte of every stack
2507 argument is passed at the lowest byte address of the stack slot. */
2508 if (!BYTES_BIG_ENDIAN)
2509 return true;
2510
00edcfbe 2511 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
2512 the least significant byte of a stack argument is passed at the highest
2513 byte address of the stack slot. */
2514 if (type
00edcfbe
YZ
2515 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2516 || POINTER_TYPE_P (type))
43e9d192
IB
2517 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2518 return false;
2519
2520 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2521 return true;
2522}
2523
2524/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2525
2526 It specifies padding for the last (may also be the only)
2527 element of a block move between registers and memory. If
2528 assuming the block is in the memory, padding upward means that
2529 the last element is padded after its highest significant byte,
2530 while in downward padding, the last element is padded at the
2531 its least significant byte side.
2532
2533 Small aggregates and small complex types are always padded
2534 upwards.
2535
2536 We don't need to worry about homogeneous floating-point or
2537 short-vector aggregates; their move is not affected by the
2538 padding direction determined here. Regardless of endianness,
2539 each element of such an aggregate is put in the least
2540 significant bits of a fp/simd register.
2541
2542 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2543 register has useful data, and return the opposite if the most
2544 significant byte does. */
2545
2546bool
ef4bddc2 2547aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
2548 bool first ATTRIBUTE_UNUSED)
2549{
2550
2551 /* Small composite types are always padded upward. */
2552 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2553 {
2554 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2555 : GET_MODE_SIZE (mode));
2556 if (size < 2 * UNITS_PER_WORD)
2557 return true;
2558 }
2559
2560 /* Otherwise, use the default padding. */
2561 return !BYTES_BIG_ENDIAN;
2562}
2563
ef4bddc2 2564static machine_mode
43e9d192
IB
2565aarch64_libgcc_cmp_return_mode (void)
2566{
2567 return SImode;
2568}
2569
a3eb8a52
EB
2570#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2571
2572/* We use the 12-bit shifted immediate arithmetic instructions so values
2573 must be multiple of (1 << 12), i.e. 4096. */
2574#define ARITH_FACTOR 4096
2575
2576#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2577#error Cannot use simple address calculation for stack probing
2578#endif
2579
2580/* The pair of scratch registers used for stack probing. */
2581#define PROBE_STACK_FIRST_REG 9
2582#define PROBE_STACK_SECOND_REG 10
2583
2584/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2585 inclusive. These are offsets from the current stack pointer. */
2586
2587static void
2588aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
2589{
2590 rtx reg1 = gen_rtx_REG (ptr_mode, PROBE_STACK_FIRST_REG);
2591
2592 /* See the same assertion on PROBE_INTERVAL above. */
2593 gcc_assert ((first % ARITH_FACTOR) == 0);
2594
2595 /* See if we have a constant small number of probes to generate. If so,
2596 that's the easy case. */
2597 if (size <= PROBE_INTERVAL)
2598 {
2599 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
2600
2601 emit_set_insn (reg1,
2602 plus_constant (ptr_mode,
2603 stack_pointer_rtx, -(first + base)));
2604 emit_stack_probe (plus_constant (ptr_mode, reg1, base - size));
2605 }
2606
2607 /* The run-time loop is made up of 8 insns in the generic case while the
2608 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2609 else if (size <= 4 * PROBE_INTERVAL)
2610 {
2611 HOST_WIDE_INT i, rem;
2612
2613 emit_set_insn (reg1,
2614 plus_constant (ptr_mode,
2615 stack_pointer_rtx,
2616 -(first + PROBE_INTERVAL)));
2617 emit_stack_probe (reg1);
2618
2619 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2620 it exceeds SIZE. If only two probes are needed, this will not
2621 generate any code. Then probe at FIRST + SIZE. */
2622 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
2623 {
2624 emit_set_insn (reg1,
2625 plus_constant (ptr_mode, reg1, -PROBE_INTERVAL));
2626 emit_stack_probe (reg1);
2627 }
2628
2629 rem = size - (i - PROBE_INTERVAL);
2630 if (rem > 256)
2631 {
2632 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2633
2634 emit_set_insn (reg1, plus_constant (ptr_mode, reg1, -base));
2635 emit_stack_probe (plus_constant (ptr_mode, reg1, base - rem));
2636 }
2637 else
2638 emit_stack_probe (plus_constant (ptr_mode, reg1, -rem));
2639 }
2640
2641 /* Otherwise, do the same as above, but in a loop. Note that we must be
2642 extra careful with variables wrapping around because we might be at
2643 the very top (or the very bottom) of the address space and we have
2644 to be able to handle this case properly; in particular, we use an
2645 equality test for the loop condition. */
2646 else
2647 {
2648 rtx reg2 = gen_rtx_REG (ptr_mode, PROBE_STACK_SECOND_REG);
2649
2650 /* Step 1: round SIZE to the previous multiple of the interval. */
2651
2652 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
2653
2654
2655 /* Step 2: compute initial and final value of the loop counter. */
2656
2657 /* TEST_ADDR = SP + FIRST. */
2658 emit_set_insn (reg1,
2659 plus_constant (ptr_mode, stack_pointer_rtx, -first));
2660
2661 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
2662 emit_set_insn (reg2,
2663 plus_constant (ptr_mode, stack_pointer_rtx,
2664 -(first + rounded_size)));
2665
2666
2667 /* Step 3: the loop
2668
2669 do
2670 {
2671 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2672 probe at TEST_ADDR
2673 }
2674 while (TEST_ADDR != LAST_ADDR)
2675
2676 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2677 until it is equal to ROUNDED_SIZE. */
2678
2679 if (ptr_mode == DImode)
2680 emit_insn (gen_probe_stack_range_di (reg1, reg1, reg2));
2681 else
2682 emit_insn (gen_probe_stack_range_si (reg1, reg1, reg2));
2683
2684
2685 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2686 that SIZE is equal to ROUNDED_SIZE. */
2687
2688 if (size != rounded_size)
2689 {
2690 HOST_WIDE_INT rem = size - rounded_size;
2691
2692 if (rem > 256)
2693 {
2694 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2695
2696 emit_set_insn (reg2, plus_constant (ptr_mode, reg2, -base));
2697 emit_stack_probe (plus_constant (ptr_mode, reg2, base - rem));
2698 }
2699 else
2700 emit_stack_probe (plus_constant (ptr_mode, reg2, -rem));
2701 }
2702 }
2703
2704 /* Make sure nothing is scheduled before we are done. */
2705 emit_insn (gen_blockage ());
2706}
2707
2708/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2709 absolute addresses. */
2710
2711const char *
2712aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
2713{
2714 static int labelno = 0;
2715 char loop_lab[32];
2716 rtx xops[2];
2717
2718 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
2719
2720 /* Loop. */
2721 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
2722
2723 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2724 xops[0] = reg1;
2725 xops[1] = GEN_INT (PROBE_INTERVAL);
2726 output_asm_insn ("sub\t%0, %0, %1", xops);
2727
2728 /* Probe at TEST_ADDR. */
2729 output_asm_insn ("str\txzr, [%0]", xops);
2730
2731 /* Test if TEST_ADDR == LAST_ADDR. */
2732 xops[1] = reg2;
2733 output_asm_insn ("cmp\t%0, %1", xops);
2734
2735 /* Branch. */
2736 fputs ("\tb.ne\t", asm_out_file);
2737 assemble_name_raw (asm_out_file, loop_lab);
2738 fputc ('\n', asm_out_file);
2739
2740 return "";
2741}
2742
43e9d192
IB
2743static bool
2744aarch64_frame_pointer_required (void)
2745{
0b7f8166
MS
2746 /* In aarch64_override_options_after_change
2747 flag_omit_leaf_frame_pointer turns off the frame pointer by
2748 default. Turn it back on now if we've not got a leaf
2749 function. */
2750 if (flag_omit_leaf_frame_pointer
2751 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2752 return true;
43e9d192 2753
0b7f8166 2754 return false;
43e9d192
IB
2755}
2756
2757/* Mark the registers that need to be saved by the callee and calculate
2758 the size of the callee-saved registers area and frame record (both FP
2759 and LR may be omitted). */
2760static void
2761aarch64_layout_frame (void)
2762{
2763 HOST_WIDE_INT offset = 0;
4b0685d9 2764 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192
IB
2765
2766 if (reload_completed && cfun->machine->frame.laid_out)
2767 return;
2768
97826595
MS
2769#define SLOT_NOT_REQUIRED (-2)
2770#define SLOT_REQUIRED (-1)
2771
71bfb77a
WD
2772 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
2773 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 2774
43e9d192
IB
2775 /* First mark all the registers that really need to be saved... */
2776 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2777 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2778
2779 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2780 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
2781
2782 /* ... that includes the eh data registers (if needed)... */
2783 if (crtl->calls_eh_return)
2784 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
2785 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2786 = SLOT_REQUIRED;
43e9d192
IB
2787
2788 /* ... and any callee saved register that dataflow says is live. */
2789 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2790 if (df_regs_ever_live_p (regno)
1c923b60
JW
2791 && (regno == R30_REGNUM
2792 || !call_used_regs[regno]))
97826595 2793 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
2794
2795 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2796 if (df_regs_ever_live_p (regno)
2797 && !call_used_regs[regno])
4b0685d9
WD
2798 {
2799 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2800 last_fp_reg = regno;
2801 }
43e9d192
IB
2802
2803 if (frame_pointer_needed)
2804 {
2e1cdae5 2805 /* FP and LR are placed in the linkage record. */
43e9d192 2806 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 2807 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 2808 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 2809 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2e1cdae5 2810 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
2811 }
2812
2813 /* Now assign stack slots for them. */
2e1cdae5 2814 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 2815 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
2816 {
2817 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2818 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2819 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2820 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 2821 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2822 offset += UNITS_PER_WORD;
2823 }
2824
4b0685d9
WD
2825 HOST_WIDE_INT max_int_offset = offset;
2826 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2827 bool has_align_gap = offset != max_int_offset;
2828
43e9d192 2829 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 2830 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 2831 {
4b0685d9
WD
2832 /* If there is an alignment gap between integer and fp callee-saves,
2833 allocate the last fp register to it if possible. */
2834 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
2835 {
2836 cfun->machine->frame.reg_offset[regno] = max_int_offset;
2837 break;
2838 }
2839
43e9d192 2840 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 2841 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 2842 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 2843 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
2844 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2845 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
2846 offset += UNITS_PER_WORD;
2847 }
2848
4f59f9f2 2849 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
2850
2851 cfun->machine->frame.saved_regs_size = offset;
1c960e02 2852
71bfb77a
WD
2853 HOST_WIDE_INT varargs_and_saved_regs_size
2854 = offset + cfun->machine->frame.saved_varargs_size;
2855
1c960e02 2856 cfun->machine->frame.hard_fp_offset
71bfb77a 2857 = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (),
4f59f9f2 2858 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02
MS
2859
2860 cfun->machine->frame.frame_size
4f59f9f2
UB
2861 = ROUND_UP (cfun->machine->frame.hard_fp_offset
2862 + crtl->outgoing_args_size,
2863 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 2864
71bfb77a
WD
2865 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
2866
2867 cfun->machine->frame.initial_adjust = 0;
2868 cfun->machine->frame.final_adjust = 0;
2869 cfun->machine->frame.callee_adjust = 0;
2870 cfun->machine->frame.callee_offset = 0;
2871
2872 HOST_WIDE_INT max_push_offset = 0;
2873 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
2874 max_push_offset = 512;
2875 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
2876 max_push_offset = 256;
2877
2878 if (cfun->machine->frame.frame_size < max_push_offset
2879 && crtl->outgoing_args_size == 0)
2880 {
2881 /* Simple, small frame with no outgoing arguments:
2882 stp reg1, reg2, [sp, -frame_size]!
2883 stp reg3, reg4, [sp, 16] */
2884 cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size;
2885 }
2886 else if ((crtl->outgoing_args_size
2887 + cfun->machine->frame.saved_regs_size < 512)
2888 && !(cfun->calls_alloca
2889 && cfun->machine->frame.hard_fp_offset < max_push_offset))
2890 {
2891 /* Frame with small outgoing arguments:
2892 sub sp, sp, frame_size
2893 stp reg1, reg2, [sp, outgoing_args_size]
2894 stp reg3, reg4, [sp, outgoing_args_size + 16] */
2895 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
2896 cfun->machine->frame.callee_offset
2897 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
2898 }
2899 else if (cfun->machine->frame.hard_fp_offset < max_push_offset)
2900 {
2901 /* Frame with large outgoing arguments but a small local area:
2902 stp reg1, reg2, [sp, -hard_fp_offset]!
2903 stp reg3, reg4, [sp, 16]
2904 sub sp, sp, outgoing_args_size */
2905 cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset;
2906 cfun->machine->frame.final_adjust
2907 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
2908 }
2909 else if (!frame_pointer_needed
2910 && varargs_and_saved_regs_size < max_push_offset)
2911 {
2912 /* Frame with large local area and outgoing arguments (this pushes the
2913 callee-saves first, followed by the locals and outgoing area):
2914 stp reg1, reg2, [sp, -varargs_and_saved_regs_size]!
2915 stp reg3, reg4, [sp, 16]
2916 sub sp, sp, frame_size - varargs_and_saved_regs_size */
2917 cfun->machine->frame.callee_adjust = varargs_and_saved_regs_size;
2918 cfun->machine->frame.final_adjust
2919 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
2920 cfun->machine->frame.hard_fp_offset = cfun->machine->frame.callee_adjust;
2921 cfun->machine->frame.locals_offset = cfun->machine->frame.hard_fp_offset;
2922 }
2923 else
2924 {
2925 /* Frame with large local area and outgoing arguments using frame pointer:
2926 sub sp, sp, hard_fp_offset
2927 stp x29, x30, [sp, 0]
2928 add x29, sp, 0
2929 stp reg3, reg4, [sp, 16]
2930 sub sp, sp, outgoing_args_size */
2931 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
2932 cfun->machine->frame.final_adjust
2933 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
2934 }
2935
43e9d192
IB
2936 cfun->machine->frame.laid_out = true;
2937}
2938
04ddfe06
KT
2939/* Return true if the register REGNO is saved on entry to
2940 the current function. */
2941
43e9d192
IB
2942static bool
2943aarch64_register_saved_on_entry (int regno)
2944{
97826595 2945 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
2946}
2947
04ddfe06
KT
2948/* Return the next register up from REGNO up to LIMIT for the callee
2949 to save. */
2950
64dedd72
JW
2951static unsigned
2952aarch64_next_callee_save (unsigned regno, unsigned limit)
2953{
2954 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2955 regno ++;
2956 return regno;
2957}
43e9d192 2958
04ddfe06
KT
2959/* Push the register number REGNO of mode MODE to the stack with write-back
2960 adjusting the stack by ADJUSTMENT. */
2961
c5e1f66e 2962static void
ef4bddc2 2963aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
2964 HOST_WIDE_INT adjustment)
2965 {
2966 rtx base_rtx = stack_pointer_rtx;
2967 rtx insn, reg, mem;
2968
2969 reg = gen_rtx_REG (mode, regno);
2970 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2971 plus_constant (Pmode, base_rtx, -adjustment));
2972 mem = gen_rtx_MEM (mode, mem);
2973
2974 insn = emit_move_insn (mem, reg);
2975 RTX_FRAME_RELATED_P (insn) = 1;
2976}
2977
04ddfe06
KT
2978/* Generate and return an instruction to store the pair of registers
2979 REG and REG2 of mode MODE to location BASE with write-back adjusting
2980 the stack location BASE by ADJUSTMENT. */
2981
80c11907 2982static rtx
ef4bddc2 2983aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
2984 HOST_WIDE_INT adjustment)
2985{
2986 switch (mode)
2987 {
2988 case DImode:
2989 return gen_storewb_pairdi_di (base, base, reg, reg2,
2990 GEN_INT (-adjustment),
2991 GEN_INT (UNITS_PER_WORD - adjustment));
2992 case DFmode:
2993 return gen_storewb_pairdf_di (base, base, reg, reg2,
2994 GEN_INT (-adjustment),
2995 GEN_INT (UNITS_PER_WORD - adjustment));
2996 default:
2997 gcc_unreachable ();
2998 }
2999}
3000
04ddfe06
KT
3001/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
3002 stack pointer by ADJUSTMENT. */
3003
80c11907 3004static void
89ac681e 3005aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 3006{
5d8a22a5 3007 rtx_insn *insn;
89ac681e
WD
3008 machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode;
3009
71bfb77a 3010 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3011 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
3012
80c11907
JW
3013 rtx reg1 = gen_rtx_REG (mode, regno1);
3014 rtx reg2 = gen_rtx_REG (mode, regno2);
3015
3016 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
3017 reg2, adjustment));
3018 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
3019 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3020 RTX_FRAME_RELATED_P (insn) = 1;
3021}
3022
04ddfe06
KT
3023/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
3024 adjusting it by ADJUSTMENT afterwards. */
3025
159313d9 3026static rtx
ef4bddc2 3027aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
3028 HOST_WIDE_INT adjustment)
3029{
3030 switch (mode)
3031 {
3032 case DImode:
3033 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3034 GEN_INT (UNITS_PER_WORD));
159313d9
JW
3035 case DFmode:
3036 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 3037 GEN_INT (UNITS_PER_WORD));
159313d9
JW
3038 default:
3039 gcc_unreachable ();
3040 }
3041}
3042
04ddfe06
KT
3043/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
3044 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
3045 into CFI_OPS. */
3046
89ac681e
WD
3047static void
3048aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
3049 rtx *cfi_ops)
3050{
3051 machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode;
3052 rtx reg1 = gen_rtx_REG (mode, regno1);
3053
3054 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
3055
71bfb77a 3056 if (regno2 == INVALID_REGNUM)
89ac681e
WD
3057 {
3058 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
3059 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
3060 emit_move_insn (reg1, gen_rtx_MEM (mode, mem));
3061 }
3062 else
3063 {
3064 rtx reg2 = gen_rtx_REG (mode, regno2);
3065 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
3066 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
3067 reg2, adjustment));
3068 }
3069}
3070
04ddfe06
KT
3071/* Generate and return a store pair instruction of mode MODE to store
3072 register REG1 to MEM1 and register REG2 to MEM2. */
3073
72df5c1f 3074static rtx
ef4bddc2 3075aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
3076 rtx reg2)
3077{
3078 switch (mode)
3079 {
3080 case DImode:
3081 return gen_store_pairdi (mem1, reg1, mem2, reg2);
3082
3083 case DFmode:
3084 return gen_store_pairdf (mem1, reg1, mem2, reg2);
3085
3086 default:
3087 gcc_unreachable ();
3088 }
3089}
3090
04ddfe06
KT
3091/* Generate and regurn a load pair isntruction of mode MODE to load register
3092 REG1 from MEM1 and register REG2 from MEM2. */
3093
72df5c1f 3094static rtx
ef4bddc2 3095aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
3096 rtx mem2)
3097{
3098 switch (mode)
3099 {
3100 case DImode:
3101 return gen_load_pairdi (reg1, mem1, reg2, mem2);
3102
3103 case DFmode:
3104 return gen_load_pairdf (reg1, mem1, reg2, mem2);
3105
3106 default:
3107 gcc_unreachable ();
3108 }
3109}
3110
04ddfe06
KT
3111/* Emit code to save the callee-saved registers from register number START
3112 to LIMIT to the stack at the location starting at offset START_OFFSET,
3113 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 3114
43e9d192 3115static void
ef4bddc2 3116aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 3117 unsigned start, unsigned limit, bool skip_wb)
43e9d192 3118{
5d8a22a5 3119 rtx_insn *insn;
ef4bddc2 3120 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 3121 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
3122 unsigned regno;
3123 unsigned regno2;
3124
0ec74a1e 3125 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
3126 regno <= limit;
3127 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 3128 {
ae13fce3
JW
3129 rtx reg, mem;
3130 HOST_WIDE_INT offset;
64dedd72 3131
ae13fce3
JW
3132 if (skip_wb
3133 && (regno == cfun->machine->frame.wb_candidate1
3134 || regno == cfun->machine->frame.wb_candidate2))
3135 continue;
3136
3137 reg = gen_rtx_REG (mode, regno);
3138 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
3139 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
3140 offset));
64dedd72
JW
3141
3142 regno2 = aarch64_next_callee_save (regno + 1, limit);
3143
3144 if (regno2 <= limit
3145 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3146 == cfun->machine->frame.reg_offset[regno2]))
3147
43e9d192 3148 {
0ec74a1e 3149 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
3150 rtx mem2;
3151
3152 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
3153 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
3154 offset));
3155 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
3156 reg2));
0b4a9743 3157
64dedd72
JW
3158 /* The first part of a frame-related parallel insn is
3159 always assumed to be relevant to the frame
3160 calculations; subsequent parts, are only
3161 frame-related if explicitly marked. */
3162 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3163 regno = regno2;
3164 }
3165 else
8ed2fc62
JW
3166 insn = emit_move_insn (mem, reg);
3167
3168 RTX_FRAME_RELATED_P (insn) = 1;
3169 }
3170}
3171
04ddfe06
KT
3172/* Emit code to restore the callee registers of mode MODE from register
3173 number START up to and including LIMIT. Restore from the stack offset
3174 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
3175 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
3176
8ed2fc62 3177static void
ef4bddc2 3178aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 3179 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 3180 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 3181{
8ed2fc62 3182 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 3183 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
3184 ? gen_frame_mem : gen_rtx_MEM);
3185 unsigned regno;
3186 unsigned regno2;
3187 HOST_WIDE_INT offset;
3188
3189 for (regno = aarch64_next_callee_save (start, limit);
3190 regno <= limit;
3191 regno = aarch64_next_callee_save (regno + 1, limit))
3192 {
ae13fce3 3193 rtx reg, mem;
8ed2fc62 3194
ae13fce3
JW
3195 if (skip_wb
3196 && (regno == cfun->machine->frame.wb_candidate1
3197 || regno == cfun->machine->frame.wb_candidate2))
3198 continue;
3199
3200 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
3201 offset = start_offset + cfun->machine->frame.reg_offset[regno];
3202 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
3203
3204 regno2 = aarch64_next_callee_save (regno + 1, limit);
3205
3206 if (regno2 <= limit
3207 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3208 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 3209 {
8ed2fc62
JW
3210 rtx reg2 = gen_rtx_REG (mode, regno2);
3211 rtx mem2;
3212
3213 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
3214 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 3215 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 3216
dd991abb 3217 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 3218 regno = regno2;
43e9d192 3219 }
8ed2fc62 3220 else
dd991abb
RH
3221 emit_move_insn (reg, mem);
3222 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 3223 }
43e9d192
IB
3224}
3225
3226/* AArch64 stack frames generated by this compiler look like:
3227
3228 +-------------------------------+
3229 | |
3230 | incoming stack arguments |
3231 | |
34834420
MS
3232 +-------------------------------+
3233 | | <-- incoming stack pointer (aligned)
43e9d192
IB
3234 | callee-allocated save area |
3235 | for register varargs |
3236 | |
34834420
MS
3237 +-------------------------------+
3238 | local variables | <-- frame_pointer_rtx
43e9d192
IB
3239 | |
3240 +-------------------------------+
454fdba9
RL
3241 | padding0 | \
3242 +-------------------------------+ |
454fdba9 3243 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
3244 +-------------------------------+ |
3245 | LR' | |
3246 +-------------------------------+ |
34834420
MS
3247 | FP' | / <- hard_frame_pointer_rtx (aligned)
3248 +-------------------------------+
43e9d192
IB
3249 | dynamic allocation |
3250 +-------------------------------+
34834420
MS
3251 | padding |
3252 +-------------------------------+
3253 | outgoing stack arguments | <-- arg_pointer
3254 | |
3255 +-------------------------------+
3256 | | <-- stack_pointer_rtx (aligned)
43e9d192 3257
34834420
MS
3258 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3259 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3260 unchanged. */
43e9d192
IB
3261
3262/* Generate the prologue instructions for entry into a function.
3263 Establish the stack frame by decreasing the stack pointer with a
3264 properly calculated size and, if necessary, create a frame record
3265 filled with the values of LR and previous frame pointer. The
6991c977 3266 current FP is also set up if it is in use. */
43e9d192
IB
3267
3268void
3269aarch64_expand_prologue (void)
3270{
43e9d192 3271 aarch64_layout_frame ();
43e9d192 3272
71bfb77a
WD
3273 HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
3274 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3275 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3276 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3277 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3278 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3279 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3280 rtx_insn *insn;
43e9d192 3281
dd991abb
RH
3282 if (flag_stack_usage_info)
3283 current_function_static_stack_size = frame_size;
43e9d192 3284
a3eb8a52
EB
3285 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3286 {
3287 if (crtl->is_leaf && !cfun->calls_alloca)
3288 {
3289 if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
3290 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3291 frame_size - STACK_CHECK_PROTECT);
3292 }
3293 else if (frame_size > 0)
3294 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
3295 }
3296
5be6b295 3297 aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
43e9d192 3298
71bfb77a
WD
3299 if (callee_adjust != 0)
3300 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 3301
71bfb77a 3302 if (frame_pointer_needed)
43e9d192 3303 {
71bfb77a
WD
3304 if (callee_adjust == 0)
3305 aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
3306 R30_REGNUM, false);
3307 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
3308 stack_pointer_rtx,
3309 GEN_INT (callee_offset)));
3310 RTX_FRAME_RELATED_P (insn) = 1;
3311 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 3312 }
71bfb77a
WD
3313
3314 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3315 callee_adjust != 0 || frame_pointer_needed);
3316 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3317 callee_adjust != 0 || frame_pointer_needed);
5be6b295 3318 aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
43e9d192
IB
3319}
3320
4f942779
RL
3321/* Return TRUE if we can use a simple_return insn.
3322
3323 This function checks whether the callee saved stack is empty, which
3324 means no restore actions are need. The pro_and_epilogue will use
3325 this to check whether shrink-wrapping opt is feasible. */
3326
3327bool
3328aarch64_use_return_insn_p (void)
3329{
3330 if (!reload_completed)
3331 return false;
3332
3333 if (crtl->profile)
3334 return false;
3335
3336 aarch64_layout_frame ();
3337
3338 return cfun->machine->frame.frame_size == 0;
3339}
3340
71bfb77a
WD
3341/* Generate the epilogue instructions for returning from a function.
3342 This is almost exactly the reverse of the prolog sequence, except
3343 that we need to insert barriers to avoid scheduling loads that read
3344 from a deallocated stack, and we optimize the unwind records by
3345 emitting them all together if possible. */
43e9d192
IB
3346void
3347aarch64_expand_epilogue (bool for_sibcall)
3348{
43e9d192 3349 aarch64_layout_frame ();
43e9d192 3350
71bfb77a
WD
3351 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3352 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3353 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3354 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3355 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3356 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3357 rtx cfi_ops = NULL;
3358 rtx_insn *insn;
44c0e7b9 3359
71bfb77a
WD
3360 /* We need to add memory barrier to prevent read from deallocated stack. */
3361 bool need_barrier_p = (get_frame_size ()
3362 + cfun->machine->frame.saved_varargs_size) != 0;
43e9d192 3363
71bfb77a
WD
3364 /* Emit a barrier to prevent loads from a deallocated stack. */
3365 if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
43e9d192 3366 {
71bfb77a
WD
3367 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3368 need_barrier_p = false;
3369 }
7e8c2bd5 3370
71bfb77a
WD
3371 /* Restore the stack pointer from the frame pointer if it may not
3372 be the same as the stack pointer. */
3373 if (frame_pointer_needed && (final_adjust || cfun->calls_alloca))
3374 {
43e9d192
IB
3375 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
3376 hard_frame_pointer_rtx,
71bfb77a
WD
3377 GEN_INT (-callee_offset)));
3378 /* If writeback is used when restoring callee-saves, the CFA
3379 is restored on the instruction doing the writeback. */
3380 RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
43e9d192 3381 }
71bfb77a 3382 else
5be6b295 3383 aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
43e9d192 3384
71bfb77a
WD
3385 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3386 callee_adjust != 0, &cfi_ops);
3387 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3388 callee_adjust != 0, &cfi_ops);
43e9d192 3389
71bfb77a
WD
3390 if (need_barrier_p)
3391 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3392
3393 if (callee_adjust != 0)
3394 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
3395
3396 if (callee_adjust != 0 || initial_adjust > 65536)
3397 {
3398 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 3399 insn = get_last_insn ();
71bfb77a
WD
3400 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
3401 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 3402 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 3403 cfi_ops = NULL;
43e9d192
IB
3404 }
3405
5be6b295 3406 aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 3407
71bfb77a
WD
3408 if (cfi_ops)
3409 {
3410 /* Emit delayed restores and reset the CFA to be SP. */
3411 insn = get_last_insn ();
3412 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
3413 REG_NOTES (insn) = cfi_ops;
3414 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
3415 }
3416
3417 /* Stack adjustment for exception handler. */
3418 if (crtl->calls_eh_return)
3419 {
3420 /* We need to unwind the stack by the offset computed by
3421 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3422 to be SP; letting the CFA move during this adjustment
3423 is just as correct as retaining the CFA from the body
3424 of the function. Therefore, do nothing special. */
3425 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
3426 }
3427
3428 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
3429 if (!for_sibcall)
3430 emit_jump_insn (ret_rtx);
3431}
3432
3433/* Return the place to copy the exception unwinding return address to.
3434 This will probably be a stack slot, but could (in theory be the
3435 return register). */
3436rtx
3437aarch64_final_eh_return_addr (void)
3438{
1c960e02
MS
3439 HOST_WIDE_INT fp_offset;
3440
43e9d192 3441 aarch64_layout_frame ();
1c960e02
MS
3442
3443 fp_offset = cfun->machine->frame.frame_size
3444 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
3445
3446 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
3447 return gen_rtx_REG (DImode, LR_REGNUM);
3448
3449 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
3450 result in a store to save LR introduced by builtin_eh_return () being
3451 incorrectly deleted because the alias is not detected.
3452 So in the calculation of the address to copy the exception unwinding
3453 return address to, we note 2 cases.
3454 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
3455 we return a SP-relative location since all the addresses are SP-relative
3456 in this case. This prevents the store from being optimized away.
3457 If the fp_offset is not 0, then the addresses will be FP-relative and
3458 therefore we return a FP-relative location. */
3459
3460 if (frame_pointer_needed)
3461 {
3462 if (fp_offset)
3463 return gen_frame_mem (DImode,
3464 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
3465 else
3466 return gen_frame_mem (DImode,
3467 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
3468 }
3469
3470 /* If FP is not needed, we calculate the location of LR, which would be
3471 at the top of the saved registers block. */
3472
3473 return gen_frame_mem (DImode,
3474 plus_constant (Pmode,
3475 stack_pointer_rtx,
3476 fp_offset
3477 + cfun->machine->frame.saved_regs_size
3478 - 2 * UNITS_PER_WORD));
3479}
3480
43e9d192
IB
3481/* Output code to add DELTA to the first argument, and then jump
3482 to FUNCTION. Used for C++ multiple inheritance. */
3483static void
3484aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3485 HOST_WIDE_INT delta,
3486 HOST_WIDE_INT vcall_offset,
3487 tree function)
3488{
3489 /* The this pointer is always in x0. Note that this differs from
3490 Arm where the this pointer maybe bumped to r1 if r0 is required
3491 to return a pointer to an aggregate. On AArch64 a result value
3492 pointer will be in x8. */
3493 int this_regno = R0_REGNUM;
5d8a22a5
DM
3494 rtx this_rtx, temp0, temp1, addr, funexp;
3495 rtx_insn *insn;
43e9d192 3496
75f1d6fc
SN
3497 reload_completed = 1;
3498 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
3499
3500 if (vcall_offset == 0)
5be6b295 3501 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3502 else
3503 {
28514dda 3504 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 3505
75f1d6fc
SN
3506 this_rtx = gen_rtx_REG (Pmode, this_regno);
3507 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3508 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 3509
75f1d6fc
SN
3510 addr = this_rtx;
3511 if (delta != 0)
3512 {
3513 if (delta >= -256 && delta < 256)
3514 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3515 plus_constant (Pmode, this_rtx, delta));
3516 else
5be6b295 3517 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
43e9d192
IB
3518 }
3519
28514dda
YZ
3520 if (Pmode == ptr_mode)
3521 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3522 else
3523 aarch64_emit_move (temp0,
3524 gen_rtx_ZERO_EXTEND (Pmode,
3525 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 3526
28514dda 3527 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 3528 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
3529 else
3530 {
f43657b4
JW
3531 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
3532 Pmode);
75f1d6fc 3533 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
3534 }
3535
28514dda
YZ
3536 if (Pmode == ptr_mode)
3537 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3538 else
3539 aarch64_emit_move (temp1,
3540 gen_rtx_SIGN_EXTEND (Pmode,
3541 gen_rtx_MEM (ptr_mode, addr)));
3542
75f1d6fc 3543 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
3544 }
3545
75f1d6fc
SN
3546 /* Generate a tail call to the target function. */
3547 if (!TREE_USED (function))
3548 {
3549 assemble_external (function);
3550 TREE_USED (function) = 1;
3551 }
3552 funexp = XEXP (DECL_RTL (function), 0);
3553 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3554 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3555 SIBLING_CALL_P (insn) = 1;
3556
3557 insn = get_insns ();
3558 shorten_branches (insn);
3559 final_start_function (insn, file, 1);
3560 final (insn, file, 1);
43e9d192 3561 final_end_function ();
75f1d6fc
SN
3562
3563 /* Stop pretending to be a post-reload pass. */
3564 reload_completed = 0;
43e9d192
IB
3565}
3566
43e9d192
IB
3567static bool
3568aarch64_tls_referenced_p (rtx x)
3569{
3570 if (!TARGET_HAVE_TLS)
3571 return false;
e7de8563
RS
3572 subrtx_iterator::array_type array;
3573 FOR_EACH_SUBRTX (iter, array, x, ALL)
3574 {
3575 const_rtx x = *iter;
3576 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3577 return true;
3578 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3579 TLS offsets, not real symbol references. */
3580 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3581 iter.skip_subrtxes ();
3582 }
3583 return false;
43e9d192
IB
3584}
3585
3586
43e9d192
IB
3587/* Return true if val can be encoded as a 12-bit unsigned immediate with
3588 a left shift of 0 or 12 bits. */
3589bool
3590aarch64_uimm12_shift (HOST_WIDE_INT val)
3591{
3592 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3593 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3594 );
3595}
3596
3597
3598/* Return true if val is an immediate that can be loaded into a
3599 register by a MOVZ instruction. */
3600static bool
ef4bddc2 3601aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3602{
3603 if (GET_MODE_SIZE (mode) > 4)
3604 {
3605 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3606 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3607 return 1;
3608 }
3609 else
3610 {
3611 /* Ignore sign extension. */
3612 val &= (HOST_WIDE_INT) 0xffffffff;
3613 }
3614 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3615 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3616}
3617
a64c73a2
WD
3618/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
3619
3620static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
3621 {
3622 0x0000000100000001ull,
3623 0x0001000100010001ull,
3624 0x0101010101010101ull,
3625 0x1111111111111111ull,
3626 0x5555555555555555ull,
3627 };
3628
43e9d192
IB
3629
3630/* Return true if val is a valid bitmask immediate. */
a64c73a2 3631
43e9d192 3632bool
a64c73a2 3633aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 3634{
a64c73a2
WD
3635 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
3636 int bits;
3637
3638 /* Check for a single sequence of one bits and return quickly if so.
3639 The special cases of all ones and all zeroes returns false. */
3640 val = (unsigned HOST_WIDE_INT) val_in;
3641 tmp = val + (val & -val);
3642
3643 if (tmp == (tmp & -tmp))
3644 return (val + 1) > 1;
3645
3646 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
3647 if (mode == SImode)
3648 val = (val << 32) | (val & 0xffffffff);
3649
3650 /* Invert if the immediate doesn't start with a zero bit - this means we
3651 only need to search for sequences of one bits. */
3652 if (val & 1)
3653 val = ~val;
3654
3655 /* Find the first set bit and set tmp to val with the first sequence of one
3656 bits removed. Return success if there is a single sequence of ones. */
3657 first_one = val & -val;
3658 tmp = val & (val + first_one);
3659
3660 if (tmp == 0)
3661 return true;
3662
3663 /* Find the next set bit and compute the difference in bit position. */
3664 next_one = tmp & -tmp;
3665 bits = clz_hwi (first_one) - clz_hwi (next_one);
3666 mask = val ^ tmp;
3667
3668 /* Check the bit position difference is a power of 2, and that the first
3669 sequence of one bits fits within 'bits' bits. */
3670 if ((mask >> bits) != 0 || bits != (bits & -bits))
3671 return false;
3672
3673 /* Check the sequence of one bits is repeated 64/bits times. */
3674 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
3675}
3676
3677
3678/* Return true if val is an immediate that can be loaded into a
3679 register in a single instruction. */
3680bool
ef4bddc2 3681aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
3682{
3683 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3684 return 1;
3685 return aarch64_bitmask_imm (val, mode);
3686}
3687
3688static bool
ef4bddc2 3689aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
3690{
3691 rtx base, offset;
7eda14e1 3692
43e9d192
IB
3693 if (GET_CODE (x) == HIGH)
3694 return true;
3695
3696 split_const (x, &base, &offset);
3697 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 3698 {
a6e0bfa7 3699 if (aarch64_classify_symbol (base, offset)
28514dda
YZ
3700 != SYMBOL_FORCE_TO_MEM)
3701 return true;
3702 else
3703 /* Avoid generating a 64-bit relocation in ILP32; leave
3704 to aarch64_expand_mov_immediate to handle it properly. */
3705 return mode != ptr_mode;
3706 }
43e9d192
IB
3707
3708 return aarch64_tls_referenced_p (x);
3709}
3710
e79136e4
WD
3711/* Implement TARGET_CASE_VALUES_THRESHOLD.
3712 The expansion for a table switch is quite expensive due to the number
3713 of instructions, the table lookup and hard to predict indirect jump.
3714 When optimizing for speed, and -O3 enabled, use the per-core tuning if
3715 set, otherwise use tables for > 16 cases as a tradeoff between size and
3716 performance. When optimizing for size, use the default setting. */
50487d79
EM
3717
3718static unsigned int
3719aarch64_case_values_threshold (void)
3720{
3721 /* Use the specified limit for the number of cases before using jump
3722 tables at higher optimization levels. */
3723 if (optimize > 2
3724 && selected_cpu->tune->max_case_values != 0)
3725 return selected_cpu->tune->max_case_values;
3726 else
e79136e4 3727 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
3728}
3729
43e9d192
IB
3730/* Return true if register REGNO is a valid index register.
3731 STRICT_P is true if REG_OK_STRICT is in effect. */
3732
3733bool
3734aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3735{
3736 if (!HARD_REGISTER_NUM_P (regno))
3737 {
3738 if (!strict_p)
3739 return true;
3740
3741 if (!reg_renumber)
3742 return false;
3743
3744 regno = reg_renumber[regno];
3745 }
3746 return GP_REGNUM_P (regno);
3747}
3748
3749/* Return true if register REGNO is a valid base register for mode MODE.
3750 STRICT_P is true if REG_OK_STRICT is in effect. */
3751
3752bool
3753aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3754{
3755 if (!HARD_REGISTER_NUM_P (regno))
3756 {
3757 if (!strict_p)
3758 return true;
3759
3760 if (!reg_renumber)
3761 return false;
3762
3763 regno = reg_renumber[regno];
3764 }
3765
3766 /* The fake registers will be eliminated to either the stack or
3767 hard frame pointer, both of which are usually valid base registers.
3768 Reload deals with the cases where the eliminated form isn't valid. */
3769 return (GP_REGNUM_P (regno)
3770 || regno == SP_REGNUM
3771 || regno == FRAME_POINTER_REGNUM
3772 || regno == ARG_POINTER_REGNUM);
3773}
3774
3775/* Return true if X is a valid base register for mode MODE.
3776 STRICT_P is true if REG_OK_STRICT is in effect. */
3777
3778static bool
3779aarch64_base_register_rtx_p (rtx x, bool strict_p)
3780{
3781 if (!strict_p && GET_CODE (x) == SUBREG)
3782 x = SUBREG_REG (x);
3783
3784 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3785}
3786
3787/* Return true if address offset is a valid index. If it is, fill in INFO
3788 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3789
3790static bool
3791aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 3792 machine_mode mode, bool strict_p)
43e9d192
IB
3793{
3794 enum aarch64_address_type type;
3795 rtx index;
3796 int shift;
3797
3798 /* (reg:P) */
3799 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3800 && GET_MODE (x) == Pmode)
3801 {
3802 type = ADDRESS_REG_REG;
3803 index = x;
3804 shift = 0;
3805 }
3806 /* (sign_extend:DI (reg:SI)) */
3807 else if ((GET_CODE (x) == SIGN_EXTEND
3808 || GET_CODE (x) == ZERO_EXTEND)
3809 && GET_MODE (x) == DImode
3810 && GET_MODE (XEXP (x, 0)) == SImode)
3811 {
3812 type = (GET_CODE (x) == SIGN_EXTEND)
3813 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3814 index = XEXP (x, 0);
3815 shift = 0;
3816 }
3817 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3818 else if (GET_CODE (x) == MULT
3819 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3820 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3821 && GET_MODE (XEXP (x, 0)) == DImode
3822 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3823 && CONST_INT_P (XEXP (x, 1)))
3824 {
3825 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3826 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3827 index = XEXP (XEXP (x, 0), 0);
3828 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3829 }
3830 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3831 else if (GET_CODE (x) == ASHIFT
3832 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3833 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3834 && GET_MODE (XEXP (x, 0)) == DImode
3835 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3836 && CONST_INT_P (XEXP (x, 1)))
3837 {
3838 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3839 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3840 index = XEXP (XEXP (x, 0), 0);
3841 shift = INTVAL (XEXP (x, 1));
3842 }
3843 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3844 else if ((GET_CODE (x) == SIGN_EXTRACT
3845 || GET_CODE (x) == ZERO_EXTRACT)
3846 && GET_MODE (x) == DImode
3847 && GET_CODE (XEXP (x, 0)) == MULT
3848 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3849 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3850 {
3851 type = (GET_CODE (x) == SIGN_EXTRACT)
3852 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3853 index = XEXP (XEXP (x, 0), 0);
3854 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3855 if (INTVAL (XEXP (x, 1)) != 32 + shift
3856 || INTVAL (XEXP (x, 2)) != 0)
3857 shift = -1;
3858 }
3859 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3860 (const_int 0xffffffff<<shift)) */
3861 else if (GET_CODE (x) == AND
3862 && GET_MODE (x) == DImode
3863 && GET_CODE (XEXP (x, 0)) == MULT
3864 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3865 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3866 && CONST_INT_P (XEXP (x, 1)))
3867 {
3868 type = ADDRESS_REG_UXTW;
3869 index = XEXP (XEXP (x, 0), 0);
3870 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3871 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3872 shift = -1;
3873 }
3874 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3875 else if ((GET_CODE (x) == SIGN_EXTRACT
3876 || GET_CODE (x) == ZERO_EXTRACT)
3877 && GET_MODE (x) == DImode
3878 && GET_CODE (XEXP (x, 0)) == ASHIFT
3879 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3880 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3881 {
3882 type = (GET_CODE (x) == SIGN_EXTRACT)
3883 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3884 index = XEXP (XEXP (x, 0), 0);
3885 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3886 if (INTVAL (XEXP (x, 1)) != 32 + shift
3887 || INTVAL (XEXP (x, 2)) != 0)
3888 shift = -1;
3889 }
3890 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3891 (const_int 0xffffffff<<shift)) */
3892 else if (GET_CODE (x) == AND
3893 && GET_MODE (x) == DImode
3894 && GET_CODE (XEXP (x, 0)) == ASHIFT
3895 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3896 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3897 && CONST_INT_P (XEXP (x, 1)))
3898 {
3899 type = ADDRESS_REG_UXTW;
3900 index = XEXP (XEXP (x, 0), 0);
3901 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3902 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3903 shift = -1;
3904 }
3905 /* (mult:P (reg:P) (const_int scale)) */
3906 else if (GET_CODE (x) == MULT
3907 && GET_MODE (x) == Pmode
3908 && GET_MODE (XEXP (x, 0)) == Pmode
3909 && CONST_INT_P (XEXP (x, 1)))
3910 {
3911 type = ADDRESS_REG_REG;
3912 index = XEXP (x, 0);
3913 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3914 }
3915 /* (ashift:P (reg:P) (const_int shift)) */
3916 else if (GET_CODE (x) == ASHIFT
3917 && GET_MODE (x) == Pmode
3918 && GET_MODE (XEXP (x, 0)) == Pmode
3919 && CONST_INT_P (XEXP (x, 1)))
3920 {
3921 type = ADDRESS_REG_REG;
3922 index = XEXP (x, 0);
3923 shift = INTVAL (XEXP (x, 1));
3924 }
3925 else
3926 return false;
3927
3928 if (GET_CODE (index) == SUBREG)
3929 index = SUBREG_REG (index);
3930
3931 if ((shift == 0 ||
3932 (shift > 0 && shift <= 3
3933 && (1 << shift) == GET_MODE_SIZE (mode)))
3934 && REG_P (index)
3935 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3936 {
3937 info->type = type;
3938 info->offset = index;
3939 info->shift = shift;
3940 return true;
3941 }
3942
3943 return false;
3944}
3945
44707478 3946bool
ef4bddc2 3947aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3948{
3949 return (offset >= -64 * GET_MODE_SIZE (mode)
3950 && offset < 64 * GET_MODE_SIZE (mode)
3951 && offset % GET_MODE_SIZE (mode) == 0);
3952}
3953
3954static inline bool
ef4bddc2 3955offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
3956 HOST_WIDE_INT offset)
3957{
3958 return offset >= -256 && offset < 256;
3959}
3960
3961static inline bool
ef4bddc2 3962offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3963{
3964 return (offset >= 0
3965 && offset < 4096 * GET_MODE_SIZE (mode)
3966 && offset % GET_MODE_SIZE (mode) == 0);
3967}
3968
abc52318
KT
3969/* Return true if MODE is one of the modes for which we
3970 support LDP/STP operations. */
3971
3972static bool
3973aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
3974{
3975 return mode == SImode || mode == DImode
3976 || mode == SFmode || mode == DFmode
3977 || (aarch64_vector_mode_supported_p (mode)
3978 && GET_MODE_SIZE (mode) == 8);
3979}
3980
9e0218fc
RH
3981/* Return true if REGNO is a virtual pointer register, or an eliminable
3982 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
3983 include stack_pointer or hard_frame_pointer. */
3984static bool
3985virt_or_elim_regno_p (unsigned regno)
3986{
3987 return ((regno >= FIRST_VIRTUAL_REGISTER
3988 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
3989 || regno == FRAME_POINTER_REGNUM
3990 || regno == ARG_POINTER_REGNUM);
3991}
3992
43e9d192
IB
3993/* Return true if X is a valid address for machine mode MODE. If it is,
3994 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3995 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3996
3997static bool
3998aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 3999 rtx x, machine_mode mode,
43e9d192
IB
4000 RTX_CODE outer_code, bool strict_p)
4001{
4002 enum rtx_code code = GET_CODE (x);
4003 rtx op0, op1;
2d8c6dc1
AH
4004
4005 /* On BE, we use load/store pair for all large int mode load/stores. */
4006 bool load_store_pair_p = (outer_code == PARALLEL
4007 || (BYTES_BIG_ENDIAN
4008 && aarch64_vect_struct_mode_p (mode)));
4009
43e9d192 4010 bool allow_reg_index_p =
2d8c6dc1
AH
4011 !load_store_pair_p
4012 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
4013 && !aarch64_vect_struct_mode_p (mode);
4014
4015 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
4016 REG addressing. */
4017 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
43e9d192
IB
4018 && (code != POST_INC && code != REG))
4019 return false;
4020
4021 switch (code)
4022 {
4023 case REG:
4024 case SUBREG:
4025 info->type = ADDRESS_REG_IMM;
4026 info->base = x;
4027 info->offset = const0_rtx;
4028 return aarch64_base_register_rtx_p (x, strict_p);
4029
4030 case PLUS:
4031 op0 = XEXP (x, 0);
4032 op1 = XEXP (x, 1);
15c0c5c9
JW
4033
4034 if (! strict_p
4aa81c2e 4035 && REG_P (op0)
9e0218fc 4036 && virt_or_elim_regno_p (REGNO (op0))
4aa81c2e 4037 && CONST_INT_P (op1))
15c0c5c9
JW
4038 {
4039 info->type = ADDRESS_REG_IMM;
4040 info->base = op0;
4041 info->offset = op1;
4042
4043 return true;
4044 }
4045
43e9d192
IB
4046 if (GET_MODE_SIZE (mode) != 0
4047 && CONST_INT_P (op1)
4048 && aarch64_base_register_rtx_p (op0, strict_p))
4049 {
4050 HOST_WIDE_INT offset = INTVAL (op1);
4051
4052 info->type = ADDRESS_REG_IMM;
4053 info->base = op0;
4054 info->offset = op1;
4055
4056 /* TImode and TFmode values are allowed in both pairs of X
4057 registers and individual Q registers. The available
4058 address modes are:
4059 X,X: 7-bit signed scaled offset
4060 Q: 9-bit signed offset
4061 We conservatively require an offset representable in either mode.
8ed49fab
KT
4062 When performing the check for pairs of X registers i.e. LDP/STP
4063 pass down DImode since that is the natural size of the LDP/STP
4064 instruction memory accesses. */
43e9d192 4065 if (mode == TImode || mode == TFmode)
8ed49fab 4066 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
43e9d192
IB
4067 && offset_9bit_signed_unscaled_p (mode, offset));
4068
2d8c6dc1
AH
4069 /* A 7bit offset check because OImode will emit a ldp/stp
4070 instruction (only big endian will get here).
4071 For ldp/stp instructions, the offset is scaled for the size of a
4072 single element of the pair. */
4073 if (mode == OImode)
4074 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
4075
4076 /* Three 9/12 bit offsets checks because CImode will emit three
4077 ldr/str instructions (only big endian will get here). */
4078 if (mode == CImode)
4079 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4080 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
4081 || offset_12bit_unsigned_scaled_p (V16QImode,
4082 offset + 32)));
4083
4084 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4085 instructions (only big endian will get here). */
4086 if (mode == XImode)
4087 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4088 && aarch64_offset_7bit_signed_scaled_p (TImode,
4089 offset + 32));
4090
4091 if (load_store_pair_p)
43e9d192 4092 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4093 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4094 else
4095 return (offset_9bit_signed_unscaled_p (mode, offset)
4096 || offset_12bit_unsigned_scaled_p (mode, offset));
4097 }
4098
4099 if (allow_reg_index_p)
4100 {
4101 /* Look for base + (scaled/extended) index register. */
4102 if (aarch64_base_register_rtx_p (op0, strict_p)
4103 && aarch64_classify_index (info, op1, mode, strict_p))
4104 {
4105 info->base = op0;
4106 return true;
4107 }
4108 if (aarch64_base_register_rtx_p (op1, strict_p)
4109 && aarch64_classify_index (info, op0, mode, strict_p))
4110 {
4111 info->base = op1;
4112 return true;
4113 }
4114 }
4115
4116 return false;
4117
4118 case POST_INC:
4119 case POST_DEC:
4120 case PRE_INC:
4121 case PRE_DEC:
4122 info->type = ADDRESS_REG_WB;
4123 info->base = XEXP (x, 0);
4124 info->offset = NULL_RTX;
4125 return aarch64_base_register_rtx_p (info->base, strict_p);
4126
4127 case POST_MODIFY:
4128 case PRE_MODIFY:
4129 info->type = ADDRESS_REG_WB;
4130 info->base = XEXP (x, 0);
4131 if (GET_CODE (XEXP (x, 1)) == PLUS
4132 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
4133 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
4134 && aarch64_base_register_rtx_p (info->base, strict_p))
4135 {
4136 HOST_WIDE_INT offset;
4137 info->offset = XEXP (XEXP (x, 1), 1);
4138 offset = INTVAL (info->offset);
4139
4140 /* TImode and TFmode values are allowed in both pairs of X
4141 registers and individual Q registers. The available
4142 address modes are:
4143 X,X: 7-bit signed scaled offset
4144 Q: 9-bit signed offset
4145 We conservatively require an offset representable in either mode.
4146 */
4147 if (mode == TImode || mode == TFmode)
44707478 4148 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
4149 && offset_9bit_signed_unscaled_p (mode, offset));
4150
2d8c6dc1 4151 if (load_store_pair_p)
43e9d192 4152 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 4153 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
4154 else
4155 return offset_9bit_signed_unscaled_p (mode, offset);
4156 }
4157 return false;
4158
4159 case CONST:
4160 case SYMBOL_REF:
4161 case LABEL_REF:
79517551
SN
4162 /* load literal: pc-relative constant pool entry. Only supported
4163 for SI mode or larger. */
43e9d192 4164 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1
AH
4165
4166 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
4167 {
4168 rtx sym, addend;
4169
4170 split_const (x, &sym, &addend);
b4f50fd4
RR
4171 return ((GET_CODE (sym) == LABEL_REF
4172 || (GET_CODE (sym) == SYMBOL_REF
4173 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 4174 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
4175 }
4176 return false;
4177
4178 case LO_SUM:
4179 info->type = ADDRESS_LO_SUM;
4180 info->base = XEXP (x, 0);
4181 info->offset = XEXP (x, 1);
4182 if (allow_reg_index_p
4183 && aarch64_base_register_rtx_p (info->base, strict_p))
4184 {
4185 rtx sym, offs;
4186 split_const (info->offset, &sym, &offs);
4187 if (GET_CODE (sym) == SYMBOL_REF
a6e0bfa7 4188 && (aarch64_classify_symbol (sym, offs) == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
4189 {
4190 /* The symbol and offset must be aligned to the access size. */
4191 unsigned int align;
4192 unsigned int ref_size;
4193
4194 if (CONSTANT_POOL_ADDRESS_P (sym))
4195 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
4196 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
4197 {
4198 tree exp = SYMBOL_REF_DECL (sym);
4199 align = TYPE_ALIGN (TREE_TYPE (exp));
4200 align = CONSTANT_ALIGNMENT (exp, align);
4201 }
4202 else if (SYMBOL_REF_DECL (sym))
4203 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
4204 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
4205 && SYMBOL_REF_BLOCK (sym) != NULL)
4206 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
4207 else
4208 align = BITS_PER_UNIT;
4209
4210 ref_size = GET_MODE_SIZE (mode);
4211 if (ref_size == 0)
4212 ref_size = GET_MODE_SIZE (DImode);
4213
4214 return ((INTVAL (offs) & (ref_size - 1)) == 0
4215 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
4216 }
4217 }
4218 return false;
4219
4220 default:
4221 return false;
4222 }
4223}
4224
4225bool
4226aarch64_symbolic_address_p (rtx x)
4227{
4228 rtx offset;
4229
4230 split_const (x, &x, &offset);
4231 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
4232}
4233
a6e0bfa7 4234/* Classify the base of symbolic expression X. */
da4f13a4
MS
4235
4236enum aarch64_symbol_type
a6e0bfa7 4237aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
4238{
4239 rtx offset;
da4f13a4 4240
43e9d192 4241 split_const (x, &x, &offset);
a6e0bfa7 4242 return aarch64_classify_symbol (x, offset);
43e9d192
IB
4243}
4244
4245
4246/* Return TRUE if X is a legitimate address for accessing memory in
4247 mode MODE. */
4248static bool
ef4bddc2 4249aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
4250{
4251 struct aarch64_address_info addr;
4252
4253 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
4254}
4255
4256/* Return TRUE if X is a legitimate address for accessing memory in
4257 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4258 pair operation. */
4259bool
ef4bddc2 4260aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 4261 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
4262{
4263 struct aarch64_address_info addr;
4264
4265 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
4266}
4267
491ec060
WD
4268/* Split an out-of-range address displacement into a base and offset.
4269 Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
4270 to increase opportunities for sharing the base address of different sizes.
4271 For TI/TFmode and unaligned accesses use a 256-byte range. */
4272static bool
4273aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
4274{
4275 HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
4276
4277 if (mode == TImode || mode == TFmode ||
4278 (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
4279 mask = 0xff;
4280
4281 *off = GEN_INT (INTVAL (*disp) & ~mask);
4282 *disp = GEN_INT (INTVAL (*disp) & mask);
4283 return true;
4284}
4285
43e9d192
IB
4286/* Return TRUE if rtx X is immediate constant 0.0 */
4287bool
3520f7cc 4288aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 4289{
43e9d192
IB
4290 if (GET_MODE (x) == VOIDmode)
4291 return false;
4292
34a72c33 4293 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 4294 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 4295 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
4296}
4297
70f09188
AP
4298/* Return the fixed registers used for condition codes. */
4299
4300static bool
4301aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
4302{
4303 *p1 = CC_REGNUM;
4304 *p2 = INVALID_REGNUM;
4305 return true;
4306}
4307
78607708
TV
4308/* Emit call insn with PAT and do aarch64-specific handling. */
4309
d07a3fed 4310void
78607708
TV
4311aarch64_emit_call_insn (rtx pat)
4312{
4313 rtx insn = emit_call_insn (pat);
4314
4315 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
4316 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
4317 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
4318}
4319
ef4bddc2 4320machine_mode
43e9d192
IB
4321aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
4322{
4323 /* All floating point compares return CCFP if it is an equality
4324 comparison, and CCFPE otherwise. */
4325 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4326 {
4327 switch (code)
4328 {
4329 case EQ:
4330 case NE:
4331 case UNORDERED:
4332 case ORDERED:
4333 case UNLT:
4334 case UNLE:
4335 case UNGT:
4336 case UNGE:
4337 case UNEQ:
4338 case LTGT:
4339 return CCFPmode;
4340
4341 case LT:
4342 case LE:
4343 case GT:
4344 case GE:
4345 return CCFPEmode;
4346
4347 default:
4348 gcc_unreachable ();
4349 }
4350 }
4351
2b8568fe
KT
4352 /* Equality comparisons of short modes against zero can be performed
4353 using the TST instruction with the appropriate bitmask. */
4354 if (y == const0_rtx && REG_P (x)
4355 && (code == EQ || code == NE)
4356 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
4357 return CC_NZmode;
4358
b06335f9
KT
4359 /* Similarly, comparisons of zero_extends from shorter modes can
4360 be performed using an ANDS with an immediate mask. */
4361 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
4362 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4363 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
4364 && (code == EQ || code == NE))
4365 return CC_NZmode;
4366
43e9d192
IB
4367 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4368 && y == const0_rtx
4369 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 4370 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
4371 || GET_CODE (x) == NEG
4372 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
4373 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
4374 return CC_NZmode;
4375
1c992d1e 4376 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
4377 the comparison will have to be swapped when we emit the assembly
4378 code. */
4379 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4380 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
4381 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
4382 || GET_CODE (x) == LSHIFTRT
1c992d1e 4383 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
4384 return CC_SWPmode;
4385
1c992d1e
RE
4386 /* Similarly for a negated operand, but we can only do this for
4387 equalities. */
4388 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 4389 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
4390 && (code == EQ || code == NE)
4391 && GET_CODE (x) == NEG)
4392 return CC_Zmode;
4393
ef22810a
RH
4394 /* A test for unsigned overflow. */
4395 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
4396 && code == NE
4397 && GET_CODE (x) == PLUS
4398 && GET_CODE (y) == ZERO_EXTEND)
4399 return CC_Cmode;
4400
43e9d192
IB
4401 /* For everything else, return CCmode. */
4402 return CCmode;
4403}
4404
3dfa7055
ZC
4405static int
4406aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
4407
cd5660ab 4408int
43e9d192
IB
4409aarch64_get_condition_code (rtx x)
4410{
ef4bddc2 4411 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
4412 enum rtx_code comp_code = GET_CODE (x);
4413
4414 if (GET_MODE_CLASS (mode) != MODE_CC)
4415 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
4416 return aarch64_get_condition_code_1 (mode, comp_code);
4417}
43e9d192 4418
3dfa7055
ZC
4419static int
4420aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
4421{
43e9d192
IB
4422 switch (mode)
4423 {
4424 case CCFPmode:
4425 case CCFPEmode:
4426 switch (comp_code)
4427 {
4428 case GE: return AARCH64_GE;
4429 case GT: return AARCH64_GT;
4430 case LE: return AARCH64_LS;
4431 case LT: return AARCH64_MI;
4432 case NE: return AARCH64_NE;
4433 case EQ: return AARCH64_EQ;
4434 case ORDERED: return AARCH64_VC;
4435 case UNORDERED: return AARCH64_VS;
4436 case UNLT: return AARCH64_LT;
4437 case UNLE: return AARCH64_LE;
4438 case UNGT: return AARCH64_HI;
4439 case UNGE: return AARCH64_PL;
cd5660ab 4440 default: return -1;
43e9d192
IB
4441 }
4442 break;
4443
4444 case CCmode:
4445 switch (comp_code)
4446 {
4447 case NE: return AARCH64_NE;
4448 case EQ: return AARCH64_EQ;
4449 case GE: return AARCH64_GE;
4450 case GT: return AARCH64_GT;
4451 case LE: return AARCH64_LE;
4452 case LT: return AARCH64_LT;
4453 case GEU: return AARCH64_CS;
4454 case GTU: return AARCH64_HI;
4455 case LEU: return AARCH64_LS;
4456 case LTU: return AARCH64_CC;
cd5660ab 4457 default: return -1;
43e9d192
IB
4458 }
4459 break;
4460
4461 case CC_SWPmode:
43e9d192
IB
4462 switch (comp_code)
4463 {
4464 case NE: return AARCH64_NE;
4465 case EQ: return AARCH64_EQ;
4466 case GE: return AARCH64_LE;
4467 case GT: return AARCH64_LT;
4468 case LE: return AARCH64_GE;
4469 case LT: return AARCH64_GT;
4470 case GEU: return AARCH64_LS;
4471 case GTU: return AARCH64_CC;
4472 case LEU: return AARCH64_CS;
4473 case LTU: return AARCH64_HI;
cd5660ab 4474 default: return -1;
43e9d192
IB
4475 }
4476 break;
4477
4478 case CC_NZmode:
4479 switch (comp_code)
4480 {
4481 case NE: return AARCH64_NE;
4482 case EQ: return AARCH64_EQ;
4483 case GE: return AARCH64_PL;
4484 case LT: return AARCH64_MI;
cd5660ab 4485 default: return -1;
43e9d192
IB
4486 }
4487 break;
4488
1c992d1e
RE
4489 case CC_Zmode:
4490 switch (comp_code)
4491 {
4492 case NE: return AARCH64_NE;
4493 case EQ: return AARCH64_EQ;
cd5660ab 4494 default: return -1;
1c992d1e
RE
4495 }
4496 break;
4497
ef22810a
RH
4498 case CC_Cmode:
4499 switch (comp_code)
4500 {
4501 case NE: return AARCH64_CS;
4502 case EQ: return AARCH64_CC;
4503 default: return -1;
4504 }
4505 break;
4506
43e9d192 4507 default:
cd5660ab 4508 return -1;
43e9d192 4509 }
3dfa7055 4510
3dfa7055 4511 return -1;
43e9d192
IB
4512}
4513
ddeabd3e
AL
4514bool
4515aarch64_const_vec_all_same_in_range_p (rtx x,
4516 HOST_WIDE_INT minval,
4517 HOST_WIDE_INT maxval)
4518{
4519 HOST_WIDE_INT firstval;
4520 int count, i;
4521
4522 if (GET_CODE (x) != CONST_VECTOR
4523 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
4524 return false;
4525
4526 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
4527 if (firstval < minval || firstval > maxval)
4528 return false;
4529
4530 count = CONST_VECTOR_NUNITS (x);
4531 for (i = 1; i < count; i++)
4532 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
4533 return false;
4534
4535 return true;
4536}
4537
4538bool
4539aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
4540{
4541 return aarch64_const_vec_all_same_in_range_p (x, val, val);
4542}
4543
43e9d192 4544
cf670503
ZC
4545/* N Z C V. */
4546#define AARCH64_CC_V 1
4547#define AARCH64_CC_C (1 << 1)
4548#define AARCH64_CC_Z (1 << 2)
4549#define AARCH64_CC_N (1 << 3)
4550
c8012fbc
WD
4551/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
4552static const int aarch64_nzcv_codes[] =
4553{
4554 0, /* EQ, Z == 1. */
4555 AARCH64_CC_Z, /* NE, Z == 0. */
4556 0, /* CS, C == 1. */
4557 AARCH64_CC_C, /* CC, C == 0. */
4558 0, /* MI, N == 1. */
4559 AARCH64_CC_N, /* PL, N == 0. */
4560 0, /* VS, V == 1. */
4561 AARCH64_CC_V, /* VC, V == 0. */
4562 0, /* HI, C ==1 && Z == 0. */
4563 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
4564 AARCH64_CC_V, /* GE, N == V. */
4565 0, /* LT, N != V. */
4566 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
4567 0, /* LE, !(Z == 0 && N == V). */
4568 0, /* AL, Any. */
4569 0 /* NV, Any. */
cf670503
ZC
4570};
4571
cc8ca59e
JB
4572static void
4573aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192
IB
4574{
4575 switch (code)
4576 {
f541a481
KT
4577 /* An integer or symbol address without a preceding # sign. */
4578 case 'c':
4579 switch (GET_CODE (x))
4580 {
4581 case CONST_INT:
4582 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4583 break;
4584
4585 case SYMBOL_REF:
4586 output_addr_const (f, x);
4587 break;
4588
4589 case CONST:
4590 if (GET_CODE (XEXP (x, 0)) == PLUS
4591 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4592 {
4593 output_addr_const (f, x);
4594 break;
4595 }
4596 /* Fall through. */
4597
4598 default:
4599 output_operand_lossage ("Unsupported operand for code '%c'", code);
4600 }
4601 break;
4602
43e9d192
IB
4603 case 'e':
4604 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4605 {
4606 int n;
4607
4aa81c2e 4608 if (!CONST_INT_P (x)
43e9d192
IB
4609 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4610 {
4611 output_operand_lossage ("invalid operand for '%%%c'", code);
4612 return;
4613 }
4614
4615 switch (n)
4616 {
4617 case 3:
4618 fputc ('b', f);
4619 break;
4620 case 4:
4621 fputc ('h', f);
4622 break;
4623 case 5:
4624 fputc ('w', f);
4625 break;
4626 default:
4627 output_operand_lossage ("invalid operand for '%%%c'", code);
4628 return;
4629 }
4630 }
4631 break;
4632
4633 case 'p':
4634 {
4635 int n;
4636
4637 /* Print N such that 2^N == X. */
4aa81c2e 4638 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
4639 {
4640 output_operand_lossage ("invalid operand for '%%%c'", code);
4641 return;
4642 }
4643
4644 asm_fprintf (f, "%d", n);
4645 }
4646 break;
4647
4648 case 'P':
4649 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 4650 if (!CONST_INT_P (x))
43e9d192
IB
4651 {
4652 output_operand_lossage ("invalid operand for '%%%c'", code);
4653 return;
4654 }
4655
8d55c61b 4656 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
4657 break;
4658
4659 case 'H':
4660 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 4661 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
4662 {
4663 output_operand_lossage ("invalid operand for '%%%c'", code);
4664 return;
4665 }
4666
01a3a324 4667 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
4668 break;
4669
43e9d192 4670 case 'M':
c8012fbc 4671 case 'm':
cd5660ab
KT
4672 {
4673 int cond_code;
c8012fbc 4674 /* Print a condition (eq, ne, etc) or its inverse. */
43e9d192 4675
c8012fbc
WD
4676 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
4677 if (x == const_true_rtx)
cd5660ab 4678 {
c8012fbc
WD
4679 if (code == 'M')
4680 fputs ("nv", f);
cd5660ab
KT
4681 return;
4682 }
43e9d192 4683
cd5660ab
KT
4684 if (!COMPARISON_P (x))
4685 {
4686 output_operand_lossage ("invalid operand for '%%%c'", code);
4687 return;
4688 }
c8012fbc 4689
cd5660ab
KT
4690 cond_code = aarch64_get_condition_code (x);
4691 gcc_assert (cond_code >= 0);
c8012fbc
WD
4692 if (code == 'M')
4693 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
4694 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 4695 }
43e9d192
IB
4696 break;
4697
4698 case 'b':
4699 case 'h':
4700 case 's':
4701 case 'd':
4702 case 'q':
4703 /* Print a scalar FP/SIMD register name. */
4704 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4705 {
4706 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4707 return;
4708 }
50ce6f88 4709 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
4710 break;
4711
4712 case 'S':
4713 case 'T':
4714 case 'U':
4715 case 'V':
4716 /* Print the first FP/SIMD register name in a list. */
4717 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4718 {
4719 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4720 return;
4721 }
50ce6f88 4722 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
4723 break;
4724
2d8c6dc1
AH
4725 case 'R':
4726 /* Print a scalar FP/SIMD register name + 1. */
4727 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4728 {
4729 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4730 return;
4731 }
4732 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4733 break;
4734
a05c0ddf 4735 case 'X':
50d38551 4736 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 4737 if (!CONST_INT_P (x))
a05c0ddf
IB
4738 {
4739 output_operand_lossage ("invalid operand for '%%%c'", code);
4740 return;
4741 }
50d38551 4742 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
4743 break;
4744
43e9d192
IB
4745 case 'w':
4746 case 'x':
4747 /* Print a general register name or the zero register (32-bit or
4748 64-bit). */
3520f7cc
JG
4749 if (x == const0_rtx
4750 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 4751 {
50ce6f88 4752 asm_fprintf (f, "%czr", code);
43e9d192
IB
4753 break;
4754 }
4755
4756 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4757 {
50ce6f88 4758 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
4759 break;
4760 }
4761
4762 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4763 {
50ce6f88 4764 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
4765 break;
4766 }
4767
4768 /* Fall through */
4769
4770 case 0:
4771 /* Print a normal operand, if it's a general register, then we
4772 assume DImode. */
4773 if (x == NULL)
4774 {
4775 output_operand_lossage ("missing operand");
4776 return;
4777 }
4778
4779 switch (GET_CODE (x))
4780 {
4781 case REG:
01a3a324 4782 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
4783 break;
4784
4785 case MEM:
cc8ca59e 4786 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
4787 break;
4788
2af16a7c 4789 case CONST:
43e9d192
IB
4790 case LABEL_REF:
4791 case SYMBOL_REF:
4792 output_addr_const (asm_out_file, x);
4793 break;
4794
4795 case CONST_INT:
4796 asm_fprintf (f, "%wd", INTVAL (x));
4797 break;
4798
4799 case CONST_VECTOR:
3520f7cc
JG
4800 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4801 {
ddeabd3e
AL
4802 gcc_assert (
4803 aarch64_const_vec_all_same_in_range_p (x,
4804 HOST_WIDE_INT_MIN,
4805 HOST_WIDE_INT_MAX));
3520f7cc
JG
4806 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4807 }
4808 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4809 {
4810 fputc ('0', f);
4811 }
4812 else
4813 gcc_unreachable ();
43e9d192
IB
4814 break;
4815
3520f7cc 4816 case CONST_DOUBLE:
2ca5b430
KT
4817 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
4818 be getting CONST_DOUBLEs holding integers. */
4819 gcc_assert (GET_MODE (x) != VOIDmode);
4820 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
4821 {
4822 fputc ('0', f);
4823 break;
4824 }
4825 else if (aarch64_float_const_representable_p (x))
4826 {
4827#define buf_size 20
4828 char float_buf[buf_size] = {'\0'};
34a72c33
RS
4829 real_to_decimal_for_mode (float_buf,
4830 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
4831 buf_size, buf_size,
4832 1, GET_MODE (x));
4833 asm_fprintf (asm_out_file, "%s", float_buf);
4834 break;
4835#undef buf_size
4836 }
4837 output_operand_lossage ("invalid constant");
4838 return;
43e9d192
IB
4839 default:
4840 output_operand_lossage ("invalid operand");
4841 return;
4842 }
4843 break;
4844
4845 case 'A':
4846 if (GET_CODE (x) == HIGH)
4847 x = XEXP (x, 0);
4848
a6e0bfa7 4849 switch (aarch64_classify_symbolic_expression (x))
43e9d192 4850 {
6642bdb4 4851 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
4852 asm_fprintf (asm_out_file, ":got:");
4853 break;
4854
4855 case SYMBOL_SMALL_TLSGD:
4856 asm_fprintf (asm_out_file, ":tlsgd:");
4857 break;
4858
4859 case SYMBOL_SMALL_TLSDESC:
4860 asm_fprintf (asm_out_file, ":tlsdesc:");
4861 break;
4862
79496620 4863 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
4864 asm_fprintf (asm_out_file, ":gottprel:");
4865 break;
4866
d18ba284 4867 case SYMBOL_TLSLE24:
43e9d192
IB
4868 asm_fprintf (asm_out_file, ":tprel:");
4869 break;
4870
87dd8ab0
MS
4871 case SYMBOL_TINY_GOT:
4872 gcc_unreachable ();
4873 break;
4874
43e9d192
IB
4875 default:
4876 break;
4877 }
4878 output_addr_const (asm_out_file, x);
4879 break;
4880
4881 case 'L':
a6e0bfa7 4882 switch (aarch64_classify_symbolic_expression (x))
43e9d192 4883 {
6642bdb4 4884 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
4885 asm_fprintf (asm_out_file, ":lo12:");
4886 break;
4887
4888 case SYMBOL_SMALL_TLSGD:
4889 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4890 break;
4891
4892 case SYMBOL_SMALL_TLSDESC:
4893 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4894 break;
4895
79496620 4896 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
4897 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4898 break;
4899
cbf5629e
JW
4900 case SYMBOL_TLSLE12:
4901 asm_fprintf (asm_out_file, ":tprel_lo12:");
4902 break;
4903
d18ba284 4904 case SYMBOL_TLSLE24:
43e9d192
IB
4905 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4906 break;
4907
87dd8ab0
MS
4908 case SYMBOL_TINY_GOT:
4909 asm_fprintf (asm_out_file, ":got:");
4910 break;
4911
5ae7caad
JW
4912 case SYMBOL_TINY_TLSIE:
4913 asm_fprintf (asm_out_file, ":gottprel:");
4914 break;
4915
43e9d192
IB
4916 default:
4917 break;
4918 }
4919 output_addr_const (asm_out_file, x);
4920 break;
4921
4922 case 'G':
4923
a6e0bfa7 4924 switch (aarch64_classify_symbolic_expression (x))
43e9d192 4925 {
d18ba284 4926 case SYMBOL_TLSLE24:
43e9d192
IB
4927 asm_fprintf (asm_out_file, ":tprel_hi12:");
4928 break;
4929 default:
4930 break;
4931 }
4932 output_addr_const (asm_out_file, x);
4933 break;
4934
cf670503
ZC
4935 case 'k':
4936 {
c8012fbc 4937 HOST_WIDE_INT cond_code;
cf670503
ZC
4938 /* Print nzcv. */
4939
c8012fbc 4940 if (!CONST_INT_P (x))
cf670503
ZC
4941 {
4942 output_operand_lossage ("invalid operand for '%%%c'", code);
4943 return;
4944 }
4945
c8012fbc
WD
4946 cond_code = INTVAL (x);
4947 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
4948 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
4949 }
4950 break;
4951
43e9d192
IB
4952 default:
4953 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4954 return;
4955 }
4956}
4957
cc8ca59e
JB
4958static void
4959aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
43e9d192
IB
4960{
4961 struct aarch64_address_info addr;
4962
cc8ca59e 4963 if (aarch64_classify_address (&addr, x, mode, MEM, true))
43e9d192
IB
4964 switch (addr.type)
4965 {
4966 case ADDRESS_REG_IMM:
4967 if (addr.offset == const0_rtx)
01a3a324 4968 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4969 else
16a3246f 4970 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4971 INTVAL (addr.offset));
4972 return;
4973
4974 case ADDRESS_REG_REG:
4975 if (addr.shift == 0)
16a3246f 4976 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4977 reg_names [REGNO (addr.offset)]);
43e9d192 4978 else
16a3246f 4979 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4980 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4981 return;
4982
4983 case ADDRESS_REG_UXTW:
4984 if (addr.shift == 0)
16a3246f 4985 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4986 REGNO (addr.offset) - R0_REGNUM);
4987 else
16a3246f 4988 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4989 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4990 return;
4991
4992 case ADDRESS_REG_SXTW:
4993 if (addr.shift == 0)
16a3246f 4994 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4995 REGNO (addr.offset) - R0_REGNUM);
4996 else
16a3246f 4997 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4998 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4999 return;
5000
5001 case ADDRESS_REG_WB:
5002 switch (GET_CODE (x))
5003 {
5004 case PRE_INC:
16a3246f 5005 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5006 GET_MODE_SIZE (mode));
43e9d192
IB
5007 return;
5008 case POST_INC:
16a3246f 5009 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
cc8ca59e 5010 GET_MODE_SIZE (mode));
43e9d192
IB
5011 return;
5012 case PRE_DEC:
16a3246f 5013 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
cc8ca59e 5014 GET_MODE_SIZE (mode));
43e9d192
IB
5015 return;
5016 case POST_DEC:
16a3246f 5017 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
cc8ca59e 5018 GET_MODE_SIZE (mode));
43e9d192
IB
5019 return;
5020 case PRE_MODIFY:
16a3246f 5021 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
5022 INTVAL (addr.offset));
5023 return;
5024 case POST_MODIFY:
16a3246f 5025 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
5026 INTVAL (addr.offset));
5027 return;
5028 default:
5029 break;
5030 }
5031 break;
5032
5033 case ADDRESS_LO_SUM:
16a3246f 5034 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
5035 output_addr_const (f, addr.offset);
5036 asm_fprintf (f, "]");
5037 return;
5038
5039 case ADDRESS_SYMBOLIC:
5040 break;
5041 }
5042
5043 output_addr_const (f, x);
5044}
5045
43e9d192
IB
5046bool
5047aarch64_label_mentioned_p (rtx x)
5048{
5049 const char *fmt;
5050 int i;
5051
5052 if (GET_CODE (x) == LABEL_REF)
5053 return true;
5054
5055 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5056 referencing instruction, but they are constant offsets, not
5057 symbols. */
5058 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5059 return false;
5060
5061 fmt = GET_RTX_FORMAT (GET_CODE (x));
5062 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5063 {
5064 if (fmt[i] == 'E')
5065 {
5066 int j;
5067
5068 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5069 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
5070 return 1;
5071 }
5072 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
5073 return 1;
5074 }
5075
5076 return 0;
5077}
5078
5079/* Implement REGNO_REG_CLASS. */
5080
5081enum reg_class
5082aarch64_regno_regclass (unsigned regno)
5083{
5084 if (GP_REGNUM_P (regno))
a4a182c6 5085 return GENERAL_REGS;
43e9d192
IB
5086
5087 if (regno == SP_REGNUM)
5088 return STACK_REG;
5089
5090 if (regno == FRAME_POINTER_REGNUM
5091 || regno == ARG_POINTER_REGNUM)
f24bb080 5092 return POINTER_REGS;
43e9d192
IB
5093
5094 if (FP_REGNUM_P (regno))
5095 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
5096
5097 return NO_REGS;
5098}
5099
0c4ec427 5100static rtx
ef4bddc2 5101aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
5102{
5103 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5104 where mask is selected by alignment and size of the offset.
5105 We try to pick as large a range for the offset as possible to
5106 maximize the chance of a CSE. However, for aligned addresses
5107 we limit the range to 4k so that structures with different sized
e8426e0a
BC
5108 elements are likely to use the same base. We need to be careful
5109 not to split a CONST for some forms of address expression, otherwise
5110 it will generate sub-optimal code. */
0c4ec427
RE
5111
5112 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
5113 {
9e0218fc 5114 rtx base = XEXP (x, 0);
17d7bdd8 5115 rtx offset_rtx = XEXP (x, 1);
9e0218fc 5116 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 5117
9e0218fc 5118 if (GET_CODE (base) == PLUS)
e8426e0a 5119 {
9e0218fc
RH
5120 rtx op0 = XEXP (base, 0);
5121 rtx op1 = XEXP (base, 1);
5122
5123 /* Force any scaling into a temp for CSE. */
5124 op0 = force_reg (Pmode, op0);
5125 op1 = force_reg (Pmode, op1);
5126
5127 /* Let the pointer register be in op0. */
5128 if (REG_POINTER (op1))
5129 std::swap (op0, op1);
5130
5131 /* If the pointer is virtual or frame related, then we know that
5132 virtual register instantiation or register elimination is going
5133 to apply a second constant. We want the two constants folded
5134 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5135 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 5136 {
9e0218fc
RH
5137 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
5138 NULL_RTX, true, OPTAB_DIRECT);
5139 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 5140 }
e8426e0a 5141
9e0218fc
RH
5142 /* Otherwise, in order to encourage CSE (and thence loop strength
5143 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5144 base = expand_binop (Pmode, add_optab, op0, op1,
5145 NULL_RTX, true, OPTAB_DIRECT);
5146 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
5147 }
5148
0c4ec427 5149 /* Does it look like we'll need a load/store-pair operation? */
9e0218fc 5150 HOST_WIDE_INT base_offset;
0c4ec427
RE
5151 if (GET_MODE_SIZE (mode) > 16
5152 || mode == TImode)
5153 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
5154 & ~((128 * GET_MODE_SIZE (mode)) - 1));
5155 /* For offsets aren't a multiple of the access size, the limit is
5156 -256...255. */
5157 else if (offset & (GET_MODE_SIZE (mode) - 1))
ff0f3f1c
WD
5158 {
5159 base_offset = (offset + 0x100) & ~0x1ff;
5160
5161 /* BLKmode typically uses LDP of X-registers. */
5162 if (mode == BLKmode)
5163 base_offset = (offset + 512) & ~0x3ff;
5164 }
5165 /* Small negative offsets are supported. */
5166 else if (IN_RANGE (offset, -256, 0))
5167 base_offset = 0;
5168 /* Use 12-bit offset by access size. */
0c4ec427 5169 else
ff0f3f1c 5170 base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
0c4ec427 5171
9e0218fc
RH
5172 if (base_offset != 0)
5173 {
5174 base = plus_constant (Pmode, base, base_offset);
5175 base = force_operand (base, NULL_RTX);
5176 return plus_constant (Pmode, base, offset - base_offset);
5177 }
0c4ec427
RE
5178 }
5179
5180 return x;
5181}
5182
b4f50fd4
RR
5183/* Return the reload icode required for a constant pool in mode. */
5184static enum insn_code
5185aarch64_constant_pool_reload_icode (machine_mode mode)
5186{
5187 switch (mode)
5188 {
5189 case SFmode:
5190 return CODE_FOR_aarch64_reload_movcpsfdi;
5191
5192 case DFmode:
5193 return CODE_FOR_aarch64_reload_movcpdfdi;
5194
5195 case TFmode:
5196 return CODE_FOR_aarch64_reload_movcptfdi;
5197
5198 case V8QImode:
5199 return CODE_FOR_aarch64_reload_movcpv8qidi;
5200
5201 case V16QImode:
5202 return CODE_FOR_aarch64_reload_movcpv16qidi;
5203
5204 case V4HImode:
5205 return CODE_FOR_aarch64_reload_movcpv4hidi;
5206
5207 case V8HImode:
5208 return CODE_FOR_aarch64_reload_movcpv8hidi;
5209
5210 case V2SImode:
5211 return CODE_FOR_aarch64_reload_movcpv2sidi;
5212
5213 case V4SImode:
5214 return CODE_FOR_aarch64_reload_movcpv4sidi;
5215
5216 case V2DImode:
5217 return CODE_FOR_aarch64_reload_movcpv2didi;
5218
5219 case V2DFmode:
5220 return CODE_FOR_aarch64_reload_movcpv2dfdi;
5221
5222 default:
5223 gcc_unreachable ();
5224 }
5225
5226 gcc_unreachable ();
5227}
43e9d192
IB
5228static reg_class_t
5229aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
5230 reg_class_t rclass,
ef4bddc2 5231 machine_mode mode,
43e9d192
IB
5232 secondary_reload_info *sri)
5233{
b4f50fd4
RR
5234
5235 /* If we have to disable direct literal pool loads and stores because the
5236 function is too big, then we need a scratch register. */
5237 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
5238 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
5239 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 5240 && !aarch64_pcrelative_literal_loads)
b4f50fd4
RR
5241 {
5242 sri->icode = aarch64_constant_pool_reload_icode (mode);
5243 return NO_REGS;
5244 }
5245
43e9d192
IB
5246 /* Without the TARGET_SIMD instructions we cannot move a Q register
5247 to a Q register directly. We need a scratch. */
5248 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
5249 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
5250 && reg_class_subset_p (rclass, FP_REGS))
5251 {
5252 if (mode == TFmode)
5253 sri->icode = CODE_FOR_aarch64_reload_movtf;
5254 else if (mode == TImode)
5255 sri->icode = CODE_FOR_aarch64_reload_movti;
5256 return NO_REGS;
5257 }
5258
5259 /* A TFmode or TImode memory access should be handled via an FP_REGS
5260 because AArch64 has richer addressing modes for LDR/STR instructions
5261 than LDP/STP instructions. */
d5726973 5262 if (TARGET_FLOAT && rclass == GENERAL_REGS
43e9d192
IB
5263 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
5264 return FP_REGS;
5265
5266 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 5267 return GENERAL_REGS;
43e9d192
IB
5268
5269 return NO_REGS;
5270}
5271
5272static bool
5273aarch64_can_eliminate (const int from, const int to)
5274{
5275 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5276 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5277
5278 if (frame_pointer_needed)
5279 {
5280 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5281 return true;
5282 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5283 return false;
5284 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
5285 && !cfun->calls_alloca)
5286 return true;
5287 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5288 return true;
0b7f8166
MS
5289
5290 return false;
43e9d192 5291 }
1c923b60
JW
5292 else
5293 {
5294 /* If we decided that we didn't need a leaf frame pointer but then used
5295 LR in the function, then we'll want a frame pointer after all, so
5296 prevent this elimination to ensure a frame pointer is used. */
5297 if (to == STACK_POINTER_REGNUM
5298 && flag_omit_leaf_frame_pointer
5299 && df_regs_ever_live_p (LR_REGNUM))
5300 return false;
5301 }
777e6976 5302
43e9d192
IB
5303 return true;
5304}
5305
5306HOST_WIDE_INT
5307aarch64_initial_elimination_offset (unsigned from, unsigned to)
5308{
43e9d192 5309 aarch64_layout_frame ();
78c29983
MS
5310
5311 if (to == HARD_FRAME_POINTER_REGNUM)
5312 {
5313 if (from == ARG_POINTER_REGNUM)
71bfb77a 5314 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
5315
5316 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5317 return cfun->machine->frame.hard_fp_offset
5318 - cfun->machine->frame.locals_offset;
78c29983
MS
5319 }
5320
5321 if (to == STACK_POINTER_REGNUM)
5322 {
5323 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
5324 return cfun->machine->frame.frame_size
5325 - cfun->machine->frame.locals_offset;
78c29983
MS
5326 }
5327
1c960e02 5328 return cfun->machine->frame.frame_size;
43e9d192
IB
5329}
5330
43e9d192
IB
5331/* Implement RETURN_ADDR_RTX. We do not support moving back to a
5332 previous frame. */
5333
5334rtx
5335aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5336{
5337 if (count != 0)
5338 return const0_rtx;
5339 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5340}
5341
5342
5343static void
5344aarch64_asm_trampoline_template (FILE *f)
5345{
28514dda
YZ
5346 if (TARGET_ILP32)
5347 {
5348 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5349 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5350 }
5351 else
5352 {
5353 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5354 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5355 }
01a3a324 5356 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 5357 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
5358 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5359 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
5360}
5361
5362static void
5363aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5364{
5365 rtx fnaddr, mem, a_tramp;
28514dda 5366 const int tramp_code_sz = 16;
43e9d192
IB
5367
5368 /* Don't need to copy the trailing D-words, we fill those in below. */
5369 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
5370 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5371 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 5372 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
5373 if (GET_MODE (fnaddr) != ptr_mode)
5374 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
5375 emit_move_insn (mem, fnaddr);
5376
28514dda 5377 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
5378 emit_move_insn (mem, chain_value);
5379
5380 /* XXX We should really define a "clear_cache" pattern and use
5381 gen_clear_cache(). */
5382 a_tramp = XEXP (m_tramp, 0);
5383 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
5384 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5385 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5386 ptr_mode);
43e9d192
IB
5387}
5388
5389static unsigned char
ef4bddc2 5390aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
5391{
5392 switch (regclass)
5393 {
fee9ba42 5394 case CALLER_SAVE_REGS:
43e9d192
IB
5395 case POINTER_REGS:
5396 case GENERAL_REGS:
5397 case ALL_REGS:
5398 case FP_REGS:
5399 case FP_LO_REGS:
5400 return
7bd11911
KT
5401 aarch64_vector_mode_p (mode)
5402 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5403 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
43e9d192
IB
5404 case STACK_REG:
5405 return 1;
5406
5407 case NO_REGS:
5408 return 0;
5409
5410 default:
5411 break;
5412 }
5413 gcc_unreachable ();
5414}
5415
5416static reg_class_t
78d8b9f0 5417aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 5418{
51bb310d 5419 if (regclass == POINTER_REGS)
78d8b9f0
IB
5420 return GENERAL_REGS;
5421
51bb310d
MS
5422 if (regclass == STACK_REG)
5423 {
5424 if (REG_P(x)
5425 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
5426 return regclass;
5427
5428 return NO_REGS;
5429 }
5430
78d8b9f0
IB
5431 /* If it's an integer immediate that MOVI can't handle, then
5432 FP_REGS is not an option, so we return NO_REGS instead. */
5433 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
5434 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
5435 return NO_REGS;
5436
27bd251b
IB
5437 /* Register eliminiation can result in a request for
5438 SP+constant->FP_REGS. We cannot support such operations which
5439 use SP as source and an FP_REG as destination, so reject out
5440 right now. */
5441 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
5442 {
5443 rtx lhs = XEXP (x, 0);
5444
5445 /* Look through a possible SUBREG introduced by ILP32. */
5446 if (GET_CODE (lhs) == SUBREG)
5447 lhs = SUBREG_REG (lhs);
5448
5449 gcc_assert (REG_P (lhs));
5450 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
5451 POINTER_REGS));
5452 return NO_REGS;
5453 }
5454
78d8b9f0 5455 return regclass;
43e9d192
IB
5456}
5457
5458void
5459aarch64_asm_output_labelref (FILE* f, const char *name)
5460{
5461 asm_fprintf (f, "%U%s", name);
5462}
5463
5464static void
5465aarch64_elf_asm_constructor (rtx symbol, int priority)
5466{
5467 if (priority == DEFAULT_INIT_PRIORITY)
5468 default_ctor_section_asm_out_constructor (symbol, priority);
5469 else
5470 {
5471 section *s;
5472 char buf[18];
5473 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5474 s = get_section (buf, SECTION_WRITE, NULL);
5475 switch_to_section (s);
5476 assemble_align (POINTER_SIZE);
28514dda 5477 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5478 }
5479}
5480
5481static void
5482aarch64_elf_asm_destructor (rtx symbol, int priority)
5483{
5484 if (priority == DEFAULT_INIT_PRIORITY)
5485 default_dtor_section_asm_out_destructor (symbol, priority);
5486 else
5487 {
5488 section *s;
5489 char buf[18];
5490 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5491 s = get_section (buf, SECTION_WRITE, NULL);
5492 switch_to_section (s);
5493 assemble_align (POINTER_SIZE);
28514dda 5494 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
5495 }
5496}
5497
5498const char*
5499aarch64_output_casesi (rtx *operands)
5500{
5501 char buf[100];
5502 char label[100];
b32d5189 5503 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
5504 int index;
5505 static const char *const patterns[4][2] =
5506 {
5507 {
5508 "ldrb\t%w3, [%0,%w1,uxtw]",
5509 "add\t%3, %4, %w3, sxtb #2"
5510 },
5511 {
5512 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5513 "add\t%3, %4, %w3, sxth #2"
5514 },
5515 {
5516 "ldr\t%w3, [%0,%w1,uxtw #2]",
5517 "add\t%3, %4, %w3, sxtw #2"
5518 },
5519 /* We assume that DImode is only generated when not optimizing and
5520 that we don't really need 64-bit address offsets. That would
5521 imply an object file with 8GB of code in a single function! */
5522 {
5523 "ldr\t%w3, [%0,%w1,uxtw #2]",
5524 "add\t%3, %4, %w3, sxtw #2"
5525 }
5526 };
5527
5528 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5529
5530 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5531
5532 gcc_assert (index >= 0 && index <= 3);
5533
5534 /* Need to implement table size reduction, by chaning the code below. */
5535 output_asm_insn (patterns[index][0], operands);
5536 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5537 snprintf (buf, sizeof (buf),
5538 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5539 output_asm_insn (buf, operands);
5540 output_asm_insn (patterns[index][1], operands);
5541 output_asm_insn ("br\t%3", operands);
5542 assemble_label (asm_out_file, label);
5543 return "";
5544}
5545
5546
5547/* Return size in bits of an arithmetic operand which is shifted/scaled and
5548 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5549 operator. */
5550
5551int
5552aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5553{
5554 if (shift >= 0 && shift <= 3)
5555 {
5556 int size;
5557 for (size = 8; size <= 32; size *= 2)
5558 {
5559 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5560 if (mask == bits << shift)
5561 return size;
5562 }
5563 }
5564 return 0;
5565}
5566
e78d485e
RR
5567/* Constant pools are per function only when PC relative
5568 literal loads are true or we are in the large memory
5569 model. */
5570
5571static inline bool
5572aarch64_can_use_per_function_literal_pools_p (void)
5573{
9ee6540a 5574 return (aarch64_pcrelative_literal_loads
e78d485e
RR
5575 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
5576}
5577
43e9d192 5578static bool
e78d485e 5579aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 5580{
3eece53d
RR
5581 /* Fixme:: In an ideal world this would work similar
5582 to the logic in aarch64_select_rtx_section but this
5583 breaks bootstrap in gcc go. For now we workaround
5584 this by returning false here. */
5585 return false;
43e9d192
IB
5586}
5587
e78d485e
RR
5588/* Select appropriate section for constants depending
5589 on where we place literal pools. */
5590
43e9d192 5591static section *
e78d485e
RR
5592aarch64_select_rtx_section (machine_mode mode,
5593 rtx x,
5594 unsigned HOST_WIDE_INT align)
43e9d192 5595{
e78d485e
RR
5596 if (aarch64_can_use_per_function_literal_pools_p ())
5597 return function_section (current_function_decl);
43e9d192 5598
e78d485e
RR
5599 return default_elf_select_rtx_section (mode, x, align);
5600}
43e9d192 5601
5fca7b66
RH
5602/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
5603void
5604aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
5605 HOST_WIDE_INT offset)
5606{
5607 /* When using per-function literal pools, we must ensure that any code
5608 section is aligned to the minimal instruction length, lest we get
5609 errors from the assembler re "unaligned instructions". */
5610 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
5611 ASM_OUTPUT_ALIGN (f, 2);
5612}
5613
43e9d192
IB
5614/* Costs. */
5615
5616/* Helper function for rtx cost calculation. Strip a shift expression
5617 from X. Returns the inner operand if successful, or the original
5618 expression on failure. */
5619static rtx
5620aarch64_strip_shift (rtx x)
5621{
5622 rtx op = x;
5623
57b77d46
RE
5624 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5625 we can convert both to ROR during final output. */
43e9d192
IB
5626 if ((GET_CODE (op) == ASHIFT
5627 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
5628 || GET_CODE (op) == LSHIFTRT
5629 || GET_CODE (op) == ROTATERT
5630 || GET_CODE (op) == ROTATE)
43e9d192
IB
5631 && CONST_INT_P (XEXP (op, 1)))
5632 return XEXP (op, 0);
5633
5634 if (GET_CODE (op) == MULT
5635 && CONST_INT_P (XEXP (op, 1))
5636 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5637 return XEXP (op, 0);
5638
5639 return x;
5640}
5641
4745e701 5642/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
5643 expression from X. Returns the inner operand if successful, or the
5644 original expression on failure. We deal with a number of possible
5645 canonicalization variations here. */
5646static rtx
4745e701 5647aarch64_strip_extend (rtx x)
43e9d192
IB
5648{
5649 rtx op = x;
5650
5651 /* Zero and sign extraction of a widened value. */
5652 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5653 && XEXP (op, 2) == const0_rtx
4745e701 5654 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
5655 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5656 XEXP (op, 1)))
5657 return XEXP (XEXP (op, 0), 0);
5658
5659 /* It can also be represented (for zero-extend) as an AND with an
5660 immediate. */
5661 if (GET_CODE (op) == AND
5662 && GET_CODE (XEXP (op, 0)) == MULT
5663 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5664 && CONST_INT_P (XEXP (op, 1))
5665 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5666 INTVAL (XEXP (op, 1))) != 0)
5667 return XEXP (XEXP (op, 0), 0);
5668
5669 /* Now handle extended register, as this may also have an optional
5670 left shift by 1..4. */
5671 if (GET_CODE (op) == ASHIFT
5672 && CONST_INT_P (XEXP (op, 1))
5673 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5674 op = XEXP (op, 0);
5675
5676 if (GET_CODE (op) == ZERO_EXTEND
5677 || GET_CODE (op) == SIGN_EXTEND)
5678 op = XEXP (op, 0);
5679
5680 if (op != x)
5681 return op;
5682
4745e701
JG
5683 return x;
5684}
5685
0a78ebe4
KT
5686/* Return true iff CODE is a shift supported in combination
5687 with arithmetic instructions. */
4d1919ed 5688
0a78ebe4
KT
5689static bool
5690aarch64_shift_p (enum rtx_code code)
5691{
5692 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
5693}
5694
4745e701 5695/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
5696 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5697 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
5698 operands where needed. */
5699
5700static int
e548c9df 5701aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
5702{
5703 rtx op0, op1;
5704 const struct cpu_cost_table *extra_cost
b175b679 5705 = aarch64_tune_params.insn_extra_cost;
4745e701 5706 int cost = 0;
0a78ebe4 5707 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 5708 machine_mode mode = GET_MODE (x);
4745e701
JG
5709
5710 gcc_checking_assert (code == MULT);
5711
5712 op0 = XEXP (x, 0);
5713 op1 = XEXP (x, 1);
5714
5715 if (VECTOR_MODE_P (mode))
5716 mode = GET_MODE_INNER (mode);
5717
5718 /* Integer multiply/fma. */
5719 if (GET_MODE_CLASS (mode) == MODE_INT)
5720 {
5721 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
5722 if (aarch64_shift_p (GET_CODE (x))
5723 || (CONST_INT_P (op1)
5724 && exact_log2 (INTVAL (op1)) > 0))
4745e701 5725 {
0a78ebe4
KT
5726 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
5727 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
5728 if (speed)
5729 {
0a78ebe4
KT
5730 if (compound_p)
5731 {
5732 if (REG_P (op1))
5733 /* ARITH + shift-by-register. */
5734 cost += extra_cost->alu.arith_shift_reg;
5735 else if (is_extend)
5736 /* ARITH + extended register. We don't have a cost field
5737 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5738 cost += extra_cost->alu.extend_arith;
5739 else
5740 /* ARITH + shift-by-immediate. */
5741 cost += extra_cost->alu.arith_shift;
5742 }
4745e701
JG
5743 else
5744 /* LSL (immediate). */
0a78ebe4
KT
5745 cost += extra_cost->alu.shift;
5746
4745e701 5747 }
0a78ebe4
KT
5748 /* Strip extends as we will have costed them in the case above. */
5749 if (is_extend)
5750 op0 = aarch64_strip_extend (op0);
4745e701 5751
e548c9df 5752 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
5753
5754 return cost;
5755 }
5756
d2ac256b
KT
5757 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5758 compound and let the below cases handle it. After all, MNEG is a
5759 special-case alias of MSUB. */
5760 if (GET_CODE (op0) == NEG)
5761 {
5762 op0 = XEXP (op0, 0);
5763 compound_p = true;
5764 }
5765
4745e701
JG
5766 /* Integer multiplies or FMAs have zero/sign extending variants. */
5767 if ((GET_CODE (op0) == ZERO_EXTEND
5768 && GET_CODE (op1) == ZERO_EXTEND)
5769 || (GET_CODE (op0) == SIGN_EXTEND
5770 && GET_CODE (op1) == SIGN_EXTEND))
5771 {
e548c9df
AM
5772 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
5773 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
5774
5775 if (speed)
5776 {
0a78ebe4 5777 if (compound_p)
d2ac256b 5778 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
5779 cost += extra_cost->mult[0].extend_add;
5780 else
5781 /* MUL/SMULL/UMULL. */
5782 cost += extra_cost->mult[0].extend;
5783 }
5784
5785 return cost;
5786 }
5787
d2ac256b 5788 /* This is either an integer multiply or a MADD. In both cases
4745e701 5789 we want to recurse and cost the operands. */
e548c9df
AM
5790 cost += rtx_cost (op0, mode, MULT, 0, speed);
5791 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
5792
5793 if (speed)
5794 {
0a78ebe4 5795 if (compound_p)
d2ac256b 5796 /* MADD/MSUB. */
4745e701
JG
5797 cost += extra_cost->mult[mode == DImode].add;
5798 else
5799 /* MUL. */
5800 cost += extra_cost->mult[mode == DImode].simple;
5801 }
5802
5803 return cost;
5804 }
5805 else
5806 {
5807 if (speed)
5808 {
3d840f7d 5809 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
5810 operands, unless the rounding mode is upward or downward in
5811 which case FNMUL is different than FMUL with operand negation. */
5812 bool neg0 = GET_CODE (op0) == NEG;
5813 bool neg1 = GET_CODE (op1) == NEG;
5814 if (compound_p || !flag_rounding_math || (neg0 && neg1))
5815 {
5816 if (neg0)
5817 op0 = XEXP (op0, 0);
5818 if (neg1)
5819 op1 = XEXP (op1, 0);
5820 }
4745e701 5821
0a78ebe4 5822 if (compound_p)
4745e701
JG
5823 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5824 cost += extra_cost->fp[mode == DFmode].fma;
5825 else
3d840f7d 5826 /* FMUL/FNMUL. */
4745e701
JG
5827 cost += extra_cost->fp[mode == DFmode].mult;
5828 }
5829
e548c9df
AM
5830 cost += rtx_cost (op0, mode, MULT, 0, speed);
5831 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
5832 return cost;
5833 }
43e9d192
IB
5834}
5835
67747367
JG
5836static int
5837aarch64_address_cost (rtx x,
ef4bddc2 5838 machine_mode mode,
67747367
JG
5839 addr_space_t as ATTRIBUTE_UNUSED,
5840 bool speed)
5841{
5842 enum rtx_code c = GET_CODE (x);
b175b679 5843 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
5844 struct aarch64_address_info info;
5845 int cost = 0;
5846 info.shift = 0;
5847
5848 if (!aarch64_classify_address (&info, x, mode, c, false))
5849 {
5850 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5851 {
5852 /* This is a CONST or SYMBOL ref which will be split
5853 in a different way depending on the code model in use.
5854 Cost it through the generic infrastructure. */
e548c9df 5855 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
5856 /* Divide through by the cost of one instruction to
5857 bring it to the same units as the address costs. */
5858 cost_symbol_ref /= COSTS_N_INSNS (1);
5859 /* The cost is then the cost of preparing the address,
5860 followed by an immediate (possibly 0) offset. */
5861 return cost_symbol_ref + addr_cost->imm_offset;
5862 }
5863 else
5864 {
5865 /* This is most likely a jump table from a case
5866 statement. */
5867 return addr_cost->register_offset;
5868 }
5869 }
5870
5871 switch (info.type)
5872 {
5873 case ADDRESS_LO_SUM:
5874 case ADDRESS_SYMBOLIC:
5875 case ADDRESS_REG_IMM:
5876 cost += addr_cost->imm_offset;
5877 break;
5878
5879 case ADDRESS_REG_WB:
5880 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5881 cost += addr_cost->pre_modify;
5882 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5883 cost += addr_cost->post_modify;
5884 else
5885 gcc_unreachable ();
5886
5887 break;
5888
5889 case ADDRESS_REG_REG:
5890 cost += addr_cost->register_offset;
5891 break;
5892
67747367 5893 case ADDRESS_REG_SXTW:
783879e6
EM
5894 cost += addr_cost->register_sextend;
5895 break;
5896
5897 case ADDRESS_REG_UXTW:
5898 cost += addr_cost->register_zextend;
67747367
JG
5899 break;
5900
5901 default:
5902 gcc_unreachable ();
5903 }
5904
5905
5906 if (info.shift > 0)
5907 {
5908 /* For the sake of calculating the cost of the shifted register
5909 component, we can treat same sized modes in the same way. */
5910 switch (GET_MODE_BITSIZE (mode))
5911 {
5912 case 16:
5913 cost += addr_cost->addr_scale_costs.hi;
5914 break;
5915
5916 case 32:
5917 cost += addr_cost->addr_scale_costs.si;
5918 break;
5919
5920 case 64:
5921 cost += addr_cost->addr_scale_costs.di;
5922 break;
5923
5924 /* We can't tell, or this is a 128-bit vector. */
5925 default:
5926 cost += addr_cost->addr_scale_costs.ti;
5927 break;
5928 }
5929 }
5930
5931 return cost;
5932}
5933
b9066f5a
MW
5934/* Return the cost of a branch. If SPEED_P is true then the compiler is
5935 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5936 to be taken. */
5937
5938int
5939aarch64_branch_cost (bool speed_p, bool predictable_p)
5940{
5941 /* When optimizing for speed, use the cost of unpredictable branches. */
5942 const struct cpu_branch_cost *branch_costs =
b175b679 5943 aarch64_tune_params.branch_costs;
b9066f5a
MW
5944
5945 if (!speed_p || predictable_p)
5946 return branch_costs->predictable;
5947 else
5948 return branch_costs->unpredictable;
5949}
5950
7cc2145f
JG
5951/* Return true if the RTX X in mode MODE is a zero or sign extract
5952 usable in an ADD or SUB (extended register) instruction. */
5953static bool
ef4bddc2 5954aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
5955{
5956 /* Catch add with a sign extract.
5957 This is add_<optab><mode>_multp2. */
5958 if (GET_CODE (x) == SIGN_EXTRACT
5959 || GET_CODE (x) == ZERO_EXTRACT)
5960 {
5961 rtx op0 = XEXP (x, 0);
5962 rtx op1 = XEXP (x, 1);
5963 rtx op2 = XEXP (x, 2);
5964
5965 if (GET_CODE (op0) == MULT
5966 && CONST_INT_P (op1)
5967 && op2 == const0_rtx
5968 && CONST_INT_P (XEXP (op0, 1))
5969 && aarch64_is_extend_from_extract (mode,
5970 XEXP (op0, 1),
5971 op1))
5972 {
5973 return true;
5974 }
5975 }
e47c4031
KT
5976 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
5977 No shift. */
5978 else if (GET_CODE (x) == SIGN_EXTEND
5979 || GET_CODE (x) == ZERO_EXTEND)
5980 return REG_P (XEXP (x, 0));
7cc2145f
JG
5981
5982 return false;
5983}
5984
61263118
KT
5985static bool
5986aarch64_frint_unspec_p (unsigned int u)
5987{
5988 switch (u)
5989 {
5990 case UNSPEC_FRINTZ:
5991 case UNSPEC_FRINTP:
5992 case UNSPEC_FRINTM:
5993 case UNSPEC_FRINTA:
5994 case UNSPEC_FRINTN:
5995 case UNSPEC_FRINTX:
5996 case UNSPEC_FRINTI:
5997 return true;
5998
5999 default:
6000 return false;
6001 }
6002}
6003
fb0cb7fa
KT
6004/* Return true iff X is an rtx that will match an extr instruction
6005 i.e. as described in the *extr<mode>5_insn family of patterns.
6006 OP0 and OP1 will be set to the operands of the shifts involved
6007 on success and will be NULL_RTX otherwise. */
6008
6009static bool
6010aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
6011{
6012 rtx op0, op1;
6013 machine_mode mode = GET_MODE (x);
6014
6015 *res_op0 = NULL_RTX;
6016 *res_op1 = NULL_RTX;
6017
6018 if (GET_CODE (x) != IOR)
6019 return false;
6020
6021 op0 = XEXP (x, 0);
6022 op1 = XEXP (x, 1);
6023
6024 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
6025 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
6026 {
6027 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6028 if (GET_CODE (op1) == ASHIFT)
6029 std::swap (op0, op1);
6030
6031 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
6032 return false;
6033
6034 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
6035 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
6036
6037 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
6038 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
6039 {
6040 *res_op0 = XEXP (op0, 0);
6041 *res_op1 = XEXP (op1, 0);
6042 return true;
6043 }
6044 }
6045
6046 return false;
6047}
6048
2d5ffe46
AP
6049/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6050 storing it in *COST. Result is true if the total cost of the operation
6051 has now been calculated. */
6052static bool
6053aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
6054{
b9e3afe9
AP
6055 rtx inner;
6056 rtx comparator;
6057 enum rtx_code cmpcode;
6058
6059 if (COMPARISON_P (op0))
6060 {
6061 inner = XEXP (op0, 0);
6062 comparator = XEXP (op0, 1);
6063 cmpcode = GET_CODE (op0);
6064 }
6065 else
6066 {
6067 inner = op0;
6068 comparator = const0_rtx;
6069 cmpcode = NE;
6070 }
6071
2d5ffe46
AP
6072 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
6073 {
6074 /* Conditional branch. */
b9e3afe9 6075 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
6076 return true;
6077 else
6078 {
b9e3afe9 6079 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 6080 {
2d5ffe46
AP
6081 if (comparator == const0_rtx)
6082 {
6083 /* TBZ/TBNZ/CBZ/CBNZ. */
6084 if (GET_CODE (inner) == ZERO_EXTRACT)
6085 /* TBZ/TBNZ. */
e548c9df
AM
6086 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
6087 ZERO_EXTRACT, 0, speed);
6088 else
6089 /* CBZ/CBNZ. */
6090 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
6091
6092 return true;
6093 }
6094 }
b9e3afe9 6095 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 6096 {
2d5ffe46
AP
6097 /* TBZ/TBNZ. */
6098 if (comparator == const0_rtx)
6099 return true;
6100 }
6101 }
6102 }
b9e3afe9 6103 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 6104 {
786298dc 6105 /* CCMP. */
6dfeb7ce 6106 if (GET_CODE (op1) == COMPARE)
786298dc
WD
6107 {
6108 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6109 if (XEXP (op1, 1) == const0_rtx)
6110 *cost += 1;
6111 if (speed)
6112 {
6113 machine_mode mode = GET_MODE (XEXP (op1, 0));
6114 const struct cpu_cost_table *extra_cost
6115 = aarch64_tune_params.insn_extra_cost;
6116
6117 if (GET_MODE_CLASS (mode) == MODE_INT)
6118 *cost += extra_cost->alu.arith;
6119 else
6120 *cost += extra_cost->fp[mode == DFmode].compare;
6121 }
6122 return true;
6123 }
6124
2d5ffe46
AP
6125 /* It's a conditional operation based on the status flags,
6126 so it must be some flavor of CSEL. */
6127
6128 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6129 if (GET_CODE (op1) == NEG
6130 || GET_CODE (op1) == NOT
6131 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
6132 op1 = XEXP (op1, 0);
bad00732
KT
6133 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
6134 {
6135 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6136 op1 = XEXP (op1, 0);
6137 op2 = XEXP (op2, 0);
6138 }
2d5ffe46 6139
e548c9df
AM
6140 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
6141 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
6142 return true;
6143 }
6144
6145 /* We don't know what this is, cost all operands. */
6146 return false;
6147}
6148
283b6c85
KT
6149/* Check whether X is a bitfield operation of the form shift + extend that
6150 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6151 operand to which the bitfield operation is applied. Otherwise return
6152 NULL_RTX. */
6153
6154static rtx
6155aarch64_extend_bitfield_pattern_p (rtx x)
6156{
6157 rtx_code outer_code = GET_CODE (x);
6158 machine_mode outer_mode = GET_MODE (x);
6159
6160 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
6161 && outer_mode != SImode && outer_mode != DImode)
6162 return NULL_RTX;
6163
6164 rtx inner = XEXP (x, 0);
6165 rtx_code inner_code = GET_CODE (inner);
6166 machine_mode inner_mode = GET_MODE (inner);
6167 rtx op = NULL_RTX;
6168
6169 switch (inner_code)
6170 {
6171 case ASHIFT:
6172 if (CONST_INT_P (XEXP (inner, 1))
6173 && (inner_mode == QImode || inner_mode == HImode))
6174 op = XEXP (inner, 0);
6175 break;
6176 case LSHIFTRT:
6177 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
6178 && (inner_mode == QImode || inner_mode == HImode))
6179 op = XEXP (inner, 0);
6180 break;
6181 case ASHIFTRT:
6182 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
6183 && (inner_mode == QImode || inner_mode == HImode))
6184 op = XEXP (inner, 0);
6185 break;
6186 default:
6187 break;
6188 }
6189
6190 return op;
6191}
6192
8c83f71d
KT
6193/* Return true if the mask and a shift amount from an RTX of the form
6194 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6195 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6196
6197bool
6198aarch64_mask_and_shift_for_ubfiz_p (machine_mode mode, rtx mask, rtx shft_amnt)
6199{
6200 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
6201 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
6202 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
6203 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
6204}
6205
43e9d192
IB
6206/* Calculate the cost of calculating X, storing it in *COST. Result
6207 is true if the total cost of the operation has now been calculated. */
6208static bool
e548c9df 6209aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
6210 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
6211{
a8eecd00 6212 rtx op0, op1, op2;
73250c4c 6213 const struct cpu_cost_table *extra_cost
b175b679 6214 = aarch64_tune_params.insn_extra_cost;
e548c9df 6215 int code = GET_CODE (x);
43e9d192 6216
7fc5ef02
JG
6217 /* By default, assume that everything has equivalent cost to the
6218 cheapest instruction. Any additional costs are applied as a delta
6219 above this default. */
6220 *cost = COSTS_N_INSNS (1);
6221
43e9d192
IB
6222 switch (code)
6223 {
6224 case SET:
ba123b0d
JG
6225 /* The cost depends entirely on the operands to SET. */
6226 *cost = 0;
43e9d192
IB
6227 op0 = SET_DEST (x);
6228 op1 = SET_SRC (x);
6229
6230 switch (GET_CODE (op0))
6231 {
6232 case MEM:
6233 if (speed)
2961177e
JG
6234 {
6235 rtx address = XEXP (op0, 0);
b6875aac
KV
6236 if (VECTOR_MODE_P (mode))
6237 *cost += extra_cost->ldst.storev;
6238 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6239 *cost += extra_cost->ldst.store;
6240 else if (mode == SFmode)
6241 *cost += extra_cost->ldst.storef;
6242 else if (mode == DFmode)
6243 *cost += extra_cost->ldst.stored;
6244
6245 *cost +=
6246 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6247 0, speed));
6248 }
43e9d192 6249
e548c9df 6250 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6251 return true;
6252
6253 case SUBREG:
6254 if (! REG_P (SUBREG_REG (op0)))
e548c9df 6255 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 6256
43e9d192
IB
6257 /* Fall through. */
6258 case REG:
b6875aac
KV
6259 /* The cost is one per vector-register copied. */
6260 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
6261 {
6262 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
6263 / GET_MODE_SIZE (V4SImode);
6264 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6265 }
ba123b0d
JG
6266 /* const0_rtx is in general free, but we will use an
6267 instruction to set a register to 0. */
b6875aac
KV
6268 else if (REG_P (op1) || op1 == const0_rtx)
6269 {
6270 /* The cost is 1 per register copied. */
6271 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
ba123b0d 6272 / UNITS_PER_WORD;
b6875aac
KV
6273 *cost = COSTS_N_INSNS (n_minus_1 + 1);
6274 }
ba123b0d
JG
6275 else
6276 /* Cost is just the cost of the RHS of the set. */
e548c9df 6277 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
6278 return true;
6279
ba123b0d 6280 case ZERO_EXTRACT:
43e9d192 6281 case SIGN_EXTRACT:
ba123b0d
JG
6282 /* Bit-field insertion. Strip any redundant widening of
6283 the RHS to meet the width of the target. */
43e9d192
IB
6284 if (GET_CODE (op1) == SUBREG)
6285 op1 = SUBREG_REG (op1);
6286 if ((GET_CODE (op1) == ZERO_EXTEND
6287 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 6288 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
6289 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
6290 >= INTVAL (XEXP (op0, 1))))
6291 op1 = XEXP (op1, 0);
ba123b0d
JG
6292
6293 if (CONST_INT_P (op1))
6294 {
6295 /* MOV immediate is assumed to always be cheap. */
6296 *cost = COSTS_N_INSNS (1);
6297 }
6298 else
6299 {
6300 /* BFM. */
6301 if (speed)
6302 *cost += extra_cost->alu.bfi;
e548c9df 6303 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
6304 }
6305
43e9d192
IB
6306 return true;
6307
6308 default:
ba123b0d
JG
6309 /* We can't make sense of this, assume default cost. */
6310 *cost = COSTS_N_INSNS (1);
61263118 6311 return false;
43e9d192
IB
6312 }
6313 return false;
6314
9dfc162c
JG
6315 case CONST_INT:
6316 /* If an instruction can incorporate a constant within the
6317 instruction, the instruction's expression avoids calling
6318 rtx_cost() on the constant. If rtx_cost() is called on a
6319 constant, then it is usually because the constant must be
6320 moved into a register by one or more instructions.
6321
6322 The exception is constant 0, which can be expressed
6323 as XZR/WZR and is therefore free. The exception to this is
6324 if we have (set (reg) (const0_rtx)) in which case we must cost
6325 the move. However, we can catch that when we cost the SET, so
6326 we don't need to consider that here. */
6327 if (x == const0_rtx)
6328 *cost = 0;
6329 else
6330 {
6331 /* To an approximation, building any other constant is
6332 proportionally expensive to the number of instructions
6333 required to build that constant. This is true whether we
6334 are compiling for SPEED or otherwise. */
82614948
RR
6335 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
6336 (NULL_RTX, x, false, mode));
9dfc162c
JG
6337 }
6338 return true;
6339
6340 case CONST_DOUBLE:
6341 if (speed)
6342 {
6343 /* mov[df,sf]_aarch64. */
6344 if (aarch64_float_const_representable_p (x))
6345 /* FMOV (scalar immediate). */
6346 *cost += extra_cost->fp[mode == DFmode].fpconst;
6347 else if (!aarch64_float_const_zero_rtx_p (x))
6348 {
6349 /* This will be a load from memory. */
6350 if (mode == DFmode)
6351 *cost += extra_cost->ldst.loadd;
6352 else
6353 *cost += extra_cost->ldst.loadf;
6354 }
6355 else
6356 /* Otherwise this is +0.0. We get this using MOVI d0, #0
6357 or MOV v0.s[0], wzr - neither of which are modeled by the
6358 cost tables. Just use the default cost. */
6359 {
6360 }
6361 }
6362
6363 return true;
6364
43e9d192
IB
6365 case MEM:
6366 if (speed)
2961177e
JG
6367 {
6368 /* For loads we want the base cost of a load, plus an
6369 approximation for the additional cost of the addressing
6370 mode. */
6371 rtx address = XEXP (x, 0);
b6875aac
KV
6372 if (VECTOR_MODE_P (mode))
6373 *cost += extra_cost->ldst.loadv;
6374 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
6375 *cost += extra_cost->ldst.load;
6376 else if (mode == SFmode)
6377 *cost += extra_cost->ldst.loadf;
6378 else if (mode == DFmode)
6379 *cost += extra_cost->ldst.loadd;
6380
6381 *cost +=
6382 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6383 0, speed));
6384 }
43e9d192
IB
6385
6386 return true;
6387
6388 case NEG:
4745e701
JG
6389 op0 = XEXP (x, 0);
6390
b6875aac
KV
6391 if (VECTOR_MODE_P (mode))
6392 {
6393 if (speed)
6394 {
6395 /* FNEG. */
6396 *cost += extra_cost->vect.alu;
6397 }
6398 return false;
6399 }
6400
e548c9df
AM
6401 if (GET_MODE_CLASS (mode) == MODE_INT)
6402 {
4745e701
JG
6403 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6404 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6405 {
6406 /* CSETM. */
e548c9df 6407 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
6408 return true;
6409 }
6410
6411 /* Cost this as SUB wzr, X. */
e548c9df 6412 op0 = CONST0_RTX (mode);
4745e701
JG
6413 op1 = XEXP (x, 0);
6414 goto cost_minus;
6415 }
6416
e548c9df 6417 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
6418 {
6419 /* Support (neg(fma...)) as a single instruction only if
6420 sign of zeros is unimportant. This matches the decision
6421 making in aarch64.md. */
6422 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
6423 {
6424 /* FNMADD. */
e548c9df 6425 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
6426 return true;
6427 }
d318517d
SN
6428 if (GET_CODE (op0) == MULT)
6429 {
6430 /* FNMUL. */
6431 *cost = rtx_cost (op0, mode, NEG, 0, speed);
6432 return true;
6433 }
4745e701
JG
6434 if (speed)
6435 /* FNEG. */
6436 *cost += extra_cost->fp[mode == DFmode].neg;
6437 return false;
6438 }
6439
6440 return false;
43e9d192 6441
781aeb73
KT
6442 case CLRSB:
6443 case CLZ:
6444 if (speed)
b6875aac
KV
6445 {
6446 if (VECTOR_MODE_P (mode))
6447 *cost += extra_cost->vect.alu;
6448 else
6449 *cost += extra_cost->alu.clz;
6450 }
781aeb73
KT
6451
6452 return false;
6453
43e9d192
IB
6454 case COMPARE:
6455 op0 = XEXP (x, 0);
6456 op1 = XEXP (x, 1);
6457
6458 if (op1 == const0_rtx
6459 && GET_CODE (op0) == AND)
6460 {
6461 x = op0;
e548c9df 6462 mode = GET_MODE (op0);
43e9d192
IB
6463 goto cost_logic;
6464 }
6465
a8eecd00
JG
6466 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
6467 {
6468 /* TODO: A write to the CC flags possibly costs extra, this
6469 needs encoding in the cost tables. */
6470
e548c9df 6471 mode = GET_MODE (op0);
a8eecd00
JG
6472 /* ANDS. */
6473 if (GET_CODE (op0) == AND)
6474 {
6475 x = op0;
6476 goto cost_logic;
6477 }
6478
6479 if (GET_CODE (op0) == PLUS)
6480 {
6481 /* ADDS (and CMN alias). */
6482 x = op0;
6483 goto cost_plus;
6484 }
6485
6486 if (GET_CODE (op0) == MINUS)
6487 {
6488 /* SUBS. */
6489 x = op0;
6490 goto cost_minus;
6491 }
6492
345854d8
KT
6493 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
6494 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
6495 && CONST_INT_P (XEXP (op0, 2)))
6496 {
6497 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
6498 Handle it here directly rather than going to cost_logic
6499 since we know the immediate generated for the TST is valid
6500 so we can avoid creating an intermediate rtx for it only
6501 for costing purposes. */
6502 if (speed)
6503 *cost += extra_cost->alu.logical;
6504
6505 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
6506 ZERO_EXTRACT, 0, speed);
6507 return true;
6508 }
6509
a8eecd00
JG
6510 if (GET_CODE (op1) == NEG)
6511 {
6512 /* CMN. */
6513 if (speed)
6514 *cost += extra_cost->alu.arith;
6515
e548c9df
AM
6516 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
6517 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
6518 return true;
6519 }
6520
6521 /* CMP.
6522
6523 Compare can freely swap the order of operands, and
6524 canonicalization puts the more complex operation first.
6525 But the integer MINUS logic expects the shift/extend
6526 operation in op1. */
6527 if (! (REG_P (op0)
6528 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
6529 {
6530 op0 = XEXP (x, 1);
6531 op1 = XEXP (x, 0);
6532 }
6533 goto cost_minus;
6534 }
6535
6536 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6537 {
6538 /* FCMP. */
6539 if (speed)
6540 *cost += extra_cost->fp[mode == DFmode].compare;
6541
6542 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
6543 {
e548c9df 6544 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
6545 /* FCMP supports constant 0.0 for no extra cost. */
6546 return true;
6547 }
6548 return false;
6549 }
6550
b6875aac
KV
6551 if (VECTOR_MODE_P (mode))
6552 {
6553 /* Vector compare. */
6554 if (speed)
6555 *cost += extra_cost->vect.alu;
6556
6557 if (aarch64_float_const_zero_rtx_p (op1))
6558 {
6559 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6560 cost. */
6561 return true;
6562 }
6563 return false;
6564 }
a8eecd00 6565 return false;
43e9d192
IB
6566
6567 case MINUS:
4745e701
JG
6568 {
6569 op0 = XEXP (x, 0);
6570 op1 = XEXP (x, 1);
6571
6572cost_minus:
e548c9df 6573 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 6574
4745e701
JG
6575 /* Detect valid immediates. */
6576 if ((GET_MODE_CLASS (mode) == MODE_INT
6577 || (GET_MODE_CLASS (mode) == MODE_CC
6578 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
6579 && CONST_INT_P (op1)
6580 && aarch64_uimm12_shift (INTVAL (op1)))
6581 {
4745e701
JG
6582 if (speed)
6583 /* SUB(S) (immediate). */
6584 *cost += extra_cost->alu.arith;
6585 return true;
4745e701
JG
6586 }
6587
7cc2145f
JG
6588 /* Look for SUB (extended register). */
6589 if (aarch64_rtx_arith_op_extract_p (op1, mode))
6590 {
6591 if (speed)
2533c820 6592 *cost += extra_cost->alu.extend_arith;
7cc2145f 6593
e47c4031
KT
6594 op1 = aarch64_strip_extend (op1);
6595 *cost += rtx_cost (op1, VOIDmode,
e548c9df 6596 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
6597 return true;
6598 }
6599
4745e701
JG
6600 rtx new_op1 = aarch64_strip_extend (op1);
6601
6602 /* Cost this as an FMA-alike operation. */
6603 if ((GET_CODE (new_op1) == MULT
0a78ebe4 6604 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
6605 && code != COMPARE)
6606 {
6607 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
6608 (enum rtx_code) code,
6609 speed);
4745e701
JG
6610 return true;
6611 }
43e9d192 6612
e548c9df 6613 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 6614
4745e701
JG
6615 if (speed)
6616 {
b6875aac
KV
6617 if (VECTOR_MODE_P (mode))
6618 {
6619 /* Vector SUB. */
6620 *cost += extra_cost->vect.alu;
6621 }
6622 else if (GET_MODE_CLASS (mode) == MODE_INT)
6623 {
6624 /* SUB(S). */
6625 *cost += extra_cost->alu.arith;
6626 }
4745e701 6627 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6628 {
6629 /* FSUB. */
6630 *cost += extra_cost->fp[mode == DFmode].addsub;
6631 }
4745e701
JG
6632 }
6633 return true;
6634 }
43e9d192
IB
6635
6636 case PLUS:
4745e701
JG
6637 {
6638 rtx new_op0;
43e9d192 6639
4745e701
JG
6640 op0 = XEXP (x, 0);
6641 op1 = XEXP (x, 1);
43e9d192 6642
a8eecd00 6643cost_plus:
4745e701
JG
6644 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6645 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6646 {
6647 /* CSINC. */
e548c9df
AM
6648 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
6649 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
6650 return true;
6651 }
43e9d192 6652
4745e701
JG
6653 if (GET_MODE_CLASS (mode) == MODE_INT
6654 && CONST_INT_P (op1)
6655 && aarch64_uimm12_shift (INTVAL (op1)))
6656 {
e548c9df 6657 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 6658
4745e701
JG
6659 if (speed)
6660 /* ADD (immediate). */
6661 *cost += extra_cost->alu.arith;
6662 return true;
6663 }
6664
e548c9df 6665 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 6666
7cc2145f
JG
6667 /* Look for ADD (extended register). */
6668 if (aarch64_rtx_arith_op_extract_p (op0, mode))
6669 {
6670 if (speed)
2533c820 6671 *cost += extra_cost->alu.extend_arith;
7cc2145f 6672
e47c4031
KT
6673 op0 = aarch64_strip_extend (op0);
6674 *cost += rtx_cost (op0, VOIDmode,
e548c9df 6675 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
6676 return true;
6677 }
6678
4745e701
JG
6679 /* Strip any extend, leave shifts behind as we will
6680 cost them through mult_cost. */
6681 new_op0 = aarch64_strip_extend (op0);
6682
6683 if (GET_CODE (new_op0) == MULT
0a78ebe4 6684 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
6685 {
6686 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
6687 speed);
4745e701
JG
6688 return true;
6689 }
6690
e548c9df 6691 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
6692
6693 if (speed)
6694 {
b6875aac
KV
6695 if (VECTOR_MODE_P (mode))
6696 {
6697 /* Vector ADD. */
6698 *cost += extra_cost->vect.alu;
6699 }
6700 else if (GET_MODE_CLASS (mode) == MODE_INT)
6701 {
6702 /* ADD. */
6703 *cost += extra_cost->alu.arith;
6704 }
4745e701 6705 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
6706 {
6707 /* FADD. */
6708 *cost += extra_cost->fp[mode == DFmode].addsub;
6709 }
4745e701
JG
6710 }
6711 return true;
6712 }
43e9d192 6713
18b42b2a
KT
6714 case BSWAP:
6715 *cost = COSTS_N_INSNS (1);
6716
6717 if (speed)
b6875aac
KV
6718 {
6719 if (VECTOR_MODE_P (mode))
6720 *cost += extra_cost->vect.alu;
6721 else
6722 *cost += extra_cost->alu.rev;
6723 }
18b42b2a
KT
6724 return false;
6725
43e9d192 6726 case IOR:
f7d5cf8d
KT
6727 if (aarch_rev16_p (x))
6728 {
6729 *cost = COSTS_N_INSNS (1);
6730
b6875aac
KV
6731 if (speed)
6732 {
6733 if (VECTOR_MODE_P (mode))
6734 *cost += extra_cost->vect.alu;
6735 else
6736 *cost += extra_cost->alu.rev;
6737 }
6738 return true;
f7d5cf8d 6739 }
fb0cb7fa
KT
6740
6741 if (aarch64_extr_rtx_p (x, &op0, &op1))
6742 {
e548c9df
AM
6743 *cost += rtx_cost (op0, mode, IOR, 0, speed);
6744 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
6745 if (speed)
6746 *cost += extra_cost->alu.shift;
6747
6748 return true;
6749 }
f7d5cf8d 6750 /* Fall through. */
43e9d192
IB
6751 case XOR:
6752 case AND:
6753 cost_logic:
6754 op0 = XEXP (x, 0);
6755 op1 = XEXP (x, 1);
6756
b6875aac
KV
6757 if (VECTOR_MODE_P (mode))
6758 {
6759 if (speed)
6760 *cost += extra_cost->vect.alu;
6761 return true;
6762 }
6763
268c3b47
JG
6764 if (code == AND
6765 && GET_CODE (op0) == MULT
6766 && CONST_INT_P (XEXP (op0, 1))
6767 && CONST_INT_P (op1)
6768 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
6769 INTVAL (op1)) != 0)
6770 {
6771 /* This is a UBFM/SBFM. */
e548c9df 6772 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
6773 if (speed)
6774 *cost += extra_cost->alu.bfx;
6775 return true;
6776 }
6777
e548c9df 6778 if (GET_MODE_CLASS (mode) == MODE_INT)
43e9d192 6779 {
8c83f71d 6780 if (CONST_INT_P (op1))
43e9d192 6781 {
8c83f71d
KT
6782 /* We have a mask + shift version of a UBFIZ
6783 i.e. the *andim_ashift<mode>_bfiz pattern. */
6784 if (GET_CODE (op0) == ASHIFT
6785 && aarch64_mask_and_shift_for_ubfiz_p (mode, op1,
6786 XEXP (op0, 1)))
6787 {
6788 *cost += rtx_cost (XEXP (op0, 0), mode,
6789 (enum rtx_code) code, 0, speed);
6790 if (speed)
6791 *cost += extra_cost->alu.bfx;
268c3b47 6792
8c83f71d
KT
6793 return true;
6794 }
6795 else if (aarch64_bitmask_imm (INTVAL (op1), mode))
6796 {
6797 /* We possibly get the immediate for free, this is not
6798 modelled. */
6799 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6800 if (speed)
6801 *cost += extra_cost->alu.logical;
268c3b47 6802
8c83f71d
KT
6803 return true;
6804 }
43e9d192
IB
6805 }
6806 else
6807 {
268c3b47
JG
6808 rtx new_op0 = op0;
6809
6810 /* Handle ORN, EON, or BIC. */
43e9d192
IB
6811 if (GET_CODE (op0) == NOT)
6812 op0 = XEXP (op0, 0);
268c3b47
JG
6813
6814 new_op0 = aarch64_strip_shift (op0);
6815
6816 /* If we had a shift on op0 then this is a logical-shift-
6817 by-register/immediate operation. Otherwise, this is just
6818 a logical operation. */
6819 if (speed)
6820 {
6821 if (new_op0 != op0)
6822 {
6823 /* Shift by immediate. */
6824 if (CONST_INT_P (XEXP (op0, 1)))
6825 *cost += extra_cost->alu.log_shift;
6826 else
6827 *cost += extra_cost->alu.log_shift_reg;
6828 }
6829 else
6830 *cost += extra_cost->alu.logical;
6831 }
6832
6833 /* In both cases we want to cost both operands. */
e548c9df
AM
6834 *cost += rtx_cost (new_op0, mode, (enum rtx_code) code, 0, speed);
6835 *cost += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
268c3b47
JG
6836
6837 return true;
43e9d192 6838 }
43e9d192
IB
6839 }
6840 return false;
6841
268c3b47 6842 case NOT:
6365da9e
KT
6843 x = XEXP (x, 0);
6844 op0 = aarch64_strip_shift (x);
6845
b6875aac
KV
6846 if (VECTOR_MODE_P (mode))
6847 {
6848 /* Vector NOT. */
6849 *cost += extra_cost->vect.alu;
6850 return false;
6851 }
6852
6365da9e
KT
6853 /* MVN-shifted-reg. */
6854 if (op0 != x)
6855 {
e548c9df 6856 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
6857
6858 if (speed)
6859 *cost += extra_cost->alu.log_shift;
6860
6861 return true;
6862 }
6863 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6864 Handle the second form here taking care that 'a' in the above can
6865 be a shift. */
6866 else if (GET_CODE (op0) == XOR)
6867 {
6868 rtx newop0 = XEXP (op0, 0);
6869 rtx newop1 = XEXP (op0, 1);
6870 rtx op0_stripped = aarch64_strip_shift (newop0);
6871
e548c9df
AM
6872 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
6873 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
6874
6875 if (speed)
6876 {
6877 if (op0_stripped != newop0)
6878 *cost += extra_cost->alu.log_shift;
6879 else
6880 *cost += extra_cost->alu.logical;
6881 }
6882
6883 return true;
6884 }
268c3b47
JG
6885 /* MVN. */
6886 if (speed)
6887 *cost += extra_cost->alu.logical;
6888
268c3b47
JG
6889 return false;
6890
43e9d192 6891 case ZERO_EXTEND:
b1685e62
JG
6892
6893 op0 = XEXP (x, 0);
6894 /* If a value is written in SI mode, then zero extended to DI
6895 mode, the operation will in general be free as a write to
6896 a 'w' register implicitly zeroes the upper bits of an 'x'
6897 register. However, if this is
6898
6899 (set (reg) (zero_extend (reg)))
6900
6901 we must cost the explicit register move. */
6902 if (mode == DImode
6903 && GET_MODE (op0) == SImode
6904 && outer == SET)
6905 {
e548c9df 6906 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 6907
dde23f43
KM
6908 /* If OP_COST is non-zero, then the cost of the zero extend
6909 is effectively the cost of the inner operation. Otherwise
6910 we have a MOV instruction and we take the cost from the MOV
6911 itself. This is true independently of whether we are
6912 optimizing for space or time. */
6913 if (op_cost)
b1685e62
JG
6914 *cost = op_cost;
6915
6916 return true;
6917 }
e548c9df 6918 else if (MEM_P (op0))
43e9d192 6919 {
b1685e62 6920 /* All loads can zero extend to any size for free. */
e548c9df 6921 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
6922 return true;
6923 }
b1685e62 6924
283b6c85
KT
6925 op0 = aarch64_extend_bitfield_pattern_p (x);
6926 if (op0)
6927 {
6928 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
6929 if (speed)
6930 *cost += extra_cost->alu.bfx;
6931 return true;
6932 }
6933
b1685e62 6934 if (speed)
b6875aac
KV
6935 {
6936 if (VECTOR_MODE_P (mode))
6937 {
6938 /* UMOV. */
6939 *cost += extra_cost->vect.alu;
6940 }
6941 else
6942 {
63715e5e
WD
6943 /* We generate an AND instead of UXTB/UXTH. */
6944 *cost += extra_cost->alu.logical;
b6875aac
KV
6945 }
6946 }
43e9d192
IB
6947 return false;
6948
6949 case SIGN_EXTEND:
b1685e62 6950 if (MEM_P (XEXP (x, 0)))
43e9d192 6951 {
b1685e62
JG
6952 /* LDRSH. */
6953 if (speed)
6954 {
6955 rtx address = XEXP (XEXP (x, 0), 0);
6956 *cost += extra_cost->ldst.load_sign_extend;
6957
6958 *cost +=
6959 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6960 0, speed));
6961 }
43e9d192
IB
6962 return true;
6963 }
b1685e62 6964
283b6c85
KT
6965 op0 = aarch64_extend_bitfield_pattern_p (x);
6966 if (op0)
6967 {
6968 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
6969 if (speed)
6970 *cost += extra_cost->alu.bfx;
6971 return true;
6972 }
6973
b1685e62 6974 if (speed)
b6875aac
KV
6975 {
6976 if (VECTOR_MODE_P (mode))
6977 *cost += extra_cost->vect.alu;
6978 else
6979 *cost += extra_cost->alu.extend;
6980 }
43e9d192
IB
6981 return false;
6982
ba0cfa17
JG
6983 case ASHIFT:
6984 op0 = XEXP (x, 0);
6985 op1 = XEXP (x, 1);
6986
6987 if (CONST_INT_P (op1))
6988 {
ba0cfa17 6989 if (speed)
b6875aac
KV
6990 {
6991 if (VECTOR_MODE_P (mode))
6992 {
6993 /* Vector shift (immediate). */
6994 *cost += extra_cost->vect.alu;
6995 }
6996 else
6997 {
6998 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6999 aliases. */
7000 *cost += extra_cost->alu.shift;
7001 }
7002 }
ba0cfa17
JG
7003
7004 /* We can incorporate zero/sign extend for free. */
7005 if (GET_CODE (op0) == ZERO_EXTEND
7006 || GET_CODE (op0) == SIGN_EXTEND)
7007 op0 = XEXP (op0, 0);
7008
e548c9df 7009 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
7010 return true;
7011 }
7012 else
7013 {
ba0cfa17 7014 if (speed)
b6875aac
KV
7015 {
7016 if (VECTOR_MODE_P (mode))
7017 {
7018 /* Vector shift (register). */
7019 *cost += extra_cost->vect.alu;
7020 }
7021 else
7022 {
7023 /* LSLV. */
7024 *cost += extra_cost->alu.shift_reg;
7025 }
7026 }
ba0cfa17
JG
7027 return false; /* All arguments need to be in registers. */
7028 }
7029
43e9d192 7030 case ROTATE:
43e9d192
IB
7031 case ROTATERT:
7032 case LSHIFTRT:
43e9d192 7033 case ASHIFTRT:
ba0cfa17
JG
7034 op0 = XEXP (x, 0);
7035 op1 = XEXP (x, 1);
43e9d192 7036
ba0cfa17
JG
7037 if (CONST_INT_P (op1))
7038 {
7039 /* ASR (immediate) and friends. */
7040 if (speed)
b6875aac
KV
7041 {
7042 if (VECTOR_MODE_P (mode))
7043 *cost += extra_cost->vect.alu;
7044 else
7045 *cost += extra_cost->alu.shift;
7046 }
43e9d192 7047
e548c9df 7048 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
7049 return true;
7050 }
7051 else
7052 {
7053
7054 /* ASR (register) and friends. */
7055 if (speed)
b6875aac
KV
7056 {
7057 if (VECTOR_MODE_P (mode))
7058 *cost += extra_cost->vect.alu;
7059 else
7060 *cost += extra_cost->alu.shift_reg;
7061 }
ba0cfa17
JG
7062 return false; /* All arguments need to be in registers. */
7063 }
43e9d192 7064
909734be
JG
7065 case SYMBOL_REF:
7066
1b1e81f8
JW
7067 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
7068 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
7069 {
7070 /* LDR. */
7071 if (speed)
7072 *cost += extra_cost->ldst.load;
7073 }
7074 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
7075 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
7076 {
7077 /* ADRP, followed by ADD. */
7078 *cost += COSTS_N_INSNS (1);
7079 if (speed)
7080 *cost += 2 * extra_cost->alu.arith;
7081 }
7082 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
7083 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
7084 {
7085 /* ADR. */
7086 if (speed)
7087 *cost += extra_cost->alu.arith;
7088 }
7089
7090 if (flag_pic)
7091 {
7092 /* One extra load instruction, after accessing the GOT. */
7093 *cost += COSTS_N_INSNS (1);
7094 if (speed)
7095 *cost += extra_cost->ldst.load;
7096 }
43e9d192
IB
7097 return true;
7098
909734be 7099 case HIGH:
43e9d192 7100 case LO_SUM:
909734be
JG
7101 /* ADRP/ADD (immediate). */
7102 if (speed)
7103 *cost += extra_cost->alu.arith;
43e9d192
IB
7104 return true;
7105
7106 case ZERO_EXTRACT:
7107 case SIGN_EXTRACT:
7cc2145f
JG
7108 /* UBFX/SBFX. */
7109 if (speed)
b6875aac
KV
7110 {
7111 if (VECTOR_MODE_P (mode))
7112 *cost += extra_cost->vect.alu;
7113 else
7114 *cost += extra_cost->alu.bfx;
7115 }
7cc2145f
JG
7116
7117 /* We can trust that the immediates used will be correct (there
7118 are no by-register forms), so we need only cost op0. */
e548c9df 7119 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
7120 return true;
7121
7122 case MULT:
4745e701
JG
7123 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
7124 /* aarch64_rtx_mult_cost always handles recursion to its
7125 operands. */
7126 return true;
43e9d192
IB
7127
7128 case MOD:
4f58fe36
KT
7129 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7130 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7131 an unconditional negate. This case should only ever be reached through
7132 the set_smod_pow2_cheap check in expmed.c. */
7133 if (CONST_INT_P (XEXP (x, 1))
7134 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
7135 && (mode == SImode || mode == DImode))
7136 {
7137 /* We expand to 4 instructions. Reset the baseline. */
7138 *cost = COSTS_N_INSNS (4);
7139
7140 if (speed)
7141 *cost += 2 * extra_cost->alu.logical
7142 + 2 * extra_cost->alu.arith;
7143
7144 return true;
7145 }
7146
7147 /* Fall-through. */
43e9d192 7148 case UMOD:
43e9d192
IB
7149 if (speed)
7150 {
b6875aac
KV
7151 if (VECTOR_MODE_P (mode))
7152 *cost += extra_cost->vect.alu;
e548c9df
AM
7153 else if (GET_MODE_CLASS (mode) == MODE_INT)
7154 *cost += (extra_cost->mult[mode == DImode].add
7155 + extra_cost->mult[mode == DImode].idiv);
7156 else if (mode == DFmode)
73250c4c
KT
7157 *cost += (extra_cost->fp[1].mult
7158 + extra_cost->fp[1].div);
e548c9df 7159 else if (mode == SFmode)
73250c4c
KT
7160 *cost += (extra_cost->fp[0].mult
7161 + extra_cost->fp[0].div);
43e9d192
IB
7162 }
7163 return false; /* All arguments need to be in registers. */
7164
7165 case DIV:
7166 case UDIV:
4105fe38 7167 case SQRT:
43e9d192
IB
7168 if (speed)
7169 {
b6875aac
KV
7170 if (VECTOR_MODE_P (mode))
7171 *cost += extra_cost->vect.alu;
7172 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
7173 /* There is no integer SQRT, so only DIV and UDIV can get
7174 here. */
7175 *cost += extra_cost->mult[mode == DImode].idiv;
7176 else
7177 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
7178 }
7179 return false; /* All arguments need to be in registers. */
7180
a8eecd00 7181 case IF_THEN_ELSE:
2d5ffe46
AP
7182 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
7183 XEXP (x, 2), cost, speed);
a8eecd00
JG
7184
7185 case EQ:
7186 case NE:
7187 case GT:
7188 case GTU:
7189 case LT:
7190 case LTU:
7191 case GE:
7192 case GEU:
7193 case LE:
7194 case LEU:
7195
7196 return false; /* All arguments must be in registers. */
7197
b292109f
JG
7198 case FMA:
7199 op0 = XEXP (x, 0);
7200 op1 = XEXP (x, 1);
7201 op2 = XEXP (x, 2);
7202
7203 if (speed)
b6875aac
KV
7204 {
7205 if (VECTOR_MODE_P (mode))
7206 *cost += extra_cost->vect.alu;
7207 else
7208 *cost += extra_cost->fp[mode == DFmode].fma;
7209 }
b292109f
JG
7210
7211 /* FMSUB, FNMADD, and FNMSUB are free. */
7212 if (GET_CODE (op0) == NEG)
7213 op0 = XEXP (op0, 0);
7214
7215 if (GET_CODE (op2) == NEG)
7216 op2 = XEXP (op2, 0);
7217
7218 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7219 and the by-element operand as operand 0. */
7220 if (GET_CODE (op1) == NEG)
7221 op1 = XEXP (op1, 0);
7222
7223 /* Catch vector-by-element operations. The by-element operand can
7224 either be (vec_duplicate (vec_select (x))) or just
7225 (vec_select (x)), depending on whether we are multiplying by
7226 a vector or a scalar.
7227
7228 Canonicalization is not very good in these cases, FMA4 will put the
7229 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7230 if (GET_CODE (op0) == VEC_DUPLICATE)
7231 op0 = XEXP (op0, 0);
7232 else if (GET_CODE (op1) == VEC_DUPLICATE)
7233 op1 = XEXP (op1, 0);
7234
7235 if (GET_CODE (op0) == VEC_SELECT)
7236 op0 = XEXP (op0, 0);
7237 else if (GET_CODE (op1) == VEC_SELECT)
7238 op1 = XEXP (op1, 0);
7239
7240 /* If the remaining parameters are not registers,
7241 get the cost to put them into registers. */
e548c9df
AM
7242 *cost += rtx_cost (op0, mode, FMA, 0, speed);
7243 *cost += rtx_cost (op1, mode, FMA, 1, speed);
7244 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
7245 return true;
7246
5e2a765b
KT
7247 case FLOAT:
7248 case UNSIGNED_FLOAT:
7249 if (speed)
7250 *cost += extra_cost->fp[mode == DFmode].fromint;
7251 return false;
7252
b292109f
JG
7253 case FLOAT_EXTEND:
7254 if (speed)
b6875aac
KV
7255 {
7256 if (VECTOR_MODE_P (mode))
7257 {
7258 /*Vector truncate. */
7259 *cost += extra_cost->vect.alu;
7260 }
7261 else
7262 *cost += extra_cost->fp[mode == DFmode].widen;
7263 }
b292109f
JG
7264 return false;
7265
7266 case FLOAT_TRUNCATE:
7267 if (speed)
b6875aac
KV
7268 {
7269 if (VECTOR_MODE_P (mode))
7270 {
7271 /*Vector conversion. */
7272 *cost += extra_cost->vect.alu;
7273 }
7274 else
7275 *cost += extra_cost->fp[mode == DFmode].narrow;
7276 }
b292109f
JG
7277 return false;
7278
61263118
KT
7279 case FIX:
7280 case UNSIGNED_FIX:
7281 x = XEXP (x, 0);
7282 /* Strip the rounding part. They will all be implemented
7283 by the fcvt* family of instructions anyway. */
7284 if (GET_CODE (x) == UNSPEC)
7285 {
7286 unsigned int uns_code = XINT (x, 1);
7287
7288 if (uns_code == UNSPEC_FRINTA
7289 || uns_code == UNSPEC_FRINTM
7290 || uns_code == UNSPEC_FRINTN
7291 || uns_code == UNSPEC_FRINTP
7292 || uns_code == UNSPEC_FRINTZ)
7293 x = XVECEXP (x, 0, 0);
7294 }
7295
7296 if (speed)
b6875aac
KV
7297 {
7298 if (VECTOR_MODE_P (mode))
7299 *cost += extra_cost->vect.alu;
7300 else
7301 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
7302 }
39252973
KT
7303
7304 /* We can combine fmul by a power of 2 followed by a fcvt into a single
7305 fixed-point fcvt. */
7306 if (GET_CODE (x) == MULT
7307 && ((VECTOR_MODE_P (mode)
7308 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
7309 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
7310 {
7311 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
7312 0, speed);
7313 return true;
7314 }
7315
e548c9df 7316 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
7317 return true;
7318
b292109f 7319 case ABS:
b6875aac
KV
7320 if (VECTOR_MODE_P (mode))
7321 {
7322 /* ABS (vector). */
7323 if (speed)
7324 *cost += extra_cost->vect.alu;
7325 }
7326 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 7327 {
19261b99
KT
7328 op0 = XEXP (x, 0);
7329
7330 /* FABD, which is analogous to FADD. */
7331 if (GET_CODE (op0) == MINUS)
7332 {
e548c9df
AM
7333 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
7334 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
7335 if (speed)
7336 *cost += extra_cost->fp[mode == DFmode].addsub;
7337
7338 return true;
7339 }
7340 /* Simple FABS is analogous to FNEG. */
b292109f
JG
7341 if (speed)
7342 *cost += extra_cost->fp[mode == DFmode].neg;
7343 }
7344 else
7345 {
7346 /* Integer ABS will either be split to
7347 two arithmetic instructions, or will be an ABS
7348 (scalar), which we don't model. */
7349 *cost = COSTS_N_INSNS (2);
7350 if (speed)
7351 *cost += 2 * extra_cost->alu.arith;
7352 }
7353 return false;
7354
7355 case SMAX:
7356 case SMIN:
7357 if (speed)
7358 {
b6875aac
KV
7359 if (VECTOR_MODE_P (mode))
7360 *cost += extra_cost->vect.alu;
7361 else
7362 {
7363 /* FMAXNM/FMINNM/FMAX/FMIN.
7364 TODO: This may not be accurate for all implementations, but
7365 we do not model this in the cost tables. */
7366 *cost += extra_cost->fp[mode == DFmode].addsub;
7367 }
b292109f
JG
7368 }
7369 return false;
7370
61263118
KT
7371 case UNSPEC:
7372 /* The floating point round to integer frint* instructions. */
7373 if (aarch64_frint_unspec_p (XINT (x, 1)))
7374 {
7375 if (speed)
7376 *cost += extra_cost->fp[mode == DFmode].roundint;
7377
7378 return false;
7379 }
781aeb73
KT
7380
7381 if (XINT (x, 1) == UNSPEC_RBIT)
7382 {
7383 if (speed)
7384 *cost += extra_cost->alu.rev;
7385
7386 return false;
7387 }
61263118
KT
7388 break;
7389
fb620c4a
JG
7390 case TRUNCATE:
7391
7392 /* Decompose <su>muldi3_highpart. */
7393 if (/* (truncate:DI */
7394 mode == DImode
7395 /* (lshiftrt:TI */
7396 && GET_MODE (XEXP (x, 0)) == TImode
7397 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7398 /* (mult:TI */
7399 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7400 /* (ANY_EXTEND:TI (reg:DI))
7401 (ANY_EXTEND:TI (reg:DI))) */
7402 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7403 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
7404 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
7405 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
7406 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
7407 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
7408 /* (const_int 64) */
7409 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7410 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
7411 {
7412 /* UMULH/SMULH. */
7413 if (speed)
7414 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
7415 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
7416 mode, MULT, 0, speed);
7417 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
7418 mode, MULT, 1, speed);
fb620c4a
JG
7419 return true;
7420 }
7421
7422 /* Fall through. */
43e9d192 7423 default:
61263118 7424 break;
43e9d192 7425 }
61263118 7426
c10e3d7f
AP
7427 if (dump_file
7428 && flag_aarch64_verbose_cost)
61263118
KT
7429 fprintf (dump_file,
7430 "\nFailed to cost RTX. Assuming default cost.\n");
7431
7432 return true;
43e9d192
IB
7433}
7434
0ee859b5
JG
7435/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
7436 calculated for X. This cost is stored in *COST. Returns true
7437 if the total cost of X was calculated. */
7438static bool
e548c9df 7439aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
7440 int param, int *cost, bool speed)
7441{
e548c9df 7442 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 7443
c10e3d7f
AP
7444 if (dump_file
7445 && flag_aarch64_verbose_cost)
0ee859b5
JG
7446 {
7447 print_rtl_single (dump_file, x);
7448 fprintf (dump_file, "\n%s cost: %d (%s)\n",
7449 speed ? "Hot" : "Cold",
7450 *cost, result ? "final" : "partial");
7451 }
7452
7453 return result;
7454}
7455
43e9d192 7456static int
ef4bddc2 7457aarch64_register_move_cost (machine_mode mode,
8a3a7e67 7458 reg_class_t from_i, reg_class_t to_i)
43e9d192 7459{
8a3a7e67
RH
7460 enum reg_class from = (enum reg_class) from_i;
7461 enum reg_class to = (enum reg_class) to_i;
43e9d192 7462 const struct cpu_regmove_cost *regmove_cost
b175b679 7463 = aarch64_tune_params.regmove_cost;
43e9d192 7464
3be07662 7465 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
2876a13f 7466 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
3be07662
WD
7467 to = GENERAL_REGS;
7468
2876a13f 7469 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
3be07662
WD
7470 from = GENERAL_REGS;
7471
6ee70f81
AP
7472 /* Moving between GPR and stack cost is the same as GP2GP. */
7473 if ((from == GENERAL_REGS && to == STACK_REG)
7474 || (to == GENERAL_REGS && from == STACK_REG))
7475 return regmove_cost->GP2GP;
7476
7477 /* To/From the stack register, we move via the gprs. */
7478 if (to == STACK_REG || from == STACK_REG)
7479 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
7480 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
7481
8919453c
WD
7482 if (GET_MODE_SIZE (mode) == 16)
7483 {
7484 /* 128-bit operations on general registers require 2 instructions. */
7485 if (from == GENERAL_REGS && to == GENERAL_REGS)
7486 return regmove_cost->GP2GP * 2;
7487 else if (from == GENERAL_REGS)
7488 return regmove_cost->GP2FP * 2;
7489 else if (to == GENERAL_REGS)
7490 return regmove_cost->FP2GP * 2;
7491
7492 /* When AdvSIMD instructions are disabled it is not possible to move
7493 a 128-bit value directly between Q registers. This is handled in
7494 secondary reload. A general register is used as a scratch to move
7495 the upper DI value and the lower DI value is moved directly,
7496 hence the cost is the sum of three moves. */
7497 if (! TARGET_SIMD)
7498 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
7499
7500 return regmove_cost->FP2FP;
7501 }
7502
43e9d192
IB
7503 if (from == GENERAL_REGS && to == GENERAL_REGS)
7504 return regmove_cost->GP2GP;
7505 else if (from == GENERAL_REGS)
7506 return regmove_cost->GP2FP;
7507 else if (to == GENERAL_REGS)
7508 return regmove_cost->FP2GP;
7509
43e9d192
IB
7510 return regmove_cost->FP2FP;
7511}
7512
7513static int
ef4bddc2 7514aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
7515 reg_class_t rclass ATTRIBUTE_UNUSED,
7516 bool in ATTRIBUTE_UNUSED)
7517{
b175b679 7518 return aarch64_tune_params.memmov_cost;
43e9d192
IB
7519}
7520
0c30e0f3
EM
7521/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
7522 to optimize 1.0/sqrt. */
ee62a5a6
RS
7523
7524static bool
9acc9cbe 7525use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
7526{
7527 return (!flag_trapping_math
7528 && flag_unsafe_math_optimizations
9acc9cbe
EM
7529 && ((aarch64_tune_params.approx_modes->recip_sqrt
7530 & AARCH64_APPROX_MODE (mode))
1a33079e 7531 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
7532}
7533
0c30e0f3
EM
7534/* Function to decide when to use the approximate reciprocal square root
7535 builtin. */
a6fc00da
BH
7536
7537static tree
ee62a5a6 7538aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 7539{
9acc9cbe
EM
7540 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
7541
7542 if (!use_rsqrt_p (mode))
a6fc00da 7543 return NULL_TREE;
ee62a5a6 7544 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
7545}
7546
7547typedef rtx (*rsqrte_type) (rtx, rtx);
7548
98daafa0
EM
7549/* Select reciprocal square root initial estimate insn depending on machine
7550 mode. */
a6fc00da 7551
98daafa0 7552static rsqrte_type
a6fc00da
BH
7553get_rsqrte_type (machine_mode mode)
7554{
7555 switch (mode)
7556 {
2a823433
JW
7557 case DFmode: return gen_aarch64_rsqrtedf;
7558 case SFmode: return gen_aarch64_rsqrtesf;
7559 case V2DFmode: return gen_aarch64_rsqrtev2df;
7560 case V2SFmode: return gen_aarch64_rsqrtev2sf;
7561 case V4SFmode: return gen_aarch64_rsqrtev4sf;
a6fc00da
BH
7562 default: gcc_unreachable ();
7563 }
7564}
7565
7566typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
7567
98daafa0 7568/* Select reciprocal square root series step insn depending on machine mode. */
a6fc00da 7569
98daafa0 7570static rsqrts_type
a6fc00da
BH
7571get_rsqrts_type (machine_mode mode)
7572{
7573 switch (mode)
7574 {
00ea75d4
JW
7575 case DFmode: return gen_aarch64_rsqrtsdf;
7576 case SFmode: return gen_aarch64_rsqrtssf;
7577 case V2DFmode: return gen_aarch64_rsqrtsv2df;
7578 case V2SFmode: return gen_aarch64_rsqrtsv2sf;
7579 case V4SFmode: return gen_aarch64_rsqrtsv4sf;
a6fc00da
BH
7580 default: gcc_unreachable ();
7581 }
7582}
7583
98daafa0
EM
7584/* Emit instruction sequence to compute either the approximate square root
7585 or its approximate reciprocal, depending on the flag RECP, and return
7586 whether the sequence was emitted or not. */
a6fc00da 7587
98daafa0
EM
7588bool
7589aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 7590{
98daafa0 7591 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
7592
7593 if (GET_MODE_INNER (mode) == HFmode)
7594 return false;
7595
98daafa0
EM
7596 machine_mode mmsk = mode_for_vector
7597 (int_mode_for_mode (GET_MODE_INNER (mode)),
7598 GET_MODE_NUNITS (mode));
7599 bool use_approx_sqrt_p = (!recp
7600 && (flag_mlow_precision_sqrt
7601 || (aarch64_tune_params.approx_modes->sqrt
7602 & AARCH64_APPROX_MODE (mode))));
7603 bool use_approx_rsqrt_p = (recp
7604 && (flag_mrecip_low_precision_sqrt
7605 || (aarch64_tune_params.approx_modes->recip_sqrt
7606 & AARCH64_APPROX_MODE (mode))));
7607
7608 if (!flag_finite_math_only
7609 || flag_trapping_math
7610 || !flag_unsafe_math_optimizations
7611 || !(use_approx_sqrt_p || use_approx_rsqrt_p)
7612 || optimize_function_for_size_p (cfun))
7613 return false;
a6fc00da 7614
98daafa0
EM
7615 rtx xmsk = gen_reg_rtx (mmsk);
7616 if (!recp)
7617 /* When calculating the approximate square root, compare the argument with
7618 0.0 and create a mask. */
7619 emit_insn (gen_rtx_SET (xmsk, gen_rtx_NEG (mmsk, gen_rtx_EQ (mmsk, src,
7620 CONST0_RTX (mode)))));
a6fc00da 7621
98daafa0
EM
7622 /* Estimate the approximate reciprocal square root. */
7623 rtx xdst = gen_reg_rtx (mode);
7624 emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
a6fc00da 7625
98daafa0
EM
7626 /* Iterate over the series twice for SF and thrice for DF. */
7627 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 7628
98daafa0
EM
7629 /* Optionally iterate over the series once less for faster performance
7630 while sacrificing the accuracy. */
7631 if ((recp && flag_mrecip_low_precision_sqrt)
7632 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
7633 iterations--;
7634
98daafa0
EM
7635 /* Iterate over the series to calculate the approximate reciprocal square
7636 root. */
7637 rtx x1 = gen_reg_rtx (mode);
7638 while (iterations--)
a6fc00da 7639 {
a6fc00da 7640 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
7641 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
7642
7643 emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
a6fc00da 7644
98daafa0
EM
7645 if (iterations > 0)
7646 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
7647 }
7648
7649 if (!recp)
7650 {
7651 /* Qualify the approximate reciprocal square root when the argument is
7652 0.0 by squashing the intermediary result to 0.0. */
7653 rtx xtmp = gen_reg_rtx (mmsk);
7654 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
7655 gen_rtx_SUBREG (mmsk, xdst, 0)));
7656 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 7657
98daafa0
EM
7658 /* Calculate the approximate square root. */
7659 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
7660 }
7661
98daafa0
EM
7662 /* Finalize the approximation. */
7663 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
7664
7665 return true;
a6fc00da
BH
7666}
7667
79a2bc2d
EM
7668typedef rtx (*recpe_type) (rtx, rtx);
7669
7670/* Select reciprocal initial estimate insn depending on machine mode. */
7671
7672static recpe_type
7673get_recpe_type (machine_mode mode)
7674{
7675 switch (mode)
7676 {
7677 case SFmode: return (gen_aarch64_frecpesf);
7678 case V2SFmode: return (gen_aarch64_frecpev2sf);
7679 case V4SFmode: return (gen_aarch64_frecpev4sf);
7680 case DFmode: return (gen_aarch64_frecpedf);
7681 case V2DFmode: return (gen_aarch64_frecpev2df);
7682 default: gcc_unreachable ();
7683 }
7684}
7685
7686typedef rtx (*recps_type) (rtx, rtx, rtx);
7687
7688/* Select reciprocal series step insn depending on machine mode. */
7689
7690static recps_type
7691get_recps_type (machine_mode mode)
7692{
7693 switch (mode)
7694 {
7695 case SFmode: return (gen_aarch64_frecpssf);
7696 case V2SFmode: return (gen_aarch64_frecpsv2sf);
7697 case V4SFmode: return (gen_aarch64_frecpsv4sf);
7698 case DFmode: return (gen_aarch64_frecpsdf);
7699 case V2DFmode: return (gen_aarch64_frecpsv2df);
7700 default: gcc_unreachable ();
7701 }
7702}
7703
7704/* Emit the instruction sequence to compute the approximation for the division
7705 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
7706
7707bool
7708aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
7709{
7710 machine_mode mode = GET_MODE (quo);
33d72b63
JW
7711
7712 if (GET_MODE_INNER (mode) == HFmode)
7713 return false;
7714
79a2bc2d
EM
7715 bool use_approx_division_p = (flag_mlow_precision_div
7716 || (aarch64_tune_params.approx_modes->division
7717 & AARCH64_APPROX_MODE (mode)));
7718
7719 if (!flag_finite_math_only
7720 || flag_trapping_math
7721 || !flag_unsafe_math_optimizations
7722 || optimize_function_for_size_p (cfun)
7723 || !use_approx_division_p)
7724 return false;
7725
7726 /* Estimate the approximate reciprocal. */
7727 rtx xrcp = gen_reg_rtx (mode);
7728 emit_insn ((*get_recpe_type (mode)) (xrcp, den));
7729
7730 /* Iterate over the series twice for SF and thrice for DF. */
7731 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
7732
7733 /* Optionally iterate over the series once less for faster performance,
7734 while sacrificing the accuracy. */
7735 if (flag_mlow_precision_div)
7736 iterations--;
7737
7738 /* Iterate over the series to calculate the approximate reciprocal. */
7739 rtx xtmp = gen_reg_rtx (mode);
7740 while (iterations--)
7741 {
7742 emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
7743
7744 if (iterations > 0)
7745 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
7746 }
7747
7748 if (num != CONST1_RTX (mode))
7749 {
7750 /* As the approximate reciprocal of DEN is already calculated, only
7751 calculate the approximate division when NUM is not 1.0. */
7752 rtx xnum = force_reg (mode, num);
7753 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
7754 }
7755
7756 /* Finalize the approximation. */
7757 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
7758 return true;
7759}
7760
d126a4ae
AP
7761/* Return the number of instructions that can be issued per cycle. */
7762static int
7763aarch64_sched_issue_rate (void)
7764{
b175b679 7765 return aarch64_tune_params.issue_rate;
d126a4ae
AP
7766}
7767
d03f7e44
MK
7768static int
7769aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
7770{
7771 int issue_rate = aarch64_sched_issue_rate ();
7772
7773 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
7774}
7775
2d6bc7fa
KT
7776
7777/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
7778 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
7779 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
7780
7781static int
7782aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
7783 int ready_index)
7784{
7785 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
7786}
7787
7788
8990e73a
TB
7789/* Vectorizer cost model target hooks. */
7790
7791/* Implement targetm.vectorize.builtin_vectorization_cost. */
7792static int
7793aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
7794 tree vectype,
7795 int misalign ATTRIBUTE_UNUSED)
7796{
7797 unsigned elements;
7798
7799 switch (type_of_cost)
7800 {
7801 case scalar_stmt:
b175b679 7802 return aarch64_tune_params.vec_costs->scalar_stmt_cost;
8990e73a
TB
7803
7804 case scalar_load:
b175b679 7805 return aarch64_tune_params.vec_costs->scalar_load_cost;
8990e73a
TB
7806
7807 case scalar_store:
b175b679 7808 return aarch64_tune_params.vec_costs->scalar_store_cost;
8990e73a
TB
7809
7810 case vector_stmt:
b175b679 7811 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
7812
7813 case vector_load:
b175b679 7814 return aarch64_tune_params.vec_costs->vec_align_load_cost;
8990e73a
TB
7815
7816 case vector_store:
b175b679 7817 return aarch64_tune_params.vec_costs->vec_store_cost;
8990e73a
TB
7818
7819 case vec_to_scalar:
b175b679 7820 return aarch64_tune_params.vec_costs->vec_to_scalar_cost;
8990e73a
TB
7821
7822 case scalar_to_vec:
b175b679 7823 return aarch64_tune_params.vec_costs->scalar_to_vec_cost;
8990e73a
TB
7824
7825 case unaligned_load:
b175b679 7826 return aarch64_tune_params.vec_costs->vec_unalign_load_cost;
8990e73a
TB
7827
7828 case unaligned_store:
b175b679 7829 return aarch64_tune_params.vec_costs->vec_unalign_store_cost;
8990e73a
TB
7830
7831 case cond_branch_taken:
b175b679 7832 return aarch64_tune_params.vec_costs->cond_taken_branch_cost;
8990e73a
TB
7833
7834 case cond_branch_not_taken:
b175b679 7835 return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
8990e73a
TB
7836
7837 case vec_perm:
c428f91c
WD
7838 return aarch64_tune_params.vec_costs->vec_permute_cost;
7839
8990e73a 7840 case vec_promote_demote:
b175b679 7841 return aarch64_tune_params.vec_costs->vec_stmt_cost;
8990e73a
TB
7842
7843 case vec_construct:
7844 elements = TYPE_VECTOR_SUBPARTS (vectype);
7845 return elements / 2 + 1;
7846
7847 default:
7848 gcc_unreachable ();
7849 }
7850}
7851
7852/* Implement targetm.vectorize.add_stmt_cost. */
7853static unsigned
7854aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
7855 struct _stmt_vec_info *stmt_info, int misalign,
7856 enum vect_cost_model_location where)
7857{
7858 unsigned *cost = (unsigned *) data;
7859 unsigned retval = 0;
7860
7861 if (flag_vect_cost_model)
7862 {
7863 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
7864 int stmt_cost =
7865 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
7866
7867 /* Statements in an inner loop relative to the loop being
7868 vectorized are weighted more heavily. The value here is
058e4c71 7869 arbitrary and could potentially be improved with analysis. */
8990e73a 7870 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 7871 count *= 50; /* FIXME */
8990e73a
TB
7872
7873 retval = (unsigned) (count * stmt_cost);
7874 cost[where] += retval;
7875 }
7876
7877 return retval;
7878}
7879
0cfff2a1 7880static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 7881
0cfff2a1
KT
7882/* Parse the TO_PARSE string and put the architecture struct that it
7883 selects into RES and the architectural features into ISA_FLAGS.
7884 Return an aarch64_parse_opt_result describing the parse result.
7885 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 7886
0cfff2a1
KT
7887static enum aarch64_parse_opt_result
7888aarch64_parse_arch (const char *to_parse, const struct processor **res,
7889 unsigned long *isa_flags)
43e9d192
IB
7890{
7891 char *ext;
7892 const struct processor *arch;
0cfff2a1 7893 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
7894 size_t len;
7895
0cfff2a1 7896 strcpy (str, to_parse);
43e9d192
IB
7897
7898 ext = strchr (str, '+');
7899
7900 if (ext != NULL)
7901 len = ext - str;
7902 else
7903 len = strlen (str);
7904
7905 if (len == 0)
0cfff2a1
KT
7906 return AARCH64_PARSE_MISSING_ARG;
7907
43e9d192 7908
0cfff2a1 7909 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
7910 for (arch = all_architectures; arch->name != NULL; arch++)
7911 {
7912 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
7913 {
0cfff2a1 7914 unsigned long isa_temp = arch->flags;
43e9d192
IB
7915
7916 if (ext != NULL)
7917 {
0cfff2a1
KT
7918 /* TO_PARSE string contains at least one extension. */
7919 enum aarch64_parse_opt_result ext_res
7920 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 7921
0cfff2a1
KT
7922 if (ext_res != AARCH64_PARSE_OK)
7923 return ext_res;
ffee7aa9 7924 }
0cfff2a1
KT
7925 /* Extension parsing was successful. Confirm the result
7926 arch and ISA flags. */
7927 *res = arch;
7928 *isa_flags = isa_temp;
7929 return AARCH64_PARSE_OK;
43e9d192
IB
7930 }
7931 }
7932
7933 /* ARCH name not found in list. */
0cfff2a1 7934 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7935}
7936
0cfff2a1
KT
7937/* Parse the TO_PARSE string and put the result tuning in RES and the
7938 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
7939 describing the parse result. If there is an error parsing, RES and
7940 ISA_FLAGS are left unchanged. */
43e9d192 7941
0cfff2a1
KT
7942static enum aarch64_parse_opt_result
7943aarch64_parse_cpu (const char *to_parse, const struct processor **res,
7944 unsigned long *isa_flags)
43e9d192
IB
7945{
7946 char *ext;
7947 const struct processor *cpu;
0cfff2a1 7948 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
7949 size_t len;
7950
0cfff2a1 7951 strcpy (str, to_parse);
43e9d192
IB
7952
7953 ext = strchr (str, '+');
7954
7955 if (ext != NULL)
7956 len = ext - str;
7957 else
7958 len = strlen (str);
7959
7960 if (len == 0)
0cfff2a1
KT
7961 return AARCH64_PARSE_MISSING_ARG;
7962
43e9d192
IB
7963
7964 /* Loop through the list of supported CPUs to find a match. */
7965 for (cpu = all_cores; cpu->name != NULL; cpu++)
7966 {
7967 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
7968 {
0cfff2a1
KT
7969 unsigned long isa_temp = cpu->flags;
7970
43e9d192
IB
7971
7972 if (ext != NULL)
7973 {
0cfff2a1
KT
7974 /* TO_PARSE string contains at least one extension. */
7975 enum aarch64_parse_opt_result ext_res
7976 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 7977
0cfff2a1
KT
7978 if (ext_res != AARCH64_PARSE_OK)
7979 return ext_res;
7980 }
7981 /* Extension parsing was successfull. Confirm the result
7982 cpu and ISA flags. */
7983 *res = cpu;
7984 *isa_flags = isa_temp;
7985 return AARCH64_PARSE_OK;
43e9d192
IB
7986 }
7987 }
7988
7989 /* CPU name not found in list. */
0cfff2a1 7990 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
7991}
7992
0cfff2a1
KT
7993/* Parse the TO_PARSE string and put the cpu it selects into RES.
7994 Return an aarch64_parse_opt_result describing the parse result.
7995 If the parsing fails the RES does not change. */
43e9d192 7996
0cfff2a1
KT
7997static enum aarch64_parse_opt_result
7998aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
7999{
8000 const struct processor *cpu;
0cfff2a1
KT
8001 char *str = (char *) alloca (strlen (to_parse) + 1);
8002
8003 strcpy (str, to_parse);
43e9d192
IB
8004
8005 /* Loop through the list of supported CPUs to find a match. */
8006 for (cpu = all_cores; cpu->name != NULL; cpu++)
8007 {
8008 if (strcmp (cpu->name, str) == 0)
8009 {
0cfff2a1
KT
8010 *res = cpu;
8011 return AARCH64_PARSE_OK;
43e9d192
IB
8012 }
8013 }
8014
8015 /* CPU name not found in list. */
0cfff2a1 8016 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
8017}
8018
8dec06f2
JG
8019/* Parse TOKEN, which has length LENGTH to see if it is an option
8020 described in FLAG. If it is, return the index bit for that fusion type.
8021 If not, error (printing OPTION_NAME) and return zero. */
8022
8023static unsigned int
8024aarch64_parse_one_option_token (const char *token,
8025 size_t length,
8026 const struct aarch64_flag_desc *flag,
8027 const char *option_name)
8028{
8029 for (; flag->name != NULL; flag++)
8030 {
8031 if (length == strlen (flag->name)
8032 && !strncmp (flag->name, token, length))
8033 return flag->flag;
8034 }
8035
8036 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
8037 return 0;
8038}
8039
8040/* Parse OPTION which is a comma-separated list of flags to enable.
8041 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
8042 default state we inherit from the CPU tuning structures. OPTION_NAME
8043 gives the top-level option we are parsing in the -moverride string,
8044 for use in error messages. */
8045
8046static unsigned int
8047aarch64_parse_boolean_options (const char *option,
8048 const struct aarch64_flag_desc *flags,
8049 unsigned int initial_state,
8050 const char *option_name)
8051{
8052 const char separator = '.';
8053 const char* specs = option;
8054 const char* ntoken = option;
8055 unsigned int found_flags = initial_state;
8056
8057 while ((ntoken = strchr (specs, separator)))
8058 {
8059 size_t token_length = ntoken - specs;
8060 unsigned token_ops = aarch64_parse_one_option_token (specs,
8061 token_length,
8062 flags,
8063 option_name);
8064 /* If we find "none" (or, for simplicity's sake, an error) anywhere
8065 in the token stream, reset the supported operations. So:
8066
8067 adrp+add.cmp+branch.none.adrp+add
8068
8069 would have the result of turning on only adrp+add fusion. */
8070 if (!token_ops)
8071 found_flags = 0;
8072
8073 found_flags |= token_ops;
8074 specs = ++ntoken;
8075 }
8076
8077 /* We ended with a comma, print something. */
8078 if (!(*specs))
8079 {
8080 error ("%s string ill-formed\n", option_name);
8081 return 0;
8082 }
8083
8084 /* We still have one more token to parse. */
8085 size_t token_length = strlen (specs);
8086 unsigned token_ops = aarch64_parse_one_option_token (specs,
8087 token_length,
8088 flags,
8089 option_name);
8090 if (!token_ops)
8091 found_flags = 0;
8092
8093 found_flags |= token_ops;
8094 return found_flags;
8095}
8096
8097/* Support for overriding instruction fusion. */
8098
8099static void
8100aarch64_parse_fuse_string (const char *fuse_string,
8101 struct tune_params *tune)
8102{
8103 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
8104 aarch64_fusible_pairs,
8105 tune->fusible_ops,
8106 "fuse=");
8107}
8108
8109/* Support for overriding other tuning flags. */
8110
8111static void
8112aarch64_parse_tune_string (const char *tune_string,
8113 struct tune_params *tune)
8114{
8115 tune->extra_tuning_flags
8116 = aarch64_parse_boolean_options (tune_string,
8117 aarch64_tuning_flags,
8118 tune->extra_tuning_flags,
8119 "tune=");
8120}
8121
8122/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8123 we understand. If it is, extract the option string and handoff to
8124 the appropriate function. */
8125
8126void
8127aarch64_parse_one_override_token (const char* token,
8128 size_t length,
8129 struct tune_params *tune)
8130{
8131 const struct aarch64_tuning_override_function *fn
8132 = aarch64_tuning_override_functions;
8133
8134 const char *option_part = strchr (token, '=');
8135 if (!option_part)
8136 {
8137 error ("tuning string missing in option (%s)", token);
8138 return;
8139 }
8140
8141 /* Get the length of the option name. */
8142 length = option_part - token;
8143 /* Skip the '=' to get to the option string. */
8144 option_part++;
8145
8146 for (; fn->name != NULL; fn++)
8147 {
8148 if (!strncmp (fn->name, token, length))
8149 {
8150 fn->parse_override (option_part, tune);
8151 return;
8152 }
8153 }
8154
8155 error ("unknown tuning option (%s)",token);
8156 return;
8157}
8158
5eee3c34
JW
8159/* A checking mechanism for the implementation of the tls size. */
8160
8161static void
8162initialize_aarch64_tls_size (struct gcc_options *opts)
8163{
8164 if (aarch64_tls_size == 0)
8165 aarch64_tls_size = 24;
8166
8167 switch (opts->x_aarch64_cmodel_var)
8168 {
8169 case AARCH64_CMODEL_TINY:
8170 /* Both the default and maximum TLS size allowed under tiny is 1M which
8171 needs two instructions to address, so we clamp the size to 24. */
8172 if (aarch64_tls_size > 24)
8173 aarch64_tls_size = 24;
8174 break;
8175 case AARCH64_CMODEL_SMALL:
8176 /* The maximum TLS size allowed under small is 4G. */
8177 if (aarch64_tls_size > 32)
8178 aarch64_tls_size = 32;
8179 break;
8180 case AARCH64_CMODEL_LARGE:
8181 /* The maximum TLS size allowed under large is 16E.
8182 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8183 if (aarch64_tls_size > 48)
8184 aarch64_tls_size = 48;
8185 break;
8186 default:
8187 gcc_unreachable ();
8188 }
8189
8190 return;
8191}
8192
8dec06f2
JG
8193/* Parse STRING looking for options in the format:
8194 string :: option:string
8195 option :: name=substring
8196 name :: {a-z}
8197 substring :: defined by option. */
8198
8199static void
8200aarch64_parse_override_string (const char* input_string,
8201 struct tune_params* tune)
8202{
8203 const char separator = ':';
8204 size_t string_length = strlen (input_string) + 1;
8205 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
8206 char *string = string_root;
8207 strncpy (string, input_string, string_length);
8208 string[string_length - 1] = '\0';
8209
8210 char* ntoken = string;
8211
8212 while ((ntoken = strchr (string, separator)))
8213 {
8214 size_t token_length = ntoken - string;
8215 /* Make this substring look like a string. */
8216 *ntoken = '\0';
8217 aarch64_parse_one_override_token (string, token_length, tune);
8218 string = ++ntoken;
8219 }
8220
8221 /* One last option to parse. */
8222 aarch64_parse_one_override_token (string, strlen (string), tune);
8223 free (string_root);
8224}
43e9d192 8225
43e9d192
IB
8226
8227static void
0cfff2a1 8228aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 8229{
a3dc8760
NC
8230 /* The logic here is that if we are disabling all frame pointer generation
8231 then we do not need to disable leaf frame pointer generation as a
8232 separate operation. But if we are *only* disabling leaf frame pointer
8233 generation then we set flag_omit_frame_pointer to true, but in
8234 aarch64_frame_pointer_required we return false only for leaf functions.
8235
8236 PR 70044: We have to be careful about being called multiple times for the
8237 same function. Once we have decided to set flag_omit_frame_pointer just
8238 so that we can omit leaf frame pointers, we must then not interpret a
8239 second call as meaning that all frame pointer generation should be
8240 omitted. We do this by setting flag_omit_frame_pointer to a special,
8241 non-zero value. */
8242 if (opts->x_flag_omit_frame_pointer == 2)
8243 opts->x_flag_omit_frame_pointer = 0;
8244
0cfff2a1
KT
8245 if (opts->x_flag_omit_frame_pointer)
8246 opts->x_flag_omit_leaf_frame_pointer = false;
8247 else if (opts->x_flag_omit_leaf_frame_pointer)
a3dc8760 8248 opts->x_flag_omit_frame_pointer = 2;
43e9d192 8249
1be34295 8250 /* If not optimizing for size, set the default
0cfff2a1
KT
8251 alignment to what the target wants. */
8252 if (!opts->x_optimize_size)
43e9d192 8253 {
0cfff2a1
KT
8254 if (opts->x_align_loops <= 0)
8255 opts->x_align_loops = aarch64_tune_params.loop_align;
8256 if (opts->x_align_jumps <= 0)
8257 opts->x_align_jumps = aarch64_tune_params.jump_align;
8258 if (opts->x_align_functions <= 0)
8259 opts->x_align_functions = aarch64_tune_params.function_align;
43e9d192 8260 }
b4f50fd4 8261
9ee6540a
WD
8262 /* We default to no pc-relative literal loads. */
8263
8264 aarch64_pcrelative_literal_loads = false;
8265
8266 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 8267 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
8268 if (opts->x_pcrelative_literal_loads == 1)
8269 aarch64_pcrelative_literal_loads = true;
b4f50fd4 8270
48bb1a55
CL
8271 /* This is PR70113. When building the Linux kernel with
8272 CONFIG_ARM64_ERRATUM_843419, support for relocations
8273 R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_ADR_PREL_PG_HI21_NC is
8274 removed from the kernel to avoid loading objects with possibly
9ee6540a 8275 offending sequences. Without -mpc-relative-literal-loads we would
48bb1a55
CL
8276 generate such relocations, preventing the kernel build from
8277 succeeding. */
9ee6540a
WD
8278 if (opts->x_pcrelative_literal_loads == 2
8279 && TARGET_FIX_ERR_A53_843419)
8280 aarch64_pcrelative_literal_loads = true;
8281
8282 /* In the tiny memory model it makes no sense to disallow PC relative
8283 literal pool loads. */
8284 if (aarch64_cmodel == AARCH64_CMODEL_TINY
8285 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
8286 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
8287
8288 /* When enabling the lower precision Newton series for the square root, also
8289 enable it for the reciprocal square root, since the latter is an
8290 intermediary step for the former. */
8291 if (flag_mlow_precision_sqrt)
8292 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 8293}
43e9d192 8294
0cfff2a1
KT
8295/* 'Unpack' up the internal tuning structs and update the options
8296 in OPTS. The caller must have set up selected_tune and selected_arch
8297 as all the other target-specific codegen decisions are
8298 derived from them. */
8299
e4ea20c8 8300void
0cfff2a1
KT
8301aarch64_override_options_internal (struct gcc_options *opts)
8302{
8303 aarch64_tune_flags = selected_tune->flags;
8304 aarch64_tune = selected_tune->sched_core;
8305 /* Make a copy of the tuning parameters attached to the core, which
8306 we may later overwrite. */
8307 aarch64_tune_params = *(selected_tune->tune);
8308 aarch64_architecture_version = selected_arch->architecture_version;
8309
8310 if (opts->x_aarch64_override_tune_string)
8311 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
8312 &aarch64_tune_params);
8313
8314 /* This target defaults to strict volatile bitfields. */
8315 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
8316 opts->x_flag_strict_volatile_bitfields = 1;
8317
0cfff2a1 8318 initialize_aarch64_code_model (opts);
5eee3c34 8319 initialize_aarch64_tls_size (opts);
63892fa2 8320
2d6bc7fa
KT
8321 int queue_depth = 0;
8322 switch (aarch64_tune_params.autoprefetcher_model)
8323 {
8324 case tune_params::AUTOPREFETCHER_OFF:
8325 queue_depth = -1;
8326 break;
8327 case tune_params::AUTOPREFETCHER_WEAK:
8328 queue_depth = 0;
8329 break;
8330 case tune_params::AUTOPREFETCHER_STRONG:
8331 queue_depth = max_insn_queue_index + 1;
8332 break;
8333 default:
8334 gcc_unreachable ();
8335 }
8336
8337 /* We don't mind passing in global_options_set here as we don't use
8338 the *options_set structs anyway. */
8339 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
8340 queue_depth,
8341 opts->x_param_values,
8342 global_options_set.x_param_values);
8343
50487d79
EM
8344 /* Set the L1 cache line size. */
8345 if (selected_cpu->tune->cache_line_size != 0)
8346 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
8347 selected_cpu->tune->cache_line_size,
8348 opts->x_param_values,
8349 global_options_set.x_param_values);
8350
0cfff2a1
KT
8351 aarch64_override_options_after_change_1 (opts);
8352}
43e9d192 8353
01f44038
KT
8354/* Print a hint with a suggestion for a core or architecture name that
8355 most closely resembles what the user passed in STR. ARCH is true if
8356 the user is asking for an architecture name. ARCH is false if the user
8357 is asking for a core name. */
8358
8359static void
8360aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
8361{
8362 auto_vec<const char *> candidates;
8363 const struct processor *entry = arch ? all_architectures : all_cores;
8364 for (; entry->name != NULL; entry++)
8365 candidates.safe_push (entry->name);
8366 char *s;
8367 const char *hint = candidates_list_and_hint (str, s, candidates);
8368 if (hint)
8369 inform (input_location, "valid arguments are: %s;"
8370 " did you mean %qs?", s, hint);
8371 XDELETEVEC (s);
8372}
8373
8374/* Print a hint with a suggestion for a core name that most closely resembles
8375 what the user passed in STR. */
8376
8377inline static void
8378aarch64_print_hint_for_core (const char *str)
8379{
8380 aarch64_print_hint_for_core_or_arch (str, false);
8381}
8382
8383/* Print a hint with a suggestion for an architecture name that most closely
8384 resembles what the user passed in STR. */
8385
8386inline static void
8387aarch64_print_hint_for_arch (const char *str)
8388{
8389 aarch64_print_hint_for_core_or_arch (str, true);
8390}
8391
0cfff2a1
KT
8392/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
8393 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
8394 they are valid in RES and ISA_FLAGS. Return whether the option is
8395 valid. */
43e9d192 8396
361fb3ee 8397static bool
0cfff2a1
KT
8398aarch64_validate_mcpu (const char *str, const struct processor **res,
8399 unsigned long *isa_flags)
8400{
8401 enum aarch64_parse_opt_result parse_res
8402 = aarch64_parse_cpu (str, res, isa_flags);
8403
8404 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8405 return true;
0cfff2a1
KT
8406
8407 switch (parse_res)
8408 {
8409 case AARCH64_PARSE_MISSING_ARG:
8410 error ("missing cpu name in -mcpu=%qs", str);
8411 break;
8412 case AARCH64_PARSE_INVALID_ARG:
8413 error ("unknown value %qs for -mcpu", str);
01f44038 8414 aarch64_print_hint_for_core (str);
0cfff2a1
KT
8415 break;
8416 case AARCH64_PARSE_INVALID_FEATURE:
8417 error ("invalid feature modifier in -mcpu=%qs", str);
8418 break;
8419 default:
8420 gcc_unreachable ();
8421 }
361fb3ee
KT
8422
8423 return false;
0cfff2a1
KT
8424}
8425
8426/* Validate a command-line -march option. Parse the arch and extensions
8427 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
8428 results, if they are valid, in RES and ISA_FLAGS. Return whether the
8429 option is valid. */
0cfff2a1 8430
361fb3ee 8431static bool
0cfff2a1 8432aarch64_validate_march (const char *str, const struct processor **res,
01f44038 8433 unsigned long *isa_flags)
0cfff2a1
KT
8434{
8435 enum aarch64_parse_opt_result parse_res
8436 = aarch64_parse_arch (str, res, isa_flags);
8437
8438 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8439 return true;
0cfff2a1
KT
8440
8441 switch (parse_res)
8442 {
8443 case AARCH64_PARSE_MISSING_ARG:
8444 error ("missing arch name in -march=%qs", str);
8445 break;
8446 case AARCH64_PARSE_INVALID_ARG:
8447 error ("unknown value %qs for -march", str);
01f44038 8448 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
8449 break;
8450 case AARCH64_PARSE_INVALID_FEATURE:
8451 error ("invalid feature modifier in -march=%qs", str);
8452 break;
8453 default:
8454 gcc_unreachable ();
8455 }
361fb3ee
KT
8456
8457 return false;
0cfff2a1
KT
8458}
8459
8460/* Validate a command-line -mtune option. Parse the cpu
8461 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
8462 result, if it is valid, in RES. Return whether the option is
8463 valid. */
0cfff2a1 8464
361fb3ee 8465static bool
0cfff2a1
KT
8466aarch64_validate_mtune (const char *str, const struct processor **res)
8467{
8468 enum aarch64_parse_opt_result parse_res
8469 = aarch64_parse_tune (str, res);
8470
8471 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 8472 return true;
0cfff2a1
KT
8473
8474 switch (parse_res)
8475 {
8476 case AARCH64_PARSE_MISSING_ARG:
8477 error ("missing cpu name in -mtune=%qs", str);
8478 break;
8479 case AARCH64_PARSE_INVALID_ARG:
8480 error ("unknown value %qs for -mtune", str);
01f44038 8481 aarch64_print_hint_for_core (str);
0cfff2a1
KT
8482 break;
8483 default:
8484 gcc_unreachable ();
8485 }
361fb3ee
KT
8486 return false;
8487}
8488
8489/* Return the CPU corresponding to the enum CPU.
8490 If it doesn't specify a cpu, return the default. */
8491
8492static const struct processor *
8493aarch64_get_tune_cpu (enum aarch64_processor cpu)
8494{
8495 if (cpu != aarch64_none)
8496 return &all_cores[cpu];
8497
8498 /* The & 0x3f is to extract the bottom 6 bits that encode the
8499 default cpu as selected by the --with-cpu GCC configure option
8500 in config.gcc.
8501 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
8502 flags mechanism should be reworked to make it more sane. */
8503 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
8504}
8505
8506/* Return the architecture corresponding to the enum ARCH.
8507 If it doesn't specify a valid architecture, return the default. */
8508
8509static const struct processor *
8510aarch64_get_arch (enum aarch64_arch arch)
8511{
8512 if (arch != aarch64_no_arch)
8513 return &all_architectures[arch];
8514
8515 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
8516
8517 return &all_architectures[cpu->arch];
0cfff2a1
KT
8518}
8519
8520/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
8521 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
8522 tuning structs. In particular it must set selected_tune and
8523 aarch64_isa_flags that define the available ISA features and tuning
8524 decisions. It must also set selected_arch as this will be used to
8525 output the .arch asm tags for each function. */
8526
8527static void
8528aarch64_override_options (void)
8529{
8530 unsigned long cpu_isa = 0;
8531 unsigned long arch_isa = 0;
8532 aarch64_isa_flags = 0;
8533
361fb3ee
KT
8534 bool valid_cpu = true;
8535 bool valid_tune = true;
8536 bool valid_arch = true;
8537
0cfff2a1
KT
8538 selected_cpu = NULL;
8539 selected_arch = NULL;
8540 selected_tune = NULL;
8541
8542 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
8543 If either of -march or -mtune is given, they override their
8544 respective component of -mcpu. */
8545 if (aarch64_cpu_string)
361fb3ee
KT
8546 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
8547 &cpu_isa);
0cfff2a1
KT
8548
8549 if (aarch64_arch_string)
361fb3ee
KT
8550 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
8551 &arch_isa);
0cfff2a1
KT
8552
8553 if (aarch64_tune_string)
361fb3ee 8554 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
8555
8556 /* If the user did not specify a processor, choose the default
8557 one for them. This will be the CPU set during configuration using
a3cd0246 8558 --with-cpu, otherwise it is "generic". */
43e9d192
IB
8559 if (!selected_cpu)
8560 {
0cfff2a1
KT
8561 if (selected_arch)
8562 {
8563 selected_cpu = &all_cores[selected_arch->ident];
8564 aarch64_isa_flags = arch_isa;
361fb3ee 8565 explicit_arch = selected_arch->arch;
0cfff2a1
KT
8566 }
8567 else
8568 {
361fb3ee
KT
8569 /* Get default configure-time CPU. */
8570 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
8571 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
8572 }
361fb3ee
KT
8573
8574 if (selected_tune)
8575 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
8576 }
8577 /* If both -mcpu and -march are specified check that they are architecturally
8578 compatible, warn if they're not and prefer the -march ISA flags. */
8579 else if (selected_arch)
8580 {
8581 if (selected_arch->arch != selected_cpu->arch)
8582 {
8583 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
8584 all_architectures[selected_cpu->arch].name,
8585 selected_arch->name);
8586 }
8587 aarch64_isa_flags = arch_isa;
361fb3ee
KT
8588 explicit_arch = selected_arch->arch;
8589 explicit_tune_core = selected_tune ? selected_tune->ident
8590 : selected_cpu->ident;
0cfff2a1
KT
8591 }
8592 else
8593 {
8594 /* -mcpu but no -march. */
8595 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
8596 explicit_tune_core = selected_tune ? selected_tune->ident
8597 : selected_cpu->ident;
8598 gcc_assert (selected_cpu);
8599 selected_arch = &all_architectures[selected_cpu->arch];
8600 explicit_arch = selected_arch->arch;
43e9d192
IB
8601 }
8602
0cfff2a1
KT
8603 /* Set the arch as well as we will need it when outputing
8604 the .arch directive in assembly. */
8605 if (!selected_arch)
8606 {
8607 gcc_assert (selected_cpu);
8608 selected_arch = &all_architectures[selected_cpu->arch];
8609 }
43e9d192 8610
43e9d192 8611 if (!selected_tune)
3edaf26d 8612 selected_tune = selected_cpu;
43e9d192 8613
0cfff2a1
KT
8614#ifndef HAVE_AS_MABI_OPTION
8615 /* The compiler may have been configured with 2.23.* binutils, which does
8616 not have support for ILP32. */
8617 if (TARGET_ILP32)
8618 error ("Assembler does not support -mabi=ilp32");
8619#endif
43e9d192 8620
361fb3ee
KT
8621 /* Make sure we properly set up the explicit options. */
8622 if ((aarch64_cpu_string && valid_cpu)
8623 || (aarch64_tune_string && valid_tune))
8624 gcc_assert (explicit_tune_core != aarch64_none);
8625
8626 if ((aarch64_cpu_string && valid_cpu)
8627 || (aarch64_arch_string && valid_arch))
8628 gcc_assert (explicit_arch != aarch64_no_arch);
8629
0cfff2a1
KT
8630 aarch64_override_options_internal (&global_options);
8631
8632 /* Save these options as the default ones in case we push and pop them later
8633 while processing functions with potential target attributes. */
8634 target_option_default_node = target_option_current_node
8635 = build_target_option_node (&global_options);
43e9d192
IB
8636}
8637
8638/* Implement targetm.override_options_after_change. */
8639
8640static void
8641aarch64_override_options_after_change (void)
8642{
0cfff2a1 8643 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
8644}
8645
8646static struct machine_function *
8647aarch64_init_machine_status (void)
8648{
8649 struct machine_function *machine;
766090c2 8650 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
8651 return machine;
8652}
8653
8654void
8655aarch64_init_expanders (void)
8656{
8657 init_machine_status = aarch64_init_machine_status;
8658}
8659
8660/* A checking mechanism for the implementation of the various code models. */
8661static void
0cfff2a1 8662initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 8663{
0cfff2a1 8664 if (opts->x_flag_pic)
43e9d192 8665 {
0cfff2a1 8666 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
8667 {
8668 case AARCH64_CMODEL_TINY:
8669 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
8670 break;
8671 case AARCH64_CMODEL_SMALL:
34ecdb0f 8672#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
8673 aarch64_cmodel = (flag_pic == 2
8674 ? AARCH64_CMODEL_SMALL_PIC
8675 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
8676#else
8677 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
8678#endif
43e9d192
IB
8679 break;
8680 case AARCH64_CMODEL_LARGE:
8681 sorry ("code model %qs with -f%s", "large",
0cfff2a1 8682 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 8683 break;
43e9d192
IB
8684 default:
8685 gcc_unreachable ();
8686 }
8687 }
8688 else
0cfff2a1 8689 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
8690}
8691
361fb3ee
KT
8692/* Implement TARGET_OPTION_SAVE. */
8693
8694static void
8695aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
8696{
8697 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
8698}
8699
8700/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
8701 using the information saved in PTR. */
8702
8703static void
8704aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
8705{
8706 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
8707 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
8708 opts->x_explicit_arch = ptr->x_explicit_arch;
8709 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
8710 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
8711
8712 aarch64_override_options_internal (opts);
8713}
8714
8715/* Implement TARGET_OPTION_PRINT. */
8716
8717static void
8718aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
8719{
8720 const struct processor *cpu
8721 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
8722 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
8723 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 8724 std::string extension
04a99ebe 8725 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
8726
8727 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
8728 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
8729 arch->name, extension.c_str ());
361fb3ee
KT
8730}
8731
d78006d9
KT
8732static GTY(()) tree aarch64_previous_fndecl;
8733
e4ea20c8
KT
8734void
8735aarch64_reset_previous_fndecl (void)
8736{
8737 aarch64_previous_fndecl = NULL;
8738}
8739
acfc1ac1
KT
8740/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
8741 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
8742 make sure optab availability predicates are recomputed when necessary. */
8743
8744void
8745aarch64_save_restore_target_globals (tree new_tree)
8746{
8747 if (TREE_TARGET_GLOBALS (new_tree))
8748 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
8749 else if (new_tree == target_option_default_node)
8750 restore_target_globals (&default_target_globals);
8751 else
8752 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
8753}
8754
d78006d9
KT
8755/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
8756 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
8757 of the function, if such exists. This function may be called multiple
8758 times on a single function so use aarch64_previous_fndecl to avoid
8759 setting up identical state. */
8760
8761static void
8762aarch64_set_current_function (tree fndecl)
8763{
acfc1ac1
KT
8764 if (!fndecl || fndecl == aarch64_previous_fndecl)
8765 return;
8766
d78006d9
KT
8767 tree old_tree = (aarch64_previous_fndecl
8768 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
8769 : NULL_TREE);
8770
acfc1ac1 8771 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 8772
acfc1ac1
KT
8773 /* If current function has no attributes but the previous one did,
8774 use the default node. */
8775 if (!new_tree && old_tree)
8776 new_tree = target_option_default_node;
d78006d9 8777
acfc1ac1
KT
8778 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
8779 the default have been handled by aarch64_save_restore_target_globals from
8780 aarch64_pragma_target_parse. */
8781 if (old_tree == new_tree)
8782 return;
d78006d9 8783
acfc1ac1 8784 aarch64_previous_fndecl = fndecl;
6e17a23b 8785
acfc1ac1
KT
8786 /* First set the target options. */
8787 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 8788
acfc1ac1 8789 aarch64_save_restore_target_globals (new_tree);
d78006d9 8790}
361fb3ee 8791
5a2c8331
KT
8792/* Enum describing the various ways we can handle attributes.
8793 In many cases we can reuse the generic option handling machinery. */
8794
8795enum aarch64_attr_opt_type
8796{
8797 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
8798 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
8799 aarch64_attr_enum, /* Attribute sets an enum variable. */
8800 aarch64_attr_custom /* Attribute requires a custom handling function. */
8801};
8802
8803/* All the information needed to handle a target attribute.
8804 NAME is the name of the attribute.
9c582551 8805 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
8806 in the definition of enum aarch64_attr_opt_type.
8807 ALLOW_NEG is true if the attribute supports a "no-" form.
8808 HANDLER is the function that takes the attribute string and whether
8809 it is a pragma or attribute and handles the option. It is needed only
8810 when the ATTR_TYPE is aarch64_attr_custom.
8811 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 8812 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
8813 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
8814 aarch64_attr_enum. */
8815
8816struct aarch64_attribute_info
8817{
8818 const char *name;
8819 enum aarch64_attr_opt_type attr_type;
8820 bool allow_neg;
8821 bool (*handler) (const char *, const char *);
8822 enum opt_code opt_num;
8823};
8824
8825/* Handle the ARCH_STR argument to the arch= target attribute.
8826 PRAGMA_OR_ATTR is used in potential error messages. */
8827
8828static bool
8829aarch64_handle_attr_arch (const char *str, const char *pragma_or_attr)
8830{
8831 const struct processor *tmp_arch = NULL;
8832 enum aarch64_parse_opt_result parse_res
8833 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
8834
8835 if (parse_res == AARCH64_PARSE_OK)
8836 {
8837 gcc_assert (tmp_arch);
8838 selected_arch = tmp_arch;
8839 explicit_arch = selected_arch->arch;
8840 return true;
8841 }
8842
8843 switch (parse_res)
8844 {
8845 case AARCH64_PARSE_MISSING_ARG:
8846 error ("missing architecture name in 'arch' target %s", pragma_or_attr);
8847 break;
8848 case AARCH64_PARSE_INVALID_ARG:
8849 error ("unknown value %qs for 'arch' target %s", str, pragma_or_attr);
01f44038 8850 aarch64_print_hint_for_arch (str);
5a2c8331
KT
8851 break;
8852 case AARCH64_PARSE_INVALID_FEATURE:
8853 error ("invalid feature modifier %qs for 'arch' target %s",
8854 str, pragma_or_attr);
8855 break;
8856 default:
8857 gcc_unreachable ();
8858 }
8859
8860 return false;
8861}
8862
8863/* Handle the argument CPU_STR to the cpu= target attribute.
8864 PRAGMA_OR_ATTR is used in potential error messages. */
8865
8866static bool
8867aarch64_handle_attr_cpu (const char *str, const char *pragma_or_attr)
8868{
8869 const struct processor *tmp_cpu = NULL;
8870 enum aarch64_parse_opt_result parse_res
8871 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
8872
8873 if (parse_res == AARCH64_PARSE_OK)
8874 {
8875 gcc_assert (tmp_cpu);
8876 selected_tune = tmp_cpu;
8877 explicit_tune_core = selected_tune->ident;
8878
8879 selected_arch = &all_architectures[tmp_cpu->arch];
8880 explicit_arch = selected_arch->arch;
8881 return true;
8882 }
8883
8884 switch (parse_res)
8885 {
8886 case AARCH64_PARSE_MISSING_ARG:
8887 error ("missing cpu name in 'cpu' target %s", pragma_or_attr);
8888 break;
8889 case AARCH64_PARSE_INVALID_ARG:
8890 error ("unknown value %qs for 'cpu' target %s", str, pragma_or_attr);
01f44038 8891 aarch64_print_hint_for_core (str);
5a2c8331
KT
8892 break;
8893 case AARCH64_PARSE_INVALID_FEATURE:
8894 error ("invalid feature modifier %qs for 'cpu' target %s",
8895 str, pragma_or_attr);
8896 break;
8897 default:
8898 gcc_unreachable ();
8899 }
8900
8901 return false;
8902}
8903
8904/* Handle the argument STR to the tune= target attribute.
8905 PRAGMA_OR_ATTR is used in potential error messages. */
8906
8907static bool
8908aarch64_handle_attr_tune (const char *str, const char *pragma_or_attr)
8909{
8910 const struct processor *tmp_tune = NULL;
8911 enum aarch64_parse_opt_result parse_res
8912 = aarch64_parse_tune (str, &tmp_tune);
8913
8914 if (parse_res == AARCH64_PARSE_OK)
8915 {
8916 gcc_assert (tmp_tune);
8917 selected_tune = tmp_tune;
8918 explicit_tune_core = selected_tune->ident;
8919 return true;
8920 }
8921
8922 switch (parse_res)
8923 {
8924 case AARCH64_PARSE_INVALID_ARG:
8925 error ("unknown value %qs for 'tune' target %s", str, pragma_or_attr);
01f44038 8926 aarch64_print_hint_for_core (str);
5a2c8331
KT
8927 break;
8928 default:
8929 gcc_unreachable ();
8930 }
8931
8932 return false;
8933}
8934
8935/* Parse an architecture extensions target attribute string specified in STR.
8936 For example "+fp+nosimd". Show any errors if needed. Return TRUE
8937 if successful. Update aarch64_isa_flags to reflect the ISA features
8938 modified.
8939 PRAGMA_OR_ATTR is used in potential error messages. */
8940
8941static bool
8942aarch64_handle_attr_isa_flags (char *str, const char *pragma_or_attr)
8943{
8944 enum aarch64_parse_opt_result parse_res;
8945 unsigned long isa_flags = aarch64_isa_flags;
8946
e4ea20c8
KT
8947 /* We allow "+nothing" in the beginning to clear out all architectural
8948 features if the user wants to handpick specific features. */
8949 if (strncmp ("+nothing", str, 8) == 0)
8950 {
8951 isa_flags = 0;
8952 str += 8;
8953 }
8954
5a2c8331
KT
8955 parse_res = aarch64_parse_extension (str, &isa_flags);
8956
8957 if (parse_res == AARCH64_PARSE_OK)
8958 {
8959 aarch64_isa_flags = isa_flags;
8960 return true;
8961 }
8962
8963 switch (parse_res)
8964 {
8965 case AARCH64_PARSE_MISSING_ARG:
8966 error ("missing feature modifier in target %s %qs",
8967 pragma_or_attr, str);
8968 break;
8969
8970 case AARCH64_PARSE_INVALID_FEATURE:
8971 error ("invalid feature modifier in target %s %qs",
8972 pragma_or_attr, str);
8973 break;
8974
8975 default:
8976 gcc_unreachable ();
8977 }
8978
8979 return false;
8980}
8981
8982/* The target attributes that we support. On top of these we also support just
8983 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
8984 handled explicitly in aarch64_process_one_target_attr. */
8985
8986static const struct aarch64_attribute_info aarch64_attributes[] =
8987{
8988 { "general-regs-only", aarch64_attr_mask, false, NULL,
8989 OPT_mgeneral_regs_only },
8990 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
8991 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
8992 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
8993 OPT_mfix_cortex_a53_843419 },
5a2c8331
KT
8994 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
8995 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
8996 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
8997 OPT_momit_leaf_frame_pointer },
8998 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
8999 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
9000 OPT_march_ },
9001 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
9002 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
9003 OPT_mtune_ },
9004 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
9005};
9006
9007/* Parse ARG_STR which contains the definition of one target attribute.
9008 Show appropriate errors if any or return true if the attribute is valid.
9009 PRAGMA_OR_ATTR holds the string to use in error messages about whether
9010 we're processing a target attribute or pragma. */
9011
9012static bool
9013aarch64_process_one_target_attr (char *arg_str, const char* pragma_or_attr)
9014{
9015 bool invert = false;
9016
9017 size_t len = strlen (arg_str);
9018
9019 if (len == 0)
9020 {
9021 error ("malformed target %s", pragma_or_attr);
9022 return false;
9023 }
9024
9025 char *str_to_check = (char *) alloca (len + 1);
9026 strcpy (str_to_check, arg_str);
9027
9028 /* Skip leading whitespace. */
9029 while (*str_to_check == ' ' || *str_to_check == '\t')
9030 str_to_check++;
9031
9032 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
9033 It is easier to detect and handle it explicitly here rather than going
9034 through the machinery for the rest of the target attributes in this
9035 function. */
9036 if (*str_to_check == '+')
9037 return aarch64_handle_attr_isa_flags (str_to_check, pragma_or_attr);
9038
9039 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
9040 {
9041 invert = true;
9042 str_to_check += 3;
9043 }
9044 char *arg = strchr (str_to_check, '=');
9045
9046 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
9047 and point ARG to "foo". */
9048 if (arg)
9049 {
9050 *arg = '\0';
9051 arg++;
9052 }
9053 const struct aarch64_attribute_info *p_attr;
16d12992 9054 bool found = false;
5a2c8331
KT
9055 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
9056 {
9057 /* If the names don't match up, or the user has given an argument
9058 to an attribute that doesn't accept one, or didn't give an argument
9059 to an attribute that expects one, fail to match. */
9060 if (strcmp (str_to_check, p_attr->name) != 0)
9061 continue;
9062
16d12992 9063 found = true;
5a2c8331
KT
9064 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
9065 || p_attr->attr_type == aarch64_attr_enum;
9066
9067 if (attr_need_arg_p ^ (arg != NULL))
9068 {
9069 error ("target %s %qs does not accept an argument",
9070 pragma_or_attr, str_to_check);
9071 return false;
9072 }
9073
9074 /* If the name matches but the attribute does not allow "no-" versions
9075 then we can't match. */
9076 if (invert && !p_attr->allow_neg)
9077 {
9078 error ("target %s %qs does not allow a negated form",
9079 pragma_or_attr, str_to_check);
9080 return false;
9081 }
9082
9083 switch (p_attr->attr_type)
9084 {
9085 /* Has a custom handler registered.
9086 For example, cpu=, arch=, tune=. */
9087 case aarch64_attr_custom:
9088 gcc_assert (p_attr->handler);
9089 if (!p_attr->handler (arg, pragma_or_attr))
9090 return false;
9091 break;
9092
9093 /* Either set or unset a boolean option. */
9094 case aarch64_attr_bool:
9095 {
9096 struct cl_decoded_option decoded;
9097
9098 generate_option (p_attr->opt_num, NULL, !invert,
9099 CL_TARGET, &decoded);
9100 aarch64_handle_option (&global_options, &global_options_set,
9101 &decoded, input_location);
9102 break;
9103 }
9104 /* Set or unset a bit in the target_flags. aarch64_handle_option
9105 should know what mask to apply given the option number. */
9106 case aarch64_attr_mask:
9107 {
9108 struct cl_decoded_option decoded;
9109 /* We only need to specify the option number.
9110 aarch64_handle_option will know which mask to apply. */
9111 decoded.opt_index = p_attr->opt_num;
9112 decoded.value = !invert;
9113 aarch64_handle_option (&global_options, &global_options_set,
9114 &decoded, input_location);
9115 break;
9116 }
9117 /* Use the option setting machinery to set an option to an enum. */
9118 case aarch64_attr_enum:
9119 {
9120 gcc_assert (arg);
9121 bool valid;
9122 int value;
9123 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
9124 &value, CL_TARGET);
9125 if (valid)
9126 {
9127 set_option (&global_options, NULL, p_attr->opt_num, value,
9128 NULL, DK_UNSPECIFIED, input_location,
9129 global_dc);
9130 }
9131 else
9132 {
9133 error ("target %s %s=%s is not valid",
9134 pragma_or_attr, str_to_check, arg);
9135 }
9136 break;
9137 }
9138 default:
9139 gcc_unreachable ();
9140 }
9141 }
9142
16d12992
KT
9143 /* If we reached here we either have found an attribute and validated
9144 it or didn't match any. If we matched an attribute but its arguments
9145 were malformed we will have returned false already. */
9146 return found;
5a2c8331
KT
9147}
9148
9149/* Count how many times the character C appears in
9150 NULL-terminated string STR. */
9151
9152static unsigned int
9153num_occurences_in_str (char c, char *str)
9154{
9155 unsigned int res = 0;
9156 while (*str != '\0')
9157 {
9158 if (*str == c)
9159 res++;
9160
9161 str++;
9162 }
9163
9164 return res;
9165}
9166
9167/* Parse the tree in ARGS that contains the target attribute information
9168 and update the global target options space. PRAGMA_OR_ATTR is a string
9169 to be used in error messages, specifying whether this is processing
9170 a target attribute or a target pragma. */
9171
9172bool
9173aarch64_process_target_attr (tree args, const char* pragma_or_attr)
9174{
9175 if (TREE_CODE (args) == TREE_LIST)
9176 {
9177 do
9178 {
9179 tree head = TREE_VALUE (args);
9180 if (head)
9181 {
9182 if (!aarch64_process_target_attr (head, pragma_or_attr))
9183 return false;
9184 }
9185 args = TREE_CHAIN (args);
9186 } while (args);
9187
9188 return true;
9189 }
9190 /* We expect to find a string to parse. */
9191 gcc_assert (TREE_CODE (args) == STRING_CST);
9192
9193 size_t len = strlen (TREE_STRING_POINTER (args));
9194 char *str_to_check = (char *) alloca (len + 1);
9195 strcpy (str_to_check, TREE_STRING_POINTER (args));
9196
9197 if (len == 0)
9198 {
9199 error ("malformed target %s value", pragma_or_attr);
9200 return false;
9201 }
9202
9203 /* Used to catch empty spaces between commas i.e.
9204 attribute ((target ("attr1,,attr2"))). */
9205 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
9206
9207 /* Handle multiple target attributes separated by ','. */
9208 char *token = strtok (str_to_check, ",");
9209
9210 unsigned int num_attrs = 0;
9211 while (token)
9212 {
9213 num_attrs++;
9214 if (!aarch64_process_one_target_attr (token, pragma_or_attr))
9215 {
9216 error ("target %s %qs is invalid", pragma_or_attr, token);
9217 return false;
9218 }
9219
9220 token = strtok (NULL, ",");
9221 }
9222
9223 if (num_attrs != num_commas + 1)
9224 {
9225 error ("malformed target %s list %qs",
9226 pragma_or_attr, TREE_STRING_POINTER (args));
9227 return false;
9228 }
9229
9230 return true;
9231}
9232
9233/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9234 process attribute ((target ("..."))). */
9235
9236static bool
9237aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
9238{
9239 struct cl_target_option cur_target;
9240 bool ret;
9241 tree old_optimize;
9242 tree new_target, new_optimize;
9243 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
9244
9245 /* If what we're processing is the current pragma string then the
9246 target option node is already stored in target_option_current_node
9247 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
9248 having to re-parse the string. This is especially useful to keep
9249 arm_neon.h compile times down since that header contains a lot
9250 of intrinsics enclosed in pragmas. */
9251 if (!existing_target && args == current_target_pragma)
9252 {
9253 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
9254 return true;
9255 }
5a2c8331
KT
9256 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9257
9258 old_optimize = build_optimization_node (&global_options);
9259 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
9260
9261 /* If the function changed the optimization levels as well as setting
9262 target options, start with the optimizations specified. */
9263 if (func_optimize && func_optimize != old_optimize)
9264 cl_optimization_restore (&global_options,
9265 TREE_OPTIMIZATION (func_optimize));
9266
9267 /* Save the current target options to restore at the end. */
9268 cl_target_option_save (&cur_target, &global_options);
9269
9270 /* If fndecl already has some target attributes applied to it, unpack
9271 them so that we add this attribute on top of them, rather than
9272 overwriting them. */
9273 if (existing_target)
9274 {
9275 struct cl_target_option *existing_options
9276 = TREE_TARGET_OPTION (existing_target);
9277
9278 if (existing_options)
9279 cl_target_option_restore (&global_options, existing_options);
9280 }
9281 else
9282 cl_target_option_restore (&global_options,
9283 TREE_TARGET_OPTION (target_option_current_node));
9284
9285
9286 ret = aarch64_process_target_attr (args, "attribute");
9287
9288 /* Set up any additional state. */
9289 if (ret)
9290 {
9291 aarch64_override_options_internal (&global_options);
e95a988a
KT
9292 /* Initialize SIMD builtins if we haven't already.
9293 Set current_target_pragma to NULL for the duration so that
9294 the builtin initialization code doesn't try to tag the functions
9295 being built with the attributes specified by any current pragma, thus
9296 going into an infinite recursion. */
9297 if (TARGET_SIMD)
9298 {
9299 tree saved_current_target_pragma = current_target_pragma;
9300 current_target_pragma = NULL;
9301 aarch64_init_simd_builtins ();
9302 current_target_pragma = saved_current_target_pragma;
9303 }
5a2c8331
KT
9304 new_target = build_target_option_node (&global_options);
9305 }
9306 else
9307 new_target = NULL;
9308
9309 new_optimize = build_optimization_node (&global_options);
9310
9311 if (fndecl && ret)
9312 {
9313 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
9314
9315 if (old_optimize != new_optimize)
9316 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
9317 }
9318
9319 cl_target_option_restore (&global_options, &cur_target);
9320
9321 if (old_optimize != new_optimize)
9322 cl_optimization_restore (&global_options,
9323 TREE_OPTIMIZATION (old_optimize));
9324 return ret;
9325}
9326
1fd8d40c
KT
9327/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
9328 tri-bool options (yes, no, don't care) and the default value is
9329 DEF, determine whether to reject inlining. */
9330
9331static bool
9332aarch64_tribools_ok_for_inlining_p (int caller, int callee,
9333 int dont_care, int def)
9334{
9335 /* If the callee doesn't care, always allow inlining. */
9336 if (callee == dont_care)
9337 return true;
9338
9339 /* If the caller doesn't care, always allow inlining. */
9340 if (caller == dont_care)
9341 return true;
9342
9343 /* Otherwise, allow inlining if either the callee and caller values
9344 agree, or if the callee is using the default value. */
9345 return (callee == caller || callee == def);
9346}
9347
9348/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
9349 to inline CALLEE into CALLER based on target-specific info.
9350 Make sure that the caller and callee have compatible architectural
9351 features. Then go through the other possible target attributes
9352 and see if they can block inlining. Try not to reject always_inline
9353 callees unless they are incompatible architecturally. */
9354
9355static bool
9356aarch64_can_inline_p (tree caller, tree callee)
9357{
9358 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
9359 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
9360
9361 /* If callee has no option attributes, then it is ok to inline. */
9362 if (!callee_tree)
9363 return true;
9364
9365 struct cl_target_option *caller_opts
9366 = TREE_TARGET_OPTION (caller_tree ? caller_tree
9367 : target_option_default_node);
9368
9369 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
9370
9371
9372 /* Callee's ISA flags should be a subset of the caller's. */
9373 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
9374 != callee_opts->x_aarch64_isa_flags)
9375 return false;
9376
9377 /* Allow non-strict aligned functions inlining into strict
9378 aligned ones. */
9379 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
9380 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
9381 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
9382 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
9383 return false;
9384
9385 bool always_inline = lookup_attribute ("always_inline",
9386 DECL_ATTRIBUTES (callee));
9387
9388 /* If the architectural features match up and the callee is always_inline
9389 then the other attributes don't matter. */
9390 if (always_inline)
9391 return true;
9392
9393 if (caller_opts->x_aarch64_cmodel_var
9394 != callee_opts->x_aarch64_cmodel_var)
9395 return false;
9396
9397 if (caller_opts->x_aarch64_tls_dialect
9398 != callee_opts->x_aarch64_tls_dialect)
9399 return false;
9400
9401 /* Honour explicit requests to workaround errata. */
9402 if (!aarch64_tribools_ok_for_inlining_p (
9403 caller_opts->x_aarch64_fix_a53_err835769,
9404 callee_opts->x_aarch64_fix_a53_err835769,
9405 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
9406 return false;
9407
48bb1a55
CL
9408 if (!aarch64_tribools_ok_for_inlining_p (
9409 caller_opts->x_aarch64_fix_a53_err843419,
9410 callee_opts->x_aarch64_fix_a53_err843419,
9411 2, TARGET_FIX_ERR_A53_843419))
9412 return false;
9413
1fd8d40c
KT
9414 /* If the user explicitly specified -momit-leaf-frame-pointer for the
9415 caller and calle and they don't match up, reject inlining. */
9416 if (!aarch64_tribools_ok_for_inlining_p (
9417 caller_opts->x_flag_omit_leaf_frame_pointer,
9418 callee_opts->x_flag_omit_leaf_frame_pointer,
9419 2, 1))
9420 return false;
9421
9422 /* If the callee has specific tuning overrides, respect them. */
9423 if (callee_opts->x_aarch64_override_tune_string != NULL
9424 && caller_opts->x_aarch64_override_tune_string == NULL)
9425 return false;
9426
9427 /* If the user specified tuning override strings for the
9428 caller and callee and they don't match up, reject inlining.
9429 We just do a string compare here, we don't analyze the meaning
9430 of the string, as it would be too costly for little gain. */
9431 if (callee_opts->x_aarch64_override_tune_string
9432 && caller_opts->x_aarch64_override_tune_string
9433 && (strcmp (callee_opts->x_aarch64_override_tune_string,
9434 caller_opts->x_aarch64_override_tune_string) != 0))
9435 return false;
9436
9437 return true;
9438}
9439
43e9d192
IB
9440/* Return true if SYMBOL_REF X binds locally. */
9441
9442static bool
9443aarch64_symbol_binds_local_p (const_rtx x)
9444{
9445 return (SYMBOL_REF_DECL (x)
9446 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
9447 : SYMBOL_REF_LOCAL_P (x));
9448}
9449
9450/* Return true if SYMBOL_REF X is thread local */
9451static bool
9452aarch64_tls_symbol_p (rtx x)
9453{
9454 if (! TARGET_HAVE_TLS)
9455 return false;
9456
9457 if (GET_CODE (x) != SYMBOL_REF)
9458 return false;
9459
9460 return SYMBOL_REF_TLS_MODEL (x) != 0;
9461}
9462
9463/* Classify a TLS symbol into one of the TLS kinds. */
9464enum aarch64_symbol_type
9465aarch64_classify_tls_symbol (rtx x)
9466{
9467 enum tls_model tls_kind = tls_symbolic_operand_type (x);
9468
9469 switch (tls_kind)
9470 {
9471 case TLS_MODEL_GLOBAL_DYNAMIC:
9472 case TLS_MODEL_LOCAL_DYNAMIC:
9473 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
9474
9475 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
9476 switch (aarch64_cmodel)
9477 {
9478 case AARCH64_CMODEL_TINY:
9479 case AARCH64_CMODEL_TINY_PIC:
9480 return SYMBOL_TINY_TLSIE;
9481 default:
79496620 9482 return SYMBOL_SMALL_TLSIE;
5ae7caad 9483 }
43e9d192
IB
9484
9485 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
9486 if (aarch64_tls_size == 12)
9487 return SYMBOL_TLSLE12;
9488 else if (aarch64_tls_size == 24)
9489 return SYMBOL_TLSLE24;
9490 else if (aarch64_tls_size == 32)
9491 return SYMBOL_TLSLE32;
9492 else if (aarch64_tls_size == 48)
9493 return SYMBOL_TLSLE48;
9494 else
9495 gcc_unreachable ();
43e9d192
IB
9496
9497 case TLS_MODEL_EMULATED:
9498 case TLS_MODEL_NONE:
9499 return SYMBOL_FORCE_TO_MEM;
9500
9501 default:
9502 gcc_unreachable ();
9503 }
9504}
9505
9506/* Return the method that should be used to access SYMBOL_REF or
a6e0bfa7 9507 LABEL_REF X. */
17f4d4bf 9508
43e9d192 9509enum aarch64_symbol_type
a6e0bfa7 9510aarch64_classify_symbol (rtx x, rtx offset)
43e9d192
IB
9511{
9512 if (GET_CODE (x) == LABEL_REF)
9513 {
9514 switch (aarch64_cmodel)
9515 {
9516 case AARCH64_CMODEL_LARGE:
9517 return SYMBOL_FORCE_TO_MEM;
9518
9519 case AARCH64_CMODEL_TINY_PIC:
9520 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
9521 return SYMBOL_TINY_ABSOLUTE;
9522
1b1e81f8 9523 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
9524 case AARCH64_CMODEL_SMALL_PIC:
9525 case AARCH64_CMODEL_SMALL:
9526 return SYMBOL_SMALL_ABSOLUTE;
9527
9528 default:
9529 gcc_unreachable ();
9530 }
9531 }
9532
17f4d4bf 9533 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 9534 {
43e9d192
IB
9535 if (aarch64_tls_symbol_p (x))
9536 return aarch64_classify_tls_symbol (x);
9537
17f4d4bf
CSS
9538 switch (aarch64_cmodel)
9539 {
9540 case AARCH64_CMODEL_TINY:
15f6e0da 9541 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
9542 the offset does not cause overflow of the final address. But
9543 we have no way of knowing the address of symbol at compile time
9544 so we can't accurately say if the distance between the PC and
9545 symbol + offset is outside the addressible range of +/-1M in the
9546 TINY code model. So we rely on images not being greater than
9547 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
9548 be loaded using an alternative mechanism. Furthermore if the
9549 symbol is a weak reference to something that isn't known to
9550 resolve to a symbol in this module, then force to memory. */
9551 if ((SYMBOL_REF_WEAK (x)
9552 && !aarch64_symbol_binds_local_p (x))
f8b756b7 9553 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
a5350ddc
CSS
9554 return SYMBOL_FORCE_TO_MEM;
9555 return SYMBOL_TINY_ABSOLUTE;
9556
17f4d4bf 9557 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
9558 /* Same reasoning as the tiny code model, but the offset cap here is
9559 4G. */
15f6e0da
RR
9560 if ((SYMBOL_REF_WEAK (x)
9561 && !aarch64_symbol_binds_local_p (x))
3ff5d1f0
TB
9562 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
9563 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
9564 return SYMBOL_FORCE_TO_MEM;
9565 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 9566
17f4d4bf 9567 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 9568 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 9569 return SYMBOL_TINY_GOT;
38e6c9a6
MS
9570 return SYMBOL_TINY_ABSOLUTE;
9571
1b1e81f8 9572 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
9573 case AARCH64_CMODEL_SMALL_PIC:
9574 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
9575 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
9576 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 9577 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 9578
9ee6540a
WD
9579 case AARCH64_CMODEL_LARGE:
9580 /* This is alright even in PIC code as the constant
9581 pool reference is always PC relative and within
9582 the same translation unit. */
9583 if (CONSTANT_POOL_ADDRESS_P (x))
9584 return SYMBOL_SMALL_ABSOLUTE;
9585 else
9586 return SYMBOL_FORCE_TO_MEM;
9587
17f4d4bf
CSS
9588 default:
9589 gcc_unreachable ();
9590 }
43e9d192 9591 }
17f4d4bf 9592
43e9d192
IB
9593 /* By default push everything into the constant pool. */
9594 return SYMBOL_FORCE_TO_MEM;
9595}
9596
43e9d192
IB
9597bool
9598aarch64_constant_address_p (rtx x)
9599{
9600 return (CONSTANT_P (x) && memory_address_p (DImode, x));
9601}
9602
9603bool
9604aarch64_legitimate_pic_operand_p (rtx x)
9605{
9606 if (GET_CODE (x) == SYMBOL_REF
9607 || (GET_CODE (x) == CONST
9608 && GET_CODE (XEXP (x, 0)) == PLUS
9609 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
9610 return false;
9611
9612 return true;
9613}
9614
3520f7cc
JG
9615/* Return true if X holds either a quarter-precision or
9616 floating-point +0.0 constant. */
9617static bool
ef4bddc2 9618aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
9619{
9620 if (!CONST_DOUBLE_P (x))
9621 return false;
9622
6a0f8c01
JW
9623 if (aarch64_float_const_zero_rtx_p (x))
9624 return true;
9625
9626 /* We only handle moving 0.0 to a TFmode register. */
3520f7cc
JG
9627 if (!(mode == SFmode || mode == DFmode))
9628 return false;
9629
3520f7cc
JG
9630 return aarch64_float_const_representable_p (x);
9631}
9632
43e9d192 9633static bool
ef4bddc2 9634aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
9635{
9636 /* Do not allow vector struct mode constants. We could support
9637 0 and -1 easily, but they need support in aarch64-simd.md. */
9638 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
9639 return false;
9640
9641 /* This could probably go away because
9642 we now decompose CONST_INTs according to expand_mov_immediate. */
9643 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 9644 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
9645 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
9646 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
9647
9648 if (GET_CODE (x) == HIGH
9649 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
9650 return true;
9651
9652 return aarch64_constant_address_p (x);
9653}
9654
a5bc806c 9655rtx
43e9d192
IB
9656aarch64_load_tp (rtx target)
9657{
9658 if (!target
9659 || GET_MODE (target) != Pmode
9660 || !register_operand (target, Pmode))
9661 target = gen_reg_rtx (Pmode);
9662
9663 /* Can return in any reg. */
9664 emit_insn (gen_aarch64_load_tp_hard (target));
9665 return target;
9666}
9667
43e9d192
IB
9668/* On AAPCS systems, this is the "struct __va_list". */
9669static GTY(()) tree va_list_type;
9670
9671/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
9672 Return the type to use as __builtin_va_list.
9673
9674 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
9675
9676 struct __va_list
9677 {
9678 void *__stack;
9679 void *__gr_top;
9680 void *__vr_top;
9681 int __gr_offs;
9682 int __vr_offs;
9683 }; */
9684
9685static tree
9686aarch64_build_builtin_va_list (void)
9687{
9688 tree va_list_name;
9689 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9690
9691 /* Create the type. */
9692 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
9693 /* Give it the required name. */
9694 va_list_name = build_decl (BUILTINS_LOCATION,
9695 TYPE_DECL,
9696 get_identifier ("__va_list"),
9697 va_list_type);
9698 DECL_ARTIFICIAL (va_list_name) = 1;
9699 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 9700 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
9701
9702 /* Create the fields. */
9703 f_stack = build_decl (BUILTINS_LOCATION,
9704 FIELD_DECL, get_identifier ("__stack"),
9705 ptr_type_node);
9706 f_grtop = build_decl (BUILTINS_LOCATION,
9707 FIELD_DECL, get_identifier ("__gr_top"),
9708 ptr_type_node);
9709 f_vrtop = build_decl (BUILTINS_LOCATION,
9710 FIELD_DECL, get_identifier ("__vr_top"),
9711 ptr_type_node);
9712 f_groff = build_decl (BUILTINS_LOCATION,
9713 FIELD_DECL, get_identifier ("__gr_offs"),
9714 integer_type_node);
9715 f_vroff = build_decl (BUILTINS_LOCATION,
9716 FIELD_DECL, get_identifier ("__vr_offs"),
9717 integer_type_node);
9718
88e3bdd1 9719 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
9720 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
9721 purpose to identify whether the code is updating va_list internal
9722 offset fields through irregular way. */
9723 va_list_gpr_counter_field = f_groff;
9724 va_list_fpr_counter_field = f_vroff;
9725
43e9d192
IB
9726 DECL_ARTIFICIAL (f_stack) = 1;
9727 DECL_ARTIFICIAL (f_grtop) = 1;
9728 DECL_ARTIFICIAL (f_vrtop) = 1;
9729 DECL_ARTIFICIAL (f_groff) = 1;
9730 DECL_ARTIFICIAL (f_vroff) = 1;
9731
9732 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
9733 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
9734 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
9735 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
9736 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
9737
9738 TYPE_FIELDS (va_list_type) = f_stack;
9739 DECL_CHAIN (f_stack) = f_grtop;
9740 DECL_CHAIN (f_grtop) = f_vrtop;
9741 DECL_CHAIN (f_vrtop) = f_groff;
9742 DECL_CHAIN (f_groff) = f_vroff;
9743
9744 /* Compute its layout. */
9745 layout_type (va_list_type);
9746
9747 return va_list_type;
9748}
9749
9750/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
9751static void
9752aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9753{
9754 const CUMULATIVE_ARGS *cum;
9755 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9756 tree stack, grtop, vrtop, groff, vroff;
9757 tree t;
88e3bdd1
JW
9758 int gr_save_area_size = cfun->va_list_gpr_size;
9759 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
9760 int vr_offset;
9761
9762 cum = &crtl->args.info;
88e3bdd1
JW
9763 if (cfun->va_list_gpr_size)
9764 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
9765 cfun->va_list_gpr_size);
9766 if (cfun->va_list_fpr_size)
9767 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
9768 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 9769
d5726973 9770 if (!TARGET_FLOAT)
43e9d192 9771 {
261fb553 9772 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
9773 vr_save_area_size = 0;
9774 }
9775
9776 f_stack = TYPE_FIELDS (va_list_type_node);
9777 f_grtop = DECL_CHAIN (f_stack);
9778 f_vrtop = DECL_CHAIN (f_grtop);
9779 f_groff = DECL_CHAIN (f_vrtop);
9780 f_vroff = DECL_CHAIN (f_groff);
9781
9782 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
9783 NULL_TREE);
9784 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
9785 NULL_TREE);
9786 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
9787 NULL_TREE);
9788 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
9789 NULL_TREE);
9790 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
9791 NULL_TREE);
9792
9793 /* Emit code to initialize STACK, which points to the next varargs stack
9794 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
9795 by named arguments. STACK is 8-byte aligned. */
9796 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
9797 if (cum->aapcs_stack_size > 0)
9798 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
9799 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
9800 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9801
9802 /* Emit code to initialize GRTOP, the top of the GR save area.
9803 virtual_incoming_args_rtx should have been 16 byte aligned. */
9804 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
9805 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
9806 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9807
9808 /* Emit code to initialize VRTOP, the top of the VR save area.
9809 This address is gr_save_area_bytes below GRTOP, rounded
9810 down to the next 16-byte boundary. */
9811 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
9812 vr_offset = ROUND_UP (gr_save_area_size,
9813 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
9814
9815 if (vr_offset)
9816 t = fold_build_pointer_plus_hwi (t, -vr_offset);
9817 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
9818 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9819
9820 /* Emit code to initialize GROFF, the offset from GRTOP of the
9821 next GPR argument. */
9822 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
9823 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
9824 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9825
9826 /* Likewise emit code to initialize VROFF, the offset from FTOP
9827 of the next VR argument. */
9828 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
9829 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
9830 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9831}
9832
9833/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
9834
9835static tree
9836aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
9837 gimple_seq *post_p ATTRIBUTE_UNUSED)
9838{
9839 tree addr;
9840 bool indirect_p;
9841 bool is_ha; /* is HFA or HVA. */
9842 bool dw_align; /* double-word align. */
ef4bddc2 9843 machine_mode ag_mode = VOIDmode;
43e9d192 9844 int nregs;
ef4bddc2 9845 machine_mode mode;
43e9d192
IB
9846
9847 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
9848 tree stack, f_top, f_off, off, arg, roundup, on_stack;
9849 HOST_WIDE_INT size, rsize, adjust, align;
9850 tree t, u, cond1, cond2;
9851
9852 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9853 if (indirect_p)
9854 type = build_pointer_type (type);
9855
9856 mode = TYPE_MODE (type);
9857
9858 f_stack = TYPE_FIELDS (va_list_type_node);
9859 f_grtop = DECL_CHAIN (f_stack);
9860 f_vrtop = DECL_CHAIN (f_grtop);
9861 f_groff = DECL_CHAIN (f_vrtop);
9862 f_vroff = DECL_CHAIN (f_groff);
9863
9864 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
9865 f_stack, NULL_TREE);
9866 size = int_size_in_bytes (type);
9867 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
9868
9869 dw_align = false;
9870 adjust = 0;
9871 if (aarch64_vfp_is_call_or_return_candidate (mode,
9872 type,
9873 &ag_mode,
9874 &nregs,
9875 &is_ha))
9876 {
9877 /* TYPE passed in fp/simd registers. */
d5726973 9878 if (!TARGET_FLOAT)
261fb553 9879 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
9880
9881 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
9882 unshare_expr (valist), f_vrtop, NULL_TREE);
9883 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
9884 unshare_expr (valist), f_vroff, NULL_TREE);
9885
9886 rsize = nregs * UNITS_PER_VREG;
9887
9888 if (is_ha)
9889 {
9890 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
9891 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
9892 }
9893 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
9894 && size < UNITS_PER_VREG)
9895 {
9896 adjust = UNITS_PER_VREG - size;
9897 }
9898 }
9899 else
9900 {
9901 /* TYPE passed in general registers. */
9902 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
9903 unshare_expr (valist), f_grtop, NULL_TREE);
9904 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
9905 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 9906 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
9907 nregs = rsize / UNITS_PER_WORD;
9908
9909 if (align > 8)
9910 dw_align = true;
9911
9912 if (BLOCK_REG_PADDING (mode, type, 1) == downward
9913 && size < UNITS_PER_WORD)
9914 {
9915 adjust = UNITS_PER_WORD - size;
9916 }
9917 }
9918
9919 /* Get a local temporary for the field value. */
9920 off = get_initialized_tmp_var (f_off, pre_p, NULL);
9921
9922 /* Emit code to branch if off >= 0. */
9923 t = build2 (GE_EXPR, boolean_type_node, off,
9924 build_int_cst (TREE_TYPE (off), 0));
9925 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
9926
9927 if (dw_align)
9928 {
9929 /* Emit: offs = (offs + 15) & -16. */
9930 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
9931 build_int_cst (TREE_TYPE (off), 15));
9932 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
9933 build_int_cst (TREE_TYPE (off), -16));
9934 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
9935 }
9936 else
9937 roundup = NULL;
9938
9939 /* Update ap.__[g|v]r_offs */
9940 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
9941 build_int_cst (TREE_TYPE (off), rsize));
9942 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
9943
9944 /* String up. */
9945 if (roundup)
9946 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
9947
9948 /* [cond2] if (ap.__[g|v]r_offs > 0) */
9949 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
9950 build_int_cst (TREE_TYPE (f_off), 0));
9951 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
9952
9953 /* String up: make sure the assignment happens before the use. */
9954 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
9955 COND_EXPR_ELSE (cond1) = t;
9956
9957 /* Prepare the trees handling the argument that is passed on the stack;
9958 the top level node will store in ON_STACK. */
9959 arg = get_initialized_tmp_var (stack, pre_p, NULL);
9960 if (align > 8)
9961 {
9962 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
9963 t = fold_convert (intDI_type_node, arg);
9964 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
9965 build_int_cst (TREE_TYPE (t), 15));
9966 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9967 build_int_cst (TREE_TYPE (t), -16));
9968 t = fold_convert (TREE_TYPE (arg), t);
9969 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
9970 }
9971 else
9972 roundup = NULL;
9973 /* Advance ap.__stack */
9974 t = fold_convert (intDI_type_node, arg);
9975 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
9976 build_int_cst (TREE_TYPE (t), size + 7));
9977 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9978 build_int_cst (TREE_TYPE (t), -8));
9979 t = fold_convert (TREE_TYPE (arg), t);
9980 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
9981 /* String up roundup and advance. */
9982 if (roundup)
9983 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
9984 /* String up with arg */
9985 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
9986 /* Big-endianness related address adjustment. */
9987 if (BLOCK_REG_PADDING (mode, type, 1) == downward
9988 && size < UNITS_PER_WORD)
9989 {
9990 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
9991 size_int (UNITS_PER_WORD - size));
9992 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
9993 }
9994
9995 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
9996 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
9997
9998 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
9999 t = off;
10000 if (adjust)
10001 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
10002 build_int_cst (TREE_TYPE (off), adjust));
10003
10004 t = fold_convert (sizetype, t);
10005 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
10006
10007 if (is_ha)
10008 {
10009 /* type ha; // treat as "struct {ftype field[n];}"
10010 ... [computing offs]
10011 for (i = 0; i <nregs; ++i, offs += 16)
10012 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
10013 return ha; */
10014 int i;
10015 tree tmp_ha, field_t, field_ptr_t;
10016
10017 /* Declare a local variable. */
10018 tmp_ha = create_tmp_var_raw (type, "ha");
10019 gimple_add_tmp_var (tmp_ha);
10020
10021 /* Establish the base type. */
10022 switch (ag_mode)
10023 {
10024 case SFmode:
10025 field_t = float_type_node;
10026 field_ptr_t = float_ptr_type_node;
10027 break;
10028 case DFmode:
10029 field_t = double_type_node;
10030 field_ptr_t = double_ptr_type_node;
10031 break;
10032 case TFmode:
10033 field_t = long_double_type_node;
10034 field_ptr_t = long_double_ptr_type_node;
10035 break;
43e9d192 10036 case HFmode:
1b62ed4f
JG
10037 field_t = aarch64_fp16_type_node;
10038 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 10039 break;
43e9d192
IB
10040 case V2SImode:
10041 case V4SImode:
10042 {
10043 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
10044 field_t = build_vector_type_for_mode (innertype, ag_mode);
10045 field_ptr_t = build_pointer_type (field_t);
10046 }
10047 break;
10048 default:
10049 gcc_assert (0);
10050 }
10051
10052 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
10053 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
10054 addr = t;
10055 t = fold_convert (field_ptr_t, addr);
10056 t = build2 (MODIFY_EXPR, field_t,
10057 build1 (INDIRECT_REF, field_t, tmp_ha),
10058 build1 (INDIRECT_REF, field_t, t));
10059
10060 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
10061 for (i = 1; i < nregs; ++i)
10062 {
10063 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
10064 u = fold_convert (field_ptr_t, addr);
10065 u = build2 (MODIFY_EXPR, field_t,
10066 build2 (MEM_REF, field_t, tmp_ha,
10067 build_int_cst (field_ptr_t,
10068 (i *
10069 int_size_in_bytes (field_t)))),
10070 build1 (INDIRECT_REF, field_t, u));
10071 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
10072 }
10073
10074 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
10075 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
10076 }
10077
10078 COND_EXPR_ELSE (cond2) = t;
10079 addr = fold_convert (build_pointer_type (type), cond1);
10080 addr = build_va_arg_indirect_ref (addr);
10081
10082 if (indirect_p)
10083 addr = build_va_arg_indirect_ref (addr);
10084
10085 return addr;
10086}
10087
10088/* Implement TARGET_SETUP_INCOMING_VARARGS. */
10089
10090static void
ef4bddc2 10091aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
10092 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10093 int no_rtl)
10094{
10095 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10096 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
10097 int gr_saved = cfun->va_list_gpr_size;
10098 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
10099
10100 /* The caller has advanced CUM up to, but not beyond, the last named
10101 argument. Advance a local copy of CUM past the last "real" named
10102 argument, to find out how many registers are left over. */
10103 local_cum = *cum;
10104 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
10105
88e3bdd1
JW
10106 /* Found out how many registers we need to save.
10107 Honor tree-stdvar analysis results. */
10108 if (cfun->va_list_gpr_size)
10109 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
10110 cfun->va_list_gpr_size / UNITS_PER_WORD);
10111 if (cfun->va_list_fpr_size)
10112 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
10113 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 10114
d5726973 10115 if (!TARGET_FLOAT)
43e9d192 10116 {
261fb553 10117 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
10118 vr_saved = 0;
10119 }
10120
10121 if (!no_rtl)
10122 {
10123 if (gr_saved > 0)
10124 {
10125 rtx ptr, mem;
10126
10127 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10128 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
10129 - gr_saved * UNITS_PER_WORD);
10130 mem = gen_frame_mem (BLKmode, ptr);
10131 set_mem_alias_set (mem, get_varargs_alias_set ());
10132
10133 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
10134 mem, gr_saved);
10135 }
10136 if (vr_saved > 0)
10137 {
10138 /* We can't use move_block_from_reg, because it will use
10139 the wrong mode, storing D regs only. */
ef4bddc2 10140 machine_mode mode = TImode;
88e3bdd1 10141 int off, i, vr_start;
43e9d192
IB
10142
10143 /* Set OFF to the offset from virtual_incoming_args_rtx of
10144 the first vector register. The VR save area lies below
10145 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
10146 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
10147 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
10148 off -= vr_saved * UNITS_PER_VREG;
10149
88e3bdd1
JW
10150 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
10151 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
10152 {
10153 rtx ptr, mem;
10154
10155 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
10156 mem = gen_frame_mem (mode, ptr);
10157 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 10158 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
10159 off += UNITS_PER_VREG;
10160 }
10161 }
10162 }
10163
10164 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10165 any complication of having crtl->args.pretend_args_size changed. */
8799637a 10166 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
10167 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
10168 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
10169 + vr_saved * UNITS_PER_VREG);
10170}
10171
10172static void
10173aarch64_conditional_register_usage (void)
10174{
10175 int i;
10176 if (!TARGET_FLOAT)
10177 {
10178 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
10179 {
10180 fixed_regs[i] = 1;
10181 call_used_regs[i] = 1;
10182 }
10183 }
10184}
10185
10186/* Walk down the type tree of TYPE counting consecutive base elements.
10187 If *MODEP is VOIDmode, then set it to the first valid floating point
10188 type. If a non-floating point type is found, or if a floating point
10189 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10190 otherwise return the count in the sub-tree. */
10191static int
ef4bddc2 10192aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 10193{
ef4bddc2 10194 machine_mode mode;
43e9d192
IB
10195 HOST_WIDE_INT size;
10196
10197 switch (TREE_CODE (type))
10198 {
10199 case REAL_TYPE:
10200 mode = TYPE_MODE (type);
1b62ed4f
JG
10201 if (mode != DFmode && mode != SFmode
10202 && mode != TFmode && mode != HFmode)
43e9d192
IB
10203 return -1;
10204
10205 if (*modep == VOIDmode)
10206 *modep = mode;
10207
10208 if (*modep == mode)
10209 return 1;
10210
10211 break;
10212
10213 case COMPLEX_TYPE:
10214 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
10215 if (mode != DFmode && mode != SFmode
10216 && mode != TFmode && mode != HFmode)
43e9d192
IB
10217 return -1;
10218
10219 if (*modep == VOIDmode)
10220 *modep = mode;
10221
10222 if (*modep == mode)
10223 return 2;
10224
10225 break;
10226
10227 case VECTOR_TYPE:
10228 /* Use V2SImode and V4SImode as representatives of all 64-bit
10229 and 128-bit vector types. */
10230 size = int_size_in_bytes (type);
10231 switch (size)
10232 {
10233 case 8:
10234 mode = V2SImode;
10235 break;
10236 case 16:
10237 mode = V4SImode;
10238 break;
10239 default:
10240 return -1;
10241 }
10242
10243 if (*modep == VOIDmode)
10244 *modep = mode;
10245
10246 /* Vector modes are considered to be opaque: two vectors are
10247 equivalent for the purposes of being homogeneous aggregates
10248 if they are the same size. */
10249 if (*modep == mode)
10250 return 1;
10251
10252 break;
10253
10254 case ARRAY_TYPE:
10255 {
10256 int count;
10257 tree index = TYPE_DOMAIN (type);
10258
807e902e
KZ
10259 /* Can't handle incomplete types nor sizes that are not
10260 fixed. */
10261 if (!COMPLETE_TYPE_P (type)
10262 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10263 return -1;
10264
10265 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
10266 if (count == -1
10267 || !index
10268 || !TYPE_MAX_VALUE (index)
cc269bb6 10269 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 10270 || !TYPE_MIN_VALUE (index)
cc269bb6 10271 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
10272 || count < 0)
10273 return -1;
10274
ae7e9ddd
RS
10275 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10276 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
10277
10278 /* There must be no padding. */
807e902e 10279 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10280 return -1;
10281
10282 return count;
10283 }
10284
10285 case RECORD_TYPE:
10286 {
10287 int count = 0;
10288 int sub_count;
10289 tree field;
10290
807e902e
KZ
10291 /* Can't handle incomplete types nor sizes that are not
10292 fixed. */
10293 if (!COMPLETE_TYPE_P (type)
10294 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10295 return -1;
10296
10297 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10298 {
10299 if (TREE_CODE (field) != FIELD_DECL)
10300 continue;
10301
10302 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10303 if (sub_count < 0)
10304 return -1;
10305 count += sub_count;
10306 }
10307
10308 /* There must be no padding. */
807e902e 10309 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10310 return -1;
10311
10312 return count;
10313 }
10314
10315 case UNION_TYPE:
10316 case QUAL_UNION_TYPE:
10317 {
10318 /* These aren't very interesting except in a degenerate case. */
10319 int count = 0;
10320 int sub_count;
10321 tree field;
10322
807e902e
KZ
10323 /* Can't handle incomplete types nor sizes that are not
10324 fixed. */
10325 if (!COMPLETE_TYPE_P (type)
10326 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
10327 return -1;
10328
10329 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10330 {
10331 if (TREE_CODE (field) != FIELD_DECL)
10332 continue;
10333
10334 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
10335 if (sub_count < 0)
10336 return -1;
10337 count = count > sub_count ? count : sub_count;
10338 }
10339
10340 /* There must be no padding. */
807e902e 10341 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
10342 return -1;
10343
10344 return count;
10345 }
10346
10347 default:
10348 break;
10349 }
10350
10351 return -1;
10352}
10353
b6ec6215
KT
10354/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
10355 type as described in AAPCS64 \S 4.1.2.
10356
10357 See the comment above aarch64_composite_type_p for the notes on MODE. */
10358
10359static bool
10360aarch64_short_vector_p (const_tree type,
10361 machine_mode mode)
10362{
10363 HOST_WIDE_INT size = -1;
10364
10365 if (type && TREE_CODE (type) == VECTOR_TYPE)
10366 size = int_size_in_bytes (type);
10367 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10368 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
10369 size = GET_MODE_SIZE (mode);
10370
10371 return (size == 8 || size == 16);
10372}
10373
43e9d192
IB
10374/* Return TRUE if the type, as described by TYPE and MODE, is a composite
10375 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
10376 array types. The C99 floating-point complex types are also considered
10377 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
10378 types, which are GCC extensions and out of the scope of AAPCS64, are
10379 treated as composite types here as well.
10380
10381 Note that MODE itself is not sufficient in determining whether a type
10382 is such a composite type or not. This is because
10383 stor-layout.c:compute_record_mode may have already changed the MODE
10384 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
10385 structure with only one field may have its MODE set to the mode of the
10386 field. Also an integer mode whose size matches the size of the
10387 RECORD_TYPE type may be used to substitute the original mode
10388 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
10389 solely relied on. */
10390
10391static bool
10392aarch64_composite_type_p (const_tree type,
ef4bddc2 10393 machine_mode mode)
43e9d192 10394{
b6ec6215
KT
10395 if (aarch64_short_vector_p (type, mode))
10396 return false;
10397
43e9d192
IB
10398 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
10399 return true;
10400
10401 if (mode == BLKmode
10402 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
10403 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
10404 return true;
10405
10406 return false;
10407}
10408
43e9d192
IB
10409/* Return TRUE if an argument, whose type is described by TYPE and MODE,
10410 shall be passed or returned in simd/fp register(s) (providing these
10411 parameter passing registers are available).
10412
10413 Upon successful return, *COUNT returns the number of needed registers,
10414 *BASE_MODE returns the mode of the individual register and when IS_HAF
10415 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
10416 floating-point aggregate or a homogeneous short-vector aggregate. */
10417
10418static bool
ef4bddc2 10419aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 10420 const_tree type,
ef4bddc2 10421 machine_mode *base_mode,
43e9d192
IB
10422 int *count,
10423 bool *is_ha)
10424{
ef4bddc2 10425 machine_mode new_mode = VOIDmode;
43e9d192
IB
10426 bool composite_p = aarch64_composite_type_p (type, mode);
10427
10428 if (is_ha != NULL) *is_ha = false;
10429
10430 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
10431 || aarch64_short_vector_p (type, mode))
10432 {
10433 *count = 1;
10434 new_mode = mode;
10435 }
10436 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10437 {
10438 if (is_ha != NULL) *is_ha = true;
10439 *count = 2;
10440 new_mode = GET_MODE_INNER (mode);
10441 }
10442 else if (type && composite_p)
10443 {
10444 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
10445
10446 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
10447 {
10448 if (is_ha != NULL) *is_ha = true;
10449 *count = ag_count;
10450 }
10451 else
10452 return false;
10453 }
10454 else
10455 return false;
10456
10457 *base_mode = new_mode;
10458 return true;
10459}
10460
10461/* Implement TARGET_STRUCT_VALUE_RTX. */
10462
10463static rtx
10464aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
10465 int incoming ATTRIBUTE_UNUSED)
10466{
10467 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
10468}
10469
10470/* Implements target hook vector_mode_supported_p. */
10471static bool
ef4bddc2 10472aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
10473{
10474 if (TARGET_SIMD
10475 && (mode == V4SImode || mode == V8HImode
10476 || mode == V16QImode || mode == V2DImode
10477 || mode == V2SImode || mode == V4HImode
10478 || mode == V8QImode || mode == V2SFmode
ad7d90cc 10479 || mode == V4SFmode || mode == V2DFmode
71a11456 10480 || mode == V4HFmode || mode == V8HFmode
ad7d90cc 10481 || mode == V1DFmode))
43e9d192
IB
10482 return true;
10483
10484 return false;
10485}
10486
b7342d25
IB
10487/* Return appropriate SIMD container
10488 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
10489static machine_mode
10490aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 10491{
b7342d25 10492 gcc_assert (width == 64 || width == 128);
43e9d192 10493 if (TARGET_SIMD)
b7342d25
IB
10494 {
10495 if (width == 128)
10496 switch (mode)
10497 {
10498 case DFmode:
10499 return V2DFmode;
10500 case SFmode:
10501 return V4SFmode;
10502 case SImode:
10503 return V4SImode;
10504 case HImode:
10505 return V8HImode;
10506 case QImode:
10507 return V16QImode;
10508 case DImode:
10509 return V2DImode;
10510 default:
10511 break;
10512 }
10513 else
10514 switch (mode)
10515 {
10516 case SFmode:
10517 return V2SFmode;
10518 case SImode:
10519 return V2SImode;
10520 case HImode:
10521 return V4HImode;
10522 case QImode:
10523 return V8QImode;
10524 default:
10525 break;
10526 }
10527 }
43e9d192
IB
10528 return word_mode;
10529}
10530
b7342d25 10531/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
10532static machine_mode
10533aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
10534{
10535 return aarch64_simd_container_mode (mode, 128);
10536}
10537
3b357264
JG
10538/* Return the bitmask of possible vector sizes for the vectorizer
10539 to iterate over. */
10540static unsigned int
10541aarch64_autovectorize_vector_sizes (void)
10542{
10543 return (16 | 8);
10544}
10545
ac2b960f
YZ
10546/* Implement TARGET_MANGLE_TYPE. */
10547
6f549691 10548static const char *
ac2b960f
YZ
10549aarch64_mangle_type (const_tree type)
10550{
10551 /* The AArch64 ABI documents say that "__va_list" has to be
10552 managled as if it is in the "std" namespace. */
10553 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
10554 return "St9__va_list";
10555
c2ec330c
AL
10556 /* Half-precision float. */
10557 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
10558 return "Dh";
10559
f9d53c27
TB
10560 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
10561 builtin types. */
10562 if (TYPE_NAME (type) != NULL)
10563 return aarch64_mangle_builtin_type (type);
c6fc9e43 10564
ac2b960f
YZ
10565 /* Use the default mangling. */
10566 return NULL;
10567}
10568
8baff86e
KT
10569
10570/* Return true if the rtx_insn contains a MEM RTX somewhere
10571 in it. */
75cf1494
KT
10572
10573static bool
8baff86e 10574has_memory_op (rtx_insn *mem_insn)
75cf1494 10575{
8baff86e
KT
10576 subrtx_iterator::array_type array;
10577 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
10578 if (MEM_P (*iter))
10579 return true;
10580
10581 return false;
75cf1494
KT
10582}
10583
10584/* Find the first rtx_insn before insn that will generate an assembly
10585 instruction. */
10586
10587static rtx_insn *
10588aarch64_prev_real_insn (rtx_insn *insn)
10589{
10590 if (!insn)
10591 return NULL;
10592
10593 do
10594 {
10595 insn = prev_real_insn (insn);
10596 }
10597 while (insn && recog_memoized (insn) < 0);
10598
10599 return insn;
10600}
10601
10602static bool
10603is_madd_op (enum attr_type t1)
10604{
10605 unsigned int i;
10606 /* A number of these may be AArch32 only. */
10607 enum attr_type mlatypes[] = {
10608 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
10609 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
10610 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
10611 };
10612
10613 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
10614 {
10615 if (t1 == mlatypes[i])
10616 return true;
10617 }
10618
10619 return false;
10620}
10621
10622/* Check if there is a register dependency between a load and the insn
10623 for which we hold recog_data. */
10624
10625static bool
10626dep_between_memop_and_curr (rtx memop)
10627{
10628 rtx load_reg;
10629 int opno;
10630
8baff86e 10631 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
10632
10633 if (!REG_P (SET_DEST (memop)))
10634 return false;
10635
10636 load_reg = SET_DEST (memop);
8baff86e 10637 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
10638 {
10639 rtx operand = recog_data.operand[opno];
10640 if (REG_P (operand)
10641 && reg_overlap_mentioned_p (load_reg, operand))
10642 return true;
10643
10644 }
10645 return false;
10646}
10647
8baff86e
KT
10648
10649/* When working around the Cortex-A53 erratum 835769,
10650 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
10651 instruction and has a preceding memory instruction such that a NOP
10652 should be inserted between them. */
10653
75cf1494
KT
10654bool
10655aarch64_madd_needs_nop (rtx_insn* insn)
10656{
10657 enum attr_type attr_type;
10658 rtx_insn *prev;
10659 rtx body;
10660
b32c1043 10661 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
10662 return false;
10663
e322d6e3 10664 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
10665 return false;
10666
10667 attr_type = get_attr_type (insn);
10668 if (!is_madd_op (attr_type))
10669 return false;
10670
10671 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
10672 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
10673 Restore recog state to INSN to avoid state corruption. */
10674 extract_constrain_insn_cached (insn);
10675
8baff86e 10676 if (!prev || !has_memory_op (prev))
75cf1494
KT
10677 return false;
10678
10679 body = single_set (prev);
10680
10681 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
10682 it and the DImode madd, emit a NOP between them. If body is NULL then we
10683 have a complex memory operation, probably a load/store pair.
10684 Be conservative for now and emit a NOP. */
10685 if (GET_MODE (recog_data.operand[0]) == DImode
10686 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
10687 return true;
10688
10689 return false;
10690
10691}
10692
8baff86e
KT
10693
10694/* Implement FINAL_PRESCAN_INSN. */
10695
75cf1494
KT
10696void
10697aarch64_final_prescan_insn (rtx_insn *insn)
10698{
10699 if (aarch64_madd_needs_nop (insn))
10700 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
10701}
10702
10703
43e9d192 10704/* Return the equivalent letter for size. */
81c2dfb9 10705static char
43e9d192
IB
10706sizetochar (int size)
10707{
10708 switch (size)
10709 {
10710 case 64: return 'd';
10711 case 32: return 's';
10712 case 16: return 'h';
10713 case 8 : return 'b';
10714 default: gcc_unreachable ();
10715 }
10716}
10717
3520f7cc
JG
10718/* Return true iff x is a uniform vector of floating-point
10719 constants, and the constant can be represented in
10720 quarter-precision form. Note, as aarch64_float_const_representable
10721 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
10722static bool
10723aarch64_vect_float_const_representable_p (rtx x)
10724{
92695fbb
RS
10725 rtx elt;
10726 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
10727 && const_vec_duplicate_p (x, &elt)
10728 && aarch64_float_const_representable_p (elt));
3520f7cc
JG
10729}
10730
d8edd899 10731/* Return true for valid and false for invalid. */
3ea63f60 10732bool
ef4bddc2 10733aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 10734 struct simd_immediate_info *info)
43e9d192
IB
10735{
10736#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
10737 matches = 1; \
10738 for (i = 0; i < idx; i += (STRIDE)) \
10739 if (!(TEST)) \
10740 matches = 0; \
10741 if (matches) \
10742 { \
10743 immtype = (CLASS); \
10744 elsize = (ELSIZE); \
43e9d192
IB
10745 eshift = (SHIFT); \
10746 emvn = (NEG); \
10747 break; \
10748 }
10749
10750 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
cb5ca315 10751 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
43e9d192 10752 unsigned char bytes[16];
43e9d192
IB
10753 int immtype = -1, matches;
10754 unsigned int invmask = inverse ? 0xff : 0;
10755 int eshift, emvn;
10756
43e9d192 10757 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 10758 {
81c2dfb9
IB
10759 if (! (aarch64_simd_imm_zero_p (op, mode)
10760 || aarch64_vect_float_const_representable_p (op)))
d8edd899 10761 return false;
3520f7cc 10762
48063b9d
IB
10763 if (info)
10764 {
10765 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 10766 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
10767 info->mvn = false;
10768 info->shift = 0;
10769 }
3520f7cc 10770
d8edd899 10771 return true;
3520f7cc 10772 }
43e9d192
IB
10773
10774 /* Splat vector constant out into a byte vector. */
10775 for (i = 0; i < n_elts; i++)
10776 {
4b1e108c
AL
10777 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
10778 it must be laid out in the vector register in reverse order. */
10779 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192 10780 unsigned HOST_WIDE_INT elpart;
43e9d192 10781
ee78df47
KT
10782 gcc_assert (CONST_INT_P (el));
10783 elpart = INTVAL (el);
10784
10785 for (unsigned int byte = 0; byte < innersize; byte++)
10786 {
10787 bytes[idx++] = (elpart & 0xff) ^ invmask;
10788 elpart >>= BITS_PER_UNIT;
10789 }
43e9d192 10790
43e9d192
IB
10791 }
10792
10793 /* Sanity check. */
10794 gcc_assert (idx == GET_MODE_SIZE (mode));
10795
10796 do
10797 {
10798 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
10799 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
10800
10801 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
10802 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
10803
10804 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
10805 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
10806
10807 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
10808 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
10809
10810 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
10811
10812 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
10813
10814 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
10815 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
10816
10817 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
10818 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
10819
10820 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
10821 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
10822
10823 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
10824 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
10825
10826 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
10827
10828 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
10829
10830 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 10831 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
10832
10833 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 10834 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
10835
10836 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 10837 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
10838
10839 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 10840 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
10841
10842 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
10843
10844 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
10845 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
10846 }
10847 while (0);
10848
e4f0f84d 10849 if (immtype == -1)
d8edd899 10850 return false;
43e9d192 10851
48063b9d 10852 if (info)
43e9d192 10853 {
48063b9d 10854 info->element_width = elsize;
48063b9d
IB
10855 info->mvn = emvn != 0;
10856 info->shift = eshift;
10857
43e9d192
IB
10858 unsigned HOST_WIDE_INT imm = 0;
10859
e4f0f84d
TB
10860 if (immtype >= 12 && immtype <= 15)
10861 info->msl = true;
10862
43e9d192
IB
10863 /* Un-invert bytes of recognized vector, if necessary. */
10864 if (invmask != 0)
10865 for (i = 0; i < idx; i++)
10866 bytes[i] ^= invmask;
10867
10868 if (immtype == 17)
10869 {
10870 /* FIXME: Broken on 32-bit H_W_I hosts. */
10871 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
10872
10873 for (i = 0; i < 8; i++)
10874 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
10875 << (i * BITS_PER_UNIT);
10876
43e9d192 10877
48063b9d
IB
10878 info->value = GEN_INT (imm);
10879 }
10880 else
10881 {
10882 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
10883 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
10884
10885 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
10886 generic constants. */
10887 if (info->mvn)
43e9d192 10888 imm = ~imm;
48063b9d
IB
10889 imm = (imm >> info->shift) & 0xff;
10890 info->value = GEN_INT (imm);
10891 }
43e9d192
IB
10892 }
10893
48063b9d 10894 return true;
43e9d192
IB
10895#undef CHECK
10896}
10897
43e9d192
IB
10898/* Check of immediate shift constants are within range. */
10899bool
ef4bddc2 10900aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
10901{
10902 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
10903 if (left)
ddeabd3e 10904 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 10905 else
ddeabd3e 10906 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
10907}
10908
3520f7cc
JG
10909/* Return true if X is a uniform vector where all elements
10910 are either the floating-point constant 0.0 or the
10911 integer constant 0. */
43e9d192 10912bool
ef4bddc2 10913aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 10914{
3520f7cc 10915 return x == CONST0_RTX (mode);
43e9d192
IB
10916}
10917
7325d85a
KT
10918
10919/* Return the bitmask CONST_INT to select the bits required by a zero extract
10920 operation of width WIDTH at bit position POS. */
10921
10922rtx
10923aarch64_mask_from_zextract_ops (rtx width, rtx pos)
10924{
10925 gcc_assert (CONST_INT_P (width));
10926 gcc_assert (CONST_INT_P (pos));
10927
10928 unsigned HOST_WIDE_INT mask
10929 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
10930 return GEN_INT (mask << UINTVAL (pos));
10931}
10932
43e9d192 10933bool
ef4bddc2 10934aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
10935{
10936 HOST_WIDE_INT imm = INTVAL (x);
10937 int i;
10938
10939 for (i = 0; i < 8; i++)
10940 {
10941 unsigned int byte = imm & 0xff;
10942 if (byte != 0xff && byte != 0)
10943 return false;
10944 imm >>= 8;
10945 }
10946
10947 return true;
10948}
10949
83f8c414 10950bool
a6e0bfa7 10951aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 10952{
83f8c414
CSS
10953 if (GET_CODE (x) == HIGH
10954 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
10955 return true;
10956
82614948 10957 if (CONST_INT_P (x))
83f8c414
CSS
10958 return true;
10959
10960 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
10961 return true;
10962
a6e0bfa7 10963 return aarch64_classify_symbolic_expression (x)
a5350ddc 10964 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
10965}
10966
43e9d192
IB
10967/* Return a const_int vector of VAL. */
10968rtx
ef4bddc2 10969aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
10970{
10971 int nunits = GET_MODE_NUNITS (mode);
10972 rtvec v = rtvec_alloc (nunits);
10973 int i;
10974
10975 for (i=0; i < nunits; i++)
10976 RTVEC_ELT (v, i) = GEN_INT (val);
10977
10978 return gen_rtx_CONST_VECTOR (mode, v);
10979}
10980
051d0e2f
SN
10981/* Check OP is a legal scalar immediate for the MOVI instruction. */
10982
10983bool
ef4bddc2 10984aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 10985{
ef4bddc2 10986 machine_mode vmode;
051d0e2f
SN
10987
10988 gcc_assert (!VECTOR_MODE_P (mode));
10989 vmode = aarch64_preferred_simd_mode (mode);
10990 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 10991 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
10992}
10993
988fa693
JG
10994/* Construct and return a PARALLEL RTX vector with elements numbering the
10995 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
10996 the vector - from the perspective of the architecture. This does not
10997 line up with GCC's perspective on lane numbers, so we end up with
10998 different masks depending on our target endian-ness. The diagram
10999 below may help. We must draw the distinction when building masks
11000 which select one half of the vector. An instruction selecting
11001 architectural low-lanes for a big-endian target, must be described using
11002 a mask selecting GCC high-lanes.
11003
11004 Big-Endian Little-Endian
11005
11006GCC 0 1 2 3 3 2 1 0
11007 | x | x | x | x | | x | x | x | x |
11008Architecture 3 2 1 0 3 2 1 0
11009
11010Low Mask: { 2, 3 } { 0, 1 }
11011High Mask: { 0, 1 } { 2, 3 }
11012*/
11013
43e9d192 11014rtx
ef4bddc2 11015aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
11016{
11017 int nunits = GET_MODE_NUNITS (mode);
11018 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
11019 int high_base = nunits / 2;
11020 int low_base = 0;
11021 int base;
43e9d192
IB
11022 rtx t1;
11023 int i;
11024
988fa693
JG
11025 if (BYTES_BIG_ENDIAN)
11026 base = high ? low_base : high_base;
11027 else
11028 base = high ? high_base : low_base;
11029
11030 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
11031 RTVEC_ELT (v, i) = GEN_INT (base + i);
11032
11033 t1 = gen_rtx_PARALLEL (mode, v);
11034 return t1;
11035}
11036
988fa693
JG
11037/* Check OP for validity as a PARALLEL RTX vector with elements
11038 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
11039 from the perspective of the architecture. See the diagram above
11040 aarch64_simd_vect_par_cnst_half for more details. */
11041
11042bool
ef4bddc2 11043aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
11044 bool high)
11045{
11046 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
11047 HOST_WIDE_INT count_op = XVECLEN (op, 0);
11048 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
11049 int i = 0;
11050
11051 if (!VECTOR_MODE_P (mode))
11052 return false;
11053
11054 if (count_op != count_ideal)
11055 return false;
11056
11057 for (i = 0; i < count_ideal; i++)
11058 {
11059 rtx elt_op = XVECEXP (op, 0, i);
11060 rtx elt_ideal = XVECEXP (ideal, 0, i);
11061
4aa81c2e 11062 if (!CONST_INT_P (elt_op)
988fa693
JG
11063 || INTVAL (elt_ideal) != INTVAL (elt_op))
11064 return false;
11065 }
11066 return true;
11067}
11068
43e9d192
IB
11069/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
11070 HIGH (exclusive). */
11071void
46ed6024
CB
11072aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11073 const_tree exp)
43e9d192
IB
11074{
11075 HOST_WIDE_INT lane;
4aa81c2e 11076 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
11077 lane = INTVAL (operand);
11078
11079 if (lane < low || lane >= high)
46ed6024
CB
11080 {
11081 if (exp)
cf0c27ef 11082 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 11083 else
cf0c27ef 11084 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 11085 }
43e9d192
IB
11086}
11087
43e9d192
IB
11088/* Return TRUE if OP is a valid vector addressing mode. */
11089bool
11090aarch64_simd_mem_operand_p (rtx op)
11091{
11092 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 11093 || REG_P (XEXP (op, 0)));
43e9d192
IB
11094}
11095
2d8c6dc1
AH
11096/* Emit a register copy from operand to operand, taking care not to
11097 early-clobber source registers in the process.
43e9d192 11098
2d8c6dc1
AH
11099 COUNT is the number of components into which the copy needs to be
11100 decomposed. */
43e9d192 11101void
2d8c6dc1
AH
11102aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
11103 unsigned int count)
43e9d192
IB
11104{
11105 unsigned int i;
2d8c6dc1
AH
11106 int rdest = REGNO (operands[0]);
11107 int rsrc = REGNO (operands[1]);
43e9d192
IB
11108
11109 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
11110 || rdest < rsrc)
11111 for (i = 0; i < count; i++)
11112 emit_move_insn (gen_rtx_REG (mode, rdest + i),
11113 gen_rtx_REG (mode, rsrc + i));
43e9d192 11114 else
2d8c6dc1
AH
11115 for (i = 0; i < count; i++)
11116 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
11117 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
11118}
11119
668046d1 11120/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 11121 one of VSTRUCT modes: OI, CI, or XI. */
668046d1
DS
11122int
11123aarch64_simd_attr_length_rglist (enum machine_mode mode)
11124{
11125 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
11126}
11127
db0253a4
TB
11128/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11129 alignment of a vector to 128 bits. */
11130static HOST_WIDE_INT
11131aarch64_simd_vector_alignment (const_tree type)
11132{
9439e9a1 11133 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
11134 return MIN (align, 128);
11135}
11136
11137/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11138static bool
11139aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
11140{
11141 if (is_packed)
11142 return false;
11143
11144 /* We guarantee alignment for vectors up to 128-bits. */
11145 if (tree_int_cst_compare (TYPE_SIZE (type),
11146 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
11147 return false;
11148
11149 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11150 return true;
11151}
11152
4369c11e
TB
11153/* If VALS is a vector constant that can be loaded into a register
11154 using DUP, generate instructions to do so and return an RTX to
11155 assign to the register. Otherwise return NULL_RTX. */
11156static rtx
11157aarch64_simd_dup_constant (rtx vals)
11158{
ef4bddc2
RS
11159 machine_mode mode = GET_MODE (vals);
11160 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 11161 rtx x;
4369c11e 11162
92695fbb 11163 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
11164 return NULL_RTX;
11165
11166 /* We can load this constant by using DUP and a constant in a
11167 single ARM register. This will be cheaper than a vector
11168 load. */
92695fbb 11169 x = copy_to_mode_reg (inner_mode, x);
4369c11e
TB
11170 return gen_rtx_VEC_DUPLICATE (mode, x);
11171}
11172
11173
11174/* Generate code to load VALS, which is a PARALLEL containing only
11175 constants (for vec_init) or CONST_VECTOR, efficiently into a
11176 register. Returns an RTX to copy into the register, or NULL_RTX
11177 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 11178static rtx
4369c11e
TB
11179aarch64_simd_make_constant (rtx vals)
11180{
ef4bddc2 11181 machine_mode mode = GET_MODE (vals);
4369c11e
TB
11182 rtx const_dup;
11183 rtx const_vec = NULL_RTX;
11184 int n_elts = GET_MODE_NUNITS (mode);
11185 int n_const = 0;
11186 int i;
11187
11188 if (GET_CODE (vals) == CONST_VECTOR)
11189 const_vec = vals;
11190 else if (GET_CODE (vals) == PARALLEL)
11191 {
11192 /* A CONST_VECTOR must contain only CONST_INTs and
11193 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11194 Only store valid constants in a CONST_VECTOR. */
11195 for (i = 0; i < n_elts; ++i)
11196 {
11197 rtx x = XVECEXP (vals, 0, i);
11198 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11199 n_const++;
11200 }
11201 if (n_const == n_elts)
11202 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
11203 }
11204 else
11205 gcc_unreachable ();
11206
11207 if (const_vec != NULL_RTX
48063b9d 11208 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
11209 /* Load using MOVI/MVNI. */
11210 return const_vec;
11211 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
11212 /* Loaded using DUP. */
11213 return const_dup;
11214 else if (const_vec != NULL_RTX)
11215 /* Load from constant pool. We can not take advantage of single-cycle
11216 LD1 because we need a PC-relative addressing mode. */
11217 return const_vec;
11218 else
11219 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11220 We can not construct an initializer. */
11221 return NULL_RTX;
11222}
11223
35a093b6
JG
11224/* Expand a vector initialisation sequence, such that TARGET is
11225 initialised to contain VALS. */
11226
4369c11e
TB
11227void
11228aarch64_expand_vector_init (rtx target, rtx vals)
11229{
ef4bddc2
RS
11230 machine_mode mode = GET_MODE (target);
11231 machine_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 11232 /* The number of vector elements. */
4369c11e 11233 int n_elts = GET_MODE_NUNITS (mode);
35a093b6 11234 /* The number of vector elements which are not constant. */
8b66a2d4
AL
11235 int n_var = 0;
11236 rtx any_const = NULL_RTX;
35a093b6
JG
11237 /* The first element of vals. */
11238 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 11239 bool all_same = true;
4369c11e 11240
35a093b6 11241 /* Count the number of variable elements to initialise. */
8b66a2d4 11242 for (int i = 0; i < n_elts; ++i)
4369c11e 11243 {
8b66a2d4 11244 rtx x = XVECEXP (vals, 0, i);
35a093b6 11245 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
11246 ++n_var;
11247 else
11248 any_const = x;
4369c11e 11249
35a093b6 11250 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
11251 }
11252
35a093b6
JG
11253 /* No variable elements, hand off to aarch64_simd_make_constant which knows
11254 how best to handle this. */
4369c11e
TB
11255 if (n_var == 0)
11256 {
11257 rtx constant = aarch64_simd_make_constant (vals);
11258 if (constant != NULL_RTX)
11259 {
11260 emit_move_insn (target, constant);
11261 return;
11262 }
11263 }
11264
11265 /* Splat a single non-constant element if we can. */
11266 if (all_same)
11267 {
35a093b6 11268 rtx x = copy_to_mode_reg (inner_mode, v0);
4369c11e
TB
11269 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
11270 return;
11271 }
11272
35a093b6
JG
11273 /* Initialise a vector which is part-variable. We want to first try
11274 to build those lanes which are constant in the most efficient way we
11275 can. */
11276 if (n_var != n_elts)
4369c11e
TB
11277 {
11278 rtx copy = copy_rtx (vals);
4369c11e 11279
8b66a2d4
AL
11280 /* Load constant part of vector. We really don't care what goes into the
11281 parts we will overwrite, but we're more likely to be able to load the
11282 constant efficiently if it has fewer, larger, repeating parts
11283 (see aarch64_simd_valid_immediate). */
11284 for (int i = 0; i < n_elts; i++)
11285 {
11286 rtx x = XVECEXP (vals, 0, i);
11287 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11288 continue;
11289 rtx subst = any_const;
11290 for (int bit = n_elts / 2; bit > 0; bit /= 2)
11291 {
11292 /* Look in the copied vector, as more elements are const. */
11293 rtx test = XVECEXP (copy, 0, i ^ bit);
11294 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
11295 {
11296 subst = test;
11297 break;
11298 }
11299 }
11300 XVECEXP (copy, 0, i) = subst;
11301 }
4369c11e 11302 aarch64_expand_vector_init (target, copy);
35a093b6 11303 }
4369c11e 11304
35a093b6 11305 /* Insert the variable lanes directly. */
8b66a2d4 11306
35a093b6
JG
11307 enum insn_code icode = optab_handler (vec_set_optab, mode);
11308 gcc_assert (icode != CODE_FOR_nothing);
4369c11e 11309
8b66a2d4 11310 for (int i = 0; i < n_elts; i++)
35a093b6
JG
11311 {
11312 rtx x = XVECEXP (vals, 0, i);
11313 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11314 continue;
11315 x = copy_to_mode_reg (inner_mode, x);
11316 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
11317 }
4369c11e
TB
11318}
11319
43e9d192 11320static unsigned HOST_WIDE_INT
ef4bddc2 11321aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
11322{
11323 return
ac59ad4e
KT
11324 (!SHIFT_COUNT_TRUNCATED
11325 || aarch64_vector_mode_supported_p (mode)
43e9d192
IB
11326 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
11327}
11328
43e9d192
IB
11329/* Select a format to encode pointers in exception handling data. */
11330int
11331aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
11332{
11333 int type;
11334 switch (aarch64_cmodel)
11335 {
11336 case AARCH64_CMODEL_TINY:
11337 case AARCH64_CMODEL_TINY_PIC:
11338 case AARCH64_CMODEL_SMALL:
11339 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 11340 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
11341 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
11342 for everything. */
11343 type = DW_EH_PE_sdata4;
11344 break;
11345 default:
11346 /* No assumptions here. 8-byte relocs required. */
11347 type = DW_EH_PE_sdata8;
11348 break;
11349 }
11350 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
11351}
11352
e1c1ecb0
KT
11353/* The last .arch and .tune assembly strings that we printed. */
11354static std::string aarch64_last_printed_arch_string;
11355static std::string aarch64_last_printed_tune_string;
11356
361fb3ee
KT
11357/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
11358 by the function fndecl. */
11359
11360void
11361aarch64_declare_function_name (FILE *stream, const char* name,
11362 tree fndecl)
11363{
11364 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
11365
11366 struct cl_target_option *targ_options;
11367 if (target_parts)
11368 targ_options = TREE_TARGET_OPTION (target_parts);
11369 else
11370 targ_options = TREE_TARGET_OPTION (target_option_current_node);
11371 gcc_assert (targ_options);
11372
11373 const struct processor *this_arch
11374 = aarch64_get_arch (targ_options->x_explicit_arch);
11375
054b4005
JG
11376 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
11377 std::string extension
04a99ebe
JG
11378 = aarch64_get_extension_string_for_isa_flags (isa_flags,
11379 this_arch->flags);
e1c1ecb0
KT
11380 /* Only update the assembler .arch string if it is distinct from the last
11381 such string we printed. */
11382 std::string to_print = this_arch->name + extension;
11383 if (to_print != aarch64_last_printed_arch_string)
11384 {
11385 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
11386 aarch64_last_printed_arch_string = to_print;
11387 }
361fb3ee
KT
11388
11389 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
11390 useful to readers of the generated asm. Do it only when it changes
11391 from function to function and verbose assembly is requested. */
361fb3ee
KT
11392 const struct processor *this_tune
11393 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
11394
e1c1ecb0
KT
11395 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
11396 {
11397 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
11398 this_tune->name);
11399 aarch64_last_printed_tune_string = this_tune->name;
11400 }
361fb3ee
KT
11401
11402 /* Don't forget the type directive for ELF. */
11403 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
11404 ASM_OUTPUT_LABEL (stream, name);
11405}
11406
e1c1ecb0
KT
11407/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
11408
11409static void
11410aarch64_start_file (void)
11411{
11412 struct cl_target_option *default_options
11413 = TREE_TARGET_OPTION (target_option_default_node);
11414
11415 const struct processor *default_arch
11416 = aarch64_get_arch (default_options->x_explicit_arch);
11417 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
11418 std::string extension
04a99ebe
JG
11419 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
11420 default_arch->flags);
e1c1ecb0
KT
11421
11422 aarch64_last_printed_arch_string = default_arch->name + extension;
11423 aarch64_last_printed_tune_string = "";
11424 asm_fprintf (asm_out_file, "\t.arch %s\n",
11425 aarch64_last_printed_arch_string.c_str ());
11426
11427 default_file_start ();
11428}
11429
0462169c
SN
11430/* Emit load exclusive. */
11431
11432static void
ef4bddc2 11433aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
11434 rtx mem, rtx model_rtx)
11435{
11436 rtx (*gen) (rtx, rtx, rtx);
11437
11438 switch (mode)
11439 {
11440 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
11441 case HImode: gen = gen_aarch64_load_exclusivehi; break;
11442 case SImode: gen = gen_aarch64_load_exclusivesi; break;
11443 case DImode: gen = gen_aarch64_load_exclusivedi; break;
11444 default:
11445 gcc_unreachable ();
11446 }
11447
11448 emit_insn (gen (rval, mem, model_rtx));
11449}
11450
11451/* Emit store exclusive. */
11452
11453static void
ef4bddc2 11454aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
11455 rtx rval, rtx mem, rtx model_rtx)
11456{
11457 rtx (*gen) (rtx, rtx, rtx, rtx);
11458
11459 switch (mode)
11460 {
11461 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
11462 case HImode: gen = gen_aarch64_store_exclusivehi; break;
11463 case SImode: gen = gen_aarch64_store_exclusivesi; break;
11464 case DImode: gen = gen_aarch64_store_exclusivedi; break;
11465 default:
11466 gcc_unreachable ();
11467 }
11468
11469 emit_insn (gen (bval, rval, mem, model_rtx));
11470}
11471
11472/* Mark the previous jump instruction as unlikely. */
11473
11474static void
11475aarch64_emit_unlikely_jump (rtx insn)
11476{
e5af9ddd 11477 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c 11478
f370536c
TS
11479 rtx_insn *jump = emit_jump_insn (insn);
11480 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
0462169c
SN
11481}
11482
11483/* Expand a compare and swap pattern. */
11484
11485void
11486aarch64_expand_compare_and_swap (rtx operands[])
11487{
11488 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 11489 machine_mode mode, cmp_mode;
b0770c0f
MW
11490 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
11491 int idx;
11492 gen_cas_fn gen;
11493 const gen_cas_fn split_cas[] =
11494 {
11495 gen_aarch64_compare_and_swapqi,
11496 gen_aarch64_compare_and_swaphi,
11497 gen_aarch64_compare_and_swapsi,
11498 gen_aarch64_compare_and_swapdi
11499 };
11500 const gen_cas_fn atomic_cas[] =
11501 {
11502 gen_aarch64_compare_and_swapqi_lse,
11503 gen_aarch64_compare_and_swaphi_lse,
11504 gen_aarch64_compare_and_swapsi_lse,
11505 gen_aarch64_compare_and_swapdi_lse
11506 };
0462169c
SN
11507
11508 bval = operands[0];
11509 rval = operands[1];
11510 mem = operands[2];
11511 oldval = operands[3];
11512 newval = operands[4];
11513 is_weak = operands[5];
11514 mod_s = operands[6];
11515 mod_f = operands[7];
11516 mode = GET_MODE (mem);
11517 cmp_mode = mode;
11518
11519 /* Normally the succ memory model must be stronger than fail, but in the
11520 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
11521 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
11522
46b35980
AM
11523 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
11524 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
11525 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
11526
11527 switch (mode)
11528 {
11529 case QImode:
11530 case HImode:
11531 /* For short modes, we're going to perform the comparison in SImode,
11532 so do the zero-extension now. */
11533 cmp_mode = SImode;
11534 rval = gen_reg_rtx (SImode);
11535 oldval = convert_modes (SImode, mode, oldval, true);
11536 /* Fall through. */
11537
11538 case SImode:
11539 case DImode:
11540 /* Force the value into a register if needed. */
11541 if (!aarch64_plus_operand (oldval, mode))
11542 oldval = force_reg (cmp_mode, oldval);
11543 break;
11544
11545 default:
11546 gcc_unreachable ();
11547 }
11548
11549 switch (mode)
11550 {
b0770c0f
MW
11551 case QImode: idx = 0; break;
11552 case HImode: idx = 1; break;
11553 case SImode: idx = 2; break;
11554 case DImode: idx = 3; break;
0462169c
SN
11555 default:
11556 gcc_unreachable ();
11557 }
b0770c0f
MW
11558 if (TARGET_LSE)
11559 gen = atomic_cas[idx];
11560 else
11561 gen = split_cas[idx];
0462169c
SN
11562
11563 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
11564
11565 if (mode == QImode || mode == HImode)
11566 emit_move_insn (operands[1], gen_lowpart (mode, rval));
11567
11568 x = gen_rtx_REG (CCmode, CC_REGNUM);
11569 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 11570 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
11571}
11572
641c2f8b
MW
11573/* Test whether the target supports using a atomic load-operate instruction.
11574 CODE is the operation and AFTER is TRUE if the data in memory after the
11575 operation should be returned and FALSE if the data before the operation
11576 should be returned. Returns FALSE if the operation isn't supported by the
11577 architecture. */
11578
11579bool
11580aarch64_atomic_ldop_supported_p (enum rtx_code code)
11581{
11582 if (!TARGET_LSE)
11583 return false;
11584
11585 switch (code)
11586 {
11587 case SET:
11588 case AND:
11589 case IOR:
11590 case XOR:
11591 case MINUS:
11592 case PLUS:
11593 return true;
11594 default:
11595 return false;
11596 }
11597}
11598
f70fb3b6
MW
11599/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
11600 sequence implementing an atomic operation. */
11601
11602static void
11603aarch64_emit_post_barrier (enum memmodel model)
11604{
11605 const enum memmodel base_model = memmodel_base (model);
11606
11607 if (is_mm_sync (model)
11608 && (base_model == MEMMODEL_ACQUIRE
11609 || base_model == MEMMODEL_ACQ_REL
11610 || base_model == MEMMODEL_SEQ_CST))
11611 {
11612 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
11613 }
11614}
11615
b0770c0f
MW
11616/* Emit an atomic compare-and-swap operation. RVAL is the destination register
11617 for the data in memory. EXPECTED is the value expected to be in memory.
11618 DESIRED is the value to store to memory. MEM is the memory location. MODEL
11619 is the memory ordering to use. */
11620
11621void
11622aarch64_gen_atomic_cas (rtx rval, rtx mem,
11623 rtx expected, rtx desired,
11624 rtx model)
11625{
11626 rtx (*gen) (rtx, rtx, rtx, rtx);
11627 machine_mode mode;
11628
11629 mode = GET_MODE (mem);
11630
11631 switch (mode)
11632 {
11633 case QImode: gen = gen_aarch64_atomic_casqi; break;
11634 case HImode: gen = gen_aarch64_atomic_cashi; break;
11635 case SImode: gen = gen_aarch64_atomic_cassi; break;
11636 case DImode: gen = gen_aarch64_atomic_casdi; break;
11637 default:
11638 gcc_unreachable ();
11639 }
11640
11641 /* Move the expected value into the CAS destination register. */
11642 emit_insn (gen_rtx_SET (rval, expected));
11643
11644 /* Emit the CAS. */
11645 emit_insn (gen (rval, mem, desired, model));
11646
11647 /* Compare the expected value with the value loaded by the CAS, to establish
11648 whether the swap was made. */
11649 aarch64_gen_compare_reg (EQ, rval, expected);
11650}
11651
0462169c
SN
11652/* Split a compare and swap pattern. */
11653
11654void
11655aarch64_split_compare_and_swap (rtx operands[])
11656{
11657 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 11658 machine_mode mode;
0462169c 11659 bool is_weak;
5d8a22a5
DM
11660 rtx_code_label *label1, *label2;
11661 rtx x, cond;
ab876106
MW
11662 enum memmodel model;
11663 rtx model_rtx;
0462169c
SN
11664
11665 rval = operands[0];
11666 mem = operands[1];
11667 oldval = operands[2];
11668 newval = operands[3];
11669 is_weak = (operands[4] != const0_rtx);
ab876106 11670 model_rtx = operands[5];
0462169c
SN
11671 scratch = operands[7];
11672 mode = GET_MODE (mem);
ab876106 11673 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 11674
5d8a22a5 11675 label1 = NULL;
0462169c
SN
11676 if (!is_weak)
11677 {
11678 label1 = gen_label_rtx ();
11679 emit_label (label1);
11680 }
11681 label2 = gen_label_rtx ();
11682
ab876106
MW
11683 /* The initial load can be relaxed for a __sync operation since a final
11684 barrier will be emitted to stop code hoisting. */
11685 if (is_mm_sync (model))
11686 aarch64_emit_load_exclusive (mode, rval, mem,
11687 GEN_INT (MEMMODEL_RELAXED));
11688 else
11689 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c
SN
11690
11691 cond = aarch64_gen_compare_reg (NE, rval, oldval);
11692 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
11693 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11694 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
f7df4a84 11695 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c 11696
ab876106 11697 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
11698
11699 if (!is_weak)
11700 {
11701 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
11702 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
11703 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 11704 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
11705 }
11706 else
11707 {
11708 cond = gen_rtx_REG (CCmode, CC_REGNUM);
11709 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 11710 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
11711 }
11712
11713 emit_label (label2);
ab876106
MW
11714
11715 /* Emit any final barrier needed for a __sync operation. */
11716 if (is_mm_sync (model))
11717 aarch64_emit_post_barrier (model);
0462169c
SN
11718}
11719
68729b06
MW
11720/* Emit a BIC instruction. */
11721
11722static void
11723aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
11724{
11725 rtx shift_rtx = GEN_INT (shift);
11726 rtx (*gen) (rtx, rtx, rtx, rtx);
11727
11728 switch (mode)
11729 {
11730 case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
11731 case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
11732 default:
11733 gcc_unreachable ();
11734 }
11735
11736 emit_insn (gen (dst, s2, shift_rtx, s1));
11737}
11738
9cd7b720
MW
11739/* Emit an atomic swap. */
11740
11741static void
11742aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
11743 rtx mem, rtx model)
11744{
11745 rtx (*gen) (rtx, rtx, rtx, rtx);
11746
11747 switch (mode)
11748 {
11749 case QImode: gen = gen_aarch64_atomic_swpqi; break;
11750 case HImode: gen = gen_aarch64_atomic_swphi; break;
11751 case SImode: gen = gen_aarch64_atomic_swpsi; break;
11752 case DImode: gen = gen_aarch64_atomic_swpdi; break;
11753 default:
11754 gcc_unreachable ();
11755 }
11756
11757 emit_insn (gen (dst, mem, value, model));
11758}
11759
641c2f8b
MW
11760/* Operations supported by aarch64_emit_atomic_load_op. */
11761
11762enum aarch64_atomic_load_op_code
11763{
11764 AARCH64_LDOP_PLUS, /* A + B */
11765 AARCH64_LDOP_XOR, /* A ^ B */
11766 AARCH64_LDOP_OR, /* A | B */
11767 AARCH64_LDOP_BIC /* A & ~B */
11768};
11769
11770/* Emit an atomic load-operate. */
11771
11772static void
11773aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
11774 machine_mode mode, rtx dst, rtx src,
11775 rtx mem, rtx model)
11776{
11777 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
11778 const aarch64_atomic_load_op_fn plus[] =
11779 {
11780 gen_aarch64_atomic_loadaddqi,
11781 gen_aarch64_atomic_loadaddhi,
11782 gen_aarch64_atomic_loadaddsi,
11783 gen_aarch64_atomic_loadadddi
11784 };
11785 const aarch64_atomic_load_op_fn eor[] =
11786 {
11787 gen_aarch64_atomic_loadeorqi,
11788 gen_aarch64_atomic_loadeorhi,
11789 gen_aarch64_atomic_loadeorsi,
11790 gen_aarch64_atomic_loadeordi
11791 };
11792 const aarch64_atomic_load_op_fn ior[] =
11793 {
11794 gen_aarch64_atomic_loadsetqi,
11795 gen_aarch64_atomic_loadsethi,
11796 gen_aarch64_atomic_loadsetsi,
11797 gen_aarch64_atomic_loadsetdi
11798 };
11799 const aarch64_atomic_load_op_fn bic[] =
11800 {
11801 gen_aarch64_atomic_loadclrqi,
11802 gen_aarch64_atomic_loadclrhi,
11803 gen_aarch64_atomic_loadclrsi,
11804 gen_aarch64_atomic_loadclrdi
11805 };
11806 aarch64_atomic_load_op_fn gen;
11807 int idx = 0;
11808
11809 switch (mode)
11810 {
11811 case QImode: idx = 0; break;
11812 case HImode: idx = 1; break;
11813 case SImode: idx = 2; break;
11814 case DImode: idx = 3; break;
11815 default:
11816 gcc_unreachable ();
11817 }
11818
11819 switch (code)
11820 {
11821 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
11822 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
11823 case AARCH64_LDOP_OR: gen = ior[idx]; break;
11824 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
11825 default:
11826 gcc_unreachable ();
11827 }
11828
11829 emit_insn (gen (dst, mem, src, model));
11830}
11831
11832/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
68729b06
MW
11833 location to store the data read from memory. OUT_RESULT is the location to
11834 store the result of the operation. MEM is the memory location to read and
11835 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
11836 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
11837 be NULL. */
9cd7b720
MW
11838
11839void
68729b06 11840aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
9cd7b720
MW
11841 rtx mem, rtx value, rtx model_rtx)
11842{
11843 machine_mode mode = GET_MODE (mem);
641c2f8b
MW
11844 machine_mode wmode = (mode == DImode ? DImode : SImode);
11845 const bool short_mode = (mode < SImode);
11846 aarch64_atomic_load_op_code ldop_code;
11847 rtx src;
11848 rtx x;
11849
11850 if (out_data)
11851 out_data = gen_lowpart (mode, out_data);
9cd7b720 11852
68729b06
MW
11853 if (out_result)
11854 out_result = gen_lowpart (mode, out_result);
11855
641c2f8b
MW
11856 /* Make sure the value is in a register, putting it into a destination
11857 register if it needs to be manipulated. */
11858 if (!register_operand (value, mode)
11859 || code == AND || code == MINUS)
11860 {
68729b06 11861 src = out_result ? out_result : out_data;
641c2f8b
MW
11862 emit_move_insn (src, gen_lowpart (mode, value));
11863 }
11864 else
11865 src = value;
11866 gcc_assert (register_operand (src, mode));
9cd7b720 11867
641c2f8b
MW
11868 /* Preprocess the data for the operation as necessary. If the operation is
11869 a SET then emit a swap instruction and finish. */
9cd7b720
MW
11870 switch (code)
11871 {
11872 case SET:
641c2f8b 11873 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
9cd7b720
MW
11874 return;
11875
641c2f8b
MW
11876 case MINUS:
11877 /* Negate the value and treat it as a PLUS. */
11878 {
11879 rtx neg_src;
11880
11881 /* Resize the value if necessary. */
11882 if (short_mode)
11883 src = gen_lowpart (wmode, src);
11884
11885 neg_src = gen_rtx_NEG (wmode, src);
11886 emit_insn (gen_rtx_SET (src, neg_src));
11887
11888 if (short_mode)
11889 src = gen_lowpart (mode, src);
11890 }
11891 /* Fall-through. */
11892 case PLUS:
11893 ldop_code = AARCH64_LDOP_PLUS;
11894 break;
11895
11896 case IOR:
11897 ldop_code = AARCH64_LDOP_OR;
11898 break;
11899
11900 case XOR:
11901 ldop_code = AARCH64_LDOP_XOR;
11902 break;
11903
11904 case AND:
11905 {
11906 rtx not_src;
11907
11908 /* Resize the value if necessary. */
11909 if (short_mode)
11910 src = gen_lowpart (wmode, src);
11911
11912 not_src = gen_rtx_NOT (wmode, src);
11913 emit_insn (gen_rtx_SET (src, not_src));
11914
11915 if (short_mode)
11916 src = gen_lowpart (mode, src);
11917 }
11918 ldop_code = AARCH64_LDOP_BIC;
11919 break;
11920
9cd7b720
MW
11921 default:
11922 /* The operation can't be done with atomic instructions. */
11923 gcc_unreachable ();
11924 }
641c2f8b
MW
11925
11926 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
68729b06
MW
11927
11928 /* If necessary, calculate the data in memory after the update by redoing the
11929 operation from values in registers. */
11930 if (!out_result)
11931 return;
11932
11933 if (short_mode)
11934 {
11935 src = gen_lowpart (wmode, src);
11936 out_data = gen_lowpart (wmode, out_data);
11937 out_result = gen_lowpart (wmode, out_result);
11938 }
11939
11940 x = NULL_RTX;
11941
11942 switch (code)
11943 {
11944 case MINUS:
11945 case PLUS:
11946 x = gen_rtx_PLUS (wmode, out_data, src);
11947 break;
11948 case IOR:
11949 x = gen_rtx_IOR (wmode, out_data, src);
11950 break;
11951 case XOR:
11952 x = gen_rtx_XOR (wmode, out_data, src);
11953 break;
11954 case AND:
11955 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
11956 return;
11957 default:
11958 gcc_unreachable ();
11959 }
11960
11961 emit_set_insn (out_result, x);
11962
11963 return;
9cd7b720
MW
11964}
11965
0462169c
SN
11966/* Split an atomic operation. */
11967
11968void
11969aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 11970 rtx value, rtx model_rtx, rtx cond)
0462169c 11971{
ef4bddc2
RS
11972 machine_mode mode = GET_MODE (mem);
11973 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
11974 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
11975 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
11976 rtx_code_label *label;
11977 rtx x;
0462169c 11978
9cd7b720 11979 /* Split the atomic operation into a sequence. */
0462169c
SN
11980 label = gen_label_rtx ();
11981 emit_label (label);
11982
11983 if (new_out)
11984 new_out = gen_lowpart (wmode, new_out);
11985 if (old_out)
11986 old_out = gen_lowpart (wmode, old_out);
11987 else
11988 old_out = new_out;
11989 value = simplify_gen_subreg (wmode, value, mode, 0);
11990
f70fb3b6
MW
11991 /* The initial load can be relaxed for a __sync operation since a final
11992 barrier will be emitted to stop code hoisting. */
11993 if (is_sync)
11994 aarch64_emit_load_exclusive (mode, old_out, mem,
11995 GEN_INT (MEMMODEL_RELAXED));
11996 else
11997 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
11998
11999 switch (code)
12000 {
12001 case SET:
12002 new_out = value;
12003 break;
12004
12005 case NOT:
12006 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 12007 emit_insn (gen_rtx_SET (new_out, x));
0462169c 12008 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 12009 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12010 break;
12011
12012 case MINUS:
12013 if (CONST_INT_P (value))
12014 {
12015 value = GEN_INT (-INTVAL (value));
12016 code = PLUS;
12017 }
12018 /* Fall through. */
12019
12020 default:
12021 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 12022 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
12023 break;
12024 }
12025
12026 aarch64_emit_store_exclusive (mode, cond, mem,
12027 gen_lowpart (mode, new_out), model_rtx);
12028
12029 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12030 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12031 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 12032 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
12033
12034 /* Emit any final barrier needed for a __sync operation. */
12035 if (is_sync)
12036 aarch64_emit_post_barrier (model);
0462169c
SN
12037}
12038
c2ec330c
AL
12039static void
12040aarch64_init_libfuncs (void)
12041{
12042 /* Half-precision float operations. The compiler handles all operations
12043 with NULL libfuncs by converting to SFmode. */
12044
12045 /* Conversions. */
12046 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
12047 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
12048
12049 /* Arithmetic. */
12050 set_optab_libfunc (add_optab, HFmode, NULL);
12051 set_optab_libfunc (sdiv_optab, HFmode, NULL);
12052 set_optab_libfunc (smul_optab, HFmode, NULL);
12053 set_optab_libfunc (neg_optab, HFmode, NULL);
12054 set_optab_libfunc (sub_optab, HFmode, NULL);
12055
12056 /* Comparisons. */
12057 set_optab_libfunc (eq_optab, HFmode, NULL);
12058 set_optab_libfunc (ne_optab, HFmode, NULL);
12059 set_optab_libfunc (lt_optab, HFmode, NULL);
12060 set_optab_libfunc (le_optab, HFmode, NULL);
12061 set_optab_libfunc (ge_optab, HFmode, NULL);
12062 set_optab_libfunc (gt_optab, HFmode, NULL);
12063 set_optab_libfunc (unord_optab, HFmode, NULL);
12064}
12065
43e9d192 12066/* Target hook for c_mode_for_suffix. */
ef4bddc2 12067static machine_mode
43e9d192
IB
12068aarch64_c_mode_for_suffix (char suffix)
12069{
12070 if (suffix == 'q')
12071 return TFmode;
12072
12073 return VOIDmode;
12074}
12075
3520f7cc
JG
12076/* We can only represent floating point constants which will fit in
12077 "quarter-precision" values. These values are characterised by
12078 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
12079 by:
12080
12081 (-1)^s * (n/16) * 2^r
12082
12083 Where:
12084 's' is the sign bit.
12085 'n' is an integer in the range 16 <= n <= 31.
12086 'r' is an integer in the range -3 <= r <= 4. */
12087
12088/* Return true iff X can be represented by a quarter-precision
12089 floating point immediate operand X. Note, we cannot represent 0.0. */
12090bool
12091aarch64_float_const_representable_p (rtx x)
12092{
12093 /* This represents our current view of how many bits
12094 make up the mantissa. */
12095 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 12096 int exponent;
3520f7cc 12097 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 12098 REAL_VALUE_TYPE r, m;
807e902e 12099 bool fail;
3520f7cc
JG
12100
12101 if (!CONST_DOUBLE_P (x))
12102 return false;
12103
c2ec330c
AL
12104 /* We don't support HFmode constants yet. */
12105 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
12106 return false;
12107
34a72c33 12108 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
12109
12110 /* We cannot represent infinities, NaNs or +/-zero. We won't
12111 know if we have +zero until we analyse the mantissa, but we
12112 can reject the other invalid values. */
12113 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
12114 || REAL_VALUE_MINUS_ZERO (r))
12115 return false;
12116
ba96cdfb 12117 /* Extract exponent. */
3520f7cc
JG
12118 r = real_value_abs (&r);
12119 exponent = REAL_EXP (&r);
12120
12121 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12122 highest (sign) bit, with a fixed binary point at bit point_pos.
12123 m1 holds the low part of the mantissa, m2 the high part.
12124 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12125 bits for the mantissa, this can fail (low bits will be lost). */
12126 real_ldexp (&m, &r, point_pos - exponent);
807e902e 12127 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
12128
12129 /* If the low part of the mantissa has bits set we cannot represent
12130 the value. */
807e902e 12131 if (w.elt (0) != 0)
3520f7cc
JG
12132 return false;
12133 /* We have rejected the lower HOST_WIDE_INT, so update our
12134 understanding of how many bits lie in the mantissa and
12135 look only at the high HOST_WIDE_INT. */
807e902e 12136 mantissa = w.elt (1);
3520f7cc
JG
12137 point_pos -= HOST_BITS_PER_WIDE_INT;
12138
12139 /* We can only represent values with a mantissa of the form 1.xxxx. */
12140 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12141 if ((mantissa & mask) != 0)
12142 return false;
12143
12144 /* Having filtered unrepresentable values, we may now remove all
12145 but the highest 5 bits. */
12146 mantissa >>= point_pos - 5;
12147
12148 /* We cannot represent the value 0.0, so reject it. This is handled
12149 elsewhere. */
12150 if (mantissa == 0)
12151 return false;
12152
12153 /* Then, as bit 4 is always set, we can mask it off, leaving
12154 the mantissa in the range [0, 15]. */
12155 mantissa &= ~(1 << 4);
12156 gcc_assert (mantissa <= 15);
12157
12158 /* GCC internally does not use IEEE754-like encoding (where normalized
12159 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
12160 Our mantissa values are shifted 4 places to the left relative to
12161 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
12162 by 5 places to correct for GCC's representation. */
12163 exponent = 5 - exponent;
12164
12165 return (exponent >= 0 && exponent <= 7);
12166}
12167
12168char*
81c2dfb9 12169aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 12170 machine_mode mode,
3520f7cc
JG
12171 unsigned width)
12172{
3ea63f60 12173 bool is_valid;
3520f7cc 12174 static char templ[40];
3520f7cc 12175 const char *mnemonic;
e4f0f84d 12176 const char *shift_op;
3520f7cc 12177 unsigned int lane_count = 0;
81c2dfb9 12178 char element_char;
3520f7cc 12179
e4f0f84d 12180 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
12181
12182 /* This will return true to show const_vector is legal for use as either
12183 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
12184 also update INFO to show how the immediate should be generated. */
81c2dfb9 12185 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
12186 gcc_assert (is_valid);
12187
81c2dfb9 12188 element_char = sizetochar (info.element_width);
48063b9d
IB
12189 lane_count = width / info.element_width;
12190
3520f7cc 12191 mode = GET_MODE_INNER (mode);
0d8e1702 12192 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
3520f7cc 12193 {
48063b9d 12194 gcc_assert (info.shift == 0 && ! info.mvn);
0d8e1702
KT
12195 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
12196 move immediate path. */
48063b9d
IB
12197 if (aarch64_float_const_zero_rtx_p (info.value))
12198 info.value = GEN_INT (0);
12199 else
12200 {
83faf7d0 12201 const unsigned int buf_size = 20;
48063b9d 12202 char float_buf[buf_size] = {'\0'};
34a72c33
RS
12203 real_to_decimal_for_mode (float_buf,
12204 CONST_DOUBLE_REAL_VALUE (info.value),
12205 buf_size, buf_size, 1, mode);
48063b9d
IB
12206
12207 if (lane_count == 1)
12208 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
12209 else
12210 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 12211 lane_count, element_char, float_buf);
48063b9d
IB
12212 return templ;
12213 }
3520f7cc 12214 }
3520f7cc 12215
48063b9d 12216 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 12217 shift_op = info.msl ? "msl" : "lsl";
3520f7cc 12218
0d8e1702 12219 gcc_assert (CONST_INT_P (info.value));
3520f7cc 12220 if (lane_count == 1)
48063b9d
IB
12221 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
12222 mnemonic, UINTVAL (info.value));
12223 else if (info.shift)
12224 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
12225 ", %s %d", mnemonic, lane_count, element_char,
12226 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 12227 else
48063b9d 12228 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 12229 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
12230 return templ;
12231}
12232
b7342d25
IB
12233char*
12234aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 12235 machine_mode mode)
b7342d25 12236{
ef4bddc2 12237 machine_mode vmode;
b7342d25
IB
12238
12239 gcc_assert (!VECTOR_MODE_P (mode));
12240 vmode = aarch64_simd_container_mode (mode, 64);
12241 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
12242 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
12243}
12244
88b08073
JG
12245/* Split operands into moves from op[1] + op[2] into op[0]. */
12246
12247void
12248aarch64_split_combinev16qi (rtx operands[3])
12249{
12250 unsigned int dest = REGNO (operands[0]);
12251 unsigned int src1 = REGNO (operands[1]);
12252 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 12253 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
12254 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
12255 rtx destlo, desthi;
12256
12257 gcc_assert (halfmode == V16QImode);
12258
12259 if (src1 == dest && src2 == dest + halfregs)
12260 {
12261 /* No-op move. Can't split to nothing; emit something. */
12262 emit_note (NOTE_INSN_DELETED);
12263 return;
12264 }
12265
12266 /* Preserve register attributes for variable tracking. */
12267 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
12268 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
12269 GET_MODE_SIZE (halfmode));
12270
12271 /* Special case of reversed high/low parts. */
12272 if (reg_overlap_mentioned_p (operands[2], destlo)
12273 && reg_overlap_mentioned_p (operands[1], desthi))
12274 {
12275 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12276 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
12277 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
12278 }
12279 else if (!reg_overlap_mentioned_p (operands[2], destlo))
12280 {
12281 /* Try to avoid unnecessary moves if part of the result
12282 is in the right place already. */
12283 if (src1 != dest)
12284 emit_move_insn (destlo, operands[1]);
12285 if (src2 != dest + halfregs)
12286 emit_move_insn (desthi, operands[2]);
12287 }
12288 else
12289 {
12290 if (src2 != dest + halfregs)
12291 emit_move_insn (desthi, operands[2]);
12292 if (src1 != dest)
12293 emit_move_insn (destlo, operands[1]);
12294 }
12295}
12296
12297/* vec_perm support. */
12298
12299#define MAX_VECT_LEN 16
12300
12301struct expand_vec_perm_d
12302{
12303 rtx target, op0, op1;
12304 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 12305 machine_mode vmode;
88b08073
JG
12306 unsigned char nelt;
12307 bool one_vector_p;
12308 bool testing_p;
12309};
12310
12311/* Generate a variable permutation. */
12312
12313static void
12314aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
12315{
ef4bddc2 12316 machine_mode vmode = GET_MODE (target);
88b08073
JG
12317 bool one_vector_p = rtx_equal_p (op0, op1);
12318
12319 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
12320 gcc_checking_assert (GET_MODE (op0) == vmode);
12321 gcc_checking_assert (GET_MODE (op1) == vmode);
12322 gcc_checking_assert (GET_MODE (sel) == vmode);
12323 gcc_checking_assert (TARGET_SIMD);
12324
12325 if (one_vector_p)
12326 {
12327 if (vmode == V8QImode)
12328 {
12329 /* Expand the argument to a V16QI mode by duplicating it. */
12330 rtx pair = gen_reg_rtx (V16QImode);
12331 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
12332 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
12333 }
12334 else
12335 {
12336 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
12337 }
12338 }
12339 else
12340 {
12341 rtx pair;
12342
12343 if (vmode == V8QImode)
12344 {
12345 pair = gen_reg_rtx (V16QImode);
12346 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
12347 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
12348 }
12349 else
12350 {
12351 pair = gen_reg_rtx (OImode);
12352 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
12353 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
12354 }
12355 }
12356}
12357
12358void
12359aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
12360{
ef4bddc2 12361 machine_mode vmode = GET_MODE (target);
c9d1a16a 12362 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 12363 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 12364 rtx mask;
88b08073
JG
12365
12366 /* The TBL instruction does not use a modulo index, so we must take care
12367 of that ourselves. */
f7c4e5b8
AL
12368 mask = aarch64_simd_gen_const_vector_dup (vmode,
12369 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
12370 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
12371
f7c4e5b8
AL
12372 /* For big-endian, we also need to reverse the index within the vector
12373 (but not which vector). */
12374 if (BYTES_BIG_ENDIAN)
12375 {
12376 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
12377 if (!one_vector_p)
12378 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
12379 sel = expand_simple_binop (vmode, XOR, sel, mask,
12380 NULL, 0, OPTAB_LIB_WIDEN);
12381 }
88b08073
JG
12382 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
12383}
12384
cc4d934f
JG
12385/* Recognize patterns suitable for the TRN instructions. */
12386static bool
12387aarch64_evpc_trn (struct expand_vec_perm_d *d)
12388{
12389 unsigned int i, odd, mask, nelt = d->nelt;
12390 rtx out, in0, in1, x;
12391 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12392 machine_mode vmode = d->vmode;
cc4d934f
JG
12393
12394 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12395 return false;
12396
12397 /* Note that these are little-endian tests.
12398 We correct for big-endian later. */
12399 if (d->perm[0] == 0)
12400 odd = 0;
12401 else if (d->perm[0] == 1)
12402 odd = 1;
12403 else
12404 return false;
12405 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12406
12407 for (i = 0; i < nelt; i += 2)
12408 {
12409 if (d->perm[i] != i + odd)
12410 return false;
12411 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
12412 return false;
12413 }
12414
12415 /* Success! */
12416 if (d->testing_p)
12417 return true;
12418
12419 in0 = d->op0;
12420 in1 = d->op1;
12421 if (BYTES_BIG_ENDIAN)
12422 {
12423 x = in0, in0 = in1, in1 = x;
12424 odd = !odd;
12425 }
12426 out = d->target;
12427
12428 if (odd)
12429 {
12430 switch (vmode)
12431 {
12432 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
12433 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
12434 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
12435 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
12436 case V4SImode: gen = gen_aarch64_trn2v4si; break;
12437 case V2SImode: gen = gen_aarch64_trn2v2si; break;
12438 case V2DImode: gen = gen_aarch64_trn2v2di; break;
358decd5
JW
12439 case V4HFmode: gen = gen_aarch64_trn2v4hf; break;
12440 case V8HFmode: gen = gen_aarch64_trn2v8hf; break;
cc4d934f
JG
12441 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
12442 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
12443 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
12444 default:
12445 return false;
12446 }
12447 }
12448 else
12449 {
12450 switch (vmode)
12451 {
12452 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
12453 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
12454 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
12455 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
12456 case V4SImode: gen = gen_aarch64_trn1v4si; break;
12457 case V2SImode: gen = gen_aarch64_trn1v2si; break;
12458 case V2DImode: gen = gen_aarch64_trn1v2di; break;
358decd5
JW
12459 case V4HFmode: gen = gen_aarch64_trn1v4hf; break;
12460 case V8HFmode: gen = gen_aarch64_trn1v8hf; break;
cc4d934f
JG
12461 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
12462 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
12463 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
12464 default:
12465 return false;
12466 }
12467 }
12468
12469 emit_insn (gen (out, in0, in1));
12470 return true;
12471}
12472
12473/* Recognize patterns suitable for the UZP instructions. */
12474static bool
12475aarch64_evpc_uzp (struct expand_vec_perm_d *d)
12476{
12477 unsigned int i, odd, mask, nelt = d->nelt;
12478 rtx out, in0, in1, x;
12479 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12480 machine_mode vmode = d->vmode;
cc4d934f
JG
12481
12482 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12483 return false;
12484
12485 /* Note that these are little-endian tests.
12486 We correct for big-endian later. */
12487 if (d->perm[0] == 0)
12488 odd = 0;
12489 else if (d->perm[0] == 1)
12490 odd = 1;
12491 else
12492 return false;
12493 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12494
12495 for (i = 0; i < nelt; i++)
12496 {
12497 unsigned elt = (i * 2 + odd) & mask;
12498 if (d->perm[i] != elt)
12499 return false;
12500 }
12501
12502 /* Success! */
12503 if (d->testing_p)
12504 return true;
12505
12506 in0 = d->op0;
12507 in1 = d->op1;
12508 if (BYTES_BIG_ENDIAN)
12509 {
12510 x = in0, in0 = in1, in1 = x;
12511 odd = !odd;
12512 }
12513 out = d->target;
12514
12515 if (odd)
12516 {
12517 switch (vmode)
12518 {
12519 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
12520 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
12521 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
12522 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
12523 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
12524 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
12525 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
358decd5
JW
12526 case V4HFmode: gen = gen_aarch64_uzp2v4hf; break;
12527 case V8HFmode: gen = gen_aarch64_uzp2v8hf; break;
cc4d934f
JG
12528 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
12529 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
12530 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
12531 default:
12532 return false;
12533 }
12534 }
12535 else
12536 {
12537 switch (vmode)
12538 {
12539 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
12540 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
12541 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
12542 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
12543 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
12544 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
12545 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
358decd5
JW
12546 case V4HFmode: gen = gen_aarch64_uzp1v4hf; break;
12547 case V8HFmode: gen = gen_aarch64_uzp1v8hf; break;
cc4d934f
JG
12548 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
12549 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
12550 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
12551 default:
12552 return false;
12553 }
12554 }
12555
12556 emit_insn (gen (out, in0, in1));
12557 return true;
12558}
12559
12560/* Recognize patterns suitable for the ZIP instructions. */
12561static bool
12562aarch64_evpc_zip (struct expand_vec_perm_d *d)
12563{
12564 unsigned int i, high, mask, nelt = d->nelt;
12565 rtx out, in0, in1, x;
12566 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 12567 machine_mode vmode = d->vmode;
cc4d934f
JG
12568
12569 if (GET_MODE_UNIT_SIZE (vmode) > 8)
12570 return false;
12571
12572 /* Note that these are little-endian tests.
12573 We correct for big-endian later. */
12574 high = nelt / 2;
12575 if (d->perm[0] == high)
12576 /* Do Nothing. */
12577 ;
12578 else if (d->perm[0] == 0)
12579 high = 0;
12580 else
12581 return false;
12582 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
12583
12584 for (i = 0; i < nelt / 2; i++)
12585 {
12586 unsigned elt = (i + high) & mask;
12587 if (d->perm[i * 2] != elt)
12588 return false;
12589 elt = (elt + nelt) & mask;
12590 if (d->perm[i * 2 + 1] != elt)
12591 return false;
12592 }
12593
12594 /* Success! */
12595 if (d->testing_p)
12596 return true;
12597
12598 in0 = d->op0;
12599 in1 = d->op1;
12600 if (BYTES_BIG_ENDIAN)
12601 {
12602 x = in0, in0 = in1, in1 = x;
12603 high = !high;
12604 }
12605 out = d->target;
12606
12607 if (high)
12608 {
12609 switch (vmode)
12610 {
12611 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
12612 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
12613 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
12614 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
12615 case V4SImode: gen = gen_aarch64_zip2v4si; break;
12616 case V2SImode: gen = gen_aarch64_zip2v2si; break;
12617 case V2DImode: gen = gen_aarch64_zip2v2di; break;
358decd5
JW
12618 case V4HFmode: gen = gen_aarch64_zip2v4hf; break;
12619 case V8HFmode: gen = gen_aarch64_zip2v8hf; break;
cc4d934f
JG
12620 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
12621 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
12622 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
12623 default:
12624 return false;
12625 }
12626 }
12627 else
12628 {
12629 switch (vmode)
12630 {
12631 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
12632 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
12633 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
12634 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
12635 case V4SImode: gen = gen_aarch64_zip1v4si; break;
12636 case V2SImode: gen = gen_aarch64_zip1v2si; break;
12637 case V2DImode: gen = gen_aarch64_zip1v2di; break;
358decd5
JW
12638 case V4HFmode: gen = gen_aarch64_zip1v4hf; break;
12639 case V8HFmode: gen = gen_aarch64_zip1v8hf; break;
cc4d934f
JG
12640 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
12641 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
12642 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
12643 default:
12644 return false;
12645 }
12646 }
12647
12648 emit_insn (gen (out, in0, in1));
12649 return true;
12650}
12651
ae0533da
AL
12652/* Recognize patterns for the EXT insn. */
12653
12654static bool
12655aarch64_evpc_ext (struct expand_vec_perm_d *d)
12656{
12657 unsigned int i, nelt = d->nelt;
12658 rtx (*gen) (rtx, rtx, rtx, rtx);
12659 rtx offset;
12660
12661 unsigned int location = d->perm[0]; /* Always < nelt. */
12662
12663 /* Check if the extracted indices are increasing by one. */
12664 for (i = 1; i < nelt; i++)
12665 {
12666 unsigned int required = location + i;
12667 if (d->one_vector_p)
12668 {
12669 /* We'll pass the same vector in twice, so allow indices to wrap. */
12670 required &= (nelt - 1);
12671 }
12672 if (d->perm[i] != required)
12673 return false;
12674 }
12675
ae0533da
AL
12676 switch (d->vmode)
12677 {
12678 case V16QImode: gen = gen_aarch64_extv16qi; break;
12679 case V8QImode: gen = gen_aarch64_extv8qi; break;
12680 case V4HImode: gen = gen_aarch64_extv4hi; break;
12681 case V8HImode: gen = gen_aarch64_extv8hi; break;
12682 case V2SImode: gen = gen_aarch64_extv2si; break;
12683 case V4SImode: gen = gen_aarch64_extv4si; break;
358decd5
JW
12684 case V4HFmode: gen = gen_aarch64_extv4hf; break;
12685 case V8HFmode: gen = gen_aarch64_extv8hf; break;
ae0533da
AL
12686 case V2SFmode: gen = gen_aarch64_extv2sf; break;
12687 case V4SFmode: gen = gen_aarch64_extv4sf; break;
12688 case V2DImode: gen = gen_aarch64_extv2di; break;
12689 case V2DFmode: gen = gen_aarch64_extv2df; break;
12690 default:
12691 return false;
12692 }
12693
12694 /* Success! */
12695 if (d->testing_p)
12696 return true;
12697
b31e65bb
AL
12698 /* The case where (location == 0) is a no-op for both big- and little-endian,
12699 and is removed by the mid-end at optimization levels -O1 and higher. */
12700
12701 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
12702 {
12703 /* After setup, we want the high elements of the first vector (stored
12704 at the LSB end of the register), and the low elements of the second
12705 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 12706 std::swap (d->op0, d->op1);
ae0533da
AL
12707 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
12708 location = nelt - location;
12709 }
12710
12711 offset = GEN_INT (location);
12712 emit_insn (gen (d->target, d->op0, d->op1, offset));
12713 return true;
12714}
12715
923fcec3
AL
12716/* Recognize patterns for the REV insns. */
12717
12718static bool
12719aarch64_evpc_rev (struct expand_vec_perm_d *d)
12720{
12721 unsigned int i, j, diff, nelt = d->nelt;
12722 rtx (*gen) (rtx, rtx);
12723
12724 if (!d->one_vector_p)
12725 return false;
12726
12727 diff = d->perm[0];
12728 switch (diff)
12729 {
12730 case 7:
12731 switch (d->vmode)
12732 {
12733 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
12734 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
12735 default:
12736 return false;
12737 }
12738 break;
12739 case 3:
12740 switch (d->vmode)
12741 {
12742 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
12743 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
12744 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
12745 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
12746 default:
12747 return false;
12748 }
12749 break;
12750 case 1:
12751 switch (d->vmode)
12752 {
12753 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
12754 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
12755 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
12756 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
12757 case V4SImode: gen = gen_aarch64_rev64v4si; break;
12758 case V2SImode: gen = gen_aarch64_rev64v2si; break;
12759 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
12760 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
358decd5
JW
12761 case V8HFmode: gen = gen_aarch64_rev64v8hf; break;
12762 case V4HFmode: gen = gen_aarch64_rev64v4hf; break;
923fcec3
AL
12763 default:
12764 return false;
12765 }
12766 break;
12767 default:
12768 return false;
12769 }
12770
12771 for (i = 0; i < nelt ; i += diff + 1)
12772 for (j = 0; j <= diff; j += 1)
12773 {
12774 /* This is guaranteed to be true as the value of diff
12775 is 7, 3, 1 and we should have enough elements in the
12776 queue to generate this. Getting a vector mask with a
12777 value of diff other than these values implies that
12778 something is wrong by the time we get here. */
12779 gcc_assert (i + j < nelt);
12780 if (d->perm[i + j] != i + diff - j)
12781 return false;
12782 }
12783
12784 /* Success! */
12785 if (d->testing_p)
12786 return true;
12787
12788 emit_insn (gen (d->target, d->op0));
12789 return true;
12790}
12791
91bd4114
JG
12792static bool
12793aarch64_evpc_dup (struct expand_vec_perm_d *d)
12794{
12795 rtx (*gen) (rtx, rtx, rtx);
12796 rtx out = d->target;
12797 rtx in0;
ef4bddc2 12798 machine_mode vmode = d->vmode;
91bd4114
JG
12799 unsigned int i, elt, nelt = d->nelt;
12800 rtx lane;
12801
91bd4114
JG
12802 elt = d->perm[0];
12803 for (i = 1; i < nelt; i++)
12804 {
12805 if (elt != d->perm[i])
12806 return false;
12807 }
12808
12809 /* The generic preparation in aarch64_expand_vec_perm_const_1
12810 swaps the operand order and the permute indices if it finds
12811 d->perm[0] to be in the second operand. Thus, we can always
12812 use d->op0 and need not do any extra arithmetic to get the
12813 correct lane number. */
12814 in0 = d->op0;
f901401e 12815 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
12816
12817 switch (vmode)
12818 {
12819 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
12820 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
12821 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
12822 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
12823 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
12824 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
12825 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
862abc04
AL
12826 case V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
12827 case V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
91bd4114
JG
12828 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
12829 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
12830 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
12831 default:
12832 return false;
12833 }
12834
12835 emit_insn (gen (out, in0, lane));
12836 return true;
12837}
12838
88b08073
JG
12839static bool
12840aarch64_evpc_tbl (struct expand_vec_perm_d *d)
12841{
12842 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 12843 machine_mode vmode = d->vmode;
88b08073
JG
12844 unsigned int i, nelt = d->nelt;
12845
88b08073
JG
12846 if (d->testing_p)
12847 return true;
12848
12849 /* Generic code will try constant permutation twice. Once with the
12850 original mode and again with the elements lowered to QImode.
12851 So wait and don't do the selector expansion ourselves. */
12852 if (vmode != V8QImode && vmode != V16QImode)
12853 return false;
12854
12855 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
12856 {
12857 int nunits = GET_MODE_NUNITS (vmode);
12858
12859 /* If big-endian and two vectors we end up with a weird mixed-endian
12860 mode on NEON. Reverse the index within each word but not the word
12861 itself. */
12862 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
12863 : d->perm[i]);
12864 }
88b08073
JG
12865 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
12866 sel = force_reg (vmode, sel);
12867
12868 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
12869 return true;
12870}
12871
12872static bool
12873aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
12874{
12875 /* The pattern matching functions above are written to look for a small
12876 number to begin the sequence (0, 1, N/2). If we begin with an index
12877 from the second operand, we can swap the operands. */
12878 if (d->perm[0] >= d->nelt)
12879 {
12880 unsigned i, nelt = d->nelt;
88b08073 12881
0696116a 12882 gcc_assert (nelt == (nelt & -nelt));
88b08073 12883 for (i = 0; i < nelt; ++i)
0696116a 12884 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073 12885
cb5c6c29 12886 std::swap (d->op0, d->op1);
88b08073
JG
12887 }
12888
12889 if (TARGET_SIMD)
cc4d934f 12890 {
923fcec3
AL
12891 if (aarch64_evpc_rev (d))
12892 return true;
12893 else if (aarch64_evpc_ext (d))
ae0533da 12894 return true;
f901401e
AL
12895 else if (aarch64_evpc_dup (d))
12896 return true;
ae0533da 12897 else if (aarch64_evpc_zip (d))
cc4d934f
JG
12898 return true;
12899 else if (aarch64_evpc_uzp (d))
12900 return true;
12901 else if (aarch64_evpc_trn (d))
12902 return true;
12903 return aarch64_evpc_tbl (d);
12904 }
88b08073
JG
12905 return false;
12906}
12907
12908/* Expand a vec_perm_const pattern. */
12909
12910bool
12911aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
12912{
12913 struct expand_vec_perm_d d;
12914 int i, nelt, which;
12915
12916 d.target = target;
12917 d.op0 = op0;
12918 d.op1 = op1;
12919
12920 d.vmode = GET_MODE (target);
12921 gcc_assert (VECTOR_MODE_P (d.vmode));
12922 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
12923 d.testing_p = false;
12924
12925 for (i = which = 0; i < nelt; ++i)
12926 {
12927 rtx e = XVECEXP (sel, 0, i);
12928 int ei = INTVAL (e) & (2 * nelt - 1);
12929 which |= (ei < nelt ? 1 : 2);
12930 d.perm[i] = ei;
12931 }
12932
12933 switch (which)
12934 {
12935 default:
12936 gcc_unreachable ();
12937
12938 case 3:
12939 d.one_vector_p = false;
12940 if (!rtx_equal_p (op0, op1))
12941 break;
12942
12943 /* The elements of PERM do not suggest that only the first operand
12944 is used, but both operands are identical. Allow easier matching
12945 of the permutation by folding the permutation into the single
12946 input vector. */
12947 /* Fall Through. */
12948 case 2:
12949 for (i = 0; i < nelt; ++i)
12950 d.perm[i] &= nelt - 1;
12951 d.op0 = op1;
12952 d.one_vector_p = true;
12953 break;
12954
12955 case 1:
12956 d.op1 = op0;
12957 d.one_vector_p = true;
12958 break;
12959 }
12960
12961 return aarch64_expand_vec_perm_const_1 (&d);
12962}
12963
12964static bool
ef4bddc2 12965aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
12966 const unsigned char *sel)
12967{
12968 struct expand_vec_perm_d d;
12969 unsigned int i, nelt, which;
12970 bool ret;
12971
12972 d.vmode = vmode;
12973 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
12974 d.testing_p = true;
12975 memcpy (d.perm, sel, nelt);
12976
12977 /* Calculate whether all elements are in one vector. */
12978 for (i = which = 0; i < nelt; ++i)
12979 {
12980 unsigned char e = d.perm[i];
12981 gcc_assert (e < 2 * nelt);
12982 which |= (e < nelt ? 1 : 2);
12983 }
12984
12985 /* If all elements are from the second vector, reindex as if from the
12986 first vector. */
12987 if (which == 2)
12988 for (i = 0; i < nelt; ++i)
12989 d.perm[i] -= nelt;
12990
12991 /* Check whether the mask can be applied to a single vector. */
12992 d.one_vector_p = (which != 3);
12993
12994 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
12995 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
12996 if (!d.one_vector_p)
12997 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
12998
12999 start_sequence ();
13000 ret = aarch64_expand_vec_perm_const_1 (&d);
13001 end_sequence ();
13002
13003 return ret;
13004}
13005
668046d1
DS
13006rtx
13007aarch64_reverse_mask (enum machine_mode mode)
13008{
13009 /* We have to reverse each vector because we dont have
13010 a permuted load that can reverse-load according to ABI rules. */
13011 rtx mask;
13012 rtvec v = rtvec_alloc (16);
13013 int i, j;
13014 int nunits = GET_MODE_NUNITS (mode);
13015 int usize = GET_MODE_UNIT_SIZE (mode);
13016
13017 gcc_assert (BYTES_BIG_ENDIAN);
13018 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
13019
13020 for (i = 0; i < nunits; i++)
13021 for (j = 0; j < usize; j++)
13022 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
13023 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
13024 return force_reg (V16QImode, mask);
13025}
13026
61f17a5c
WD
13027/* Implement MODES_TIEABLE_P. In principle we should always return true.
13028 However due to issues with register allocation it is preferable to avoid
13029 tieing integer scalar and FP scalar modes. Executing integer operations
13030 in general registers is better than treating them as scalar vector
13031 operations. This reduces latency and avoids redundant int<->FP moves.
13032 So tie modes if they are either the same class, or vector modes with
13033 other vector modes, vector structs or any scalar mode.
13034*/
97e1ad78
JG
13035
13036bool
ef4bddc2 13037aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
13038{
13039 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
13040 return true;
13041
13042 /* We specifically want to allow elements of "structure" modes to
13043 be tieable to the structure. This more general condition allows
13044 other rarer situations too. */
61f17a5c
WD
13045 if (aarch64_vector_mode_p (mode1) && aarch64_vector_mode_p (mode2))
13046 return true;
13047
13048 /* Also allow any scalar modes with vectors. */
13049 if (aarch64_vector_mode_supported_p (mode1)
13050 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
13051 return true;
13052
13053 return false;
13054}
13055
e2c75eea
JG
13056/* Return a new RTX holding the result of moving POINTER forward by
13057 AMOUNT bytes. */
13058
13059static rtx
13060aarch64_move_pointer (rtx pointer, int amount)
13061{
13062 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
13063
13064 return adjust_automodify_address (pointer, GET_MODE (pointer),
13065 next, amount);
13066}
13067
13068/* Return a new RTX holding the result of moving POINTER forward by the
13069 size of the mode it points to. */
13070
13071static rtx
13072aarch64_progress_pointer (rtx pointer)
13073{
13074 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
13075
13076 return aarch64_move_pointer (pointer, amount);
13077}
13078
13079/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
13080 MODE bytes. */
13081
13082static void
13083aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 13084 machine_mode mode)
e2c75eea
JG
13085{
13086 rtx reg = gen_reg_rtx (mode);
13087
13088 /* "Cast" the pointers to the correct mode. */
13089 *src = adjust_address (*src, mode, 0);
13090 *dst = adjust_address (*dst, mode, 0);
13091 /* Emit the memcpy. */
13092 emit_move_insn (reg, *src);
13093 emit_move_insn (*dst, reg);
13094 /* Move the pointers forward. */
13095 *src = aarch64_progress_pointer (*src);
13096 *dst = aarch64_progress_pointer (*dst);
13097}
13098
13099/* Expand movmem, as if from a __builtin_memcpy. Return true if
13100 we succeed, otherwise return false. */
13101
13102bool
13103aarch64_expand_movmem (rtx *operands)
13104{
13105 unsigned int n;
13106 rtx dst = operands[0];
13107 rtx src = operands[1];
13108 rtx base;
13109 bool speed_p = !optimize_function_for_size_p (cfun);
13110
13111 /* When optimizing for size, give a better estimate of the length of a
13112 memcpy call, but use the default otherwise. */
13113 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
13114
13115 /* We can't do anything smart if the amount to copy is not constant. */
13116 if (!CONST_INT_P (operands[2]))
13117 return false;
13118
13119 n = UINTVAL (operands[2]);
13120
13121 /* Try to keep the number of instructions low. For cases below 16 bytes we
13122 need to make at most two moves. For cases above 16 bytes it will be one
13123 move for each 16 byte chunk, then at most two additional moves. */
13124 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
13125 return false;
13126
13127 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13128 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
13129
13130 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
13131 src = adjust_automodify_address (src, VOIDmode, base, 0);
13132
13133 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13134 1-byte chunk. */
13135 if (n < 4)
13136 {
13137 if (n >= 2)
13138 {
13139 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13140 n -= 2;
13141 }
13142
13143 if (n == 1)
13144 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13145
13146 return true;
13147 }
13148
13149 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
13150 4-byte chunk, partially overlapping with the previously copied chunk. */
13151 if (n < 8)
13152 {
13153 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13154 n -= 4;
13155 if (n > 0)
13156 {
13157 int move = n - 4;
13158
13159 src = aarch64_move_pointer (src, move);
13160 dst = aarch64_move_pointer (dst, move);
13161 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13162 }
13163 return true;
13164 }
13165
13166 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
13167 them, then (if applicable) an 8-byte chunk. */
13168 while (n >= 8)
13169 {
13170 if (n / 16)
13171 {
13172 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
13173 n -= 16;
13174 }
13175 else
13176 {
13177 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13178 n -= 8;
13179 }
13180 }
13181
13182 /* Finish the final bytes of the copy. We can always do this in one
13183 instruction. We either copy the exact amount we need, or partially
13184 overlap with the previous chunk we copied and copy 8-bytes. */
13185 if (n == 0)
13186 return true;
13187 else if (n == 1)
13188 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13189 else if (n == 2)
13190 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13191 else if (n == 4)
13192 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13193 else
13194 {
13195 if (n == 3)
13196 {
13197 src = aarch64_move_pointer (src, -1);
13198 dst = aarch64_move_pointer (dst, -1);
13199 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13200 }
13201 else
13202 {
13203 int move = n - 8;
13204
13205 src = aarch64_move_pointer (src, move);
13206 dst = aarch64_move_pointer (dst, move);
13207 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13208 }
13209 }
13210
13211 return true;
13212}
13213
141a3ccf
KT
13214/* Split a DImode store of a CONST_INT SRC to MEM DST as two
13215 SImode stores. Handle the case when the constant has identical
13216 bottom and top halves. This is beneficial when the two stores can be
13217 merged into an STP and we avoid synthesising potentially expensive
13218 immediates twice. Return true if such a split is possible. */
13219
13220bool
13221aarch64_split_dimode_const_store (rtx dst, rtx src)
13222{
13223 rtx lo = gen_lowpart (SImode, src);
13224 rtx hi = gen_highpart_mode (SImode, DImode, src);
13225
13226 bool size_p = optimize_function_for_size_p (cfun);
13227
13228 if (!rtx_equal_p (lo, hi))
13229 return false;
13230
13231 unsigned int orig_cost
13232 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
13233 unsigned int lo_cost
13234 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
13235
13236 /* We want to transform:
13237 MOV x1, 49370
13238 MOVK x1, 0x140, lsl 16
13239 MOVK x1, 0xc0da, lsl 32
13240 MOVK x1, 0x140, lsl 48
13241 STR x1, [x0]
13242 into:
13243 MOV w1, 49370
13244 MOVK w1, 0x140, lsl 16
13245 STP w1, w1, [x0]
13246 So we want to perform this only when we save two instructions
13247 or more. When optimizing for size, however, accept any code size
13248 savings we can. */
13249 if (size_p && orig_cost <= lo_cost)
13250 return false;
13251
13252 if (!size_p
13253 && (orig_cost <= lo_cost + 1))
13254 return false;
13255
13256 rtx mem_lo = adjust_address (dst, SImode, 0);
13257 if (!aarch64_mem_pair_operand (mem_lo, SImode))
13258 return false;
13259
13260 rtx tmp_reg = gen_reg_rtx (SImode);
13261 aarch64_expand_mov_immediate (tmp_reg, lo);
13262 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
13263 /* Don't emit an explicit store pair as this may not be always profitable.
13264 Let the sched-fusion logic decide whether to merge them. */
13265 emit_move_insn (mem_lo, tmp_reg);
13266 emit_move_insn (mem_hi, tmp_reg);
13267
13268 return true;
13269}
13270
a3125fc2
CL
13271/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
13272
13273static unsigned HOST_WIDE_INT
13274aarch64_asan_shadow_offset (void)
13275{
13276 return (HOST_WIDE_INT_1 << 36);
13277}
13278
d3006da6 13279static bool
445d7826 13280aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
d3006da6
JG
13281 unsigned int align,
13282 enum by_pieces_operation op,
13283 bool speed_p)
13284{
13285 /* STORE_BY_PIECES can be used when copying a constant string, but
13286 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
13287 For now we always fail this and let the move_by_pieces code copy
13288 the string from read-only memory. */
13289 if (op == STORE_BY_PIECES)
13290 return false;
13291
13292 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
13293}
13294
5f3bc026 13295static rtx
cb4347e8 13296aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
13297 int code, tree treeop0, tree treeop1)
13298{
c8012fbc
WD
13299 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
13300 rtx op0, op1;
5f3bc026 13301 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 13302 insn_code icode;
5f3bc026
ZC
13303 struct expand_operand ops[4];
13304
5f3bc026
ZC
13305 start_sequence ();
13306 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
13307
13308 op_mode = GET_MODE (op0);
13309 if (op_mode == VOIDmode)
13310 op_mode = GET_MODE (op1);
13311
13312 switch (op_mode)
13313 {
13314 case QImode:
13315 case HImode:
13316 case SImode:
13317 cmp_mode = SImode;
13318 icode = CODE_FOR_cmpsi;
13319 break;
13320
13321 case DImode:
13322 cmp_mode = DImode;
13323 icode = CODE_FOR_cmpdi;
13324 break;
13325
786e3c06
WD
13326 case SFmode:
13327 cmp_mode = SFmode;
13328 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
13329 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
13330 break;
13331
13332 case DFmode:
13333 cmp_mode = DFmode;
13334 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
13335 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
13336 break;
13337
5f3bc026
ZC
13338 default:
13339 end_sequence ();
13340 return NULL_RTX;
13341 }
13342
c8012fbc
WD
13343 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
13344 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
13345 if (!op0 || !op1)
13346 {
13347 end_sequence ();
13348 return NULL_RTX;
13349 }
13350 *prep_seq = get_insns ();
13351 end_sequence ();
13352
c8012fbc
WD
13353 create_fixed_operand (&ops[0], op0);
13354 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
13355
13356 start_sequence ();
c8012fbc 13357 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
13358 {
13359 end_sequence ();
13360 return NULL_RTX;
13361 }
13362 *gen_seq = get_insns ();
13363 end_sequence ();
13364
c8012fbc
WD
13365 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
13366 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
13367}
13368
13369static rtx
cb4347e8
TS
13370aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
13371 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 13372{
c8012fbc
WD
13373 rtx op0, op1, target;
13374 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 13375 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 13376 insn_code icode;
5f3bc026 13377 struct expand_operand ops[6];
c8012fbc 13378 int aarch64_cond;
5f3bc026 13379
cb4347e8 13380 push_to_sequence (*prep_seq);
5f3bc026
ZC
13381 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
13382
13383 op_mode = GET_MODE (op0);
13384 if (op_mode == VOIDmode)
13385 op_mode = GET_MODE (op1);
13386
13387 switch (op_mode)
13388 {
13389 case QImode:
13390 case HImode:
13391 case SImode:
13392 cmp_mode = SImode;
c8012fbc 13393 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
13394 break;
13395
13396 case DImode:
13397 cmp_mode = DImode;
c8012fbc 13398 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
13399 break;
13400
786e3c06
WD
13401 case SFmode:
13402 cmp_mode = SFmode;
13403 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
13404 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
13405 break;
13406
13407 case DFmode:
13408 cmp_mode = DFmode;
13409 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
13410 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
13411 break;
13412
5f3bc026
ZC
13413 default:
13414 end_sequence ();
13415 return NULL_RTX;
13416 }
13417
13418 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
13419 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
13420 if (!op0 || !op1)
13421 {
13422 end_sequence ();
13423 return NULL_RTX;
13424 }
13425 *prep_seq = get_insns ();
13426 end_sequence ();
13427
13428 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 13429 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 13430
c8012fbc
WD
13431 if (bit_code != AND)
13432 {
13433 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
13434 GET_MODE (XEXP (prev, 0))),
13435 VOIDmode, XEXP (prev, 0), const0_rtx);
13436 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
13437 }
13438
13439 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
13440 create_fixed_operand (&ops[1], target);
13441 create_fixed_operand (&ops[2], op0);
13442 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
13443 create_fixed_operand (&ops[4], prev);
13444 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 13445
cb4347e8 13446 push_to_sequence (*gen_seq);
5f3bc026
ZC
13447 if (!maybe_expand_insn (icode, 6, ops))
13448 {
13449 end_sequence ();
13450 return NULL_RTX;
13451 }
13452
13453 *gen_seq = get_insns ();
13454 end_sequence ();
13455
c8012fbc 13456 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
13457}
13458
13459#undef TARGET_GEN_CCMP_FIRST
13460#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
13461
13462#undef TARGET_GEN_CCMP_NEXT
13463#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
13464
6a569cdd
KT
13465/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
13466 instruction fusion of some sort. */
13467
13468static bool
13469aarch64_macro_fusion_p (void)
13470{
b175b679 13471 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
13472}
13473
13474
13475/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
13476 should be kept together during scheduling. */
13477
13478static bool
13479aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
13480{
13481 rtx set_dest;
13482 rtx prev_set = single_set (prev);
13483 rtx curr_set = single_set (curr);
13484 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
13485 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
13486
13487 if (!aarch64_macro_fusion_p ())
13488 return false;
13489
d7b03373 13490 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
13491 {
13492 /* We are trying to match:
13493 prev (mov) == (set (reg r0) (const_int imm16))
13494 curr (movk) == (set (zero_extract (reg r0)
13495 (const_int 16)
13496 (const_int 16))
13497 (const_int imm16_1)) */
13498
13499 set_dest = SET_DEST (curr_set);
13500
13501 if (GET_CODE (set_dest) == ZERO_EXTRACT
13502 && CONST_INT_P (SET_SRC (curr_set))
13503 && CONST_INT_P (SET_SRC (prev_set))
13504 && CONST_INT_P (XEXP (set_dest, 2))
13505 && INTVAL (XEXP (set_dest, 2)) == 16
13506 && REG_P (XEXP (set_dest, 0))
13507 && REG_P (SET_DEST (prev_set))
13508 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
13509 {
13510 return true;
13511 }
13512 }
13513
d7b03373 13514 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
13515 {
13516
13517 /* We're trying to match:
13518 prev (adrp) == (set (reg r1)
13519 (high (symbol_ref ("SYM"))))
13520 curr (add) == (set (reg r0)
13521 (lo_sum (reg r1)
13522 (symbol_ref ("SYM"))))
13523 Note that r0 need not necessarily be the same as r1, especially
13524 during pre-regalloc scheduling. */
13525
13526 if (satisfies_constraint_Ush (SET_SRC (prev_set))
13527 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
13528 {
13529 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
13530 && REG_P (XEXP (SET_SRC (curr_set), 0))
13531 && REGNO (XEXP (SET_SRC (curr_set), 0))
13532 == REGNO (SET_DEST (prev_set))
13533 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
13534 XEXP (SET_SRC (curr_set), 1)))
13535 return true;
13536 }
13537 }
13538
d7b03373 13539 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
13540 {
13541
13542 /* We're trying to match:
13543 prev (movk) == (set (zero_extract (reg r0)
13544 (const_int 16)
13545 (const_int 32))
13546 (const_int imm16_1))
13547 curr (movk) == (set (zero_extract (reg r0)
13548 (const_int 16)
13549 (const_int 48))
13550 (const_int imm16_2)) */
13551
13552 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
13553 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
13554 && REG_P (XEXP (SET_DEST (prev_set), 0))
13555 && REG_P (XEXP (SET_DEST (curr_set), 0))
13556 && REGNO (XEXP (SET_DEST (prev_set), 0))
13557 == REGNO (XEXP (SET_DEST (curr_set), 0))
13558 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
13559 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
13560 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
13561 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
13562 && CONST_INT_P (SET_SRC (prev_set))
13563 && CONST_INT_P (SET_SRC (curr_set)))
13564 return true;
13565
13566 }
d7b03373 13567 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
13568 {
13569 /* We're trying to match:
13570 prev (adrp) == (set (reg r0)
13571 (high (symbol_ref ("SYM"))))
13572 curr (ldr) == (set (reg r1)
13573 (mem (lo_sum (reg r0)
13574 (symbol_ref ("SYM")))))
13575 or
13576 curr (ldr) == (set (reg r1)
13577 (zero_extend (mem
13578 (lo_sum (reg r0)
13579 (symbol_ref ("SYM")))))) */
13580 if (satisfies_constraint_Ush (SET_SRC (prev_set))
13581 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
13582 {
13583 rtx curr_src = SET_SRC (curr_set);
13584
13585 if (GET_CODE (curr_src) == ZERO_EXTEND)
13586 curr_src = XEXP (curr_src, 0);
13587
13588 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
13589 && REG_P (XEXP (XEXP (curr_src, 0), 0))
13590 && REGNO (XEXP (XEXP (curr_src, 0), 0))
13591 == REGNO (SET_DEST (prev_set))
13592 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
13593 XEXP (SET_SRC (prev_set), 0)))
13594 return true;
13595 }
13596 }
cd0cb232 13597
d7b03373 13598 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
13599 && aarch_crypto_can_dual_issue (prev, curr))
13600 return true;
13601
d7b03373 13602 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
13603 && any_condjump_p (curr))
13604 {
13605 enum attr_type prev_type = get_attr_type (prev);
13606
13607 /* FIXME: this misses some which is considered simple arthematic
13608 instructions for ThunderX. Simple shifts are missed here. */
13609 if (prev_type == TYPE_ALUS_SREG
13610 || prev_type == TYPE_ALUS_IMM
13611 || prev_type == TYPE_LOGICS_REG
13612 || prev_type == TYPE_LOGICS_IMM)
13613 return true;
13614 }
13615
6a569cdd
KT
13616 return false;
13617}
13618
f2879a90
KT
13619/* Return true iff the instruction fusion described by OP is enabled. */
13620
13621bool
13622aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
13623{
13624 return (aarch64_tune_params.fusible_ops & op) != 0;
13625}
13626
350013bc
BC
13627/* If MEM is in the form of [base+offset], extract the two parts
13628 of address and set to BASE and OFFSET, otherwise return false
13629 after clearing BASE and OFFSET. */
13630
13631bool
13632extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
13633{
13634 rtx addr;
13635
13636 gcc_assert (MEM_P (mem));
13637
13638 addr = XEXP (mem, 0);
13639
13640 if (REG_P (addr))
13641 {
13642 *base = addr;
13643 *offset = const0_rtx;
13644 return true;
13645 }
13646
13647 if (GET_CODE (addr) == PLUS
13648 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
13649 {
13650 *base = XEXP (addr, 0);
13651 *offset = XEXP (addr, 1);
13652 return true;
13653 }
13654
13655 *base = NULL_RTX;
13656 *offset = NULL_RTX;
13657
13658 return false;
13659}
13660
13661/* Types for scheduling fusion. */
13662enum sched_fusion_type
13663{
13664 SCHED_FUSION_NONE = 0,
13665 SCHED_FUSION_LD_SIGN_EXTEND,
13666 SCHED_FUSION_LD_ZERO_EXTEND,
13667 SCHED_FUSION_LD,
13668 SCHED_FUSION_ST,
13669 SCHED_FUSION_NUM
13670};
13671
13672/* If INSN is a load or store of address in the form of [base+offset],
13673 extract the two parts and set to BASE and OFFSET. Return scheduling
13674 fusion type this INSN is. */
13675
13676static enum sched_fusion_type
13677fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
13678{
13679 rtx x, dest, src;
13680 enum sched_fusion_type fusion = SCHED_FUSION_LD;
13681
13682 gcc_assert (INSN_P (insn));
13683 x = PATTERN (insn);
13684 if (GET_CODE (x) != SET)
13685 return SCHED_FUSION_NONE;
13686
13687 src = SET_SRC (x);
13688 dest = SET_DEST (x);
13689
abc52318
KT
13690 machine_mode dest_mode = GET_MODE (dest);
13691
13692 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
13693 return SCHED_FUSION_NONE;
13694
13695 if (GET_CODE (src) == SIGN_EXTEND)
13696 {
13697 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
13698 src = XEXP (src, 0);
13699 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
13700 return SCHED_FUSION_NONE;
13701 }
13702 else if (GET_CODE (src) == ZERO_EXTEND)
13703 {
13704 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
13705 src = XEXP (src, 0);
13706 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
13707 return SCHED_FUSION_NONE;
13708 }
13709
13710 if (GET_CODE (src) == MEM && REG_P (dest))
13711 extract_base_offset_in_addr (src, base, offset);
13712 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
13713 {
13714 fusion = SCHED_FUSION_ST;
13715 extract_base_offset_in_addr (dest, base, offset);
13716 }
13717 else
13718 return SCHED_FUSION_NONE;
13719
13720 if (*base == NULL_RTX || *offset == NULL_RTX)
13721 fusion = SCHED_FUSION_NONE;
13722
13723 return fusion;
13724}
13725
13726/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
13727
13728 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
13729 and PRI are only calculated for these instructions. For other instruction,
13730 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
13731 type instruction fusion can be added by returning different priorities.
13732
13733 It's important that irrelevant instructions get the largest FUSION_PRI. */
13734
13735static void
13736aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
13737 int *fusion_pri, int *pri)
13738{
13739 int tmp, off_val;
13740 rtx base, offset;
13741 enum sched_fusion_type fusion;
13742
13743 gcc_assert (INSN_P (insn));
13744
13745 tmp = max_pri - 1;
13746 fusion = fusion_load_store (insn, &base, &offset);
13747 if (fusion == SCHED_FUSION_NONE)
13748 {
13749 *pri = tmp;
13750 *fusion_pri = tmp;
13751 return;
13752 }
13753
13754 /* Set FUSION_PRI according to fusion type and base register. */
13755 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
13756
13757 /* Calculate PRI. */
13758 tmp /= 2;
13759
13760 /* INSN with smaller offset goes first. */
13761 off_val = (int)(INTVAL (offset));
13762 if (off_val >= 0)
13763 tmp -= (off_val & 0xfffff);
13764 else
13765 tmp += ((- off_val) & 0xfffff);
13766
13767 *pri = tmp;
13768 return;
13769}
13770
13771/* Given OPERANDS of consecutive load/store, check if we can merge
13772 them into ldp/stp. LOAD is true if they are load instructions.
13773 MODE is the mode of memory operands. */
13774
13775bool
13776aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
13777 enum machine_mode mode)
13778{
13779 HOST_WIDE_INT offval_1, offval_2, msize;
13780 enum reg_class rclass_1, rclass_2;
13781 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
13782
13783 if (load)
13784 {
13785 mem_1 = operands[1];
13786 mem_2 = operands[3];
13787 reg_1 = operands[0];
13788 reg_2 = operands[2];
13789 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
13790 if (REGNO (reg_1) == REGNO (reg_2))
13791 return false;
13792 }
13793 else
13794 {
13795 mem_1 = operands[0];
13796 mem_2 = operands[2];
13797 reg_1 = operands[1];
13798 reg_2 = operands[3];
13799 }
13800
bf84ac44
AP
13801 /* The mems cannot be volatile. */
13802 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
13803 return false;
13804
54700e2e
AP
13805 /* If we have SImode and slow unaligned ldp,
13806 check the alignment to be at least 8 byte. */
13807 if (mode == SImode
13808 && (aarch64_tune_params.extra_tuning_flags
13809 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
13810 && !optimize_size
13811 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
13812 return false;
13813
350013bc
BC
13814 /* Check if the addresses are in the form of [base+offset]. */
13815 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
13816 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
13817 return false;
13818 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
13819 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
13820 return false;
13821
13822 /* Check if the bases are same. */
13823 if (!rtx_equal_p (base_1, base_2))
13824 return false;
13825
13826 offval_1 = INTVAL (offset_1);
13827 offval_2 = INTVAL (offset_2);
13828 msize = GET_MODE_SIZE (mode);
13829 /* Check if the offsets are consecutive. */
13830 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
13831 return false;
13832
13833 /* Check if the addresses are clobbered by load. */
13834 if (load)
13835 {
13836 if (reg_mentioned_p (reg_1, mem_1))
13837 return false;
13838
13839 /* In increasing order, the last load can clobber the address. */
13840 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
13841 return false;
13842 }
13843
13844 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
13845 rclass_1 = FP_REGS;
13846 else
13847 rclass_1 = GENERAL_REGS;
13848
13849 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
13850 rclass_2 = FP_REGS;
13851 else
13852 rclass_2 = GENERAL_REGS;
13853
13854 /* Check if the registers are of same class. */
13855 if (rclass_1 != rclass_2)
13856 return false;
13857
13858 return true;
13859}
13860
13861/* Given OPERANDS of consecutive load/store, check if we can merge
13862 them into ldp/stp by adjusting the offset. LOAD is true if they
13863 are load instructions. MODE is the mode of memory operands.
13864
13865 Given below consecutive stores:
13866
13867 str w1, [xb, 0x100]
13868 str w1, [xb, 0x104]
13869 str w1, [xb, 0x108]
13870 str w1, [xb, 0x10c]
13871
13872 Though the offsets are out of the range supported by stp, we can
13873 still pair them after adjusting the offset, like:
13874
13875 add scratch, xb, 0x100
13876 stp w1, w1, [scratch]
13877 stp w1, w1, [scratch, 0x8]
13878
13879 The peephole patterns detecting this opportunity should guarantee
13880 the scratch register is avaliable. */
13881
13882bool
13883aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
13884 enum machine_mode mode)
13885{
13886 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
13887 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
13888 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
13889 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
13890
13891 if (load)
13892 {
13893 reg_1 = operands[0];
13894 mem_1 = operands[1];
13895 reg_2 = operands[2];
13896 mem_2 = operands[3];
13897 reg_3 = operands[4];
13898 mem_3 = operands[5];
13899 reg_4 = operands[6];
13900 mem_4 = operands[7];
13901 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
13902 && REG_P (reg_3) && REG_P (reg_4));
13903 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
13904 return false;
13905 }
13906 else
13907 {
13908 mem_1 = operands[0];
13909 reg_1 = operands[1];
13910 mem_2 = operands[2];
13911 reg_2 = operands[3];
13912 mem_3 = operands[4];
13913 reg_3 = operands[5];
13914 mem_4 = operands[6];
13915 reg_4 = operands[7];
13916 }
13917 /* Skip if memory operand is by itslef valid for ldp/stp. */
13918 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
13919 return false;
13920
bf84ac44
AP
13921 /* The mems cannot be volatile. */
13922 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
13923 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
13924 return false;
13925
350013bc
BC
13926 /* Check if the addresses are in the form of [base+offset]. */
13927 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
13928 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
13929 return false;
13930 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
13931 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
13932 return false;
13933 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
13934 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
13935 return false;
13936 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
13937 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
13938 return false;
13939
13940 /* Check if the bases are same. */
13941 if (!rtx_equal_p (base_1, base_2)
13942 || !rtx_equal_p (base_2, base_3)
13943 || !rtx_equal_p (base_3, base_4))
13944 return false;
13945
13946 offval_1 = INTVAL (offset_1);
13947 offval_2 = INTVAL (offset_2);
13948 offval_3 = INTVAL (offset_3);
13949 offval_4 = INTVAL (offset_4);
13950 msize = GET_MODE_SIZE (mode);
13951 /* Check if the offsets are consecutive. */
13952 if ((offval_1 != (offval_2 + msize)
13953 || offval_1 != (offval_3 + msize * 2)
13954 || offval_1 != (offval_4 + msize * 3))
13955 && (offval_4 != (offval_3 + msize)
13956 || offval_4 != (offval_2 + msize * 2)
13957 || offval_4 != (offval_1 + msize * 3)))
13958 return false;
13959
13960 /* Check if the addresses are clobbered by load. */
13961 if (load)
13962 {
13963 if (reg_mentioned_p (reg_1, mem_1)
13964 || reg_mentioned_p (reg_2, mem_2)
13965 || reg_mentioned_p (reg_3, mem_3))
13966 return false;
13967
13968 /* In increasing order, the last load can clobber the address. */
13969 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
13970 return false;
13971 }
13972
54700e2e
AP
13973 /* If we have SImode and slow unaligned ldp,
13974 check the alignment to be at least 8 byte. */
13975 if (mode == SImode
13976 && (aarch64_tune_params.extra_tuning_flags
13977 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
13978 && !optimize_size
13979 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
13980 return false;
13981
350013bc
BC
13982 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
13983 rclass_1 = FP_REGS;
13984 else
13985 rclass_1 = GENERAL_REGS;
13986
13987 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
13988 rclass_2 = FP_REGS;
13989 else
13990 rclass_2 = GENERAL_REGS;
13991
13992 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
13993 rclass_3 = FP_REGS;
13994 else
13995 rclass_3 = GENERAL_REGS;
13996
13997 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
13998 rclass_4 = FP_REGS;
13999 else
14000 rclass_4 = GENERAL_REGS;
14001
14002 /* Check if the registers are of same class. */
14003 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
14004 return false;
14005
14006 return true;
14007}
14008
14009/* Given OPERANDS of consecutive load/store, this function pairs them
14010 into ldp/stp after adjusting the offset. It depends on the fact
14011 that addresses of load/store instructions are in increasing order.
14012 MODE is the mode of memory operands. CODE is the rtl operator
14013 which should be applied to all memory operands, it's SIGN_EXTEND,
14014 ZERO_EXTEND or UNKNOWN. */
14015
14016bool
14017aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
14018 enum machine_mode mode, RTX_CODE code)
14019{
14020 rtx base, offset, t1, t2;
14021 rtx mem_1, mem_2, mem_3, mem_4;
14022 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
14023
14024 if (load)
14025 {
14026 mem_1 = operands[1];
14027 mem_2 = operands[3];
14028 mem_3 = operands[5];
14029 mem_4 = operands[7];
14030 }
14031 else
14032 {
14033 mem_1 = operands[0];
14034 mem_2 = operands[2];
14035 mem_3 = operands[4];
14036 mem_4 = operands[6];
14037 gcc_assert (code == UNKNOWN);
14038 }
14039
14040 extract_base_offset_in_addr (mem_1, &base, &offset);
14041 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
14042
14043 /* Adjust offset thus it can fit in ldp/stp instruction. */
14044 msize = GET_MODE_SIZE (mode);
14045 stp_off_limit = msize * 0x40;
14046 off_val = INTVAL (offset);
14047 abs_off = (off_val < 0) ? -off_val : off_val;
14048 new_off = abs_off % stp_off_limit;
14049 adj_off = abs_off - new_off;
14050
14051 /* Further adjust to make sure all offsets are OK. */
14052 if ((new_off + msize * 2) >= stp_off_limit)
14053 {
14054 adj_off += stp_off_limit;
14055 new_off -= stp_off_limit;
14056 }
14057
14058 /* Make sure the adjustment can be done with ADD/SUB instructions. */
14059 if (adj_off >= 0x1000)
14060 return false;
14061
14062 if (off_val < 0)
14063 {
14064 adj_off = -adj_off;
14065 new_off = -new_off;
14066 }
14067
14068 /* Create new memory references. */
14069 mem_1 = change_address (mem_1, VOIDmode,
14070 plus_constant (DImode, operands[8], new_off));
14071
14072 /* Check if the adjusted address is OK for ldp/stp. */
14073 if (!aarch64_mem_pair_operand (mem_1, mode))
14074 return false;
14075
14076 msize = GET_MODE_SIZE (mode);
14077 mem_2 = change_address (mem_2, VOIDmode,
14078 plus_constant (DImode,
14079 operands[8],
14080 new_off + msize));
14081 mem_3 = change_address (mem_3, VOIDmode,
14082 plus_constant (DImode,
14083 operands[8],
14084 new_off + msize * 2));
14085 mem_4 = change_address (mem_4, VOIDmode,
14086 plus_constant (DImode,
14087 operands[8],
14088 new_off + msize * 3));
14089
14090 if (code == ZERO_EXTEND)
14091 {
14092 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
14093 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
14094 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
14095 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
14096 }
14097 else if (code == SIGN_EXTEND)
14098 {
14099 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
14100 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
14101 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
14102 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
14103 }
14104
14105 if (load)
14106 {
14107 operands[1] = mem_1;
14108 operands[3] = mem_2;
14109 operands[5] = mem_3;
14110 operands[7] = mem_4;
14111 }
14112 else
14113 {
14114 operands[0] = mem_1;
14115 operands[2] = mem_2;
14116 operands[4] = mem_3;
14117 operands[6] = mem_4;
14118 }
14119
14120 /* Emit adjusting instruction. */
f7df4a84 14121 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 14122 /* Emit ldp/stp instructions. */
f7df4a84
RS
14123 t1 = gen_rtx_SET (operands[0], operands[1]);
14124 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 14125 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
14126 t1 = gen_rtx_SET (operands[4], operands[5]);
14127 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
14128 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
14129 return true;
14130}
14131
1b1e81f8
JW
14132/* Return 1 if pseudo register should be created and used to hold
14133 GOT address for PIC code. */
14134
14135bool
14136aarch64_use_pseudo_pic_reg (void)
14137{
14138 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
14139}
14140
7b841a12
JW
14141/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
14142
14143static int
14144aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
14145{
14146 switch (XINT (x, 1))
14147 {
14148 case UNSPEC_GOTSMALLPIC:
14149 case UNSPEC_GOTSMALLPIC28K:
14150 case UNSPEC_GOTTINYPIC:
14151 return 0;
14152 default:
14153 break;
14154 }
14155
14156 return default_unspec_may_trap_p (x, flags);
14157}
14158
39252973
KT
14159
14160/* If X is a positive CONST_DOUBLE with a value that is a power of 2
14161 return the log2 of that value. Otherwise return -1. */
14162
14163int
14164aarch64_fpconst_pow_of_2 (rtx x)
14165{
14166 const REAL_VALUE_TYPE *r;
14167
14168 if (!CONST_DOUBLE_P (x))
14169 return -1;
14170
14171 r = CONST_DOUBLE_REAL_VALUE (x);
14172
14173 if (REAL_VALUE_NEGATIVE (*r)
14174 || REAL_VALUE_ISNAN (*r)
14175 || REAL_VALUE_ISINF (*r)
14176 || !real_isinteger (r, DFmode))
14177 return -1;
14178
14179 return exact_log2 (real_to_integer (r));
14180}
14181
14182/* If X is a vector of equal CONST_DOUBLE values and that value is
14183 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
14184
14185int
14186aarch64_vec_fpconst_pow_of_2 (rtx x)
14187{
14188 if (GET_CODE (x) != CONST_VECTOR)
14189 return -1;
14190
14191 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
14192 return -1;
14193
14194 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
14195 if (firstval <= 0)
14196 return -1;
14197
14198 for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
14199 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
14200 return -1;
14201
14202 return firstval;
14203}
14204
c2ec330c
AL
14205/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
14206static tree
14207aarch64_promoted_type (const_tree t)
14208{
14209 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
14210 return float_type_node;
14211 return NULL_TREE;
14212}
ee62a5a6
RS
14213
14214/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
14215
14216static bool
9acc9cbe 14217aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
14218 optimization_type opt_type)
14219{
14220 switch (op)
14221 {
14222 case rsqrt_optab:
9acc9cbe 14223 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
14224
14225 default:
14226 return true;
14227 }
14228}
14229
43e9d192
IB
14230#undef TARGET_ADDRESS_COST
14231#define TARGET_ADDRESS_COST aarch64_address_cost
14232
14233/* This hook will determines whether unnamed bitfields affect the alignment
14234 of the containing structure. The hook returns true if the structure
14235 should inherit the alignment requirements of an unnamed bitfield's
14236 type. */
14237#undef TARGET_ALIGN_ANON_BITFIELD
14238#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
14239
14240#undef TARGET_ASM_ALIGNED_DI_OP
14241#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
14242
14243#undef TARGET_ASM_ALIGNED_HI_OP
14244#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
14245
14246#undef TARGET_ASM_ALIGNED_SI_OP
14247#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
14248
14249#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
14250#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
14251 hook_bool_const_tree_hwi_hwi_const_tree_true
14252
e1c1ecb0
KT
14253#undef TARGET_ASM_FILE_START
14254#define TARGET_ASM_FILE_START aarch64_start_file
14255
43e9d192
IB
14256#undef TARGET_ASM_OUTPUT_MI_THUNK
14257#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
14258
14259#undef TARGET_ASM_SELECT_RTX_SECTION
14260#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
14261
14262#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
14263#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
14264
14265#undef TARGET_BUILD_BUILTIN_VA_LIST
14266#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
14267
14268#undef TARGET_CALLEE_COPIES
14269#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
14270
14271#undef TARGET_CAN_ELIMINATE
14272#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
14273
1fd8d40c
KT
14274#undef TARGET_CAN_INLINE_P
14275#define TARGET_CAN_INLINE_P aarch64_can_inline_p
14276
43e9d192
IB
14277#undef TARGET_CANNOT_FORCE_CONST_MEM
14278#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
14279
50487d79
EM
14280#undef TARGET_CASE_VALUES_THRESHOLD
14281#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
14282
43e9d192
IB
14283#undef TARGET_CONDITIONAL_REGISTER_USAGE
14284#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
14285
14286/* Only the least significant bit is used for initialization guard
14287 variables. */
14288#undef TARGET_CXX_GUARD_MASK_BIT
14289#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
14290
14291#undef TARGET_C_MODE_FOR_SUFFIX
14292#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
14293
14294#ifdef TARGET_BIG_ENDIAN_DEFAULT
14295#undef TARGET_DEFAULT_TARGET_FLAGS
14296#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
14297#endif
14298
14299#undef TARGET_CLASS_MAX_NREGS
14300#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
14301
119103ca
JG
14302#undef TARGET_BUILTIN_DECL
14303#define TARGET_BUILTIN_DECL aarch64_builtin_decl
14304
a6fc00da
BH
14305#undef TARGET_BUILTIN_RECIPROCAL
14306#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
14307
43e9d192
IB
14308#undef TARGET_EXPAND_BUILTIN
14309#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
14310
14311#undef TARGET_EXPAND_BUILTIN_VA_START
14312#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
14313
9697e620
JG
14314#undef TARGET_FOLD_BUILTIN
14315#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
14316
43e9d192
IB
14317#undef TARGET_FUNCTION_ARG
14318#define TARGET_FUNCTION_ARG aarch64_function_arg
14319
14320#undef TARGET_FUNCTION_ARG_ADVANCE
14321#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
14322
14323#undef TARGET_FUNCTION_ARG_BOUNDARY
14324#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
14325
14326#undef TARGET_FUNCTION_OK_FOR_SIBCALL
14327#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
14328
14329#undef TARGET_FUNCTION_VALUE
14330#define TARGET_FUNCTION_VALUE aarch64_function_value
14331
14332#undef TARGET_FUNCTION_VALUE_REGNO_P
14333#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
14334
14335#undef TARGET_FRAME_POINTER_REQUIRED
14336#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
14337
fc72cba7
AL
14338#undef TARGET_GIMPLE_FOLD_BUILTIN
14339#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 14340
43e9d192
IB
14341#undef TARGET_GIMPLIFY_VA_ARG_EXPR
14342#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
14343
14344#undef TARGET_INIT_BUILTINS
14345#define TARGET_INIT_BUILTINS aarch64_init_builtins
14346
c64f7d37
WD
14347#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
14348#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
14349 aarch64_ira_change_pseudo_allocno_class
14350
43e9d192
IB
14351#undef TARGET_LEGITIMATE_ADDRESS_P
14352#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
14353
14354#undef TARGET_LEGITIMATE_CONSTANT_P
14355#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
14356
491ec060
WD
14357#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
14358#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
14359 aarch64_legitimize_address_displacement
14360
43e9d192
IB
14361#undef TARGET_LIBGCC_CMP_RETURN_MODE
14362#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
14363
ac2b960f
YZ
14364#undef TARGET_MANGLE_TYPE
14365#define TARGET_MANGLE_TYPE aarch64_mangle_type
14366
43e9d192
IB
14367#undef TARGET_MEMORY_MOVE_COST
14368#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
14369
26e0ff94
WD
14370#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
14371#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
14372
43e9d192
IB
14373#undef TARGET_MUST_PASS_IN_STACK
14374#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
14375
14376/* This target hook should return true if accesses to volatile bitfields
14377 should use the narrowest mode possible. It should return false if these
14378 accesses should use the bitfield container type. */
14379#undef TARGET_NARROW_VOLATILE_BITFIELD
14380#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
14381
14382#undef TARGET_OPTION_OVERRIDE
14383#define TARGET_OPTION_OVERRIDE aarch64_override_options
14384
14385#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
14386#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
14387 aarch64_override_options_after_change
14388
361fb3ee
KT
14389#undef TARGET_OPTION_SAVE
14390#define TARGET_OPTION_SAVE aarch64_option_save
14391
14392#undef TARGET_OPTION_RESTORE
14393#define TARGET_OPTION_RESTORE aarch64_option_restore
14394
14395#undef TARGET_OPTION_PRINT
14396#define TARGET_OPTION_PRINT aarch64_option_print
14397
5a2c8331
KT
14398#undef TARGET_OPTION_VALID_ATTRIBUTE_P
14399#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
14400
d78006d9
KT
14401#undef TARGET_SET_CURRENT_FUNCTION
14402#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
14403
43e9d192
IB
14404#undef TARGET_PASS_BY_REFERENCE
14405#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
14406
14407#undef TARGET_PREFERRED_RELOAD_CLASS
14408#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
14409
cee66c68
WD
14410#undef TARGET_SCHED_REASSOCIATION_WIDTH
14411#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
14412
c2ec330c
AL
14413#undef TARGET_PROMOTED_TYPE
14414#define TARGET_PROMOTED_TYPE aarch64_promoted_type
14415
43e9d192
IB
14416#undef TARGET_SECONDARY_RELOAD
14417#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
14418
14419#undef TARGET_SHIFT_TRUNCATION_MASK
14420#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
14421
14422#undef TARGET_SETUP_INCOMING_VARARGS
14423#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
14424
14425#undef TARGET_STRUCT_VALUE_RTX
14426#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
14427
14428#undef TARGET_REGISTER_MOVE_COST
14429#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
14430
14431#undef TARGET_RETURN_IN_MEMORY
14432#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
14433
14434#undef TARGET_RETURN_IN_MSB
14435#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
14436
14437#undef TARGET_RTX_COSTS
7cc2145f 14438#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 14439
d126a4ae
AP
14440#undef TARGET_SCHED_ISSUE_RATE
14441#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
14442
d03f7e44
MK
14443#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
14444#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
14445 aarch64_sched_first_cycle_multipass_dfa_lookahead
14446
2d6bc7fa
KT
14447#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
14448#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
14449 aarch64_first_cycle_multipass_dfa_lookahead_guard
14450
43e9d192
IB
14451#undef TARGET_TRAMPOLINE_INIT
14452#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
14453
14454#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
14455#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
14456
14457#undef TARGET_VECTOR_MODE_SUPPORTED_P
14458#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
14459
14460#undef TARGET_ARRAY_MODE_SUPPORTED_P
14461#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
14462
8990e73a
TB
14463#undef TARGET_VECTORIZE_ADD_STMT_COST
14464#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
14465
14466#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
14467#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
14468 aarch64_builtin_vectorization_cost
14469
43e9d192
IB
14470#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
14471#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
14472
42fc9a7f
JG
14473#undef TARGET_VECTORIZE_BUILTINS
14474#define TARGET_VECTORIZE_BUILTINS
14475
14476#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
14477#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
14478 aarch64_builtin_vectorized_function
14479
3b357264
JG
14480#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
14481#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
14482 aarch64_autovectorize_vector_sizes
14483
aa87aced
KV
14484#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
14485#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
14486 aarch64_atomic_assign_expand_fenv
14487
43e9d192
IB
14488/* Section anchor support. */
14489
14490#undef TARGET_MIN_ANCHOR_OFFSET
14491#define TARGET_MIN_ANCHOR_OFFSET -256
14492
14493/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
14494 byte offset; we can do much more for larger data types, but have no way
14495 to determine the size of the access. We assume accesses are aligned. */
14496#undef TARGET_MAX_ANCHOR_OFFSET
14497#define TARGET_MAX_ANCHOR_OFFSET 4095
14498
db0253a4
TB
14499#undef TARGET_VECTOR_ALIGNMENT
14500#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
14501
14502#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
14503#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
14504 aarch64_simd_vector_alignment_reachable
14505
88b08073
JG
14506/* vec_perm support. */
14507
14508#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
14509#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
14510 aarch64_vectorize_vec_perm_const_ok
14511
c2ec330c
AL
14512#undef TARGET_INIT_LIBFUNCS
14513#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 14514
706b2314 14515#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
14516#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
14517
5cb74e90
RR
14518#undef TARGET_FLAGS_REGNUM
14519#define TARGET_FLAGS_REGNUM CC_REGNUM
14520
78607708
TV
14521#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
14522#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
14523
a3125fc2
CL
14524#undef TARGET_ASAN_SHADOW_OFFSET
14525#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
14526
0c4ec427
RE
14527#undef TARGET_LEGITIMIZE_ADDRESS
14528#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
14529
d3006da6
JG
14530#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
14531#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
14532 aarch64_use_by_pieces_infrastructure_p
14533
594bdd53
FY
14534#undef TARGET_CAN_USE_DOLOOP_P
14535#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
14536
6a569cdd
KT
14537#undef TARGET_SCHED_MACRO_FUSION_P
14538#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
14539
14540#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
14541#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
14542
350013bc
BC
14543#undef TARGET_SCHED_FUSION_PRIORITY
14544#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
14545
7b841a12
JW
14546#undef TARGET_UNSPEC_MAY_TRAP_P
14547#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
14548
1b1e81f8
JW
14549#undef TARGET_USE_PSEUDO_PIC_REG
14550#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
14551
cc8ca59e
JB
14552#undef TARGET_PRINT_OPERAND
14553#define TARGET_PRINT_OPERAND aarch64_print_operand
14554
14555#undef TARGET_PRINT_OPERAND_ADDRESS
14556#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
14557
ee62a5a6
RS
14558#undef TARGET_OPTAB_SUPPORTED_P
14559#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
14560
43203dea
RR
14561#undef TARGET_OMIT_STRUCT_RETURN_REG
14562#define TARGET_OMIT_STRUCT_RETURN_REG true
14563
43e9d192
IB
14564struct gcc_target targetm = TARGET_INITIALIZER;
14565
14566#include "gt-aarch64.h"