]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[Patch 5/7 mips] Deprecate *_BY_PIECES_P, move to hookized version
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192 33#include "regs.h"
60393bbc
AM
34#include "dominance.h"
35#include "cfg.h"
36#include "cfgrtl.h"
37#include "cfganal.h"
38#include "lcm.h"
39#include "cfgbuild.h"
40#include "cfgcleanup.h"
41#include "predict.h"
42#include "basic-block.h"
43e9d192
IB
43#include "df.h"
44#include "hard-reg-set.h"
45#include "output.h"
46#include "expr.h"
47#include "reload.h"
48#include "toplev.h"
49#include "target.h"
50#include "target-def.h"
51#include "targhooks.h"
52#include "ggc.h"
83685514
AM
53#include "hashtab.h"
54#include "hash-set.h"
55#include "vec.h"
56#include "machmode.h"
57#include "input.h"
43e9d192
IB
58#include "function.h"
59#include "tm_p.h"
60#include "recog.h"
61#include "langhooks.h"
62#include "diagnostic-core.h"
2fb9a547 63#include "hash-table.h"
2fb9a547
AM
64#include "tree-ssa-alias.h"
65#include "internal-fn.h"
66#include "gimple-fold.h"
67#include "tree-eh.h"
68#include "gimple-expr.h"
69#include "is-a.h"
18f429e2 70#include "gimple.h"
45b0be94 71#include "gimplify.h"
43e9d192
IB
72#include "optabs.h"
73#include "dwarf2.h"
8990e73a
TB
74#include "cfgloop.h"
75#include "tree-vectorizer.h"
d1bcc29f 76#include "aarch64-cost-tables.h"
0ee859b5 77#include "dumpfile.h"
9b2b7279 78#include "builtins.h"
8baff86e 79#include "rtl-iter.h"
43e9d192 80
28514dda
YZ
81/* Defined for convenience. */
82#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
83
43e9d192
IB
84/* Classifies an address.
85
86 ADDRESS_REG_IMM
87 A simple base register plus immediate offset.
88
89 ADDRESS_REG_WB
90 A base register indexed by immediate offset with writeback.
91
92 ADDRESS_REG_REG
93 A base register indexed by (optionally scaled) register.
94
95 ADDRESS_REG_UXTW
96 A base register indexed by (optionally scaled) zero-extended register.
97
98 ADDRESS_REG_SXTW
99 A base register indexed by (optionally scaled) sign-extended register.
100
101 ADDRESS_LO_SUM
102 A LO_SUM rtx with a base register and "LO12" symbol relocation.
103
104 ADDRESS_SYMBOLIC:
105 A constant symbolic address, in pc-relative literal pool. */
106
107enum aarch64_address_type {
108 ADDRESS_REG_IMM,
109 ADDRESS_REG_WB,
110 ADDRESS_REG_REG,
111 ADDRESS_REG_UXTW,
112 ADDRESS_REG_SXTW,
113 ADDRESS_LO_SUM,
114 ADDRESS_SYMBOLIC
115};
116
117struct aarch64_address_info {
118 enum aarch64_address_type type;
119 rtx base;
120 rtx offset;
121 int shift;
122 enum aarch64_symbol_type symbol_type;
123};
124
48063b9d
IB
125struct simd_immediate_info
126{
127 rtx value;
128 int shift;
129 int element_width;
48063b9d 130 bool mvn;
e4f0f84d 131 bool msl;
48063b9d
IB
132};
133
43e9d192
IB
134/* The current code model. */
135enum aarch64_code_model aarch64_cmodel;
136
137#ifdef HAVE_AS_TLS
138#undef TARGET_HAVE_TLS
139#define TARGET_HAVE_TLS 1
140#endif
141
38e8f663 142static bool aarch64_lra_p (void);
ef4bddc2
RS
143static bool aarch64_composite_type_p (const_tree, machine_mode);
144static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 145 const_tree,
ef4bddc2 146 machine_mode *, int *,
43e9d192
IB
147 bool *);
148static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
149static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 150static void aarch64_override_options_after_change (void);
ef4bddc2 151static bool aarch64_vector_mode_supported_p (machine_mode);
43e9d192 152static unsigned bit_count (unsigned HOST_WIDE_INT);
ef4bddc2 153static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073 154 const unsigned char *sel);
ef4bddc2 155static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
88b08073 156
43e9d192 157/* The processor for which instructions should be scheduled. */
02fdbd5b 158enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
159
160/* The current tuning set. */
161const struct tune_params *aarch64_tune_params;
162
163/* Mask to specify which instructions we are allowed to generate. */
164unsigned long aarch64_isa_flags = 0;
165
166/* Mask to specify which instruction scheduling options should be used. */
167unsigned long aarch64_tune_flags = 0;
168
169/* Tuning parameters. */
170
171#if HAVE_DESIGNATED_INITIALIZERS
172#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
173#else
174#define NAMED_PARAM(NAME, VAL) (VAL)
175#endif
176
177#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
178__extension__
179#endif
43e9d192
IB
180
181#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182__extension__
183#endif
184static const struct cpu_addrcost_table generic_addrcost_table =
185{
67747367
JG
186#if HAVE_DESIGNATED_INITIALIZERS
187 .addr_scale_costs =
188#endif
189 {
67747367
JG
190 NAMED_PARAM (hi, 0),
191 NAMED_PARAM (si, 0),
8d805e02 192 NAMED_PARAM (di, 0),
67747367
JG
193 NAMED_PARAM (ti, 0),
194 },
43e9d192
IB
195 NAMED_PARAM (pre_modify, 0),
196 NAMED_PARAM (post_modify, 0),
197 NAMED_PARAM (register_offset, 0),
198 NAMED_PARAM (register_extend, 0),
199 NAMED_PARAM (imm_offset, 0)
200};
201
60bff090
JG
202#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
203__extension__
204#endif
205static const struct cpu_addrcost_table cortexa57_addrcost_table =
206{
207#if HAVE_DESIGNATED_INITIALIZERS
208 .addr_scale_costs =
209#endif
210 {
60bff090
JG
211 NAMED_PARAM (hi, 1),
212 NAMED_PARAM (si, 0),
8d805e02 213 NAMED_PARAM (di, 0),
60bff090
JG
214 NAMED_PARAM (ti, 1),
215 },
216 NAMED_PARAM (pre_modify, 0),
217 NAMED_PARAM (post_modify, 0),
218 NAMED_PARAM (register_offset, 0),
219 NAMED_PARAM (register_extend, 0),
220 NAMED_PARAM (imm_offset, 0),
221};
222
43e9d192
IB
223#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
224__extension__
225#endif
226static const struct cpu_regmove_cost generic_regmove_cost =
227{
228 NAMED_PARAM (GP2GP, 1),
229 NAMED_PARAM (GP2FP, 2),
230 NAMED_PARAM (FP2GP, 2),
20b32e50 231 NAMED_PARAM (FP2FP, 2)
43e9d192
IB
232};
233
e4a9c55a
WD
234static const struct cpu_regmove_cost cortexa57_regmove_cost =
235{
236 NAMED_PARAM (GP2GP, 1),
237 /* Avoid the use of slow int<->fp moves for spilling by setting
238 their cost higher than memmov_cost. */
239 NAMED_PARAM (GP2FP, 5),
240 NAMED_PARAM (FP2GP, 5),
241 NAMED_PARAM (FP2FP, 2)
242};
243
244static const struct cpu_regmove_cost cortexa53_regmove_cost =
245{
246 NAMED_PARAM (GP2GP, 1),
247 /* Avoid the use of slow int<->fp moves for spilling by setting
248 their cost higher than memmov_cost. */
249 NAMED_PARAM (GP2FP, 5),
250 NAMED_PARAM (FP2GP, 5),
251 NAMED_PARAM (FP2FP, 2)
252};
253
d1bcc29f
AP
254static const struct cpu_regmove_cost thunderx_regmove_cost =
255{
256 NAMED_PARAM (GP2GP, 2),
257 NAMED_PARAM (GP2FP, 2),
258 NAMED_PARAM (FP2GP, 6),
259 NAMED_PARAM (FP2FP, 4)
260};
261
8990e73a
TB
262/* Generic costs for vector insn classes. */
263#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
264__extension__
265#endif
266static const struct cpu_vector_cost generic_vector_cost =
267{
268 NAMED_PARAM (scalar_stmt_cost, 1),
269 NAMED_PARAM (scalar_load_cost, 1),
270 NAMED_PARAM (scalar_store_cost, 1),
271 NAMED_PARAM (vec_stmt_cost, 1),
272 NAMED_PARAM (vec_to_scalar_cost, 1),
273 NAMED_PARAM (scalar_to_vec_cost, 1),
274 NAMED_PARAM (vec_align_load_cost, 1),
275 NAMED_PARAM (vec_unalign_load_cost, 1),
276 NAMED_PARAM (vec_unalign_store_cost, 1),
277 NAMED_PARAM (vec_store_cost, 1),
278 NAMED_PARAM (cond_taken_branch_cost, 3),
279 NAMED_PARAM (cond_not_taken_branch_cost, 1)
280};
281
60bff090
JG
282/* Generic costs for vector insn classes. */
283#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
284__extension__
285#endif
286static const struct cpu_vector_cost cortexa57_vector_cost =
287{
288 NAMED_PARAM (scalar_stmt_cost, 1),
289 NAMED_PARAM (scalar_load_cost, 4),
290 NAMED_PARAM (scalar_store_cost, 1),
291 NAMED_PARAM (vec_stmt_cost, 3),
292 NAMED_PARAM (vec_to_scalar_cost, 8),
293 NAMED_PARAM (scalar_to_vec_cost, 8),
294 NAMED_PARAM (vec_align_load_cost, 5),
295 NAMED_PARAM (vec_unalign_load_cost, 5),
296 NAMED_PARAM (vec_unalign_store_cost, 1),
297 NAMED_PARAM (vec_store_cost, 1),
298 NAMED_PARAM (cond_taken_branch_cost, 1),
299 NAMED_PARAM (cond_not_taken_branch_cost, 1)
300};
301
43e9d192
IB
302#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
303__extension__
304#endif
305static const struct tune_params generic_tunings =
306{
4e2cd668 307 &cortexa57_extra_costs,
43e9d192
IB
308 &generic_addrcost_table,
309 &generic_regmove_cost,
8990e73a 310 &generic_vector_cost,
d126a4ae
AP
311 NAMED_PARAM (memmov_cost, 4),
312 NAMED_PARAM (issue_rate, 2)
43e9d192
IB
313};
314
984239ad
KT
315static const struct tune_params cortexa53_tunings =
316{
317 &cortexa53_extra_costs,
318 &generic_addrcost_table,
e4a9c55a 319 &cortexa53_regmove_cost,
984239ad 320 &generic_vector_cost,
d126a4ae
AP
321 NAMED_PARAM (memmov_cost, 4),
322 NAMED_PARAM (issue_rate, 2)
984239ad
KT
323};
324
4fd92af6
KT
325static const struct tune_params cortexa57_tunings =
326{
327 &cortexa57_extra_costs,
60bff090 328 &cortexa57_addrcost_table,
e4a9c55a 329 &cortexa57_regmove_cost,
60bff090 330 &cortexa57_vector_cost,
4fd92af6
KT
331 NAMED_PARAM (memmov_cost, 4),
332 NAMED_PARAM (issue_rate, 3)
333};
334
d1bcc29f
AP
335static const struct tune_params thunderx_tunings =
336{
337 &thunderx_extra_costs,
338 &generic_addrcost_table,
339 &thunderx_regmove_cost,
340 &generic_vector_cost,
341 NAMED_PARAM (memmov_cost, 6),
342 NAMED_PARAM (issue_rate, 2)
343};
344
43e9d192
IB
345/* A processor implementing AArch64. */
346struct processor
347{
348 const char *const name;
349 enum aarch64_processor core;
350 const char *arch;
351 const unsigned long flags;
352 const struct tune_params *const tune;
353};
354
355/* Processor cores implementing AArch64. */
356static const struct processor all_cores[] =
357{
192ed1dd 358#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
43e9d192
IB
359 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
360#include "aarch64-cores.def"
361#undef AARCH64_CORE
02fdbd5b 362 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
363 {NULL, aarch64_none, NULL, 0, NULL}
364};
365
366/* Architectures implementing AArch64. */
367static const struct processor all_architectures[] =
368{
369#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
370 {NAME, CORE, #ARCH, FLAGS, NULL},
371#include "aarch64-arches.def"
372#undef AARCH64_ARCH
43e9d192
IB
373 {NULL, aarch64_none, NULL, 0, NULL}
374};
375
376/* Target specification. These are populated as commandline arguments
377 are processed, or NULL if not specified. */
378static const struct processor *selected_arch;
379static const struct processor *selected_cpu;
380static const struct processor *selected_tune;
381
382#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
383
384/* An ISA extension in the co-processor and main instruction set space. */
385struct aarch64_option_extension
386{
387 const char *const name;
388 const unsigned long flags_on;
389 const unsigned long flags_off;
390};
391
392/* ISA extensions in AArch64. */
393static const struct aarch64_option_extension all_extensions[] =
394{
395#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
396 {NAME, FLAGS_ON, FLAGS_OFF},
397#include "aarch64-option-extensions.def"
398#undef AARCH64_OPT_EXTENSION
399 {NULL, 0, 0}
400};
401
402/* Used to track the size of an address when generating a pre/post
403 increment address. */
ef4bddc2 404static machine_mode aarch64_memory_reference_mode;
43e9d192
IB
405
406/* Used to force GTY into this file. */
407static GTY(()) int gty_dummy;
408
409/* A table of valid AArch64 "bitmask immediate" values for
410 logical instructions. */
411
412#define AARCH64_NUM_BITMASKS 5334
413static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
414
43e9d192
IB
415typedef enum aarch64_cond_code
416{
417 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
418 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
419 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
420}
421aarch64_cc;
422
423#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
424
425/* The condition codes of the processor, and the inverse function. */
426static const char * const aarch64_condition_codes[] =
427{
428 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
429 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
430};
431
432/* Provide a mapping from gcc register numbers to dwarf register numbers. */
433unsigned
434aarch64_dbx_register_number (unsigned regno)
435{
436 if (GP_REGNUM_P (regno))
437 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
438 else if (regno == SP_REGNUM)
439 return AARCH64_DWARF_SP;
440 else if (FP_REGNUM_P (regno))
441 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
442
443 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
444 equivalent DWARF register. */
445 return DWARF_FRAME_REGISTERS;
446}
447
448/* Return TRUE if MODE is any of the large INT modes. */
449static bool
ef4bddc2 450aarch64_vect_struct_mode_p (machine_mode mode)
43e9d192
IB
451{
452 return mode == OImode || mode == CImode || mode == XImode;
453}
454
455/* Return TRUE if MODE is any of the vector modes. */
456static bool
ef4bddc2 457aarch64_vector_mode_p (machine_mode mode)
43e9d192
IB
458{
459 return aarch64_vector_mode_supported_p (mode)
460 || aarch64_vect_struct_mode_p (mode);
461}
462
463/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
464static bool
ef4bddc2 465aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
466 unsigned HOST_WIDE_INT nelems)
467{
468 if (TARGET_SIMD
469 && AARCH64_VALID_SIMD_QREG_MODE (mode)
470 && (nelems >= 2 && nelems <= 4))
471 return true;
472
473 return false;
474}
475
476/* Implement HARD_REGNO_NREGS. */
477
478int
ef4bddc2 479aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192
IB
480{
481 switch (aarch64_regno_regclass (regno))
482 {
483 case FP_REGS:
484 case FP_LO_REGS:
485 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
486 default:
487 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
488 }
489 gcc_unreachable ();
490}
491
492/* Implement HARD_REGNO_MODE_OK. */
493
494int
ef4bddc2 495aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
496{
497 if (GET_MODE_CLASS (mode) == MODE_CC)
498 return regno == CC_REGNUM;
499
9259db42
YZ
500 if (regno == SP_REGNUM)
501 /* The purpose of comparing with ptr_mode is to support the
502 global register variable associated with the stack pointer
503 register via the syntax of asm ("wsp") in ILP32. */
504 return mode == Pmode || mode == ptr_mode;
505
506 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
507 return mode == Pmode;
508
509 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
510 return 1;
511
512 if (FP_REGNUM_P (regno))
513 {
514 if (aarch64_vect_struct_mode_p (mode))
515 return
516 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
517 else
518 return 1;
519 }
520
521 return 0;
522}
523
73d9ac6a 524/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 525machine_mode
73d9ac6a 526aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
ef4bddc2 527 machine_mode mode)
73d9ac6a
IB
528{
529 /* Handle modes that fit within single registers. */
530 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
531 {
532 if (GET_MODE_SIZE (mode) >= 4)
533 return mode;
534 else
535 return SImode;
536 }
537 /* Fall back to generic for multi-reg and very large modes. */
538 else
539 return choose_hard_reg_mode (regno, nregs, false);
540}
541
43e9d192
IB
542/* Return true if calls to DECL should be treated as
543 long-calls (ie called via a register). */
544static bool
545aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
546{
547 return false;
548}
549
550/* Return true if calls to symbol-ref SYM should be treated as
551 long-calls (ie called via a register). */
552bool
553aarch64_is_long_call_p (rtx sym)
554{
555 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
556}
557
558/* Return true if the offsets to a zero/sign-extract operation
559 represent an expression that matches an extend operation. The
560 operands represent the paramters from
561
4745e701 562 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 563bool
ef4bddc2 564aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
43e9d192
IB
565 rtx extract_imm)
566{
567 HOST_WIDE_INT mult_val, extract_val;
568
569 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
570 return false;
571
572 mult_val = INTVAL (mult_imm);
573 extract_val = INTVAL (extract_imm);
574
575 if (extract_val > 8
576 && extract_val < GET_MODE_BITSIZE (mode)
577 && exact_log2 (extract_val & ~7) > 0
578 && (extract_val & 7) <= 4
579 && mult_val == (1 << (extract_val & 7)))
580 return true;
581
582 return false;
583}
584
585/* Emit an insn that's a simple single-set. Both the operands must be
586 known to be valid. */
587inline static rtx
588emit_set_insn (rtx x, rtx y)
589{
590 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
591}
592
593/* X and Y are two things to compare using CODE. Emit the compare insn and
594 return the rtx for register 0 in the proper mode. */
595rtx
596aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
597{
ef4bddc2 598 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
599 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
600
601 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
602 return cc_reg;
603}
604
605/* Build the SYMBOL_REF for __tls_get_addr. */
606
607static GTY(()) rtx tls_get_addr_libfunc;
608
609rtx
610aarch64_tls_get_addr (void)
611{
612 if (!tls_get_addr_libfunc)
613 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
614 return tls_get_addr_libfunc;
615}
616
617/* Return the TLS model to use for ADDR. */
618
619static enum tls_model
620tls_symbolic_operand_type (rtx addr)
621{
622 enum tls_model tls_kind = TLS_MODEL_NONE;
623 rtx sym, addend;
624
625 if (GET_CODE (addr) == CONST)
626 {
627 split_const (addr, &sym, &addend);
628 if (GET_CODE (sym) == SYMBOL_REF)
629 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
630 }
631 else if (GET_CODE (addr) == SYMBOL_REF)
632 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
633
634 return tls_kind;
635}
636
637/* We'll allow lo_sum's in addresses in our legitimate addresses
638 so that combine would take care of combining addresses where
639 necessary, but for generation purposes, we'll generate the address
640 as :
641 RTL Absolute
642 tmp = hi (symbol_ref); adrp x1, foo
643 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
644 nop
645
646 PIC TLS
647 adrp x1, :got:foo adrp tmp, :tlsgd:foo
648 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
649 bl __tls_get_addr
650 nop
651
652 Load TLS symbol, depending on TLS mechanism and TLS access model.
653
654 Global Dynamic - Traditional TLS:
655 adrp tmp, :tlsgd:imm
656 add dest, tmp, #:tlsgd_lo12:imm
657 bl __tls_get_addr
658
659 Global Dynamic - TLS Descriptors:
660 adrp dest, :tlsdesc:imm
661 ldr tmp, [dest, #:tlsdesc_lo12:imm]
662 add dest, dest, #:tlsdesc_lo12:imm
663 blr tmp
664 mrs tp, tpidr_el0
665 add dest, dest, tp
666
667 Initial Exec:
668 mrs tp, tpidr_el0
669 adrp tmp, :gottprel:imm
670 ldr dest, [tmp, #:gottprel_lo12:imm]
671 add dest, dest, tp
672
673 Local Exec:
674 mrs tp, tpidr_el0
675 add t0, tp, #:tprel_hi12:imm
676 add t0, #:tprel_lo12_nc:imm
677*/
678
679static void
680aarch64_load_symref_appropriately (rtx dest, rtx imm,
681 enum aarch64_symbol_type type)
682{
683 switch (type)
684 {
685 case SYMBOL_SMALL_ABSOLUTE:
686 {
28514dda 687 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 688 rtx tmp_reg = dest;
ef4bddc2 689 machine_mode mode = GET_MODE (dest);
28514dda
YZ
690
691 gcc_assert (mode == Pmode || mode == ptr_mode);
692
43e9d192 693 if (can_create_pseudo_p ())
28514dda 694 tmp_reg = gen_reg_rtx (mode);
43e9d192 695
28514dda 696 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
697 emit_insn (gen_add_losym (dest, tmp_reg, imm));
698 return;
699 }
700
a5350ddc
CSS
701 case SYMBOL_TINY_ABSOLUTE:
702 emit_insn (gen_rtx_SET (Pmode, dest, imm));
703 return;
704
43e9d192
IB
705 case SYMBOL_SMALL_GOT:
706 {
28514dda
YZ
707 /* In ILP32, the mode of dest can be either SImode or DImode,
708 while the got entry is always of SImode size. The mode of
709 dest depends on how dest is used: if dest is assigned to a
710 pointer (e.g. in the memory), it has SImode; it may have
711 DImode if dest is dereferenced to access the memeory.
712 This is why we have to handle three different ldr_got_small
713 patterns here (two patterns for ILP32). */
43e9d192 714 rtx tmp_reg = dest;
ef4bddc2 715 machine_mode mode = GET_MODE (dest);
28514dda 716
43e9d192 717 if (can_create_pseudo_p ())
28514dda
YZ
718 tmp_reg = gen_reg_rtx (mode);
719
720 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
721 if (mode == ptr_mode)
722 {
723 if (mode == DImode)
724 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
725 else
726 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
727 }
728 else
729 {
730 gcc_assert (mode == Pmode);
731 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
732 }
733
43e9d192
IB
734 return;
735 }
736
737 case SYMBOL_SMALL_TLSGD:
738 {
5d8a22a5 739 rtx_insn *insns;
43e9d192
IB
740 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
741
742 start_sequence ();
78607708 743 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
744 insns = get_insns ();
745 end_sequence ();
746
747 RTL_CONST_CALL_P (insns) = 1;
748 emit_libcall_block (insns, dest, result, imm);
749 return;
750 }
751
752 case SYMBOL_SMALL_TLSDESC:
753 {
ef4bddc2 754 machine_mode mode = GET_MODE (dest);
621ad2de 755 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
756 rtx tp;
757
621ad2de
AP
758 gcc_assert (mode == Pmode || mode == ptr_mode);
759
760 /* In ILP32, the got entry is always of SImode size. Unlike
761 small GOT, the dest is fixed at reg 0. */
762 if (TARGET_ILP32)
763 emit_insn (gen_tlsdesc_small_si (imm));
764 else
765 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 766 tp = aarch64_load_tp (NULL);
621ad2de
AP
767
768 if (mode != Pmode)
769 tp = gen_lowpart (mode, tp);
770
771 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
772 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
773 return;
774 }
775
776 case SYMBOL_SMALL_GOTTPREL:
777 {
621ad2de
AP
778 /* In ILP32, the mode of dest can be either SImode or DImode,
779 while the got entry is always of SImode size. The mode of
780 dest depends on how dest is used: if dest is assigned to a
781 pointer (e.g. in the memory), it has SImode; it may have
782 DImode if dest is dereferenced to access the memeory.
783 This is why we have to handle three different tlsie_small
784 patterns here (two patterns for ILP32). */
ef4bddc2 785 machine_mode mode = GET_MODE (dest);
621ad2de 786 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 787 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
788
789 if (mode == ptr_mode)
790 {
791 if (mode == DImode)
792 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
793 else
794 {
795 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
796 tp = gen_lowpart (mode, tp);
797 }
798 }
799 else
800 {
801 gcc_assert (mode == Pmode);
802 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
803 }
804
805 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
806 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
807 return;
808 }
809
810 case SYMBOL_SMALL_TPREL:
811 {
812 rtx tp = aarch64_load_tp (NULL);
813 emit_insn (gen_tlsle_small (dest, tp, imm));
814 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
815 return;
816 }
817
87dd8ab0
MS
818 case SYMBOL_TINY_GOT:
819 emit_insn (gen_ldr_got_tiny (dest, imm));
820 return;
821
43e9d192
IB
822 default:
823 gcc_unreachable ();
824 }
825}
826
827/* Emit a move from SRC to DEST. Assume that the move expanders can
828 handle all moves if !can_create_pseudo_p (). The distinction is
829 important because, unlike emit_move_insn, the move expanders know
830 how to force Pmode objects into the constant pool even when the
831 constant pool address is not itself legitimate. */
832static rtx
833aarch64_emit_move (rtx dest, rtx src)
834{
835 return (can_create_pseudo_p ()
836 ? emit_move_insn (dest, src)
837 : emit_move_insn_1 (dest, src));
838}
839
030d03b8
RE
840/* Split a 128-bit move operation into two 64-bit move operations,
841 taking care to handle partial overlap of register to register
842 copies. Special cases are needed when moving between GP regs and
843 FP regs. SRC can be a register, constant or memory; DST a register
844 or memory. If either operand is memory it must not have any side
845 effects. */
43e9d192
IB
846void
847aarch64_split_128bit_move (rtx dst, rtx src)
848{
030d03b8
RE
849 rtx dst_lo, dst_hi;
850 rtx src_lo, src_hi;
43e9d192 851
ef4bddc2 852 machine_mode mode = GET_MODE (dst);
12dc6974 853
030d03b8
RE
854 gcc_assert (mode == TImode || mode == TFmode);
855 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
856 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
857
858 if (REG_P (dst) && REG_P (src))
859 {
030d03b8
RE
860 int src_regno = REGNO (src);
861 int dst_regno = REGNO (dst);
43e9d192 862
030d03b8 863 /* Handle FP <-> GP regs. */
43e9d192
IB
864 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
865 {
030d03b8
RE
866 src_lo = gen_lowpart (word_mode, src);
867 src_hi = gen_highpart (word_mode, src);
868
869 if (mode == TImode)
870 {
871 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
872 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
873 }
874 else
875 {
876 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
877 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
878 }
879 return;
43e9d192
IB
880 }
881 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
882 {
030d03b8
RE
883 dst_lo = gen_lowpart (word_mode, dst);
884 dst_hi = gen_highpart (word_mode, dst);
885
886 if (mode == TImode)
887 {
888 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
889 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
890 }
891 else
892 {
893 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
894 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
895 }
896 return;
43e9d192 897 }
43e9d192
IB
898 }
899
030d03b8
RE
900 dst_lo = gen_lowpart (word_mode, dst);
901 dst_hi = gen_highpart (word_mode, dst);
902 src_lo = gen_lowpart (word_mode, src);
903 src_hi = gen_highpart_mode (word_mode, mode, src);
904
905 /* At most one pairing may overlap. */
906 if (reg_overlap_mentioned_p (dst_lo, src_hi))
907 {
908 aarch64_emit_move (dst_hi, src_hi);
909 aarch64_emit_move (dst_lo, src_lo);
910 }
911 else
912 {
913 aarch64_emit_move (dst_lo, src_lo);
914 aarch64_emit_move (dst_hi, src_hi);
915 }
43e9d192
IB
916}
917
918bool
919aarch64_split_128bit_move_p (rtx dst, rtx src)
920{
921 return (! REG_P (src)
922 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
923}
924
8b033a8a
SN
925/* Split a complex SIMD combine. */
926
927void
928aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
929{
ef4bddc2
RS
930 machine_mode src_mode = GET_MODE (src1);
931 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
932
933 gcc_assert (VECTOR_MODE_P (dst_mode));
934
935 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
936 {
937 rtx (*gen) (rtx, rtx, rtx);
938
939 switch (src_mode)
940 {
941 case V8QImode:
942 gen = gen_aarch64_simd_combinev8qi;
943 break;
944 case V4HImode:
945 gen = gen_aarch64_simd_combinev4hi;
946 break;
947 case V2SImode:
948 gen = gen_aarch64_simd_combinev2si;
949 break;
950 case V2SFmode:
951 gen = gen_aarch64_simd_combinev2sf;
952 break;
953 case DImode:
954 gen = gen_aarch64_simd_combinedi;
955 break;
956 case DFmode:
957 gen = gen_aarch64_simd_combinedf;
958 break;
959 default:
960 gcc_unreachable ();
961 }
962
963 emit_insn (gen (dst, src1, src2));
964 return;
965 }
966}
967
fd4842cd
SN
968/* Split a complex SIMD move. */
969
970void
971aarch64_split_simd_move (rtx dst, rtx src)
972{
ef4bddc2
RS
973 machine_mode src_mode = GET_MODE (src);
974 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
975
976 gcc_assert (VECTOR_MODE_P (dst_mode));
977
978 if (REG_P (dst) && REG_P (src))
979 {
c59b7e28
SN
980 rtx (*gen) (rtx, rtx);
981
fd4842cd
SN
982 gcc_assert (VECTOR_MODE_P (src_mode));
983
984 switch (src_mode)
985 {
986 case V16QImode:
c59b7e28 987 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
988 break;
989 case V8HImode:
c59b7e28 990 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
991 break;
992 case V4SImode:
c59b7e28 993 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
994 break;
995 case V2DImode:
c59b7e28 996 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
997 break;
998 case V4SFmode:
c59b7e28 999 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
1000 break;
1001 case V2DFmode:
c59b7e28 1002 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
1003 break;
1004 default:
1005 gcc_unreachable ();
1006 }
c59b7e28
SN
1007
1008 emit_insn (gen (dst, src));
fd4842cd
SN
1009 return;
1010 }
1011}
1012
43e9d192 1013static rtx
ef4bddc2 1014aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
1015{
1016 if (can_create_pseudo_p ())
e18b4a81 1017 return force_reg (mode, value);
43e9d192
IB
1018 else
1019 {
1020 x = aarch64_emit_move (x, value);
1021 return x;
1022 }
1023}
1024
1025
1026static rtx
ef4bddc2 1027aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
43e9d192 1028{
9c023bf0 1029 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1030 {
1031 rtx high;
1032 /* Load the full offset into a register. This
1033 might be improvable in the future. */
1034 high = GEN_INT (offset);
1035 offset = 0;
e18b4a81
YZ
1036 high = aarch64_force_temporary (mode, temp, high);
1037 reg = aarch64_force_temporary (mode, temp,
1038 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1039 }
1040 return plus_constant (mode, reg, offset);
1041}
1042
1043void
1044aarch64_expand_mov_immediate (rtx dest, rtx imm)
1045{
ef4bddc2 1046 machine_mode mode = GET_MODE (dest);
43e9d192
IB
1047 unsigned HOST_WIDE_INT mask;
1048 int i;
1049 bool first;
1050 unsigned HOST_WIDE_INT val;
1051 bool subtargets;
1052 rtx subtarget;
c747993a 1053 int one_match, zero_match, first_not_ffff_match;
43e9d192
IB
1054
1055 gcc_assert (mode == SImode || mode == DImode);
1056
1057 /* Check on what type of symbol it is. */
1058 if (GET_CODE (imm) == SYMBOL_REF
1059 || GET_CODE (imm) == LABEL_REF
1060 || GET_CODE (imm) == CONST)
1061 {
1062 rtx mem, base, offset;
1063 enum aarch64_symbol_type sty;
1064
1065 /* If we have (const (plus symbol offset)), separate out the offset
1066 before we start classifying the symbol. */
1067 split_const (imm, &base, &offset);
1068
1069 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1070 switch (sty)
1071 {
1072 case SYMBOL_FORCE_TO_MEM:
1073 if (offset != const0_rtx
1074 && targetm.cannot_force_const_mem (mode, imm))
1075 {
aef66c94 1076 gcc_assert (can_create_pseudo_p ());
e18b4a81 1077 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1078 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1079 aarch64_emit_move (dest, base);
1080 return;
1081 }
28514dda 1082 mem = force_const_mem (ptr_mode, imm);
43e9d192 1083 gcc_assert (mem);
28514dda
YZ
1084 if (mode != ptr_mode)
1085 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
1086 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1087 return;
1088
1089 case SYMBOL_SMALL_TLSGD:
1090 case SYMBOL_SMALL_TLSDESC:
1091 case SYMBOL_SMALL_GOTTPREL:
1092 case SYMBOL_SMALL_GOT:
87dd8ab0 1093 case SYMBOL_TINY_GOT:
43e9d192
IB
1094 if (offset != const0_rtx)
1095 {
1096 gcc_assert(can_create_pseudo_p ());
e18b4a81 1097 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1098 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1099 aarch64_emit_move (dest, base);
1100 return;
1101 }
1102 /* FALLTHRU */
1103
1104 case SYMBOL_SMALL_TPREL:
1105 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 1106 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
1107 aarch64_load_symref_appropriately (dest, imm, sty);
1108 return;
1109
1110 default:
1111 gcc_unreachable ();
1112 }
1113 }
1114
1115 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1116 {
1117 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1118 return;
1119 }
1120
1121 if (!CONST_INT_P (imm))
1122 {
1123 if (GET_CODE (imm) == HIGH)
1124 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1125 else
1126 {
1127 rtx mem = force_const_mem (mode, imm);
1128 gcc_assert (mem);
1129 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1130 }
1131
1132 return;
1133 }
1134
1135 if (mode == SImode)
1136 {
1137 /* We know we can't do this in 1 insn, and we must be able to do it
1138 in two; so don't mess around looking for sequences that don't buy
1139 us anything. */
1140 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1141 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1142 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1143 return;
1144 }
1145
1146 /* Remaining cases are all for DImode. */
1147
1148 val = INTVAL (imm);
1149 subtargets = optimize && can_create_pseudo_p ();
1150
1151 one_match = 0;
1152 zero_match = 0;
1153 mask = 0xffff;
c747993a 1154 first_not_ffff_match = -1;
43e9d192
IB
1155
1156 for (i = 0; i < 64; i += 16, mask <<= 16)
1157 {
c747993a 1158 if ((val & mask) == mask)
43e9d192 1159 one_match++;
c747993a
IB
1160 else
1161 {
1162 if (first_not_ffff_match < 0)
1163 first_not_ffff_match = i;
1164 if ((val & mask) == 0)
1165 zero_match++;
1166 }
43e9d192
IB
1167 }
1168
1169 if (one_match == 2)
1170 {
c747993a
IB
1171 /* Set one of the quarters and then insert back into result. */
1172 mask = 0xffffll << first_not_ffff_match;
1173 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1174 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1175 GEN_INT ((val >> first_not_ffff_match)
1176 & 0xffff)));
1177 return;
1178 }
1179
43e9d192
IB
1180 if (zero_match == 2)
1181 goto simple_sequence;
1182
1183 mask = 0x0ffff0000UL;
1184 for (i = 16; i < 64; i += 16, mask <<= 16)
1185 {
1186 HOST_WIDE_INT comp = mask & ~(mask - 1);
1187
1188 if (aarch64_uimm12_shift (val - (val & mask)))
1189 {
1190 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1191
1192 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1193 emit_insn (gen_adddi3 (dest, subtarget,
1194 GEN_INT (val - (val & mask))));
1195 return;
1196 }
1197 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1198 {
1199 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1200
1201 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1202 GEN_INT ((val + comp) & mask)));
1203 emit_insn (gen_adddi3 (dest, subtarget,
1204 GEN_INT (val - ((val + comp) & mask))));
1205 return;
1206 }
1207 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1208 {
1209 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1210
1211 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1212 GEN_INT ((val - comp) | ~mask)));
1213 emit_insn (gen_adddi3 (dest, subtarget,
1214 GEN_INT (val - ((val - comp) | ~mask))));
1215 return;
1216 }
1217 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1218 {
1219 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1220
1221 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1222 GEN_INT (val | ~mask)));
1223 emit_insn (gen_adddi3 (dest, subtarget,
1224 GEN_INT (val - (val | ~mask))));
1225 return;
1226 }
1227 }
1228
1229 /* See if we can do it by arithmetically combining two
1230 immediates. */
1231 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1232 {
1233 int j;
1234 mask = 0xffff;
1235
1236 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1237 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1238 {
1239 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1240 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1241 GEN_INT (aarch64_bitmasks[i])));
1242 emit_insn (gen_adddi3 (dest, subtarget,
1243 GEN_INT (val - aarch64_bitmasks[i])));
1244 return;
1245 }
1246
1247 for (j = 0; j < 64; j += 16, mask <<= 16)
1248 {
1249 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1250 {
1251 emit_insn (gen_rtx_SET (VOIDmode, dest,
1252 GEN_INT (aarch64_bitmasks[i])));
1253 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1254 GEN_INT ((val >> j) & 0xffff)));
1255 return;
1256 }
1257 }
1258 }
1259
1260 /* See if we can do it by logically combining two immediates. */
1261 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1262 {
1263 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1264 {
1265 int j;
1266
1267 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1268 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1269 {
1270 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1271 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1272 GEN_INT (aarch64_bitmasks[i])));
1273 emit_insn (gen_iordi3 (dest, subtarget,
1274 GEN_INT (aarch64_bitmasks[j])));
1275 return;
1276 }
1277 }
1278 else if ((val & aarch64_bitmasks[i]) == val)
1279 {
1280 int j;
1281
1282 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1283 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1284 {
1285
1286 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1287 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1288 GEN_INT (aarch64_bitmasks[j])));
1289 emit_insn (gen_anddi3 (dest, subtarget,
1290 GEN_INT (aarch64_bitmasks[i])));
1291 return;
1292 }
1293 }
1294 }
1295
2c274197
KT
1296 if (one_match > zero_match)
1297 {
1298 /* Set either first three quarters or all but the third. */
1299 mask = 0xffffll << (16 - first_not_ffff_match);
1300 emit_insn (gen_rtx_SET (VOIDmode, dest,
1301 GEN_INT (val | mask | 0xffffffff00000000ull)));
1302
1303 /* Now insert other two quarters. */
1304 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1305 i < 64; i += 16, mask <<= 16)
1306 {
1307 if ((val & mask) != mask)
1308 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1309 GEN_INT ((val >> i) & 0xffff)));
1310 }
1311 return;
1312 }
1313
43e9d192
IB
1314 simple_sequence:
1315 first = true;
1316 mask = 0xffff;
1317 for (i = 0; i < 64; i += 16, mask <<= 16)
1318 {
1319 if ((val & mask) != 0)
1320 {
1321 if (first)
1322 {
1323 emit_insn (gen_rtx_SET (VOIDmode, dest,
1324 GEN_INT (val & mask)));
1325 first = false;
1326 }
1327 else
1328 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1329 GEN_INT ((val >> i) & 0xffff)));
1330 }
1331 }
1332}
1333
1334static bool
fee9ba42
JW
1335aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1336 tree exp ATTRIBUTE_UNUSED)
43e9d192 1337{
fee9ba42 1338 /* Currently, always true. */
43e9d192
IB
1339 return true;
1340}
1341
1342/* Implement TARGET_PASS_BY_REFERENCE. */
1343
1344static bool
1345aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 1346 machine_mode mode,
43e9d192
IB
1347 const_tree type,
1348 bool named ATTRIBUTE_UNUSED)
1349{
1350 HOST_WIDE_INT size;
ef4bddc2 1351 machine_mode dummymode;
43e9d192
IB
1352 int nregs;
1353
1354 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1355 size = (mode == BLKmode && type)
1356 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1357
aadc1c43
MHD
1358 /* Aggregates are passed by reference based on their size. */
1359 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1360 {
aadc1c43 1361 size = int_size_in_bytes (type);
43e9d192
IB
1362 }
1363
1364 /* Variable sized arguments are always returned by reference. */
1365 if (size < 0)
1366 return true;
1367
1368 /* Can this be a candidate to be passed in fp/simd register(s)? */
1369 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1370 &dummymode, &nregs,
1371 NULL))
1372 return false;
1373
1374 /* Arguments which are variable sized or larger than 2 registers are
1375 passed by reference unless they are a homogenous floating point
1376 aggregate. */
1377 return size > 2 * UNITS_PER_WORD;
1378}
1379
1380/* Return TRUE if VALTYPE is padded to its least significant bits. */
1381static bool
1382aarch64_return_in_msb (const_tree valtype)
1383{
ef4bddc2 1384 machine_mode dummy_mode;
43e9d192
IB
1385 int dummy_int;
1386
1387 /* Never happens in little-endian mode. */
1388 if (!BYTES_BIG_ENDIAN)
1389 return false;
1390
1391 /* Only composite types smaller than or equal to 16 bytes can
1392 be potentially returned in registers. */
1393 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1394 || int_size_in_bytes (valtype) <= 0
1395 || int_size_in_bytes (valtype) > 16)
1396 return false;
1397
1398 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1399 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1400 is always passed/returned in the least significant bits of fp/simd
1401 register(s). */
1402 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1403 &dummy_mode, &dummy_int, NULL))
1404 return false;
1405
1406 return true;
1407}
1408
1409/* Implement TARGET_FUNCTION_VALUE.
1410 Define how to find the value returned by a function. */
1411
1412static rtx
1413aarch64_function_value (const_tree type, const_tree func,
1414 bool outgoing ATTRIBUTE_UNUSED)
1415{
ef4bddc2 1416 machine_mode mode;
43e9d192
IB
1417 int unsignedp;
1418 int count;
ef4bddc2 1419 machine_mode ag_mode;
43e9d192
IB
1420
1421 mode = TYPE_MODE (type);
1422 if (INTEGRAL_TYPE_P (type))
1423 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1424
1425 if (aarch64_return_in_msb (type))
1426 {
1427 HOST_WIDE_INT size = int_size_in_bytes (type);
1428
1429 if (size % UNITS_PER_WORD != 0)
1430 {
1431 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1432 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1433 }
1434 }
1435
1436 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1437 &ag_mode, &count, NULL))
1438 {
1439 if (!aarch64_composite_type_p (type, mode))
1440 {
1441 gcc_assert (count == 1 && mode == ag_mode);
1442 return gen_rtx_REG (mode, V0_REGNUM);
1443 }
1444 else
1445 {
1446 int i;
1447 rtx par;
1448
1449 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1450 for (i = 0; i < count; i++)
1451 {
1452 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1453 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1454 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1455 XVECEXP (par, 0, i) = tmp;
1456 }
1457 return par;
1458 }
1459 }
1460 else
1461 return gen_rtx_REG (mode, R0_REGNUM);
1462}
1463
1464/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1465 Return true if REGNO is the number of a hard register in which the values
1466 of called function may come back. */
1467
1468static bool
1469aarch64_function_value_regno_p (const unsigned int regno)
1470{
1471 /* Maximum of 16 bytes can be returned in the general registers. Examples
1472 of 16-byte return values are: 128-bit integers and 16-byte small
1473 structures (excluding homogeneous floating-point aggregates). */
1474 if (regno == R0_REGNUM || regno == R1_REGNUM)
1475 return true;
1476
1477 /* Up to four fp/simd registers can return a function value, e.g. a
1478 homogeneous floating-point aggregate having four members. */
1479 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1480 return !TARGET_GENERAL_REGS_ONLY;
1481
1482 return false;
1483}
1484
1485/* Implement TARGET_RETURN_IN_MEMORY.
1486
1487 If the type T of the result of a function is such that
1488 void func (T arg)
1489 would require that arg be passed as a value in a register (or set of
1490 registers) according to the parameter passing rules, then the result
1491 is returned in the same registers as would be used for such an
1492 argument. */
1493
1494static bool
1495aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1496{
1497 HOST_WIDE_INT size;
ef4bddc2 1498 machine_mode ag_mode;
43e9d192
IB
1499 int count;
1500
1501 if (!AGGREGATE_TYPE_P (type)
1502 && TREE_CODE (type) != COMPLEX_TYPE
1503 && TREE_CODE (type) != VECTOR_TYPE)
1504 /* Simple scalar types always returned in registers. */
1505 return false;
1506
1507 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1508 type,
1509 &ag_mode,
1510 &count,
1511 NULL))
1512 return false;
1513
1514 /* Types larger than 2 registers returned in memory. */
1515 size = int_size_in_bytes (type);
1516 return (size < 0 || size > 2 * UNITS_PER_WORD);
1517}
1518
1519static bool
ef4bddc2 1520aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1521 const_tree type, int *nregs)
1522{
1523 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1524 return aarch64_vfp_is_call_or_return_candidate (mode,
1525 type,
1526 &pcum->aapcs_vfp_rmode,
1527 nregs,
1528 NULL);
1529}
1530
1531/* Given MODE and TYPE of a function argument, return the alignment in
1532 bits. The idea is to suppress any stronger alignment requested by
1533 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1534 This is a helper function for local use only. */
1535
1536static unsigned int
ef4bddc2 1537aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192
IB
1538{
1539 unsigned int alignment;
1540
1541 if (type)
1542 {
1543 if (!integer_zerop (TYPE_SIZE (type)))
1544 {
1545 if (TYPE_MODE (type) == mode)
1546 alignment = TYPE_ALIGN (type);
1547 else
1548 alignment = GET_MODE_ALIGNMENT (mode);
1549 }
1550 else
1551 alignment = 0;
1552 }
1553 else
1554 alignment = GET_MODE_ALIGNMENT (mode);
1555
1556 return alignment;
1557}
1558
1559/* Layout a function argument according to the AAPCS64 rules. The rule
1560 numbers refer to the rule numbers in the AAPCS64. */
1561
1562static void
ef4bddc2 1563aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1564 const_tree type,
1565 bool named ATTRIBUTE_UNUSED)
1566{
1567 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1568 int ncrn, nvrn, nregs;
1569 bool allocate_ncrn, allocate_nvrn;
3abf17cf 1570 HOST_WIDE_INT size;
43e9d192
IB
1571
1572 /* We need to do this once per argument. */
1573 if (pcum->aapcs_arg_processed)
1574 return;
1575
1576 pcum->aapcs_arg_processed = true;
1577
3abf17cf
YZ
1578 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1579 size
1580 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1581 UNITS_PER_WORD);
1582
43e9d192
IB
1583 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1584 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1585 mode,
1586 type,
1587 &nregs);
1588
1589 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1590 The following code thus handles passing by SIMD/FP registers first. */
1591
1592 nvrn = pcum->aapcs_nvrn;
1593
1594 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1595 and homogenous short-vector aggregates (HVA). */
1596 if (allocate_nvrn)
1597 {
1598 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1599 {
1600 pcum->aapcs_nextnvrn = nvrn + nregs;
1601 if (!aarch64_composite_type_p (type, mode))
1602 {
1603 gcc_assert (nregs == 1);
1604 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1605 }
1606 else
1607 {
1608 rtx par;
1609 int i;
1610 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1611 for (i = 0; i < nregs; i++)
1612 {
1613 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1614 V0_REGNUM + nvrn + i);
1615 tmp = gen_rtx_EXPR_LIST
1616 (VOIDmode, tmp,
1617 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1618 XVECEXP (par, 0, i) = tmp;
1619 }
1620 pcum->aapcs_reg = par;
1621 }
1622 return;
1623 }
1624 else
1625 {
1626 /* C.3 NSRN is set to 8. */
1627 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1628 goto on_stack;
1629 }
1630 }
1631
1632 ncrn = pcum->aapcs_ncrn;
3abf17cf 1633 nregs = size / UNITS_PER_WORD;
43e9d192
IB
1634
1635 /* C6 - C9. though the sign and zero extension semantics are
1636 handled elsewhere. This is the case where the argument fits
1637 entirely general registers. */
1638 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1639 {
1640 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1641
1642 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1643
1644 /* C.8 if the argument has an alignment of 16 then the NGRN is
1645 rounded up to the next even number. */
1646 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1647 {
1648 ++ncrn;
1649 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1650 }
1651 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1652 A reg is still generated for it, but the caller should be smart
1653 enough not to use it. */
1654 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1655 {
1656 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1657 }
1658 else
1659 {
1660 rtx par;
1661 int i;
1662
1663 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1664 for (i = 0; i < nregs; i++)
1665 {
1666 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1667 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1668 GEN_INT (i * UNITS_PER_WORD));
1669 XVECEXP (par, 0, i) = tmp;
1670 }
1671 pcum->aapcs_reg = par;
1672 }
1673
1674 pcum->aapcs_nextncrn = ncrn + nregs;
1675 return;
1676 }
1677
1678 /* C.11 */
1679 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1680
1681 /* The argument is passed on stack; record the needed number of words for
3abf17cf 1682 this argument and align the total size if necessary. */
43e9d192 1683on_stack:
3abf17cf 1684 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
1685 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1686 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 1687 16 / UNITS_PER_WORD);
43e9d192
IB
1688 return;
1689}
1690
1691/* Implement TARGET_FUNCTION_ARG. */
1692
1693static rtx
ef4bddc2 1694aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
1695 const_tree type, bool named)
1696{
1697 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1698 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1699
1700 if (mode == VOIDmode)
1701 return NULL_RTX;
1702
1703 aarch64_layout_arg (pcum_v, mode, type, named);
1704 return pcum->aapcs_reg;
1705}
1706
1707void
1708aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1709 const_tree fntype ATTRIBUTE_UNUSED,
1710 rtx libname ATTRIBUTE_UNUSED,
1711 const_tree fndecl ATTRIBUTE_UNUSED,
1712 unsigned n_named ATTRIBUTE_UNUSED)
1713{
1714 pcum->aapcs_ncrn = 0;
1715 pcum->aapcs_nvrn = 0;
1716 pcum->aapcs_nextncrn = 0;
1717 pcum->aapcs_nextnvrn = 0;
1718 pcum->pcs_variant = ARM_PCS_AAPCS64;
1719 pcum->aapcs_reg = NULL_RTX;
1720 pcum->aapcs_arg_processed = false;
1721 pcum->aapcs_stack_words = 0;
1722 pcum->aapcs_stack_size = 0;
1723
1724 return;
1725}
1726
1727static void
1728aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 1729 machine_mode mode,
43e9d192
IB
1730 const_tree type,
1731 bool named)
1732{
1733 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1734 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1735 {
1736 aarch64_layout_arg (pcum_v, mode, type, named);
1737 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1738 != (pcum->aapcs_stack_words != 0));
1739 pcum->aapcs_arg_processed = false;
1740 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1741 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1742 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1743 pcum->aapcs_stack_words = 0;
1744 pcum->aapcs_reg = NULL_RTX;
1745 }
1746}
1747
1748bool
1749aarch64_function_arg_regno_p (unsigned regno)
1750{
1751 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1752 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1753}
1754
1755/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1756 PARM_BOUNDARY bits of alignment, but will be given anything up
1757 to STACK_BOUNDARY bits if the type requires it. This makes sure
1758 that both before and after the layout of each argument, the Next
1759 Stacked Argument Address (NSAA) will have a minimum alignment of
1760 8 bytes. */
1761
1762static unsigned int
ef4bddc2 1763aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192
IB
1764{
1765 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1766
1767 if (alignment < PARM_BOUNDARY)
1768 alignment = PARM_BOUNDARY;
1769 if (alignment > STACK_BOUNDARY)
1770 alignment = STACK_BOUNDARY;
1771 return alignment;
1772}
1773
1774/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1775
1776 Return true if an argument passed on the stack should be padded upwards,
1777 i.e. if the least-significant byte of the stack slot has useful data.
1778
1779 Small aggregate types are placed in the lowest memory address.
1780
1781 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1782
1783bool
ef4bddc2 1784aarch64_pad_arg_upward (machine_mode mode, const_tree type)
43e9d192
IB
1785{
1786 /* On little-endian targets, the least significant byte of every stack
1787 argument is passed at the lowest byte address of the stack slot. */
1788 if (!BYTES_BIG_ENDIAN)
1789 return true;
1790
00edcfbe 1791 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1792 the least significant byte of a stack argument is passed at the highest
1793 byte address of the stack slot. */
1794 if (type
00edcfbe
YZ
1795 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1796 || POINTER_TYPE_P (type))
43e9d192
IB
1797 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1798 return false;
1799
1800 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1801 return true;
1802}
1803
1804/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1805
1806 It specifies padding for the last (may also be the only)
1807 element of a block move between registers and memory. If
1808 assuming the block is in the memory, padding upward means that
1809 the last element is padded after its highest significant byte,
1810 while in downward padding, the last element is padded at the
1811 its least significant byte side.
1812
1813 Small aggregates and small complex types are always padded
1814 upwards.
1815
1816 We don't need to worry about homogeneous floating-point or
1817 short-vector aggregates; their move is not affected by the
1818 padding direction determined here. Regardless of endianness,
1819 each element of such an aggregate is put in the least
1820 significant bits of a fp/simd register.
1821
1822 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1823 register has useful data, and return the opposite if the most
1824 significant byte does. */
1825
1826bool
ef4bddc2 1827aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
1828 bool first ATTRIBUTE_UNUSED)
1829{
1830
1831 /* Small composite types are always padded upward. */
1832 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1833 {
1834 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1835 : GET_MODE_SIZE (mode));
1836 if (size < 2 * UNITS_PER_WORD)
1837 return true;
1838 }
1839
1840 /* Otherwise, use the default padding. */
1841 return !BYTES_BIG_ENDIAN;
1842}
1843
ef4bddc2 1844static machine_mode
43e9d192
IB
1845aarch64_libgcc_cmp_return_mode (void)
1846{
1847 return SImode;
1848}
1849
1850static bool
1851aarch64_frame_pointer_required (void)
1852{
0b7f8166
MS
1853 /* In aarch64_override_options_after_change
1854 flag_omit_leaf_frame_pointer turns off the frame pointer by
1855 default. Turn it back on now if we've not got a leaf
1856 function. */
1857 if (flag_omit_leaf_frame_pointer
1858 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1859 return true;
43e9d192 1860
0b7f8166 1861 return false;
43e9d192
IB
1862}
1863
1864/* Mark the registers that need to be saved by the callee and calculate
1865 the size of the callee-saved registers area and frame record (both FP
1866 and LR may be omitted). */
1867static void
1868aarch64_layout_frame (void)
1869{
1870 HOST_WIDE_INT offset = 0;
1871 int regno;
1872
1873 if (reload_completed && cfun->machine->frame.laid_out)
1874 return;
1875
97826595
MS
1876#define SLOT_NOT_REQUIRED (-2)
1877#define SLOT_REQUIRED (-1)
1878
363ffa50
JW
1879 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
1880 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
1881
43e9d192
IB
1882 /* First mark all the registers that really need to be saved... */
1883 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 1884 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
1885
1886 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 1887 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
1888
1889 /* ... that includes the eh data registers (if needed)... */
1890 if (crtl->calls_eh_return)
1891 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
1892 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1893 = SLOT_REQUIRED;
43e9d192
IB
1894
1895 /* ... and any callee saved register that dataflow says is live. */
1896 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1897 if (df_regs_ever_live_p (regno)
1898 && !call_used_regs[regno])
97826595 1899 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
1900
1901 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1902 if (df_regs_ever_live_p (regno)
1903 && !call_used_regs[regno])
97826595 1904 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
1905
1906 if (frame_pointer_needed)
1907 {
2e1cdae5 1908 /* FP and LR are placed in the linkage record. */
43e9d192 1909 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 1910 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 1911 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 1912 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 1913 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 1914 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
1915 }
1916
1917 /* Now assign stack slots for them. */
2e1cdae5 1918 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 1919 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
1920 {
1921 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
1922 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1923 cfun->machine->frame.wb_candidate1 = regno;
1924 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
1925 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
1926 offset += UNITS_PER_WORD;
1927 }
1928
1929 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 1930 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
1931 {
1932 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
1933 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1934 cfun->machine->frame.wb_candidate1 = regno;
1935 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
1936 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
1937 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
1938 offset += UNITS_PER_WORD;
1939 }
1940
43e9d192
IB
1941 cfun->machine->frame.padding0 =
1942 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1943 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1944
1945 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
1946
1947 cfun->machine->frame.hard_fp_offset
1948 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1949 + get_frame_size ()
1950 + cfun->machine->frame.saved_regs_size,
1951 STACK_BOUNDARY / BITS_PER_UNIT);
1952
1953 cfun->machine->frame.frame_size
1954 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1955 + crtl->outgoing_args_size,
1956 STACK_BOUNDARY / BITS_PER_UNIT);
1957
43e9d192
IB
1958 cfun->machine->frame.laid_out = true;
1959}
1960
43e9d192
IB
1961static bool
1962aarch64_register_saved_on_entry (int regno)
1963{
97826595 1964 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
1965}
1966
64dedd72
JW
1967static unsigned
1968aarch64_next_callee_save (unsigned regno, unsigned limit)
1969{
1970 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1971 regno ++;
1972 return regno;
1973}
43e9d192 1974
c5e1f66e 1975static void
ef4bddc2 1976aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
1977 HOST_WIDE_INT adjustment)
1978 {
1979 rtx base_rtx = stack_pointer_rtx;
1980 rtx insn, reg, mem;
1981
1982 reg = gen_rtx_REG (mode, regno);
1983 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
1984 plus_constant (Pmode, base_rtx, -adjustment));
1985 mem = gen_rtx_MEM (mode, mem);
1986
1987 insn = emit_move_insn (mem, reg);
1988 RTX_FRAME_RELATED_P (insn) = 1;
1989}
1990
80c11907 1991static rtx
ef4bddc2 1992aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
1993 HOST_WIDE_INT adjustment)
1994{
1995 switch (mode)
1996 {
1997 case DImode:
1998 return gen_storewb_pairdi_di (base, base, reg, reg2,
1999 GEN_INT (-adjustment),
2000 GEN_INT (UNITS_PER_WORD - adjustment));
2001 case DFmode:
2002 return gen_storewb_pairdf_di (base, base, reg, reg2,
2003 GEN_INT (-adjustment),
2004 GEN_INT (UNITS_PER_WORD - adjustment));
2005 default:
2006 gcc_unreachable ();
2007 }
2008}
2009
2010static void
ef4bddc2 2011aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
80c11907
JW
2012 unsigned regno2, HOST_WIDE_INT adjustment)
2013{
5d8a22a5 2014 rtx_insn *insn;
80c11907
JW
2015 rtx reg1 = gen_rtx_REG (mode, regno1);
2016 rtx reg2 = gen_rtx_REG (mode, regno2);
2017
2018 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2019 reg2, adjustment));
2020 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
2021 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2022 RTX_FRAME_RELATED_P (insn) = 1;
2023}
2024
159313d9 2025static rtx
ef4bddc2 2026aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
2027 HOST_WIDE_INT adjustment)
2028{
2029 switch (mode)
2030 {
2031 case DImode:
2032 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2033 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2034 case DFmode:
2035 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2036 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2037 default:
2038 gcc_unreachable ();
2039 }
2040}
2041
72df5c1f 2042static rtx
ef4bddc2 2043aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
2044 rtx reg2)
2045{
2046 switch (mode)
2047 {
2048 case DImode:
2049 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2050
2051 case DFmode:
2052 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2053
2054 default:
2055 gcc_unreachable ();
2056 }
2057}
2058
2059static rtx
ef4bddc2 2060aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
2061 rtx mem2)
2062{
2063 switch (mode)
2064 {
2065 case DImode:
2066 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2067
2068 case DFmode:
2069 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2070
2071 default:
2072 gcc_unreachable ();
2073 }
2074}
2075
43e9d192 2076
43e9d192 2077static void
ef4bddc2 2078aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2079 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2080{
5d8a22a5 2081 rtx_insn *insn;
ef4bddc2 2082 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
a007a21c 2083 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2084 unsigned regno;
2085 unsigned regno2;
2086
0ec74a1e 2087 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2088 regno <= limit;
2089 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2090 {
ae13fce3
JW
2091 rtx reg, mem;
2092 HOST_WIDE_INT offset;
64dedd72 2093
ae13fce3
JW
2094 if (skip_wb
2095 && (regno == cfun->machine->frame.wb_candidate1
2096 || regno == cfun->machine->frame.wb_candidate2))
2097 continue;
2098
2099 reg = gen_rtx_REG (mode, regno);
2100 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2101 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2102 offset));
64dedd72
JW
2103
2104 regno2 = aarch64_next_callee_save (regno + 1, limit);
2105
2106 if (regno2 <= limit
2107 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2108 == cfun->machine->frame.reg_offset[regno2]))
2109
43e9d192 2110 {
0ec74a1e 2111 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2112 rtx mem2;
2113
2114 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2115 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2116 offset));
2117 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2118 reg2));
0b4a9743 2119
64dedd72
JW
2120 /* The first part of a frame-related parallel insn is
2121 always assumed to be relevant to the frame
2122 calculations; subsequent parts, are only
2123 frame-related if explicitly marked. */
2124 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2125 regno = regno2;
2126 }
2127 else
8ed2fc62
JW
2128 insn = emit_move_insn (mem, reg);
2129
2130 RTX_FRAME_RELATED_P (insn) = 1;
2131 }
2132}
2133
2134static void
ef4bddc2 2135aarch64_restore_callee_saves (machine_mode mode,
8ed2fc62 2136 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2137 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2138{
8ed2fc62 2139 rtx base_rtx = stack_pointer_rtx;
ef4bddc2 2140 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
8ed2fc62
JW
2141 ? gen_frame_mem : gen_rtx_MEM);
2142 unsigned regno;
2143 unsigned regno2;
2144 HOST_WIDE_INT offset;
2145
2146 for (regno = aarch64_next_callee_save (start, limit);
2147 regno <= limit;
2148 regno = aarch64_next_callee_save (regno + 1, limit))
2149 {
ae13fce3 2150 rtx reg, mem;
8ed2fc62 2151
ae13fce3
JW
2152 if (skip_wb
2153 && (regno == cfun->machine->frame.wb_candidate1
2154 || regno == cfun->machine->frame.wb_candidate2))
2155 continue;
2156
2157 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2158 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2159 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2160
2161 regno2 = aarch64_next_callee_save (regno + 1, limit);
2162
2163 if (regno2 <= limit
2164 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2165 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2166 {
8ed2fc62
JW
2167 rtx reg2 = gen_rtx_REG (mode, regno2);
2168 rtx mem2;
2169
2170 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2171 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2172 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2173
dd991abb 2174 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2175 regno = regno2;
43e9d192 2176 }
8ed2fc62 2177 else
dd991abb
RH
2178 emit_move_insn (reg, mem);
2179 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2180 }
43e9d192
IB
2181}
2182
2183/* AArch64 stack frames generated by this compiler look like:
2184
2185 +-------------------------------+
2186 | |
2187 | incoming stack arguments |
2188 | |
34834420
MS
2189 +-------------------------------+
2190 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2191 | callee-allocated save area |
2192 | for register varargs |
2193 | |
34834420
MS
2194 +-------------------------------+
2195 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2196 | |
2197 +-------------------------------+
454fdba9
RL
2198 | padding0 | \
2199 +-------------------------------+ |
454fdba9 2200 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2201 +-------------------------------+ |
2202 | LR' | |
2203 +-------------------------------+ |
34834420
MS
2204 | FP' | / <- hard_frame_pointer_rtx (aligned)
2205 +-------------------------------+
43e9d192
IB
2206 | dynamic allocation |
2207 +-------------------------------+
34834420
MS
2208 | padding |
2209 +-------------------------------+
2210 | outgoing stack arguments | <-- arg_pointer
2211 | |
2212 +-------------------------------+
2213 | | <-- stack_pointer_rtx (aligned)
43e9d192 2214
34834420
MS
2215 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2216 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2217 unchanged. */
43e9d192
IB
2218
2219/* Generate the prologue instructions for entry into a function.
2220 Establish the stack frame by decreasing the stack pointer with a
2221 properly calculated size and, if necessary, create a frame record
2222 filled with the values of LR and previous frame pointer. The
6991c977 2223 current FP is also set up if it is in use. */
43e9d192
IB
2224
2225void
2226aarch64_expand_prologue (void)
2227{
2228 /* sub sp, sp, #<frame_size>
2229 stp {fp, lr}, [sp, #<frame_size> - 16]
2230 add fp, sp, #<frame_size> - hardfp_offset
2231 stp {cs_reg}, [fp, #-16] etc.
2232
2233 sub sp, sp, <final_adjustment_if_any>
2234 */
43e9d192 2235 HOST_WIDE_INT frame_size, offset;
1c960e02 2236 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 2237 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2238 rtx_insn *insn;
43e9d192
IB
2239
2240 aarch64_layout_frame ();
43e9d192 2241
dd991abb
RH
2242 offset = frame_size = cfun->machine->frame.frame_size;
2243 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2244 fp_offset = frame_size - hard_fp_offset;
43e9d192 2245
dd991abb
RH
2246 if (flag_stack_usage_info)
2247 current_function_static_stack_size = frame_size;
43e9d192 2248
44c0e7b9 2249 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2250 if (offset >= 512)
2251 {
2252 /* When the frame has a large size, an initial decrease is done on
2253 the stack pointer to jump over the callee-allocated save area for
2254 register varargs, the local variable area and/or the callee-saved
2255 register area. This will allow the pre-index write-back
2256 store pair instructions to be used for setting up the stack frame
2257 efficiently. */
dd991abb 2258 offset = hard_fp_offset;
43e9d192
IB
2259 if (offset >= 512)
2260 offset = cfun->machine->frame.saved_regs_size;
2261
2262 frame_size -= (offset + crtl->outgoing_args_size);
2263 fp_offset = 0;
2264
2265 if (frame_size >= 0x1000000)
2266 {
2267 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2268 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
2269 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2270
2271 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2272 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2273 plus_constant (Pmode, stack_pointer_rtx,
2274 -frame_size)));
2275 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2276 }
2277 else if (frame_size > 0)
2278 {
dd991abb
RH
2279 int hi_ofs = frame_size & 0xfff000;
2280 int lo_ofs = frame_size & 0x000fff;
2281
2282 if (hi_ofs)
43e9d192
IB
2283 {
2284 insn = emit_insn (gen_add2_insn
dd991abb 2285 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
2286 RTX_FRAME_RELATED_P (insn) = 1;
2287 }
dd991abb 2288 if (lo_ofs)
43e9d192
IB
2289 {
2290 insn = emit_insn (gen_add2_insn
dd991abb 2291 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
2292 RTX_FRAME_RELATED_P (insn) = 1;
2293 }
2294 }
2295 }
2296 else
2297 frame_size = -1;
2298
2299 if (offset > 0)
2300 {
ae13fce3
JW
2301 bool skip_wb = false;
2302
43e9d192
IB
2303 if (frame_pointer_needed)
2304 {
c5e1f66e
JW
2305 skip_wb = true;
2306
43e9d192
IB
2307 if (fp_offset)
2308 {
2309 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2310 GEN_INT (-offset)));
2311 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
2312
2313 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2314 R30_REGNUM, false);
43e9d192
IB
2315 }
2316 else
80c11907 2317 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2318
2319 /* Set up frame pointer to point to the location of the
2320 previous frame pointer on the stack. */
2321 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2322 stack_pointer_rtx,
2323 GEN_INT (fp_offset)));
43e9d192 2324 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2325 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
2326 }
2327 else
2328 {
c5e1f66e
JW
2329 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2330 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2331
c5e1f66e
JW
2332 if (fp_offset
2333 || reg1 == FIRST_PSEUDO_REGISTER
2334 || (reg2 == FIRST_PSEUDO_REGISTER
2335 && offset >= 256))
2336 {
2337 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2338 GEN_INT (-offset)));
2339 RTX_FRAME_RELATED_P (insn) = 1;
2340 }
2341 else
2342 {
ef4bddc2 2343 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
c5e1f66e
JW
2344
2345 skip_wb = true;
2346
2347 if (reg2 == FIRST_PSEUDO_REGISTER)
2348 aarch64_pushwb_single_reg (mode1, reg1, offset);
2349 else
2350 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2351 }
43e9d192
IB
2352 }
2353
c5e1f66e
JW
2354 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2355 skip_wb);
ae13fce3
JW
2356 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2357 skip_wb);
43e9d192
IB
2358 }
2359
2360 /* when offset >= 512,
2361 sub sp, sp, #<outgoing_args_size> */
2362 if (frame_size > -1)
2363 {
2364 if (crtl->outgoing_args_size > 0)
2365 {
2366 insn = emit_insn (gen_add2_insn
2367 (stack_pointer_rtx,
2368 GEN_INT (- crtl->outgoing_args_size)));
2369 RTX_FRAME_RELATED_P (insn) = 1;
2370 }
2371 }
2372}
2373
4f942779
RL
2374/* Return TRUE if we can use a simple_return insn.
2375
2376 This function checks whether the callee saved stack is empty, which
2377 means no restore actions are need. The pro_and_epilogue will use
2378 this to check whether shrink-wrapping opt is feasible. */
2379
2380bool
2381aarch64_use_return_insn_p (void)
2382{
2383 if (!reload_completed)
2384 return false;
2385
2386 if (crtl->profile)
2387 return false;
2388
2389 aarch64_layout_frame ();
2390
2391 return cfun->machine->frame.frame_size == 0;
2392}
2393
43e9d192
IB
2394/* Generate the epilogue instructions for returning from a function. */
2395void
2396aarch64_expand_epilogue (bool for_sibcall)
2397{
1c960e02 2398 HOST_WIDE_INT frame_size, offset;
43e9d192 2399 HOST_WIDE_INT fp_offset;
dd991abb 2400 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2401 rtx_insn *insn;
43e9d192
IB
2402
2403 aarch64_layout_frame ();
43e9d192 2404
1c960e02 2405 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
2406 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2407 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
2408
2409 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2410 if (offset >= 512)
2411 {
dd991abb 2412 offset = hard_fp_offset;
43e9d192
IB
2413 if (offset >= 512)
2414 offset = cfun->machine->frame.saved_regs_size;
2415
2416 frame_size -= (offset + crtl->outgoing_args_size);
2417 fp_offset = 0;
2418 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2419 {
2420 insn = emit_insn (gen_add2_insn
2421 (stack_pointer_rtx,
2422 GEN_INT (crtl->outgoing_args_size)));
2423 RTX_FRAME_RELATED_P (insn) = 1;
2424 }
2425 }
2426 else
2427 frame_size = -1;
2428
2429 /* If there were outgoing arguments or we've done dynamic stack
2430 allocation, then restore the stack pointer from the frame
2431 pointer. This is at most one insn and more efficient than using
2432 GCC's internal mechanism. */
2433 if (frame_pointer_needed
2434 && (crtl->outgoing_args_size || cfun->calls_alloca))
2435 {
2436 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2437 hard_frame_pointer_rtx,
8f454e9f
JW
2438 GEN_INT (0)));
2439 offset = offset - fp_offset;
43e9d192
IB
2440 }
2441
43e9d192
IB
2442 if (offset > 0)
2443 {
4b92caa1
JW
2444 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2445 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2446 bool skip_wb = true;
dd991abb 2447 rtx cfi_ops = NULL;
4b92caa1 2448
43e9d192 2449 if (frame_pointer_needed)
4b92caa1
JW
2450 fp_offset = 0;
2451 else if (fp_offset
2452 || reg1 == FIRST_PSEUDO_REGISTER
2453 || (reg2 == FIRST_PSEUDO_REGISTER
2454 && offset >= 256))
2455 skip_wb = false;
2456
2457 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 2458 skip_wb, &cfi_ops);
4b92caa1 2459 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 2460 skip_wb, &cfi_ops);
4b92caa1
JW
2461
2462 if (skip_wb)
43e9d192 2463 {
ef4bddc2 2464 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 2465 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 2466
dd991abb 2467 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 2468 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
2469 {
2470 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2471 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2472 mem = gen_rtx_MEM (mode1, mem);
2473 insn = emit_move_insn (rreg1, mem);
2474 }
4b92caa1
JW
2475 else
2476 {
dd991abb 2477 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 2478
dd991abb
RH
2479 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2480 insn = emit_insn (aarch64_gen_loadwb_pair
2481 (mode1, stack_pointer_rtx, rreg1,
2482 rreg2, offset));
4b92caa1 2483 }
43e9d192 2484 }
43e9d192
IB
2485 else
2486 {
2487 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2488 GEN_INT (offset)));
43e9d192 2489 }
43e9d192 2490
dd991abb
RH
2491 /* Reset the CFA to be SP + FRAME_SIZE. */
2492 rtx new_cfa = stack_pointer_rtx;
2493 if (frame_size > 0)
2494 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2495 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2496 REG_NOTES (insn) = cfi_ops;
43e9d192 2497 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2498 }
2499
dd991abb 2500 if (frame_size > 0)
43e9d192
IB
2501 {
2502 if (frame_size >= 0x1000000)
2503 {
2504 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2505 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 2506 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 2507 }
dd991abb 2508 else
43e9d192 2509 {
dd991abb
RH
2510 int hi_ofs = frame_size & 0xfff000;
2511 int lo_ofs = frame_size & 0x000fff;
2512
2513 if (hi_ofs && lo_ofs)
43e9d192
IB
2514 {
2515 insn = emit_insn (gen_add2_insn
dd991abb 2516 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 2517 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2518 frame_size = lo_ofs;
43e9d192 2519 }
dd991abb
RH
2520 insn = emit_insn (gen_add2_insn
2521 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
2522 }
2523
dd991abb
RH
2524 /* Reset the CFA to be SP + 0. */
2525 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2526 RTX_FRAME_RELATED_P (insn) = 1;
2527 }
2528
2529 /* Stack adjustment for exception handler. */
2530 if (crtl->calls_eh_return)
2531 {
2532 /* We need to unwind the stack by the offset computed by
2533 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2534 to be SP; letting the CFA move during this adjustment
2535 is just as correct as retaining the CFA from the body
2536 of the function. Therefore, do nothing special. */
2537 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
2538 }
2539
2540 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2541 if (!for_sibcall)
2542 emit_jump_insn (ret_rtx);
2543}
2544
2545/* Return the place to copy the exception unwinding return address to.
2546 This will probably be a stack slot, but could (in theory be the
2547 return register). */
2548rtx
2549aarch64_final_eh_return_addr (void)
2550{
1c960e02
MS
2551 HOST_WIDE_INT fp_offset;
2552
43e9d192 2553 aarch64_layout_frame ();
1c960e02
MS
2554
2555 fp_offset = cfun->machine->frame.frame_size
2556 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2557
2558 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2559 return gen_rtx_REG (DImode, LR_REGNUM);
2560
2561 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2562 result in a store to save LR introduced by builtin_eh_return () being
2563 incorrectly deleted because the alias is not detected.
2564 So in the calculation of the address to copy the exception unwinding
2565 return address to, we note 2 cases.
2566 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2567 we return a SP-relative location since all the addresses are SP-relative
2568 in this case. This prevents the store from being optimized away.
2569 If the fp_offset is not 0, then the addresses will be FP-relative and
2570 therefore we return a FP-relative location. */
2571
2572 if (frame_pointer_needed)
2573 {
2574 if (fp_offset)
2575 return gen_frame_mem (DImode,
2576 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2577 else
2578 return gen_frame_mem (DImode,
2579 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2580 }
2581
2582 /* If FP is not needed, we calculate the location of LR, which would be
2583 at the top of the saved registers block. */
2584
2585 return gen_frame_mem (DImode,
2586 plus_constant (Pmode,
2587 stack_pointer_rtx,
2588 fp_offset
2589 + cfun->machine->frame.saved_regs_size
2590 - 2 * UNITS_PER_WORD));
2591}
2592
9dfc162c
JG
2593/* Possibly output code to build up a constant in a register. For
2594 the benefit of the costs infrastructure, returns the number of
2595 instructions which would be emitted. GENERATE inhibits or
2596 enables code generation. */
2597
2598static int
2599aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2600{
9dfc162c
JG
2601 int insns = 0;
2602
43e9d192 2603 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2604 {
2605 if (generate)
2606 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2607 insns = 1;
2608 }
43e9d192
IB
2609 else
2610 {
2611 int i;
2612 int ncount = 0;
2613 int zcount = 0;
2614 HOST_WIDE_INT valp = val >> 16;
2615 HOST_WIDE_INT valm;
2616 HOST_WIDE_INT tval;
2617
2618 for (i = 16; i < 64; i += 16)
2619 {
2620 valm = (valp & 0xffff);
2621
2622 if (valm != 0)
2623 ++ zcount;
2624
2625 if (valm != 0xffff)
2626 ++ ncount;
2627
2628 valp >>= 16;
2629 }
2630
2631 /* zcount contains the number of additional MOVK instructions
2632 required if the constant is built up with an initial MOVZ instruction,
2633 while ncount is the number of MOVK instructions required if starting
2634 with a MOVN instruction. Choose the sequence that yields the fewest
2635 number of instructions, preferring MOVZ instructions when they are both
2636 the same. */
2637 if (ncount < zcount)
2638 {
9dfc162c
JG
2639 if (generate)
2640 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2641 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2642 tval = 0xffff;
9dfc162c 2643 insns++;
43e9d192
IB
2644 }
2645 else
2646 {
9dfc162c
JG
2647 if (generate)
2648 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2649 GEN_INT (val & 0xffff));
43e9d192 2650 tval = 0;
9dfc162c 2651 insns++;
43e9d192
IB
2652 }
2653
2654 val >>= 16;
2655
2656 for (i = 16; i < 64; i += 16)
2657 {
2658 if ((val & 0xffff) != tval)
9dfc162c
JG
2659 {
2660 if (generate)
2661 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2662 GEN_INT (i),
2663 GEN_INT (val & 0xffff)));
2664 insns++;
2665 }
43e9d192
IB
2666 val >>= 16;
2667 }
2668 }
9dfc162c 2669 return insns;
43e9d192
IB
2670}
2671
2672static void
d9600ae5 2673aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2674{
2675 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2676 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2677 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2678
2679 if (mdelta < 0)
2680 mdelta = -mdelta;
2681
2682 if (mdelta >= 4096 * 4096)
2683 {
9dfc162c 2684 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 2685 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2686 }
2687 else if (mdelta > 0)
2688 {
43e9d192 2689 if (mdelta >= 4096)
d9600ae5
SN
2690 {
2691 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2692 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2693 if (delta < 0)
2694 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2695 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2696 else
2697 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2698 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2699 }
43e9d192 2700 if (mdelta % 4096 != 0)
d9600ae5
SN
2701 {
2702 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2703 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2704 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2705 }
43e9d192
IB
2706 }
2707}
2708
2709/* Output code to add DELTA to the first argument, and then jump
2710 to FUNCTION. Used for C++ multiple inheritance. */
2711static void
2712aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2713 HOST_WIDE_INT delta,
2714 HOST_WIDE_INT vcall_offset,
2715 tree function)
2716{
2717 /* The this pointer is always in x0. Note that this differs from
2718 Arm where the this pointer maybe bumped to r1 if r0 is required
2719 to return a pointer to an aggregate. On AArch64 a result value
2720 pointer will be in x8. */
2721 int this_regno = R0_REGNUM;
5d8a22a5
DM
2722 rtx this_rtx, temp0, temp1, addr, funexp;
2723 rtx_insn *insn;
43e9d192 2724
75f1d6fc
SN
2725 reload_completed = 1;
2726 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2727
2728 if (vcall_offset == 0)
d9600ae5 2729 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2730 else
2731 {
28514dda 2732 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2733
75f1d6fc
SN
2734 this_rtx = gen_rtx_REG (Pmode, this_regno);
2735 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2736 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2737
75f1d6fc
SN
2738 addr = this_rtx;
2739 if (delta != 0)
2740 {
2741 if (delta >= -256 && delta < 256)
2742 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2743 plus_constant (Pmode, this_rtx, delta));
2744 else
d9600ae5 2745 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2746 }
2747
28514dda
YZ
2748 if (Pmode == ptr_mode)
2749 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2750 else
2751 aarch64_emit_move (temp0,
2752 gen_rtx_ZERO_EXTEND (Pmode,
2753 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2754
28514dda 2755 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2756 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2757 else
2758 {
9dfc162c 2759 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 2760 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2761 }
2762
28514dda
YZ
2763 if (Pmode == ptr_mode)
2764 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2765 else
2766 aarch64_emit_move (temp1,
2767 gen_rtx_SIGN_EXTEND (Pmode,
2768 gen_rtx_MEM (ptr_mode, addr)));
2769
75f1d6fc 2770 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2771 }
2772
75f1d6fc
SN
2773 /* Generate a tail call to the target function. */
2774 if (!TREE_USED (function))
2775 {
2776 assemble_external (function);
2777 TREE_USED (function) = 1;
2778 }
2779 funexp = XEXP (DECL_RTL (function), 0);
2780 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2781 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2782 SIBLING_CALL_P (insn) = 1;
2783
2784 insn = get_insns ();
2785 shorten_branches (insn);
2786 final_start_function (insn, file, 1);
2787 final (insn, file, 1);
43e9d192 2788 final_end_function ();
75f1d6fc
SN
2789
2790 /* Stop pretending to be a post-reload pass. */
2791 reload_completed = 0;
43e9d192
IB
2792}
2793
43e9d192
IB
2794static int
2795aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2796{
2797 if (GET_CODE (*x) == SYMBOL_REF)
2798 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2799
2800 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2801 TLS offsets, not real symbol references. */
2802 if (GET_CODE (*x) == UNSPEC
2803 && XINT (*x, 1) == UNSPEC_TLS)
2804 return -1;
2805
2806 return 0;
2807}
2808
2809static bool
2810aarch64_tls_referenced_p (rtx x)
2811{
2812 if (!TARGET_HAVE_TLS)
2813 return false;
2814
2815 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2816}
2817
2818
2819static int
2820aarch64_bitmasks_cmp (const void *i1, const void *i2)
2821{
2822 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2823 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2824
2825 if (*imm1 < *imm2)
2826 return -1;
2827 if (*imm1 > *imm2)
2828 return +1;
2829 return 0;
2830}
2831
2832
2833static void
2834aarch64_build_bitmask_table (void)
2835{
2836 unsigned HOST_WIDE_INT mask, imm;
2837 unsigned int log_e, e, s, r;
2838 unsigned int nimms = 0;
2839
2840 for (log_e = 1; log_e <= 6; log_e++)
2841 {
2842 e = 1 << log_e;
2843 if (e == 64)
2844 mask = ~(HOST_WIDE_INT) 0;
2845 else
2846 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2847 for (s = 1; s < e; s++)
2848 {
2849 for (r = 0; r < e; r++)
2850 {
2851 /* set s consecutive bits to 1 (s < 64) */
2852 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2853 /* rotate right by r */
2854 if (r != 0)
2855 imm = ((imm >> r) | (imm << (e - r))) & mask;
2856 /* replicate the constant depending on SIMD size */
2857 switch (log_e) {
2858 case 1: imm |= (imm << 2);
2859 case 2: imm |= (imm << 4);
2860 case 3: imm |= (imm << 8);
2861 case 4: imm |= (imm << 16);
2862 case 5: imm |= (imm << 32);
2863 case 6:
2864 break;
2865 default:
2866 gcc_unreachable ();
2867 }
2868 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2869 aarch64_bitmasks[nimms++] = imm;
2870 }
2871 }
2872 }
2873
2874 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2875 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2876 aarch64_bitmasks_cmp);
2877}
2878
2879
2880/* Return true if val can be encoded as a 12-bit unsigned immediate with
2881 a left shift of 0 or 12 bits. */
2882bool
2883aarch64_uimm12_shift (HOST_WIDE_INT val)
2884{
2885 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2886 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2887 );
2888}
2889
2890
2891/* Return true if val is an immediate that can be loaded into a
2892 register by a MOVZ instruction. */
2893static bool
ef4bddc2 2894aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
2895{
2896 if (GET_MODE_SIZE (mode) > 4)
2897 {
2898 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2899 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2900 return 1;
2901 }
2902 else
2903 {
2904 /* Ignore sign extension. */
2905 val &= (HOST_WIDE_INT) 0xffffffff;
2906 }
2907 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2908 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2909}
2910
2911
2912/* Return true if val is a valid bitmask immediate. */
2913bool
ef4bddc2 2914aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
2915{
2916 if (GET_MODE_SIZE (mode) < 8)
2917 {
2918 /* Replicate bit pattern. */
2919 val &= (HOST_WIDE_INT) 0xffffffff;
2920 val |= val << 32;
2921 }
2922 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2923 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2924}
2925
2926
2927/* Return true if val is an immediate that can be loaded into a
2928 register in a single instruction. */
2929bool
ef4bddc2 2930aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192
IB
2931{
2932 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2933 return 1;
2934 return aarch64_bitmask_imm (val, mode);
2935}
2936
2937static bool
ef4bddc2 2938aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
2939{
2940 rtx base, offset;
7eda14e1 2941
43e9d192
IB
2942 if (GET_CODE (x) == HIGH)
2943 return true;
2944
2945 split_const (x, &base, &offset);
2946 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2947 {
2948 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2949 != SYMBOL_FORCE_TO_MEM)
2950 return true;
2951 else
2952 /* Avoid generating a 64-bit relocation in ILP32; leave
2953 to aarch64_expand_mov_immediate to handle it properly. */
2954 return mode != ptr_mode;
2955 }
43e9d192
IB
2956
2957 return aarch64_tls_referenced_p (x);
2958}
2959
2960/* Return true if register REGNO is a valid index register.
2961 STRICT_P is true if REG_OK_STRICT is in effect. */
2962
2963bool
2964aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2965{
2966 if (!HARD_REGISTER_NUM_P (regno))
2967 {
2968 if (!strict_p)
2969 return true;
2970
2971 if (!reg_renumber)
2972 return false;
2973
2974 regno = reg_renumber[regno];
2975 }
2976 return GP_REGNUM_P (regno);
2977}
2978
2979/* Return true if register REGNO is a valid base register for mode MODE.
2980 STRICT_P is true if REG_OK_STRICT is in effect. */
2981
2982bool
2983aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2984{
2985 if (!HARD_REGISTER_NUM_P (regno))
2986 {
2987 if (!strict_p)
2988 return true;
2989
2990 if (!reg_renumber)
2991 return false;
2992
2993 regno = reg_renumber[regno];
2994 }
2995
2996 /* The fake registers will be eliminated to either the stack or
2997 hard frame pointer, both of which are usually valid base registers.
2998 Reload deals with the cases where the eliminated form isn't valid. */
2999 return (GP_REGNUM_P (regno)
3000 || regno == SP_REGNUM
3001 || regno == FRAME_POINTER_REGNUM
3002 || regno == ARG_POINTER_REGNUM);
3003}
3004
3005/* Return true if X is a valid base register for mode MODE.
3006 STRICT_P is true if REG_OK_STRICT is in effect. */
3007
3008static bool
3009aarch64_base_register_rtx_p (rtx x, bool strict_p)
3010{
3011 if (!strict_p && GET_CODE (x) == SUBREG)
3012 x = SUBREG_REG (x);
3013
3014 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3015}
3016
3017/* Return true if address offset is a valid index. If it is, fill in INFO
3018 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3019
3020static bool
3021aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 3022 machine_mode mode, bool strict_p)
43e9d192
IB
3023{
3024 enum aarch64_address_type type;
3025 rtx index;
3026 int shift;
3027
3028 /* (reg:P) */
3029 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3030 && GET_MODE (x) == Pmode)
3031 {
3032 type = ADDRESS_REG_REG;
3033 index = x;
3034 shift = 0;
3035 }
3036 /* (sign_extend:DI (reg:SI)) */
3037 else if ((GET_CODE (x) == SIGN_EXTEND
3038 || GET_CODE (x) == ZERO_EXTEND)
3039 && GET_MODE (x) == DImode
3040 && GET_MODE (XEXP (x, 0)) == SImode)
3041 {
3042 type = (GET_CODE (x) == SIGN_EXTEND)
3043 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3044 index = XEXP (x, 0);
3045 shift = 0;
3046 }
3047 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3048 else if (GET_CODE (x) == MULT
3049 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3050 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3051 && GET_MODE (XEXP (x, 0)) == DImode
3052 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3053 && CONST_INT_P (XEXP (x, 1)))
3054 {
3055 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3056 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3057 index = XEXP (XEXP (x, 0), 0);
3058 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3059 }
3060 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3061 else if (GET_CODE (x) == ASHIFT
3062 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3063 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3064 && GET_MODE (XEXP (x, 0)) == DImode
3065 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3066 && CONST_INT_P (XEXP (x, 1)))
3067 {
3068 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3069 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3070 index = XEXP (XEXP (x, 0), 0);
3071 shift = INTVAL (XEXP (x, 1));
3072 }
3073 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3074 else if ((GET_CODE (x) == SIGN_EXTRACT
3075 || GET_CODE (x) == ZERO_EXTRACT)
3076 && GET_MODE (x) == DImode
3077 && GET_CODE (XEXP (x, 0)) == MULT
3078 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3079 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3080 {
3081 type = (GET_CODE (x) == SIGN_EXTRACT)
3082 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3083 index = XEXP (XEXP (x, 0), 0);
3084 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3085 if (INTVAL (XEXP (x, 1)) != 32 + shift
3086 || INTVAL (XEXP (x, 2)) != 0)
3087 shift = -1;
3088 }
3089 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3090 (const_int 0xffffffff<<shift)) */
3091 else if (GET_CODE (x) == AND
3092 && GET_MODE (x) == DImode
3093 && GET_CODE (XEXP (x, 0)) == MULT
3094 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3095 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3096 && CONST_INT_P (XEXP (x, 1)))
3097 {
3098 type = ADDRESS_REG_UXTW;
3099 index = XEXP (XEXP (x, 0), 0);
3100 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3101 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3102 shift = -1;
3103 }
3104 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3105 else if ((GET_CODE (x) == SIGN_EXTRACT
3106 || GET_CODE (x) == ZERO_EXTRACT)
3107 && GET_MODE (x) == DImode
3108 && GET_CODE (XEXP (x, 0)) == ASHIFT
3109 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3110 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3111 {
3112 type = (GET_CODE (x) == SIGN_EXTRACT)
3113 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3114 index = XEXP (XEXP (x, 0), 0);
3115 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3116 if (INTVAL (XEXP (x, 1)) != 32 + shift
3117 || INTVAL (XEXP (x, 2)) != 0)
3118 shift = -1;
3119 }
3120 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3121 (const_int 0xffffffff<<shift)) */
3122 else if (GET_CODE (x) == AND
3123 && GET_MODE (x) == DImode
3124 && GET_CODE (XEXP (x, 0)) == ASHIFT
3125 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3126 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3127 && CONST_INT_P (XEXP (x, 1)))
3128 {
3129 type = ADDRESS_REG_UXTW;
3130 index = XEXP (XEXP (x, 0), 0);
3131 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3132 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3133 shift = -1;
3134 }
3135 /* (mult:P (reg:P) (const_int scale)) */
3136 else if (GET_CODE (x) == MULT
3137 && GET_MODE (x) == Pmode
3138 && GET_MODE (XEXP (x, 0)) == Pmode
3139 && CONST_INT_P (XEXP (x, 1)))
3140 {
3141 type = ADDRESS_REG_REG;
3142 index = XEXP (x, 0);
3143 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3144 }
3145 /* (ashift:P (reg:P) (const_int shift)) */
3146 else if (GET_CODE (x) == ASHIFT
3147 && GET_MODE (x) == Pmode
3148 && GET_MODE (XEXP (x, 0)) == Pmode
3149 && CONST_INT_P (XEXP (x, 1)))
3150 {
3151 type = ADDRESS_REG_REG;
3152 index = XEXP (x, 0);
3153 shift = INTVAL (XEXP (x, 1));
3154 }
3155 else
3156 return false;
3157
3158 if (GET_CODE (index) == SUBREG)
3159 index = SUBREG_REG (index);
3160
3161 if ((shift == 0 ||
3162 (shift > 0 && shift <= 3
3163 && (1 << shift) == GET_MODE_SIZE (mode)))
3164 && REG_P (index)
3165 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3166 {
3167 info->type = type;
3168 info->offset = index;
3169 info->shift = shift;
3170 return true;
3171 }
3172
3173 return false;
3174}
3175
44707478 3176bool
ef4bddc2 3177aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3178{
3179 return (offset >= -64 * GET_MODE_SIZE (mode)
3180 && offset < 64 * GET_MODE_SIZE (mode)
3181 && offset % GET_MODE_SIZE (mode) == 0);
3182}
3183
3184static inline bool
ef4bddc2 3185offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
3186 HOST_WIDE_INT offset)
3187{
3188 return offset >= -256 && offset < 256;
3189}
3190
3191static inline bool
ef4bddc2 3192offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3193{
3194 return (offset >= 0
3195 && offset < 4096 * GET_MODE_SIZE (mode)
3196 && offset % GET_MODE_SIZE (mode) == 0);
3197}
3198
3199/* Return true if X is a valid address for machine mode MODE. If it is,
3200 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3201 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3202
3203static bool
3204aarch64_classify_address (struct aarch64_address_info *info,
ef4bddc2 3205 rtx x, machine_mode mode,
43e9d192
IB
3206 RTX_CODE outer_code, bool strict_p)
3207{
3208 enum rtx_code code = GET_CODE (x);
3209 rtx op0, op1;
3210 bool allow_reg_index_p =
348d4b0a
BC
3211 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3212 || aarch64_vector_mode_supported_p (mode));
43e9d192
IB
3213 /* Don't support anything other than POST_INC or REG addressing for
3214 AdvSIMD. */
348d4b0a 3215 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
3216 && (code != POST_INC && code != REG))
3217 return false;
3218
3219 switch (code)
3220 {
3221 case REG:
3222 case SUBREG:
3223 info->type = ADDRESS_REG_IMM;
3224 info->base = x;
3225 info->offset = const0_rtx;
3226 return aarch64_base_register_rtx_p (x, strict_p);
3227
3228 case PLUS:
3229 op0 = XEXP (x, 0);
3230 op1 = XEXP (x, 1);
15c0c5c9
JW
3231
3232 if (! strict_p
4aa81c2e 3233 && REG_P (op0)
15c0c5c9
JW
3234 && (op0 == virtual_stack_vars_rtx
3235 || op0 == frame_pointer_rtx
3236 || op0 == arg_pointer_rtx)
4aa81c2e 3237 && CONST_INT_P (op1))
15c0c5c9
JW
3238 {
3239 info->type = ADDRESS_REG_IMM;
3240 info->base = op0;
3241 info->offset = op1;
3242
3243 return true;
3244 }
3245
43e9d192
IB
3246 if (GET_MODE_SIZE (mode) != 0
3247 && CONST_INT_P (op1)
3248 && aarch64_base_register_rtx_p (op0, strict_p))
3249 {
3250 HOST_WIDE_INT offset = INTVAL (op1);
3251
3252 info->type = ADDRESS_REG_IMM;
3253 info->base = op0;
3254 info->offset = op1;
3255
3256 /* TImode and TFmode values are allowed in both pairs of X
3257 registers and individual Q registers. The available
3258 address modes are:
3259 X,X: 7-bit signed scaled offset
3260 Q: 9-bit signed offset
3261 We conservatively require an offset representable in either mode.
3262 */
3263 if (mode == TImode || mode == TFmode)
44707478 3264 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3265 && offset_9bit_signed_unscaled_p (mode, offset));
3266
3267 if (outer_code == PARALLEL)
3268 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3269 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3270 else
3271 return (offset_9bit_signed_unscaled_p (mode, offset)
3272 || offset_12bit_unsigned_scaled_p (mode, offset));
3273 }
3274
3275 if (allow_reg_index_p)
3276 {
3277 /* Look for base + (scaled/extended) index register. */
3278 if (aarch64_base_register_rtx_p (op0, strict_p)
3279 && aarch64_classify_index (info, op1, mode, strict_p))
3280 {
3281 info->base = op0;
3282 return true;
3283 }
3284 if (aarch64_base_register_rtx_p (op1, strict_p)
3285 && aarch64_classify_index (info, op0, mode, strict_p))
3286 {
3287 info->base = op1;
3288 return true;
3289 }
3290 }
3291
3292 return false;
3293
3294 case POST_INC:
3295 case POST_DEC:
3296 case PRE_INC:
3297 case PRE_DEC:
3298 info->type = ADDRESS_REG_WB;
3299 info->base = XEXP (x, 0);
3300 info->offset = NULL_RTX;
3301 return aarch64_base_register_rtx_p (info->base, strict_p);
3302
3303 case POST_MODIFY:
3304 case PRE_MODIFY:
3305 info->type = ADDRESS_REG_WB;
3306 info->base = XEXP (x, 0);
3307 if (GET_CODE (XEXP (x, 1)) == PLUS
3308 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3309 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3310 && aarch64_base_register_rtx_p (info->base, strict_p))
3311 {
3312 HOST_WIDE_INT offset;
3313 info->offset = XEXP (XEXP (x, 1), 1);
3314 offset = INTVAL (info->offset);
3315
3316 /* TImode and TFmode values are allowed in both pairs of X
3317 registers and individual Q registers. The available
3318 address modes are:
3319 X,X: 7-bit signed scaled offset
3320 Q: 9-bit signed offset
3321 We conservatively require an offset representable in either mode.
3322 */
3323 if (mode == TImode || mode == TFmode)
44707478 3324 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3325 && offset_9bit_signed_unscaled_p (mode, offset));
3326
3327 if (outer_code == PARALLEL)
3328 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3329 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3330 else
3331 return offset_9bit_signed_unscaled_p (mode, offset);
3332 }
3333 return false;
3334
3335 case CONST:
3336 case SYMBOL_REF:
3337 case LABEL_REF:
79517551
SN
3338 /* load literal: pc-relative constant pool entry. Only supported
3339 for SI mode or larger. */
43e9d192 3340 info->type = ADDRESS_SYMBOLIC;
79517551 3341 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3342 {
3343 rtx sym, addend;
3344
3345 split_const (x, &sym, &addend);
3346 return (GET_CODE (sym) == LABEL_REF
3347 || (GET_CODE (sym) == SYMBOL_REF
3348 && CONSTANT_POOL_ADDRESS_P (sym)));
3349 }
3350 return false;
3351
3352 case LO_SUM:
3353 info->type = ADDRESS_LO_SUM;
3354 info->base = XEXP (x, 0);
3355 info->offset = XEXP (x, 1);
3356 if (allow_reg_index_p
3357 && aarch64_base_register_rtx_p (info->base, strict_p))
3358 {
3359 rtx sym, offs;
3360 split_const (info->offset, &sym, &offs);
3361 if (GET_CODE (sym) == SYMBOL_REF
3362 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3363 == SYMBOL_SMALL_ABSOLUTE))
3364 {
3365 /* The symbol and offset must be aligned to the access size. */
3366 unsigned int align;
3367 unsigned int ref_size;
3368
3369 if (CONSTANT_POOL_ADDRESS_P (sym))
3370 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3371 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3372 {
3373 tree exp = SYMBOL_REF_DECL (sym);
3374 align = TYPE_ALIGN (TREE_TYPE (exp));
3375 align = CONSTANT_ALIGNMENT (exp, align);
3376 }
3377 else if (SYMBOL_REF_DECL (sym))
3378 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3379 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3380 && SYMBOL_REF_BLOCK (sym) != NULL)
3381 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3382 else
3383 align = BITS_PER_UNIT;
3384
3385 ref_size = GET_MODE_SIZE (mode);
3386 if (ref_size == 0)
3387 ref_size = GET_MODE_SIZE (DImode);
3388
3389 return ((INTVAL (offs) & (ref_size - 1)) == 0
3390 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3391 }
3392 }
3393 return false;
3394
3395 default:
3396 return false;
3397 }
3398}
3399
3400bool
3401aarch64_symbolic_address_p (rtx x)
3402{
3403 rtx offset;
3404
3405 split_const (x, &x, &offset);
3406 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3407}
3408
3409/* Classify the base of symbolic expression X, given that X appears in
3410 context CONTEXT. */
da4f13a4
MS
3411
3412enum aarch64_symbol_type
3413aarch64_classify_symbolic_expression (rtx x,
3414 enum aarch64_symbol_context context)
43e9d192
IB
3415{
3416 rtx offset;
da4f13a4 3417
43e9d192
IB
3418 split_const (x, &x, &offset);
3419 return aarch64_classify_symbol (x, context);
3420}
3421
3422
3423/* Return TRUE if X is a legitimate address for accessing memory in
3424 mode MODE. */
3425static bool
ef4bddc2 3426aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
3427{
3428 struct aarch64_address_info addr;
3429
3430 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3431}
3432
3433/* Return TRUE if X is a legitimate address for accessing memory in
3434 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3435 pair operation. */
3436bool
ef4bddc2 3437aarch64_legitimate_address_p (machine_mode mode, rtx x,
aef66c94 3438 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3439{
3440 struct aarch64_address_info addr;
3441
3442 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3443}
3444
3445/* Return TRUE if rtx X is immediate constant 0.0 */
3446bool
3520f7cc 3447aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3448{
3449 REAL_VALUE_TYPE r;
3450
3451 if (GET_MODE (x) == VOIDmode)
3452 return false;
3453
3454 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3455 if (REAL_VALUE_MINUS_ZERO (r))
3456 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3457 return REAL_VALUES_EQUAL (r, dconst0);
3458}
3459
70f09188
AP
3460/* Return the fixed registers used for condition codes. */
3461
3462static bool
3463aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3464{
3465 *p1 = CC_REGNUM;
3466 *p2 = INVALID_REGNUM;
3467 return true;
3468}
3469
78607708
TV
3470/* Emit call insn with PAT and do aarch64-specific handling. */
3471
d07a3fed 3472void
78607708
TV
3473aarch64_emit_call_insn (rtx pat)
3474{
3475 rtx insn = emit_call_insn (pat);
3476
3477 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3478 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3479 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3480}
3481
ef4bddc2 3482machine_mode
43e9d192
IB
3483aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3484{
3485 /* All floating point compares return CCFP if it is an equality
3486 comparison, and CCFPE otherwise. */
3487 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3488 {
3489 switch (code)
3490 {
3491 case EQ:
3492 case NE:
3493 case UNORDERED:
3494 case ORDERED:
3495 case UNLT:
3496 case UNLE:
3497 case UNGT:
3498 case UNGE:
3499 case UNEQ:
3500 case LTGT:
3501 return CCFPmode;
3502
3503 case LT:
3504 case LE:
3505 case GT:
3506 case GE:
3507 return CCFPEmode;
3508
3509 default:
3510 gcc_unreachable ();
3511 }
3512 }
3513
3514 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3515 && y == const0_rtx
3516 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3517 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3518 || GET_CODE (x) == NEG))
43e9d192
IB
3519 return CC_NZmode;
3520
1c992d1e 3521 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3522 the comparison will have to be swapped when we emit the assembly
3523 code. */
3524 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3525 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
3526 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3527 || GET_CODE (x) == LSHIFTRT
1c992d1e 3528 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3529 return CC_SWPmode;
3530
1c992d1e
RE
3531 /* Similarly for a negated operand, but we can only do this for
3532 equalities. */
3533 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3534 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
3535 && (code == EQ || code == NE)
3536 && GET_CODE (x) == NEG)
3537 return CC_Zmode;
3538
43e9d192
IB
3539 /* A compare of a mode narrower than SI mode against zero can be done
3540 by extending the value in the comparison. */
3541 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3542 && y == const0_rtx)
3543 /* Only use sign-extension if we really need it. */
3544 return ((code == GT || code == GE || code == LE || code == LT)
3545 ? CC_SESWPmode : CC_ZESWPmode);
3546
3547 /* For everything else, return CCmode. */
3548 return CCmode;
3549}
3550
cd5660ab 3551int
43e9d192
IB
3552aarch64_get_condition_code (rtx x)
3553{
ef4bddc2 3554 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
3555 enum rtx_code comp_code = GET_CODE (x);
3556
3557 if (GET_MODE_CLASS (mode) != MODE_CC)
3558 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3559
3560 switch (mode)
3561 {
3562 case CCFPmode:
3563 case CCFPEmode:
3564 switch (comp_code)
3565 {
3566 case GE: return AARCH64_GE;
3567 case GT: return AARCH64_GT;
3568 case LE: return AARCH64_LS;
3569 case LT: return AARCH64_MI;
3570 case NE: return AARCH64_NE;
3571 case EQ: return AARCH64_EQ;
3572 case ORDERED: return AARCH64_VC;
3573 case UNORDERED: return AARCH64_VS;
3574 case UNLT: return AARCH64_LT;
3575 case UNLE: return AARCH64_LE;
3576 case UNGT: return AARCH64_HI;
3577 case UNGE: return AARCH64_PL;
cd5660ab 3578 default: return -1;
43e9d192
IB
3579 }
3580 break;
3581
3582 case CCmode:
3583 switch (comp_code)
3584 {
3585 case NE: return AARCH64_NE;
3586 case EQ: return AARCH64_EQ;
3587 case GE: return AARCH64_GE;
3588 case GT: return AARCH64_GT;
3589 case LE: return AARCH64_LE;
3590 case LT: return AARCH64_LT;
3591 case GEU: return AARCH64_CS;
3592 case GTU: return AARCH64_HI;
3593 case LEU: return AARCH64_LS;
3594 case LTU: return AARCH64_CC;
cd5660ab 3595 default: return -1;
43e9d192
IB
3596 }
3597 break;
3598
3599 case CC_SWPmode:
3600 case CC_ZESWPmode:
3601 case CC_SESWPmode:
3602 switch (comp_code)
3603 {
3604 case NE: return AARCH64_NE;
3605 case EQ: return AARCH64_EQ;
3606 case GE: return AARCH64_LE;
3607 case GT: return AARCH64_LT;
3608 case LE: return AARCH64_GE;
3609 case LT: return AARCH64_GT;
3610 case GEU: return AARCH64_LS;
3611 case GTU: return AARCH64_CC;
3612 case LEU: return AARCH64_CS;
3613 case LTU: return AARCH64_HI;
cd5660ab 3614 default: return -1;
43e9d192
IB
3615 }
3616 break;
3617
3618 case CC_NZmode:
3619 switch (comp_code)
3620 {
3621 case NE: return AARCH64_NE;
3622 case EQ: return AARCH64_EQ;
3623 case GE: return AARCH64_PL;
3624 case LT: return AARCH64_MI;
cd5660ab 3625 default: return -1;
43e9d192
IB
3626 }
3627 break;
3628
1c992d1e
RE
3629 case CC_Zmode:
3630 switch (comp_code)
3631 {
3632 case NE: return AARCH64_NE;
3633 case EQ: return AARCH64_EQ;
cd5660ab 3634 default: return -1;
1c992d1e
RE
3635 }
3636 break;
3637
43e9d192 3638 default:
cd5660ab 3639 return -1;
43e9d192
IB
3640 break;
3641 }
3642}
3643
ddeabd3e
AL
3644bool
3645aarch64_const_vec_all_same_in_range_p (rtx x,
3646 HOST_WIDE_INT minval,
3647 HOST_WIDE_INT maxval)
3648{
3649 HOST_WIDE_INT firstval;
3650 int count, i;
3651
3652 if (GET_CODE (x) != CONST_VECTOR
3653 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3654 return false;
3655
3656 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3657 if (firstval < minval || firstval > maxval)
3658 return false;
3659
3660 count = CONST_VECTOR_NUNITS (x);
3661 for (i = 1; i < count; i++)
3662 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3663 return false;
3664
3665 return true;
3666}
3667
3668bool
3669aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3670{
3671 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3672}
3673
43e9d192
IB
3674static unsigned
3675bit_count (unsigned HOST_WIDE_INT value)
3676{
3677 unsigned count = 0;
3678
3679 while (value)
3680 {
3681 count++;
3682 value &= value - 1;
3683 }
3684
3685 return count;
3686}
3687
3688void
3689aarch64_print_operand (FILE *f, rtx x, char code)
3690{
3691 switch (code)
3692 {
f541a481
KT
3693 /* An integer or symbol address without a preceding # sign. */
3694 case 'c':
3695 switch (GET_CODE (x))
3696 {
3697 case CONST_INT:
3698 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3699 break;
3700
3701 case SYMBOL_REF:
3702 output_addr_const (f, x);
3703 break;
3704
3705 case CONST:
3706 if (GET_CODE (XEXP (x, 0)) == PLUS
3707 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3708 {
3709 output_addr_const (f, x);
3710 break;
3711 }
3712 /* Fall through. */
3713
3714 default:
3715 output_operand_lossage ("Unsupported operand for code '%c'", code);
3716 }
3717 break;
3718
43e9d192
IB
3719 case 'e':
3720 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3721 {
3722 int n;
3723
4aa81c2e 3724 if (!CONST_INT_P (x)
43e9d192
IB
3725 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3726 {
3727 output_operand_lossage ("invalid operand for '%%%c'", code);
3728 return;
3729 }
3730
3731 switch (n)
3732 {
3733 case 3:
3734 fputc ('b', f);
3735 break;
3736 case 4:
3737 fputc ('h', f);
3738 break;
3739 case 5:
3740 fputc ('w', f);
3741 break;
3742 default:
3743 output_operand_lossage ("invalid operand for '%%%c'", code);
3744 return;
3745 }
3746 }
3747 break;
3748
3749 case 'p':
3750 {
3751 int n;
3752
3753 /* Print N such that 2^N == X. */
4aa81c2e 3754 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
3755 {
3756 output_operand_lossage ("invalid operand for '%%%c'", code);
3757 return;
3758 }
3759
3760 asm_fprintf (f, "%d", n);
3761 }
3762 break;
3763
3764 case 'P':
3765 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 3766 if (!CONST_INT_P (x))
43e9d192
IB
3767 {
3768 output_operand_lossage ("invalid operand for '%%%c'", code);
3769 return;
3770 }
3771
3772 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3773 break;
3774
3775 case 'H':
3776 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 3777 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
3778 {
3779 output_operand_lossage ("invalid operand for '%%%c'", code);
3780 return;
3781 }
3782
01a3a324 3783 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3784 break;
3785
43e9d192 3786 case 'm':
cd5660ab
KT
3787 {
3788 int cond_code;
3789 /* Print a condition (eq, ne, etc). */
43e9d192 3790
cd5660ab
KT
3791 /* CONST_TRUE_RTX means always -- that's the default. */
3792 if (x == const_true_rtx)
43e9d192 3793 return;
43e9d192 3794
cd5660ab
KT
3795 if (!COMPARISON_P (x))
3796 {
3797 output_operand_lossage ("invalid operand for '%%%c'", code);
3798 return;
3799 }
3800
3801 cond_code = aarch64_get_condition_code (x);
3802 gcc_assert (cond_code >= 0);
3803 fputs (aarch64_condition_codes[cond_code], f);
3804 }
43e9d192
IB
3805 break;
3806
3807 case 'M':
cd5660ab
KT
3808 {
3809 int cond_code;
3810 /* Print the inverse of a condition (eq <-> ne, etc). */
43e9d192 3811
cd5660ab
KT
3812 /* CONST_TRUE_RTX means never -- that's the default. */
3813 if (x == const_true_rtx)
3814 {
3815 fputs ("nv", f);
3816 return;
3817 }
43e9d192 3818
cd5660ab
KT
3819 if (!COMPARISON_P (x))
3820 {
3821 output_operand_lossage ("invalid operand for '%%%c'", code);
3822 return;
3823 }
3824 cond_code = aarch64_get_condition_code (x);
3825 gcc_assert (cond_code >= 0);
3826 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3827 (cond_code)], f);
3828 }
43e9d192
IB
3829 break;
3830
3831 case 'b':
3832 case 'h':
3833 case 's':
3834 case 'd':
3835 case 'q':
3836 /* Print a scalar FP/SIMD register name. */
3837 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3838 {
3839 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3840 return;
3841 }
50ce6f88 3842 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3843 break;
3844
3845 case 'S':
3846 case 'T':
3847 case 'U':
3848 case 'V':
3849 /* Print the first FP/SIMD register name in a list. */
3850 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3851 {
3852 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3853 return;
3854 }
50ce6f88 3855 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3856 break;
3857
a05c0ddf 3858 case 'X':
50d38551 3859 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 3860 if (!CONST_INT_P (x))
a05c0ddf
IB
3861 {
3862 output_operand_lossage ("invalid operand for '%%%c'", code);
3863 return;
3864 }
50d38551 3865 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3866 break;
3867
43e9d192
IB
3868 case 'w':
3869 case 'x':
3870 /* Print a general register name or the zero register (32-bit or
3871 64-bit). */
3520f7cc
JG
3872 if (x == const0_rtx
3873 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3874 {
50ce6f88 3875 asm_fprintf (f, "%czr", code);
43e9d192
IB
3876 break;
3877 }
3878
3879 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3880 {
50ce6f88 3881 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3882 break;
3883 }
3884
3885 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3886 {
50ce6f88 3887 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3888 break;
3889 }
3890
3891 /* Fall through */
3892
3893 case 0:
3894 /* Print a normal operand, if it's a general register, then we
3895 assume DImode. */
3896 if (x == NULL)
3897 {
3898 output_operand_lossage ("missing operand");
3899 return;
3900 }
3901
3902 switch (GET_CODE (x))
3903 {
3904 case REG:
01a3a324 3905 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3906 break;
3907
3908 case MEM:
3909 aarch64_memory_reference_mode = GET_MODE (x);
3910 output_address (XEXP (x, 0));
3911 break;
3912
3913 case LABEL_REF:
3914 case SYMBOL_REF:
3915 output_addr_const (asm_out_file, x);
3916 break;
3917
3918 case CONST_INT:
3919 asm_fprintf (f, "%wd", INTVAL (x));
3920 break;
3921
3922 case CONST_VECTOR:
3520f7cc
JG
3923 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3924 {
ddeabd3e
AL
3925 gcc_assert (
3926 aarch64_const_vec_all_same_in_range_p (x,
3927 HOST_WIDE_INT_MIN,
3928 HOST_WIDE_INT_MAX));
3520f7cc
JG
3929 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3930 }
3931 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3932 {
3933 fputc ('0', f);
3934 }
3935 else
3936 gcc_unreachable ();
43e9d192
IB
3937 break;
3938
3520f7cc
JG
3939 case CONST_DOUBLE:
3940 /* CONST_DOUBLE can represent a double-width integer.
3941 In this case, the mode of x is VOIDmode. */
3942 if (GET_MODE (x) == VOIDmode)
3943 ; /* Do Nothing. */
3944 else if (aarch64_float_const_zero_rtx_p (x))
3945 {
3946 fputc ('0', f);
3947 break;
3948 }
3949 else if (aarch64_float_const_representable_p (x))
3950 {
3951#define buf_size 20
3952 char float_buf[buf_size] = {'\0'};
3953 REAL_VALUE_TYPE r;
3954 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3955 real_to_decimal_for_mode (float_buf, &r,
3956 buf_size, buf_size,
3957 1, GET_MODE (x));
3958 asm_fprintf (asm_out_file, "%s", float_buf);
3959 break;
3960#undef buf_size
3961 }
3962 output_operand_lossage ("invalid constant");
3963 return;
43e9d192
IB
3964 default:
3965 output_operand_lossage ("invalid operand");
3966 return;
3967 }
3968 break;
3969
3970 case 'A':
3971 if (GET_CODE (x) == HIGH)
3972 x = XEXP (x, 0);
3973
3974 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3975 {
3976 case SYMBOL_SMALL_GOT:
3977 asm_fprintf (asm_out_file, ":got:");
3978 break;
3979
3980 case SYMBOL_SMALL_TLSGD:
3981 asm_fprintf (asm_out_file, ":tlsgd:");
3982 break;
3983
3984 case SYMBOL_SMALL_TLSDESC:
3985 asm_fprintf (asm_out_file, ":tlsdesc:");
3986 break;
3987
3988 case SYMBOL_SMALL_GOTTPREL:
3989 asm_fprintf (asm_out_file, ":gottprel:");
3990 break;
3991
3992 case SYMBOL_SMALL_TPREL:
3993 asm_fprintf (asm_out_file, ":tprel:");
3994 break;
3995
87dd8ab0
MS
3996 case SYMBOL_TINY_GOT:
3997 gcc_unreachable ();
3998 break;
3999
43e9d192
IB
4000 default:
4001 break;
4002 }
4003 output_addr_const (asm_out_file, x);
4004 break;
4005
4006 case 'L':
4007 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4008 {
4009 case SYMBOL_SMALL_GOT:
4010 asm_fprintf (asm_out_file, ":lo12:");
4011 break;
4012
4013 case SYMBOL_SMALL_TLSGD:
4014 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4015 break;
4016
4017 case SYMBOL_SMALL_TLSDESC:
4018 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4019 break;
4020
4021 case SYMBOL_SMALL_GOTTPREL:
4022 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4023 break;
4024
4025 case SYMBOL_SMALL_TPREL:
4026 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4027 break;
4028
87dd8ab0
MS
4029 case SYMBOL_TINY_GOT:
4030 asm_fprintf (asm_out_file, ":got:");
4031 break;
4032
43e9d192
IB
4033 default:
4034 break;
4035 }
4036 output_addr_const (asm_out_file, x);
4037 break;
4038
4039 case 'G':
4040
4041 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4042 {
4043 case SYMBOL_SMALL_TPREL:
4044 asm_fprintf (asm_out_file, ":tprel_hi12:");
4045 break;
4046 default:
4047 break;
4048 }
4049 output_addr_const (asm_out_file, x);
4050 break;
4051
4052 default:
4053 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4054 return;
4055 }
4056}
4057
4058void
4059aarch64_print_operand_address (FILE *f, rtx x)
4060{
4061 struct aarch64_address_info addr;
4062
4063 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4064 MEM, true))
4065 switch (addr.type)
4066 {
4067 case ADDRESS_REG_IMM:
4068 if (addr.offset == const0_rtx)
01a3a324 4069 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4070 else
16a3246f 4071 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4072 INTVAL (addr.offset));
4073 return;
4074
4075 case ADDRESS_REG_REG:
4076 if (addr.shift == 0)
16a3246f 4077 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4078 reg_names [REGNO (addr.offset)]);
43e9d192 4079 else
16a3246f 4080 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4081 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4082 return;
4083
4084 case ADDRESS_REG_UXTW:
4085 if (addr.shift == 0)
16a3246f 4086 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4087 REGNO (addr.offset) - R0_REGNUM);
4088 else
16a3246f 4089 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4090 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4091 return;
4092
4093 case ADDRESS_REG_SXTW:
4094 if (addr.shift == 0)
16a3246f 4095 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4096 REGNO (addr.offset) - R0_REGNUM);
4097 else
16a3246f 4098 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4099 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4100 return;
4101
4102 case ADDRESS_REG_WB:
4103 switch (GET_CODE (x))
4104 {
4105 case PRE_INC:
16a3246f 4106 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4107 GET_MODE_SIZE (aarch64_memory_reference_mode));
4108 return;
4109 case POST_INC:
16a3246f 4110 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4111 GET_MODE_SIZE (aarch64_memory_reference_mode));
4112 return;
4113 case PRE_DEC:
16a3246f 4114 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4115 GET_MODE_SIZE (aarch64_memory_reference_mode));
4116 return;
4117 case POST_DEC:
16a3246f 4118 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4119 GET_MODE_SIZE (aarch64_memory_reference_mode));
4120 return;
4121 case PRE_MODIFY:
16a3246f 4122 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4123 INTVAL (addr.offset));
4124 return;
4125 case POST_MODIFY:
16a3246f 4126 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4127 INTVAL (addr.offset));
4128 return;
4129 default:
4130 break;
4131 }
4132 break;
4133
4134 case ADDRESS_LO_SUM:
16a3246f 4135 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4136 output_addr_const (f, addr.offset);
4137 asm_fprintf (f, "]");
4138 return;
4139
4140 case ADDRESS_SYMBOLIC:
4141 break;
4142 }
4143
4144 output_addr_const (f, x);
4145}
4146
43e9d192
IB
4147bool
4148aarch64_label_mentioned_p (rtx x)
4149{
4150 const char *fmt;
4151 int i;
4152
4153 if (GET_CODE (x) == LABEL_REF)
4154 return true;
4155
4156 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4157 referencing instruction, but they are constant offsets, not
4158 symbols. */
4159 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4160 return false;
4161
4162 fmt = GET_RTX_FORMAT (GET_CODE (x));
4163 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4164 {
4165 if (fmt[i] == 'E')
4166 {
4167 int j;
4168
4169 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4170 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4171 return 1;
4172 }
4173 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4174 return 1;
4175 }
4176
4177 return 0;
4178}
4179
4180/* Implement REGNO_REG_CLASS. */
4181
4182enum reg_class
4183aarch64_regno_regclass (unsigned regno)
4184{
4185 if (GP_REGNUM_P (regno))
a4a182c6 4186 return GENERAL_REGS;
43e9d192
IB
4187
4188 if (regno == SP_REGNUM)
4189 return STACK_REG;
4190
4191 if (regno == FRAME_POINTER_REGNUM
4192 || regno == ARG_POINTER_REGNUM)
f24bb080 4193 return POINTER_REGS;
43e9d192
IB
4194
4195 if (FP_REGNUM_P (regno))
4196 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4197
4198 return NO_REGS;
4199}
4200
0c4ec427 4201static rtx
ef4bddc2 4202aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
4203{
4204 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4205 where mask is selected by alignment and size of the offset.
4206 We try to pick as large a range for the offset as possible to
4207 maximize the chance of a CSE. However, for aligned addresses
4208 we limit the range to 4k so that structures with different sized
4209 elements are likely to use the same base. */
4210
4211 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4212 {
4213 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4214 HOST_WIDE_INT base_offset;
4215
4216 /* Does it look like we'll need a load/store-pair operation? */
4217 if (GET_MODE_SIZE (mode) > 16
4218 || mode == TImode)
4219 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4220 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4221 /* For offsets aren't a multiple of the access size, the limit is
4222 -256...255. */
4223 else if (offset & (GET_MODE_SIZE (mode) - 1))
4224 base_offset = (offset + 0x100) & ~0x1ff;
4225 else
4226 base_offset = offset & ~0xfff;
4227
4228 if (base_offset == 0)
4229 return x;
4230
4231 offset -= base_offset;
4232 rtx base_reg = gen_reg_rtx (Pmode);
4233 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4234 NULL_RTX);
4235 emit_move_insn (base_reg, val);
4236 x = plus_constant (Pmode, base_reg, offset);
4237 }
4238
4239 return x;
4240}
4241
43e9d192
IB
4242/* Try a machine-dependent way of reloading an illegitimate address
4243 operand. If we find one, push the reload and return the new rtx. */
4244
4245rtx
4246aarch64_legitimize_reload_address (rtx *x_p,
ef4bddc2 4247 machine_mode mode,
43e9d192
IB
4248 int opnum, int type,
4249 int ind_levels ATTRIBUTE_UNUSED)
4250{
4251 rtx x = *x_p;
4252
348d4b0a
BC
4253 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4254 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4255 && GET_CODE (x) == PLUS
4256 && REG_P (XEXP (x, 0))
4257 && CONST_INT_P (XEXP (x, 1)))
4258 {
4259 rtx orig_rtx = x;
4260 x = copy_rtx (x);
4261 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4262 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4263 opnum, (enum reload_type) type);
4264 return x;
4265 }
4266
4267 /* We must recognize output that we have already generated ourselves. */
4268 if (GET_CODE (x) == PLUS
4269 && GET_CODE (XEXP (x, 0)) == PLUS
4270 && REG_P (XEXP (XEXP (x, 0), 0))
4271 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4272 && CONST_INT_P (XEXP (x, 1)))
4273 {
4274 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4275 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4276 opnum, (enum reload_type) type);
4277 return x;
4278 }
4279
4280 /* We wish to handle large displacements off a base register by splitting
4281 the addend across an add and the mem insn. This can cut the number of
4282 extra insns needed from 3 to 1. It is only useful for load/store of a
4283 single register with 12 bit offset field. */
4284 if (GET_CODE (x) == PLUS
4285 && REG_P (XEXP (x, 0))
4286 && CONST_INT_P (XEXP (x, 1))
4287 && HARD_REGISTER_P (XEXP (x, 0))
4288 && mode != TImode
4289 && mode != TFmode
4290 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4291 {
4292 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4293 HOST_WIDE_INT low = val & 0xfff;
4294 HOST_WIDE_INT high = val - low;
4295 HOST_WIDE_INT offs;
4296 rtx cst;
ef4bddc2 4297 machine_mode xmode = GET_MODE (x);
28514dda
YZ
4298
4299 /* In ILP32, xmode can be either DImode or SImode. */
4300 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4301
4302 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4303 BLKmode alignment. */
4304 if (GET_MODE_SIZE (mode) == 0)
4305 return NULL_RTX;
4306
4307 offs = low % GET_MODE_SIZE (mode);
4308
4309 /* Align misaligned offset by adjusting high part to compensate. */
4310 if (offs != 0)
4311 {
4312 if (aarch64_uimm12_shift (high + offs))
4313 {
4314 /* Align down. */
4315 low = low - offs;
4316 high = high + offs;
4317 }
4318 else
4319 {
4320 /* Align up. */
4321 offs = GET_MODE_SIZE (mode) - offs;
4322 low = low + offs;
4323 high = high + (low & 0x1000) - offs;
4324 low &= 0xfff;
4325 }
4326 }
4327
4328 /* Check for overflow. */
4329 if (high + low != val)
4330 return NULL_RTX;
4331
4332 cst = GEN_INT (high);
4333 if (!aarch64_uimm12_shift (high))
28514dda 4334 cst = force_const_mem (xmode, cst);
43e9d192
IB
4335
4336 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4337 in the mem instruction.
4338 Note that replacing this gen_rtx_PLUS with plus_constant is
4339 wrong in this case because we rely on the
4340 (plus (plus reg c1) c2) structure being preserved so that
4341 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4342 x = gen_rtx_PLUS (xmode,
4343 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4344 GEN_INT (low));
43e9d192
IB
4345
4346 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4347 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4348 opnum, (enum reload_type) type);
4349 return x;
4350 }
4351
4352 return NULL_RTX;
4353}
4354
4355
4356static reg_class_t
4357aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4358 reg_class_t rclass,
ef4bddc2 4359 machine_mode mode,
43e9d192
IB
4360 secondary_reload_info *sri)
4361{
43e9d192
IB
4362 /* Without the TARGET_SIMD instructions we cannot move a Q register
4363 to a Q register directly. We need a scratch. */
4364 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4365 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4366 && reg_class_subset_p (rclass, FP_REGS))
4367 {
4368 if (mode == TFmode)
4369 sri->icode = CODE_FOR_aarch64_reload_movtf;
4370 else if (mode == TImode)
4371 sri->icode = CODE_FOR_aarch64_reload_movti;
4372 return NO_REGS;
4373 }
4374
4375 /* A TFmode or TImode memory access should be handled via an FP_REGS
4376 because AArch64 has richer addressing modes for LDR/STR instructions
4377 than LDP/STP instructions. */
a4a182c6 4378 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
43e9d192
IB
4379 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4380 return FP_REGS;
4381
4382 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4383 return GENERAL_REGS;
43e9d192
IB
4384
4385 return NO_REGS;
4386}
4387
4388static bool
4389aarch64_can_eliminate (const int from, const int to)
4390{
4391 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4392 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4393
4394 if (frame_pointer_needed)
4395 {
4396 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4397 return true;
4398 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4399 return false;
4400 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4401 && !cfun->calls_alloca)
4402 return true;
4403 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4404 return true;
0b7f8166
MS
4405
4406 return false;
43e9d192 4407 }
777e6976 4408
43e9d192
IB
4409 return true;
4410}
4411
4412HOST_WIDE_INT
4413aarch64_initial_elimination_offset (unsigned from, unsigned to)
4414{
43e9d192 4415 aarch64_layout_frame ();
78c29983
MS
4416
4417 if (to == HARD_FRAME_POINTER_REGNUM)
4418 {
4419 if (from == ARG_POINTER_REGNUM)
1c960e02 4420 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
4421
4422 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4423 return (cfun->machine->frame.hard_fp_offset
4424 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4425 }
4426
4427 if (to == STACK_POINTER_REGNUM)
4428 {
4429 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4430 return (cfun->machine->frame.frame_size
4431 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4432 }
4433
1c960e02 4434 return cfun->machine->frame.frame_size;
43e9d192
IB
4435}
4436
43e9d192
IB
4437/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4438 previous frame. */
4439
4440rtx
4441aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4442{
4443 if (count != 0)
4444 return const0_rtx;
4445 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4446}
4447
4448
4449static void
4450aarch64_asm_trampoline_template (FILE *f)
4451{
28514dda
YZ
4452 if (TARGET_ILP32)
4453 {
4454 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4455 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4456 }
4457 else
4458 {
4459 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4460 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4461 }
01a3a324 4462 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4463 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4464 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4465 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4466}
4467
4468static void
4469aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4470{
4471 rtx fnaddr, mem, a_tramp;
28514dda 4472 const int tramp_code_sz = 16;
43e9d192
IB
4473
4474 /* Don't need to copy the trailing D-words, we fill those in below. */
4475 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4476 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4477 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4478 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4479 if (GET_MODE (fnaddr) != ptr_mode)
4480 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4481 emit_move_insn (mem, fnaddr);
4482
28514dda 4483 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4484 emit_move_insn (mem, chain_value);
4485
4486 /* XXX We should really define a "clear_cache" pattern and use
4487 gen_clear_cache(). */
4488 a_tramp = XEXP (m_tramp, 0);
4489 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4490 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4491 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4492 ptr_mode);
43e9d192
IB
4493}
4494
4495static unsigned char
ef4bddc2 4496aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192
IB
4497{
4498 switch (regclass)
4499 {
fee9ba42 4500 case CALLER_SAVE_REGS:
43e9d192
IB
4501 case POINTER_REGS:
4502 case GENERAL_REGS:
4503 case ALL_REGS:
4504 case FP_REGS:
4505 case FP_LO_REGS:
4506 return
4507 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4508 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4509 case STACK_REG:
4510 return 1;
4511
4512 case NO_REGS:
4513 return 0;
4514
4515 default:
4516 break;
4517 }
4518 gcc_unreachable ();
4519}
4520
4521static reg_class_t
78d8b9f0 4522aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4523{
51bb310d 4524 if (regclass == POINTER_REGS)
78d8b9f0
IB
4525 return GENERAL_REGS;
4526
51bb310d
MS
4527 if (regclass == STACK_REG)
4528 {
4529 if (REG_P(x)
4530 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4531 return regclass;
4532
4533 return NO_REGS;
4534 }
4535
78d8b9f0
IB
4536 /* If it's an integer immediate that MOVI can't handle, then
4537 FP_REGS is not an option, so we return NO_REGS instead. */
4538 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4539 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4540 return NO_REGS;
4541
27bd251b
IB
4542 /* Register eliminiation can result in a request for
4543 SP+constant->FP_REGS. We cannot support such operations which
4544 use SP as source and an FP_REG as destination, so reject out
4545 right now. */
4546 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4547 {
4548 rtx lhs = XEXP (x, 0);
4549
4550 /* Look through a possible SUBREG introduced by ILP32. */
4551 if (GET_CODE (lhs) == SUBREG)
4552 lhs = SUBREG_REG (lhs);
4553
4554 gcc_assert (REG_P (lhs));
4555 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4556 POINTER_REGS));
4557 return NO_REGS;
4558 }
4559
78d8b9f0 4560 return regclass;
43e9d192
IB
4561}
4562
4563void
4564aarch64_asm_output_labelref (FILE* f, const char *name)
4565{
4566 asm_fprintf (f, "%U%s", name);
4567}
4568
4569static void
4570aarch64_elf_asm_constructor (rtx symbol, int priority)
4571{
4572 if (priority == DEFAULT_INIT_PRIORITY)
4573 default_ctor_section_asm_out_constructor (symbol, priority);
4574 else
4575 {
4576 section *s;
4577 char buf[18];
4578 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4579 s = get_section (buf, SECTION_WRITE, NULL);
4580 switch_to_section (s);
4581 assemble_align (POINTER_SIZE);
28514dda 4582 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4583 }
4584}
4585
4586static void
4587aarch64_elf_asm_destructor (rtx symbol, int priority)
4588{
4589 if (priority == DEFAULT_INIT_PRIORITY)
4590 default_dtor_section_asm_out_destructor (symbol, priority);
4591 else
4592 {
4593 section *s;
4594 char buf[18];
4595 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4596 s = get_section (buf, SECTION_WRITE, NULL);
4597 switch_to_section (s);
4598 assemble_align (POINTER_SIZE);
28514dda 4599 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4600 }
4601}
4602
4603const char*
4604aarch64_output_casesi (rtx *operands)
4605{
4606 char buf[100];
4607 char label[100];
b32d5189 4608 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
4609 int index;
4610 static const char *const patterns[4][2] =
4611 {
4612 {
4613 "ldrb\t%w3, [%0,%w1,uxtw]",
4614 "add\t%3, %4, %w3, sxtb #2"
4615 },
4616 {
4617 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4618 "add\t%3, %4, %w3, sxth #2"
4619 },
4620 {
4621 "ldr\t%w3, [%0,%w1,uxtw #2]",
4622 "add\t%3, %4, %w3, sxtw #2"
4623 },
4624 /* We assume that DImode is only generated when not optimizing and
4625 that we don't really need 64-bit address offsets. That would
4626 imply an object file with 8GB of code in a single function! */
4627 {
4628 "ldr\t%w3, [%0,%w1,uxtw #2]",
4629 "add\t%3, %4, %w3, sxtw #2"
4630 }
4631 };
4632
4633 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4634
4635 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4636
4637 gcc_assert (index >= 0 && index <= 3);
4638
4639 /* Need to implement table size reduction, by chaning the code below. */
4640 output_asm_insn (patterns[index][0], operands);
4641 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4642 snprintf (buf, sizeof (buf),
4643 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4644 output_asm_insn (buf, operands);
4645 output_asm_insn (patterns[index][1], operands);
4646 output_asm_insn ("br\t%3", operands);
4647 assemble_label (asm_out_file, label);
4648 return "";
4649}
4650
4651
4652/* Return size in bits of an arithmetic operand which is shifted/scaled and
4653 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4654 operator. */
4655
4656int
4657aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4658{
4659 if (shift >= 0 && shift <= 3)
4660 {
4661 int size;
4662 for (size = 8; size <= 32; size *= 2)
4663 {
4664 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4665 if (mask == bits << shift)
4666 return size;
4667 }
4668 }
4669 return 0;
4670}
4671
4672static bool
ef4bddc2 4673aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
4674 const_rtx x ATTRIBUTE_UNUSED)
4675{
4676 /* We can't use blocks for constants when we're using a per-function
4677 constant pool. */
4678 return false;
4679}
4680
4681static section *
ef4bddc2 4682aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
4683 rtx x ATTRIBUTE_UNUSED,
4684 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4685{
4686 /* Force all constant pool entries into the current function section. */
4687 return function_section (current_function_decl);
4688}
4689
4690
4691/* Costs. */
4692
4693/* Helper function for rtx cost calculation. Strip a shift expression
4694 from X. Returns the inner operand if successful, or the original
4695 expression on failure. */
4696static rtx
4697aarch64_strip_shift (rtx x)
4698{
4699 rtx op = x;
4700
57b77d46
RE
4701 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4702 we can convert both to ROR during final output. */
43e9d192
IB
4703 if ((GET_CODE (op) == ASHIFT
4704 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
4705 || GET_CODE (op) == LSHIFTRT
4706 || GET_CODE (op) == ROTATERT
4707 || GET_CODE (op) == ROTATE)
43e9d192
IB
4708 && CONST_INT_P (XEXP (op, 1)))
4709 return XEXP (op, 0);
4710
4711 if (GET_CODE (op) == MULT
4712 && CONST_INT_P (XEXP (op, 1))
4713 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4714 return XEXP (op, 0);
4715
4716 return x;
4717}
4718
4745e701 4719/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
4720 expression from X. Returns the inner operand if successful, or the
4721 original expression on failure. We deal with a number of possible
4722 canonicalization variations here. */
4723static rtx
4745e701 4724aarch64_strip_extend (rtx x)
43e9d192
IB
4725{
4726 rtx op = x;
4727
4728 /* Zero and sign extraction of a widened value. */
4729 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4730 && XEXP (op, 2) == const0_rtx
4745e701 4731 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
4732 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4733 XEXP (op, 1)))
4734 return XEXP (XEXP (op, 0), 0);
4735
4736 /* It can also be represented (for zero-extend) as an AND with an
4737 immediate. */
4738 if (GET_CODE (op) == AND
4739 && GET_CODE (XEXP (op, 0)) == MULT
4740 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4741 && CONST_INT_P (XEXP (op, 1))
4742 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4743 INTVAL (XEXP (op, 1))) != 0)
4744 return XEXP (XEXP (op, 0), 0);
4745
4746 /* Now handle extended register, as this may also have an optional
4747 left shift by 1..4. */
4748 if (GET_CODE (op) == ASHIFT
4749 && CONST_INT_P (XEXP (op, 1))
4750 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4751 op = XEXP (op, 0);
4752
4753 if (GET_CODE (op) == ZERO_EXTEND
4754 || GET_CODE (op) == SIGN_EXTEND)
4755 op = XEXP (op, 0);
4756
4757 if (op != x)
4758 return op;
4759
4745e701
JG
4760 return x;
4761}
4762
4763/* Helper function for rtx cost calculation. Calculate the cost of
4764 a MULT, which may be part of a multiply-accumulate rtx. Return
4765 the calculated cost of the expression, recursing manually in to
4766 operands where needed. */
4767
4768static int
4769aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4770{
4771 rtx op0, op1;
4772 const struct cpu_cost_table *extra_cost
4773 = aarch64_tune_params->insn_extra_cost;
4774 int cost = 0;
4775 bool maybe_fma = (outer == PLUS || outer == MINUS);
ef4bddc2 4776 machine_mode mode = GET_MODE (x);
4745e701
JG
4777
4778 gcc_checking_assert (code == MULT);
4779
4780 op0 = XEXP (x, 0);
4781 op1 = XEXP (x, 1);
4782
4783 if (VECTOR_MODE_P (mode))
4784 mode = GET_MODE_INNER (mode);
4785
4786 /* Integer multiply/fma. */
4787 if (GET_MODE_CLASS (mode) == MODE_INT)
4788 {
4789 /* The multiply will be canonicalized as a shift, cost it as such. */
4790 if (CONST_INT_P (op1)
4791 && exact_log2 (INTVAL (op1)) > 0)
4792 {
4793 if (speed)
4794 {
4795 if (maybe_fma)
4796 /* ADD (shifted register). */
4797 cost += extra_cost->alu.arith_shift;
4798 else
4799 /* LSL (immediate). */
4800 cost += extra_cost->alu.shift;
4801 }
4802
4803 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4804
4805 return cost;
4806 }
4807
4808 /* Integer multiplies or FMAs have zero/sign extending variants. */
4809 if ((GET_CODE (op0) == ZERO_EXTEND
4810 && GET_CODE (op1) == ZERO_EXTEND)
4811 || (GET_CODE (op0) == SIGN_EXTEND
4812 && GET_CODE (op1) == SIGN_EXTEND))
4813 {
4814 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4815 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4816
4817 if (speed)
4818 {
4819 if (maybe_fma)
4820 /* MADD/SMADDL/UMADDL. */
4821 cost += extra_cost->mult[0].extend_add;
4822 else
4823 /* MUL/SMULL/UMULL. */
4824 cost += extra_cost->mult[0].extend;
4825 }
4826
4827 return cost;
4828 }
4829
4830 /* This is either an integer multiply or an FMA. In both cases
4831 we want to recurse and cost the operands. */
4832 cost += rtx_cost (op0, MULT, 0, speed)
4833 + rtx_cost (op1, MULT, 1, speed);
4834
4835 if (speed)
4836 {
4837 if (maybe_fma)
4838 /* MADD. */
4839 cost += extra_cost->mult[mode == DImode].add;
4840 else
4841 /* MUL. */
4842 cost += extra_cost->mult[mode == DImode].simple;
4843 }
4844
4845 return cost;
4846 }
4847 else
4848 {
4849 if (speed)
4850 {
3d840f7d 4851 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
4852 operands. */
4853 if (GET_CODE (op0) == NEG)
3d840f7d 4854 op0 = XEXP (op0, 0);
4745e701 4855 if (GET_CODE (op1) == NEG)
3d840f7d 4856 op1 = XEXP (op1, 0);
4745e701
JG
4857
4858 if (maybe_fma)
4859 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4860 cost += extra_cost->fp[mode == DFmode].fma;
4861 else
3d840f7d 4862 /* FMUL/FNMUL. */
4745e701
JG
4863 cost += extra_cost->fp[mode == DFmode].mult;
4864 }
4865
4866 cost += rtx_cost (op0, MULT, 0, speed)
4867 + rtx_cost (op1, MULT, 1, speed);
4868 return cost;
4869 }
43e9d192
IB
4870}
4871
67747367
JG
4872static int
4873aarch64_address_cost (rtx x,
ef4bddc2 4874 machine_mode mode,
67747367
JG
4875 addr_space_t as ATTRIBUTE_UNUSED,
4876 bool speed)
4877{
4878 enum rtx_code c = GET_CODE (x);
4879 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4880 struct aarch64_address_info info;
4881 int cost = 0;
4882 info.shift = 0;
4883
4884 if (!aarch64_classify_address (&info, x, mode, c, false))
4885 {
4886 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4887 {
4888 /* This is a CONST or SYMBOL ref which will be split
4889 in a different way depending on the code model in use.
4890 Cost it through the generic infrastructure. */
4891 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4892 /* Divide through by the cost of one instruction to
4893 bring it to the same units as the address costs. */
4894 cost_symbol_ref /= COSTS_N_INSNS (1);
4895 /* The cost is then the cost of preparing the address,
4896 followed by an immediate (possibly 0) offset. */
4897 return cost_symbol_ref + addr_cost->imm_offset;
4898 }
4899 else
4900 {
4901 /* This is most likely a jump table from a case
4902 statement. */
4903 return addr_cost->register_offset;
4904 }
4905 }
4906
4907 switch (info.type)
4908 {
4909 case ADDRESS_LO_SUM:
4910 case ADDRESS_SYMBOLIC:
4911 case ADDRESS_REG_IMM:
4912 cost += addr_cost->imm_offset;
4913 break;
4914
4915 case ADDRESS_REG_WB:
4916 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4917 cost += addr_cost->pre_modify;
4918 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4919 cost += addr_cost->post_modify;
4920 else
4921 gcc_unreachable ();
4922
4923 break;
4924
4925 case ADDRESS_REG_REG:
4926 cost += addr_cost->register_offset;
4927 break;
4928
4929 case ADDRESS_REG_UXTW:
4930 case ADDRESS_REG_SXTW:
4931 cost += addr_cost->register_extend;
4932 break;
4933
4934 default:
4935 gcc_unreachable ();
4936 }
4937
4938
4939 if (info.shift > 0)
4940 {
4941 /* For the sake of calculating the cost of the shifted register
4942 component, we can treat same sized modes in the same way. */
4943 switch (GET_MODE_BITSIZE (mode))
4944 {
4945 case 16:
4946 cost += addr_cost->addr_scale_costs.hi;
4947 break;
4948
4949 case 32:
4950 cost += addr_cost->addr_scale_costs.si;
4951 break;
4952
4953 case 64:
4954 cost += addr_cost->addr_scale_costs.di;
4955 break;
4956
4957 /* We can't tell, or this is a 128-bit vector. */
4958 default:
4959 cost += addr_cost->addr_scale_costs.ti;
4960 break;
4961 }
4962 }
4963
4964 return cost;
4965}
4966
7cc2145f
JG
4967/* Return true if the RTX X in mode MODE is a zero or sign extract
4968 usable in an ADD or SUB (extended register) instruction. */
4969static bool
ef4bddc2 4970aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
7cc2145f
JG
4971{
4972 /* Catch add with a sign extract.
4973 This is add_<optab><mode>_multp2. */
4974 if (GET_CODE (x) == SIGN_EXTRACT
4975 || GET_CODE (x) == ZERO_EXTRACT)
4976 {
4977 rtx op0 = XEXP (x, 0);
4978 rtx op1 = XEXP (x, 1);
4979 rtx op2 = XEXP (x, 2);
4980
4981 if (GET_CODE (op0) == MULT
4982 && CONST_INT_P (op1)
4983 && op2 == const0_rtx
4984 && CONST_INT_P (XEXP (op0, 1))
4985 && aarch64_is_extend_from_extract (mode,
4986 XEXP (op0, 1),
4987 op1))
4988 {
4989 return true;
4990 }
4991 }
4992
4993 return false;
4994}
4995
61263118
KT
4996static bool
4997aarch64_frint_unspec_p (unsigned int u)
4998{
4999 switch (u)
5000 {
5001 case UNSPEC_FRINTZ:
5002 case UNSPEC_FRINTP:
5003 case UNSPEC_FRINTM:
5004 case UNSPEC_FRINTA:
5005 case UNSPEC_FRINTN:
5006 case UNSPEC_FRINTX:
5007 case UNSPEC_FRINTI:
5008 return true;
5009
5010 default:
5011 return false;
5012 }
5013}
5014
2d5ffe46
AP
5015/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5016 storing it in *COST. Result is true if the total cost of the operation
5017 has now been calculated. */
5018static bool
5019aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5020{
b9e3afe9
AP
5021 rtx inner;
5022 rtx comparator;
5023 enum rtx_code cmpcode;
5024
5025 if (COMPARISON_P (op0))
5026 {
5027 inner = XEXP (op0, 0);
5028 comparator = XEXP (op0, 1);
5029 cmpcode = GET_CODE (op0);
5030 }
5031 else
5032 {
5033 inner = op0;
5034 comparator = const0_rtx;
5035 cmpcode = NE;
5036 }
5037
2d5ffe46
AP
5038 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5039 {
5040 /* Conditional branch. */
b9e3afe9 5041 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5042 return true;
5043 else
5044 {
b9e3afe9 5045 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 5046 {
2d5ffe46
AP
5047 if (comparator == const0_rtx)
5048 {
5049 /* TBZ/TBNZ/CBZ/CBNZ. */
5050 if (GET_CODE (inner) == ZERO_EXTRACT)
5051 /* TBZ/TBNZ. */
5052 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5053 0, speed);
5054 else
5055 /* CBZ/CBNZ. */
b9e3afe9 5056 *cost += rtx_cost (inner, cmpcode, 0, speed);
2d5ffe46
AP
5057
5058 return true;
5059 }
5060 }
b9e3afe9 5061 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 5062 {
2d5ffe46
AP
5063 /* TBZ/TBNZ. */
5064 if (comparator == const0_rtx)
5065 return true;
5066 }
5067 }
5068 }
b9e3afe9 5069 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5070 {
5071 /* It's a conditional operation based on the status flags,
5072 so it must be some flavor of CSEL. */
5073
5074 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5075 if (GET_CODE (op1) == NEG
5076 || GET_CODE (op1) == NOT
5077 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5078 op1 = XEXP (op1, 0);
5079
5080 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5081 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5082 return true;
5083 }
5084
5085 /* We don't know what this is, cost all operands. */
5086 return false;
5087}
5088
43e9d192
IB
5089/* Calculate the cost of calculating X, storing it in *COST. Result
5090 is true if the total cost of the operation has now been calculated. */
5091static bool
5092aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5093 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5094{
a8eecd00 5095 rtx op0, op1, op2;
73250c4c 5096 const struct cpu_cost_table *extra_cost
43e9d192 5097 = aarch64_tune_params->insn_extra_cost;
ef4bddc2 5098 machine_mode mode = GET_MODE (x);
43e9d192 5099
7fc5ef02
JG
5100 /* By default, assume that everything has equivalent cost to the
5101 cheapest instruction. Any additional costs are applied as a delta
5102 above this default. */
5103 *cost = COSTS_N_INSNS (1);
5104
5105 /* TODO: The cost infrastructure currently does not handle
5106 vector operations. Assume that all vector operations
5107 are equally expensive. */
5108 if (VECTOR_MODE_P (mode))
5109 {
5110 if (speed)
5111 *cost += extra_cost->vect.alu;
5112 return true;
5113 }
5114
43e9d192
IB
5115 switch (code)
5116 {
5117 case SET:
ba123b0d
JG
5118 /* The cost depends entirely on the operands to SET. */
5119 *cost = 0;
43e9d192
IB
5120 op0 = SET_DEST (x);
5121 op1 = SET_SRC (x);
5122
5123 switch (GET_CODE (op0))
5124 {
5125 case MEM:
5126 if (speed)
2961177e
JG
5127 {
5128 rtx address = XEXP (op0, 0);
5129 if (GET_MODE_CLASS (mode) == MODE_INT)
5130 *cost += extra_cost->ldst.store;
5131 else if (mode == SFmode)
5132 *cost += extra_cost->ldst.storef;
5133 else if (mode == DFmode)
5134 *cost += extra_cost->ldst.stored;
5135
5136 *cost +=
5137 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5138 0, speed));
5139 }
43e9d192 5140
ba123b0d 5141 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5142 return true;
5143
5144 case SUBREG:
5145 if (! REG_P (SUBREG_REG (op0)))
5146 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
ba123b0d 5147
43e9d192
IB
5148 /* Fall through. */
5149 case REG:
ba123b0d
JG
5150 /* const0_rtx is in general free, but we will use an
5151 instruction to set a register to 0. */
5152 if (REG_P (op1) || op1 == const0_rtx)
5153 {
5154 /* The cost is 1 per register copied. */
5155 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5156 / UNITS_PER_WORD;
5157 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5158 }
5159 else
5160 /* Cost is just the cost of the RHS of the set. */
5161 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5162 return true;
5163
ba123b0d 5164 case ZERO_EXTRACT:
43e9d192 5165 case SIGN_EXTRACT:
ba123b0d
JG
5166 /* Bit-field insertion. Strip any redundant widening of
5167 the RHS to meet the width of the target. */
43e9d192
IB
5168 if (GET_CODE (op1) == SUBREG)
5169 op1 = SUBREG_REG (op1);
5170 if ((GET_CODE (op1) == ZERO_EXTEND
5171 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 5172 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
5173 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5174 >= INTVAL (XEXP (op0, 1))))
5175 op1 = XEXP (op1, 0);
ba123b0d
JG
5176
5177 if (CONST_INT_P (op1))
5178 {
5179 /* MOV immediate is assumed to always be cheap. */
5180 *cost = COSTS_N_INSNS (1);
5181 }
5182 else
5183 {
5184 /* BFM. */
5185 if (speed)
5186 *cost += extra_cost->alu.bfi;
5187 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5188 }
5189
43e9d192
IB
5190 return true;
5191
5192 default:
ba123b0d
JG
5193 /* We can't make sense of this, assume default cost. */
5194 *cost = COSTS_N_INSNS (1);
61263118 5195 return false;
43e9d192
IB
5196 }
5197 return false;
5198
9dfc162c
JG
5199 case CONST_INT:
5200 /* If an instruction can incorporate a constant within the
5201 instruction, the instruction's expression avoids calling
5202 rtx_cost() on the constant. If rtx_cost() is called on a
5203 constant, then it is usually because the constant must be
5204 moved into a register by one or more instructions.
5205
5206 The exception is constant 0, which can be expressed
5207 as XZR/WZR and is therefore free. The exception to this is
5208 if we have (set (reg) (const0_rtx)) in which case we must cost
5209 the move. However, we can catch that when we cost the SET, so
5210 we don't need to consider that here. */
5211 if (x == const0_rtx)
5212 *cost = 0;
5213 else
5214 {
5215 /* To an approximation, building any other constant is
5216 proportionally expensive to the number of instructions
5217 required to build that constant. This is true whether we
5218 are compiling for SPEED or otherwise. */
5219 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5220 INTVAL (x),
5221 false));
5222 }
5223 return true;
5224
5225 case CONST_DOUBLE:
5226 if (speed)
5227 {
5228 /* mov[df,sf]_aarch64. */
5229 if (aarch64_float_const_representable_p (x))
5230 /* FMOV (scalar immediate). */
5231 *cost += extra_cost->fp[mode == DFmode].fpconst;
5232 else if (!aarch64_float_const_zero_rtx_p (x))
5233 {
5234 /* This will be a load from memory. */
5235 if (mode == DFmode)
5236 *cost += extra_cost->ldst.loadd;
5237 else
5238 *cost += extra_cost->ldst.loadf;
5239 }
5240 else
5241 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5242 or MOV v0.s[0], wzr - neither of which are modeled by the
5243 cost tables. Just use the default cost. */
5244 {
5245 }
5246 }
5247
5248 return true;
5249
43e9d192
IB
5250 case MEM:
5251 if (speed)
2961177e
JG
5252 {
5253 /* For loads we want the base cost of a load, plus an
5254 approximation for the additional cost of the addressing
5255 mode. */
5256 rtx address = XEXP (x, 0);
5257 if (GET_MODE_CLASS (mode) == MODE_INT)
5258 *cost += extra_cost->ldst.load;
5259 else if (mode == SFmode)
5260 *cost += extra_cost->ldst.loadf;
5261 else if (mode == DFmode)
5262 *cost += extra_cost->ldst.loadd;
5263
5264 *cost +=
5265 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5266 0, speed));
5267 }
43e9d192
IB
5268
5269 return true;
5270
5271 case NEG:
4745e701
JG
5272 op0 = XEXP (x, 0);
5273
5274 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5275 {
5276 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5277 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5278 {
5279 /* CSETM. */
5280 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5281 return true;
5282 }
5283
5284 /* Cost this as SUB wzr, X. */
5285 op0 = CONST0_RTX (GET_MODE (x));
5286 op1 = XEXP (x, 0);
5287 goto cost_minus;
5288 }
5289
5290 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5291 {
5292 /* Support (neg(fma...)) as a single instruction only if
5293 sign of zeros is unimportant. This matches the decision
5294 making in aarch64.md. */
5295 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5296 {
5297 /* FNMADD. */
5298 *cost = rtx_cost (op0, NEG, 0, speed);
5299 return true;
5300 }
5301 if (speed)
5302 /* FNEG. */
5303 *cost += extra_cost->fp[mode == DFmode].neg;
5304 return false;
5305 }
5306
5307 return false;
43e9d192 5308
781aeb73
KT
5309 case CLRSB:
5310 case CLZ:
5311 if (speed)
5312 *cost += extra_cost->alu.clz;
5313
5314 return false;
5315
43e9d192
IB
5316 case COMPARE:
5317 op0 = XEXP (x, 0);
5318 op1 = XEXP (x, 1);
5319
5320 if (op1 == const0_rtx
5321 && GET_CODE (op0) == AND)
5322 {
5323 x = op0;
5324 goto cost_logic;
5325 }
5326
a8eecd00
JG
5327 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5328 {
5329 /* TODO: A write to the CC flags possibly costs extra, this
5330 needs encoding in the cost tables. */
5331
5332 /* CC_ZESWPmode supports zero extend for free. */
5333 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5334 op0 = XEXP (op0, 0);
5335
5336 /* ANDS. */
5337 if (GET_CODE (op0) == AND)
5338 {
5339 x = op0;
5340 goto cost_logic;
5341 }
5342
5343 if (GET_CODE (op0) == PLUS)
5344 {
5345 /* ADDS (and CMN alias). */
5346 x = op0;
5347 goto cost_plus;
5348 }
5349
5350 if (GET_CODE (op0) == MINUS)
5351 {
5352 /* SUBS. */
5353 x = op0;
5354 goto cost_minus;
5355 }
5356
5357 if (GET_CODE (op1) == NEG)
5358 {
5359 /* CMN. */
5360 if (speed)
5361 *cost += extra_cost->alu.arith;
5362
5363 *cost += rtx_cost (op0, COMPARE, 0, speed);
5364 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5365 return true;
5366 }
5367
5368 /* CMP.
5369
5370 Compare can freely swap the order of operands, and
5371 canonicalization puts the more complex operation first.
5372 But the integer MINUS logic expects the shift/extend
5373 operation in op1. */
5374 if (! (REG_P (op0)
5375 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5376 {
5377 op0 = XEXP (x, 1);
5378 op1 = XEXP (x, 0);
5379 }
5380 goto cost_minus;
5381 }
5382
5383 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5384 {
5385 /* FCMP. */
5386 if (speed)
5387 *cost += extra_cost->fp[mode == DFmode].compare;
5388
5389 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5390 {
5391 /* FCMP supports constant 0.0 for no extra cost. */
5392 return true;
5393 }
5394 return false;
5395 }
5396
5397 return false;
43e9d192
IB
5398
5399 case MINUS:
4745e701
JG
5400 {
5401 op0 = XEXP (x, 0);
5402 op1 = XEXP (x, 1);
5403
5404cost_minus:
5405 /* Detect valid immediates. */
5406 if ((GET_MODE_CLASS (mode) == MODE_INT
5407 || (GET_MODE_CLASS (mode) == MODE_CC
5408 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5409 && CONST_INT_P (op1)
5410 && aarch64_uimm12_shift (INTVAL (op1)))
5411 {
5412 *cost += rtx_cost (op0, MINUS, 0, speed);
43e9d192 5413
4745e701
JG
5414 if (speed)
5415 /* SUB(S) (immediate). */
5416 *cost += extra_cost->alu.arith;
5417 return true;
5418
5419 }
5420
7cc2145f
JG
5421 /* Look for SUB (extended register). */
5422 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5423 {
5424 if (speed)
5425 *cost += extra_cost->alu.arith_shift;
5426
5427 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5428 (enum rtx_code) GET_CODE (op1),
5429 0, speed);
5430 return true;
5431 }
5432
4745e701
JG
5433 rtx new_op1 = aarch64_strip_extend (op1);
5434
5435 /* Cost this as an FMA-alike operation. */
5436 if ((GET_CODE (new_op1) == MULT
5437 || GET_CODE (new_op1) == ASHIFT)
5438 && code != COMPARE)
5439 {
5440 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5441 (enum rtx_code) code,
5442 speed);
43e9d192 5443 *cost += rtx_cost (op0, MINUS, 0, speed);
4745e701
JG
5444 return true;
5445 }
43e9d192 5446
4745e701 5447 *cost += rtx_cost (new_op1, MINUS, 1, speed);
43e9d192 5448
4745e701
JG
5449 if (speed)
5450 {
5451 if (GET_MODE_CLASS (mode) == MODE_INT)
5452 /* SUB(S). */
5453 *cost += extra_cost->alu.arith;
5454 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5455 /* FSUB. */
5456 *cost += extra_cost->fp[mode == DFmode].addsub;
5457 }
5458 return true;
5459 }
43e9d192
IB
5460
5461 case PLUS:
4745e701
JG
5462 {
5463 rtx new_op0;
43e9d192 5464
4745e701
JG
5465 op0 = XEXP (x, 0);
5466 op1 = XEXP (x, 1);
43e9d192 5467
a8eecd00 5468cost_plus:
4745e701
JG
5469 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5470 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5471 {
5472 /* CSINC. */
5473 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5474 *cost += rtx_cost (op1, PLUS, 1, speed);
5475 return true;
5476 }
43e9d192 5477
4745e701
JG
5478 if (GET_MODE_CLASS (mode) == MODE_INT
5479 && CONST_INT_P (op1)
5480 && aarch64_uimm12_shift (INTVAL (op1)))
5481 {
5482 *cost += rtx_cost (op0, PLUS, 0, speed);
43e9d192 5483
4745e701
JG
5484 if (speed)
5485 /* ADD (immediate). */
5486 *cost += extra_cost->alu.arith;
5487 return true;
5488 }
5489
7cc2145f
JG
5490 /* Look for ADD (extended register). */
5491 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5492 {
5493 if (speed)
5494 *cost += extra_cost->alu.arith_shift;
5495
5496 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5497 (enum rtx_code) GET_CODE (op0),
5498 0, speed);
5499 return true;
5500 }
5501
4745e701
JG
5502 /* Strip any extend, leave shifts behind as we will
5503 cost them through mult_cost. */
5504 new_op0 = aarch64_strip_extend (op0);
5505
5506 if (GET_CODE (new_op0) == MULT
5507 || GET_CODE (new_op0) == ASHIFT)
5508 {
5509 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5510 speed);
5511 *cost += rtx_cost (op1, PLUS, 1, speed);
5512 return true;
5513 }
5514
5515 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5516 + rtx_cost (op1, PLUS, 1, speed));
5517
5518 if (speed)
5519 {
5520 if (GET_MODE_CLASS (mode) == MODE_INT)
5521 /* ADD. */
5522 *cost += extra_cost->alu.arith;
5523 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5524 /* FADD. */
5525 *cost += extra_cost->fp[mode == DFmode].addsub;
5526 }
5527 return true;
5528 }
43e9d192 5529
18b42b2a
KT
5530 case BSWAP:
5531 *cost = COSTS_N_INSNS (1);
5532
5533 if (speed)
5534 *cost += extra_cost->alu.rev;
5535
5536 return false;
5537
43e9d192 5538 case IOR:
f7d5cf8d
KT
5539 if (aarch_rev16_p (x))
5540 {
5541 *cost = COSTS_N_INSNS (1);
5542
5543 if (speed)
5544 *cost += extra_cost->alu.rev;
5545
5546 return true;
5547 }
5548 /* Fall through. */
43e9d192
IB
5549 case XOR:
5550 case AND:
5551 cost_logic:
5552 op0 = XEXP (x, 0);
5553 op1 = XEXP (x, 1);
5554
268c3b47
JG
5555 if (code == AND
5556 && GET_CODE (op0) == MULT
5557 && CONST_INT_P (XEXP (op0, 1))
5558 && CONST_INT_P (op1)
5559 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5560 INTVAL (op1)) != 0)
5561 {
5562 /* This is a UBFM/SBFM. */
5563 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5564 if (speed)
5565 *cost += extra_cost->alu.bfx;
5566 return true;
5567 }
5568
43e9d192
IB
5569 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5570 {
268c3b47
JG
5571 /* We possibly get the immediate for free, this is not
5572 modelled. */
43e9d192
IB
5573 if (CONST_INT_P (op1)
5574 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5575 {
268c3b47
JG
5576 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5577
5578 if (speed)
5579 *cost += extra_cost->alu.logical;
5580
5581 return true;
43e9d192
IB
5582 }
5583 else
5584 {
268c3b47
JG
5585 rtx new_op0 = op0;
5586
5587 /* Handle ORN, EON, or BIC. */
43e9d192
IB
5588 if (GET_CODE (op0) == NOT)
5589 op0 = XEXP (op0, 0);
268c3b47
JG
5590
5591 new_op0 = aarch64_strip_shift (op0);
5592
5593 /* If we had a shift on op0 then this is a logical-shift-
5594 by-register/immediate operation. Otherwise, this is just
5595 a logical operation. */
5596 if (speed)
5597 {
5598 if (new_op0 != op0)
5599 {
5600 /* Shift by immediate. */
5601 if (CONST_INT_P (XEXP (op0, 1)))
5602 *cost += extra_cost->alu.log_shift;
5603 else
5604 *cost += extra_cost->alu.log_shift_reg;
5605 }
5606 else
5607 *cost += extra_cost->alu.logical;
5608 }
5609
5610 /* In both cases we want to cost both operands. */
5611 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5612 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5613
5614 return true;
43e9d192 5615 }
43e9d192
IB
5616 }
5617 return false;
5618
268c3b47
JG
5619 case NOT:
5620 /* MVN. */
5621 if (speed)
5622 *cost += extra_cost->alu.logical;
5623
5624 /* The logical instruction could have the shifted register form,
5625 but the cost is the same if the shift is processed as a separate
5626 instruction, so we don't bother with it here. */
5627 return false;
5628
43e9d192 5629 case ZERO_EXTEND:
b1685e62
JG
5630
5631 op0 = XEXP (x, 0);
5632 /* If a value is written in SI mode, then zero extended to DI
5633 mode, the operation will in general be free as a write to
5634 a 'w' register implicitly zeroes the upper bits of an 'x'
5635 register. However, if this is
5636
5637 (set (reg) (zero_extend (reg)))
5638
5639 we must cost the explicit register move. */
5640 if (mode == DImode
5641 && GET_MODE (op0) == SImode
5642 && outer == SET)
5643 {
5644 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5645
5646 if (!op_cost && speed)
5647 /* MOV. */
5648 *cost += extra_cost->alu.extend;
5649 else
5650 /* Free, the cost is that of the SI mode operation. */
5651 *cost = op_cost;
5652
5653 return true;
5654 }
5655 else if (MEM_P (XEXP (x, 0)))
43e9d192 5656 {
b1685e62
JG
5657 /* All loads can zero extend to any size for free. */
5658 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
43e9d192
IB
5659 return true;
5660 }
b1685e62
JG
5661
5662 /* UXTB/UXTH. */
5663 if (speed)
5664 *cost += extra_cost->alu.extend;
5665
43e9d192
IB
5666 return false;
5667
5668 case SIGN_EXTEND:
b1685e62 5669 if (MEM_P (XEXP (x, 0)))
43e9d192 5670 {
b1685e62
JG
5671 /* LDRSH. */
5672 if (speed)
5673 {
5674 rtx address = XEXP (XEXP (x, 0), 0);
5675 *cost += extra_cost->ldst.load_sign_extend;
5676
5677 *cost +=
5678 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5679 0, speed));
5680 }
43e9d192
IB
5681 return true;
5682 }
b1685e62
JG
5683
5684 if (speed)
5685 *cost += extra_cost->alu.extend;
43e9d192
IB
5686 return false;
5687
ba0cfa17
JG
5688 case ASHIFT:
5689 op0 = XEXP (x, 0);
5690 op1 = XEXP (x, 1);
5691
5692 if (CONST_INT_P (op1))
5693 {
5694 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5695 aliases. */
5696 if (speed)
5697 *cost += extra_cost->alu.shift;
5698
5699 /* We can incorporate zero/sign extend for free. */
5700 if (GET_CODE (op0) == ZERO_EXTEND
5701 || GET_CODE (op0) == SIGN_EXTEND)
5702 op0 = XEXP (op0, 0);
5703
5704 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5705 return true;
5706 }
5707 else
5708 {
5709 /* LSLV. */
5710 if (speed)
5711 *cost += extra_cost->alu.shift_reg;
5712
5713 return false; /* All arguments need to be in registers. */
5714 }
5715
43e9d192 5716 case ROTATE:
43e9d192
IB
5717 case ROTATERT:
5718 case LSHIFTRT:
43e9d192 5719 case ASHIFTRT:
ba0cfa17
JG
5720 op0 = XEXP (x, 0);
5721 op1 = XEXP (x, 1);
43e9d192 5722
ba0cfa17
JG
5723 if (CONST_INT_P (op1))
5724 {
5725 /* ASR (immediate) and friends. */
5726 if (speed)
5727 *cost += extra_cost->alu.shift;
43e9d192 5728
ba0cfa17
JG
5729 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5730 return true;
5731 }
5732 else
5733 {
5734
5735 /* ASR (register) and friends. */
5736 if (speed)
5737 *cost += extra_cost->alu.shift_reg;
5738
5739 return false; /* All arguments need to be in registers. */
5740 }
43e9d192 5741
909734be
JG
5742 case SYMBOL_REF:
5743
5744 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5745 {
5746 /* LDR. */
5747 if (speed)
5748 *cost += extra_cost->ldst.load;
5749 }
5750 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5751 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5752 {
5753 /* ADRP, followed by ADD. */
5754 *cost += COSTS_N_INSNS (1);
5755 if (speed)
5756 *cost += 2 * extra_cost->alu.arith;
5757 }
5758 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5759 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5760 {
5761 /* ADR. */
5762 if (speed)
5763 *cost += extra_cost->alu.arith;
5764 }
5765
5766 if (flag_pic)
5767 {
5768 /* One extra load instruction, after accessing the GOT. */
5769 *cost += COSTS_N_INSNS (1);
5770 if (speed)
5771 *cost += extra_cost->ldst.load;
5772 }
43e9d192
IB
5773 return true;
5774
909734be 5775 case HIGH:
43e9d192 5776 case LO_SUM:
909734be
JG
5777 /* ADRP/ADD (immediate). */
5778 if (speed)
5779 *cost += extra_cost->alu.arith;
43e9d192
IB
5780 return true;
5781
5782 case ZERO_EXTRACT:
5783 case SIGN_EXTRACT:
7cc2145f
JG
5784 /* UBFX/SBFX. */
5785 if (speed)
5786 *cost += extra_cost->alu.bfx;
5787
5788 /* We can trust that the immediates used will be correct (there
5789 are no by-register forms), so we need only cost op0. */
5790 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
43e9d192
IB
5791 return true;
5792
5793 case MULT:
4745e701
JG
5794 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5795 /* aarch64_rtx_mult_cost always handles recursion to its
5796 operands. */
5797 return true;
43e9d192
IB
5798
5799 case MOD:
5800 case UMOD:
43e9d192
IB
5801 if (speed)
5802 {
5803 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
5804 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5805 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 5806 else if (GET_MODE (x) == DFmode)
73250c4c
KT
5807 *cost += (extra_cost->fp[1].mult
5808 + extra_cost->fp[1].div);
43e9d192 5809 else if (GET_MODE (x) == SFmode)
73250c4c
KT
5810 *cost += (extra_cost->fp[0].mult
5811 + extra_cost->fp[0].div);
43e9d192
IB
5812 }
5813 return false; /* All arguments need to be in registers. */
5814
5815 case DIV:
5816 case UDIV:
4105fe38 5817 case SQRT:
43e9d192
IB
5818 if (speed)
5819 {
4105fe38
JG
5820 if (GET_MODE_CLASS (mode) == MODE_INT)
5821 /* There is no integer SQRT, so only DIV and UDIV can get
5822 here. */
5823 *cost += extra_cost->mult[mode == DImode].idiv;
5824 else
5825 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
5826 }
5827 return false; /* All arguments need to be in registers. */
5828
a8eecd00 5829 case IF_THEN_ELSE:
2d5ffe46
AP
5830 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5831 XEXP (x, 2), cost, speed);
a8eecd00
JG
5832
5833 case EQ:
5834 case NE:
5835 case GT:
5836 case GTU:
5837 case LT:
5838 case LTU:
5839 case GE:
5840 case GEU:
5841 case LE:
5842 case LEU:
5843
5844 return false; /* All arguments must be in registers. */
5845
b292109f
JG
5846 case FMA:
5847 op0 = XEXP (x, 0);
5848 op1 = XEXP (x, 1);
5849 op2 = XEXP (x, 2);
5850
5851 if (speed)
5852 *cost += extra_cost->fp[mode == DFmode].fma;
5853
5854 /* FMSUB, FNMADD, and FNMSUB are free. */
5855 if (GET_CODE (op0) == NEG)
5856 op0 = XEXP (op0, 0);
5857
5858 if (GET_CODE (op2) == NEG)
5859 op2 = XEXP (op2, 0);
5860
5861 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5862 and the by-element operand as operand 0. */
5863 if (GET_CODE (op1) == NEG)
5864 op1 = XEXP (op1, 0);
5865
5866 /* Catch vector-by-element operations. The by-element operand can
5867 either be (vec_duplicate (vec_select (x))) or just
5868 (vec_select (x)), depending on whether we are multiplying by
5869 a vector or a scalar.
5870
5871 Canonicalization is not very good in these cases, FMA4 will put the
5872 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5873 if (GET_CODE (op0) == VEC_DUPLICATE)
5874 op0 = XEXP (op0, 0);
5875 else if (GET_CODE (op1) == VEC_DUPLICATE)
5876 op1 = XEXP (op1, 0);
5877
5878 if (GET_CODE (op0) == VEC_SELECT)
5879 op0 = XEXP (op0, 0);
5880 else if (GET_CODE (op1) == VEC_SELECT)
5881 op1 = XEXP (op1, 0);
5882
5883 /* If the remaining parameters are not registers,
5884 get the cost to put them into registers. */
5885 *cost += rtx_cost (op0, FMA, 0, speed);
5886 *cost += rtx_cost (op1, FMA, 1, speed);
5887 *cost += rtx_cost (op2, FMA, 2, speed);
5888 return true;
5889
5890 case FLOAT_EXTEND:
5891 if (speed)
5892 *cost += extra_cost->fp[mode == DFmode].widen;
5893 return false;
5894
5895 case FLOAT_TRUNCATE:
5896 if (speed)
5897 *cost += extra_cost->fp[mode == DFmode].narrow;
5898 return false;
5899
61263118
KT
5900 case FIX:
5901 case UNSIGNED_FIX:
5902 x = XEXP (x, 0);
5903 /* Strip the rounding part. They will all be implemented
5904 by the fcvt* family of instructions anyway. */
5905 if (GET_CODE (x) == UNSPEC)
5906 {
5907 unsigned int uns_code = XINT (x, 1);
5908
5909 if (uns_code == UNSPEC_FRINTA
5910 || uns_code == UNSPEC_FRINTM
5911 || uns_code == UNSPEC_FRINTN
5912 || uns_code == UNSPEC_FRINTP
5913 || uns_code == UNSPEC_FRINTZ)
5914 x = XVECEXP (x, 0, 0);
5915 }
5916
5917 if (speed)
5918 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5919
5920 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5921 return true;
5922
b292109f
JG
5923 case ABS:
5924 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5925 {
5926 /* FABS and FNEG are analogous. */
5927 if (speed)
5928 *cost += extra_cost->fp[mode == DFmode].neg;
5929 }
5930 else
5931 {
5932 /* Integer ABS will either be split to
5933 two arithmetic instructions, or will be an ABS
5934 (scalar), which we don't model. */
5935 *cost = COSTS_N_INSNS (2);
5936 if (speed)
5937 *cost += 2 * extra_cost->alu.arith;
5938 }
5939 return false;
5940
5941 case SMAX:
5942 case SMIN:
5943 if (speed)
5944 {
5945 /* FMAXNM/FMINNM/FMAX/FMIN.
5946 TODO: This may not be accurate for all implementations, but
5947 we do not model this in the cost tables. */
5948 *cost += extra_cost->fp[mode == DFmode].addsub;
5949 }
5950 return false;
5951
61263118
KT
5952 case UNSPEC:
5953 /* The floating point round to integer frint* instructions. */
5954 if (aarch64_frint_unspec_p (XINT (x, 1)))
5955 {
5956 if (speed)
5957 *cost += extra_cost->fp[mode == DFmode].roundint;
5958
5959 return false;
5960 }
781aeb73
KT
5961
5962 if (XINT (x, 1) == UNSPEC_RBIT)
5963 {
5964 if (speed)
5965 *cost += extra_cost->alu.rev;
5966
5967 return false;
5968 }
61263118
KT
5969 break;
5970
fb620c4a
JG
5971 case TRUNCATE:
5972
5973 /* Decompose <su>muldi3_highpart. */
5974 if (/* (truncate:DI */
5975 mode == DImode
5976 /* (lshiftrt:TI */
5977 && GET_MODE (XEXP (x, 0)) == TImode
5978 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5979 /* (mult:TI */
5980 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5981 /* (ANY_EXTEND:TI (reg:DI))
5982 (ANY_EXTEND:TI (reg:DI))) */
5983 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5984 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5985 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5986 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5987 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5988 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5989 /* (const_int 64) */
5990 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5991 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5992 {
5993 /* UMULH/SMULH. */
5994 if (speed)
5995 *cost += extra_cost->mult[mode == DImode].extend;
5996 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5997 MULT, 0, speed);
5998 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5999 MULT, 1, speed);
6000 return true;
6001 }
6002
6003 /* Fall through. */
43e9d192 6004 default:
61263118 6005 break;
43e9d192 6006 }
61263118
KT
6007
6008 if (dump_file && (dump_flags & TDF_DETAILS))
6009 fprintf (dump_file,
6010 "\nFailed to cost RTX. Assuming default cost.\n");
6011
6012 return true;
43e9d192
IB
6013}
6014
0ee859b5
JG
6015/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6016 calculated for X. This cost is stored in *COST. Returns true
6017 if the total cost of X was calculated. */
6018static bool
6019aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6020 int param, int *cost, bool speed)
6021{
6022 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6023
6024 if (dump_file && (dump_flags & TDF_DETAILS))
6025 {
6026 print_rtl_single (dump_file, x);
6027 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6028 speed ? "Hot" : "Cold",
6029 *cost, result ? "final" : "partial");
6030 }
6031
6032 return result;
6033}
6034
43e9d192 6035static int
ef4bddc2 6036aarch64_register_move_cost (machine_mode mode,
8a3a7e67 6037 reg_class_t from_i, reg_class_t to_i)
43e9d192 6038{
8a3a7e67
RH
6039 enum reg_class from = (enum reg_class) from_i;
6040 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
6041 const struct cpu_regmove_cost *regmove_cost
6042 = aarch64_tune_params->regmove_cost;
6043
3be07662
WD
6044 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6045 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6046 to = GENERAL_REGS;
6047
6048 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6049 from = GENERAL_REGS;
6050
6ee70f81
AP
6051 /* Moving between GPR and stack cost is the same as GP2GP. */
6052 if ((from == GENERAL_REGS && to == STACK_REG)
6053 || (to == GENERAL_REGS && from == STACK_REG))
6054 return regmove_cost->GP2GP;
6055
6056 /* To/From the stack register, we move via the gprs. */
6057 if (to == STACK_REG || from == STACK_REG)
6058 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6059 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6060
8919453c
WD
6061 if (GET_MODE_SIZE (mode) == 16)
6062 {
6063 /* 128-bit operations on general registers require 2 instructions. */
6064 if (from == GENERAL_REGS && to == GENERAL_REGS)
6065 return regmove_cost->GP2GP * 2;
6066 else if (from == GENERAL_REGS)
6067 return regmove_cost->GP2FP * 2;
6068 else if (to == GENERAL_REGS)
6069 return regmove_cost->FP2GP * 2;
6070
6071 /* When AdvSIMD instructions are disabled it is not possible to move
6072 a 128-bit value directly between Q registers. This is handled in
6073 secondary reload. A general register is used as a scratch to move
6074 the upper DI value and the lower DI value is moved directly,
6075 hence the cost is the sum of three moves. */
6076 if (! TARGET_SIMD)
6077 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6078
6079 return regmove_cost->FP2FP;
6080 }
6081
43e9d192
IB
6082 if (from == GENERAL_REGS && to == GENERAL_REGS)
6083 return regmove_cost->GP2GP;
6084 else if (from == GENERAL_REGS)
6085 return regmove_cost->GP2FP;
6086 else if (to == GENERAL_REGS)
6087 return regmove_cost->FP2GP;
6088
43e9d192
IB
6089 return regmove_cost->FP2FP;
6090}
6091
6092static int
ef4bddc2 6093aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
6094 reg_class_t rclass ATTRIBUTE_UNUSED,
6095 bool in ATTRIBUTE_UNUSED)
6096{
6097 return aarch64_tune_params->memmov_cost;
6098}
6099
d126a4ae
AP
6100/* Return the number of instructions that can be issued per cycle. */
6101static int
6102aarch64_sched_issue_rate (void)
6103{
6104 return aarch64_tune_params->issue_rate;
6105}
6106
8990e73a
TB
6107/* Vectorizer cost model target hooks. */
6108
6109/* Implement targetm.vectorize.builtin_vectorization_cost. */
6110static int
6111aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6112 tree vectype,
6113 int misalign ATTRIBUTE_UNUSED)
6114{
6115 unsigned elements;
6116
6117 switch (type_of_cost)
6118 {
6119 case scalar_stmt:
6120 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6121
6122 case scalar_load:
6123 return aarch64_tune_params->vec_costs->scalar_load_cost;
6124
6125 case scalar_store:
6126 return aarch64_tune_params->vec_costs->scalar_store_cost;
6127
6128 case vector_stmt:
6129 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6130
6131 case vector_load:
6132 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6133
6134 case vector_store:
6135 return aarch64_tune_params->vec_costs->vec_store_cost;
6136
6137 case vec_to_scalar:
6138 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6139
6140 case scalar_to_vec:
6141 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6142
6143 case unaligned_load:
6144 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6145
6146 case unaligned_store:
6147 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6148
6149 case cond_branch_taken:
6150 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6151
6152 case cond_branch_not_taken:
6153 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6154
6155 case vec_perm:
6156 case vec_promote_demote:
6157 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6158
6159 case vec_construct:
6160 elements = TYPE_VECTOR_SUBPARTS (vectype);
6161 return elements / 2 + 1;
6162
6163 default:
6164 gcc_unreachable ();
6165 }
6166}
6167
6168/* Implement targetm.vectorize.add_stmt_cost. */
6169static unsigned
6170aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6171 struct _stmt_vec_info *stmt_info, int misalign,
6172 enum vect_cost_model_location where)
6173{
6174 unsigned *cost = (unsigned *) data;
6175 unsigned retval = 0;
6176
6177 if (flag_vect_cost_model)
6178 {
6179 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6180 int stmt_cost =
6181 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6182
6183 /* Statements in an inner loop relative to the loop being
6184 vectorized are weighted more heavily. The value here is
6185 a function (linear for now) of the loop nest level. */
6186 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6187 {
6188 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6189 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6190 unsigned nest_level = loop_depth (loop);
6191
6192 count *= nest_level;
6193 }
6194
6195 retval = (unsigned) (count * stmt_cost);
6196 cost[where] += retval;
6197 }
6198
6199 return retval;
6200}
6201
43e9d192
IB
6202static void initialize_aarch64_code_model (void);
6203
6204/* Parse the architecture extension string. */
6205
6206static void
6207aarch64_parse_extension (char *str)
6208{
6209 /* The extension string is parsed left to right. */
6210 const struct aarch64_option_extension *opt = NULL;
6211
6212 /* Flag to say whether we are adding or removing an extension. */
6213 int adding_ext = -1;
6214
6215 while (str != NULL && *str != 0)
6216 {
6217 char *ext;
6218 size_t len;
6219
6220 str++;
6221 ext = strchr (str, '+');
6222
6223 if (ext != NULL)
6224 len = ext - str;
6225 else
6226 len = strlen (str);
6227
6228 if (len >= 2 && strncmp (str, "no", 2) == 0)
6229 {
6230 adding_ext = 0;
6231 len -= 2;
6232 str += 2;
6233 }
6234 else if (len > 0)
6235 adding_ext = 1;
6236
6237 if (len == 0)
6238 {
6239 error ("missing feature modifier after %qs", "+no");
6240 return;
6241 }
6242
6243 /* Scan over the extensions table trying to find an exact match. */
6244 for (opt = all_extensions; opt->name != NULL; opt++)
6245 {
6246 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6247 {
6248 /* Add or remove the extension. */
6249 if (adding_ext)
6250 aarch64_isa_flags |= opt->flags_on;
6251 else
6252 aarch64_isa_flags &= ~(opt->flags_off);
6253 break;
6254 }
6255 }
6256
6257 if (opt->name == NULL)
6258 {
6259 /* Extension not found in list. */
6260 error ("unknown feature modifier %qs", str);
6261 return;
6262 }
6263
6264 str = ext;
6265 };
6266
6267 return;
6268}
6269
6270/* Parse the ARCH string. */
6271
6272static void
6273aarch64_parse_arch (void)
6274{
6275 char *ext;
6276 const struct processor *arch;
6277 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6278 size_t len;
6279
6280 strcpy (str, aarch64_arch_string);
6281
6282 ext = strchr (str, '+');
6283
6284 if (ext != NULL)
6285 len = ext - str;
6286 else
6287 len = strlen (str);
6288
6289 if (len == 0)
6290 {
6291 error ("missing arch name in -march=%qs", str);
6292 return;
6293 }
6294
6295 /* Loop through the list of supported ARCHs to find a match. */
6296 for (arch = all_architectures; arch->name != NULL; arch++)
6297 {
6298 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6299 {
6300 selected_arch = arch;
6301 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
6302
6303 if (!selected_cpu)
6304 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
6305
6306 if (ext != NULL)
6307 {
6308 /* ARCH string contains at least one extension. */
6309 aarch64_parse_extension (ext);
6310 }
6311
ffee7aa9
JG
6312 if (strcmp (selected_arch->arch, selected_cpu->arch))
6313 {
6314 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6315 selected_cpu->name, selected_arch->name);
6316 }
6317
43e9d192
IB
6318 return;
6319 }
6320 }
6321
6322 /* ARCH name not found in list. */
6323 error ("unknown value %qs for -march", str);
6324 return;
6325}
6326
6327/* Parse the CPU string. */
6328
6329static void
6330aarch64_parse_cpu (void)
6331{
6332 char *ext;
6333 const struct processor *cpu;
6334 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6335 size_t len;
6336
6337 strcpy (str, aarch64_cpu_string);
6338
6339 ext = strchr (str, '+');
6340
6341 if (ext != NULL)
6342 len = ext - str;
6343 else
6344 len = strlen (str);
6345
6346 if (len == 0)
6347 {
6348 error ("missing cpu name in -mcpu=%qs", str);
6349 return;
6350 }
6351
6352 /* Loop through the list of supported CPUs to find a match. */
6353 for (cpu = all_cores; cpu->name != NULL; cpu++)
6354 {
6355 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6356 {
6357 selected_cpu = cpu;
192ed1dd 6358 selected_tune = cpu;
43e9d192
IB
6359 aarch64_isa_flags = selected_cpu->flags;
6360
6361 if (ext != NULL)
6362 {
6363 /* CPU string contains at least one extension. */
6364 aarch64_parse_extension (ext);
6365 }
6366
6367 return;
6368 }
6369 }
6370
6371 /* CPU name not found in list. */
6372 error ("unknown value %qs for -mcpu", str);
6373 return;
6374}
6375
6376/* Parse the TUNE string. */
6377
6378static void
6379aarch64_parse_tune (void)
6380{
6381 const struct processor *cpu;
6382 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6383 strcpy (str, aarch64_tune_string);
6384
6385 /* Loop through the list of supported CPUs to find a match. */
6386 for (cpu = all_cores; cpu->name != NULL; cpu++)
6387 {
6388 if (strcmp (cpu->name, str) == 0)
6389 {
6390 selected_tune = cpu;
6391 return;
6392 }
6393 }
6394
6395 /* CPU name not found in list. */
6396 error ("unknown value %qs for -mtune", str);
6397 return;
6398}
6399
6400
6401/* Implement TARGET_OPTION_OVERRIDE. */
6402
6403static void
6404aarch64_override_options (void)
6405{
ffee7aa9
JG
6406 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6407 If either of -march or -mtune is given, they override their
6408 respective component of -mcpu.
43e9d192 6409
ffee7aa9
JG
6410 So, first parse AARCH64_CPU_STRING, then the others, be careful
6411 with -march as, if -mcpu is not present on the command line, march
6412 must set a sensible default CPU. */
6413 if (aarch64_cpu_string)
43e9d192 6414 {
ffee7aa9 6415 aarch64_parse_cpu ();
43e9d192
IB
6416 }
6417
ffee7aa9 6418 if (aarch64_arch_string)
43e9d192 6419 {
ffee7aa9 6420 aarch64_parse_arch ();
43e9d192
IB
6421 }
6422
6423 if (aarch64_tune_string)
6424 {
6425 aarch64_parse_tune ();
6426 }
6427
63892fa2
KV
6428#ifndef HAVE_AS_MABI_OPTION
6429 /* The compiler may have been configured with 2.23.* binutils, which does
6430 not have support for ILP32. */
6431 if (TARGET_ILP32)
6432 error ("Assembler does not support -mabi=ilp32");
6433#endif
6434
43e9d192
IB
6435 initialize_aarch64_code_model ();
6436
6437 aarch64_build_bitmask_table ();
6438
6439 /* This target defaults to strict volatile bitfields. */
6440 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6441 flag_strict_volatile_bitfields = 1;
6442
6443 /* If the user did not specify a processor, choose the default
6444 one for them. This will be the CPU set during configuration using
a3cd0246 6445 --with-cpu, otherwise it is "generic". */
43e9d192
IB
6446 if (!selected_cpu)
6447 {
6448 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6449 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6450 }
6451
6452 gcc_assert (selected_cpu);
6453
6454 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6455 if (!selected_tune)
6456 selected_tune = &all_cores[selected_cpu->core];
6457
6458 aarch64_tune_flags = selected_tune->flags;
6459 aarch64_tune = selected_tune->core;
6460 aarch64_tune_params = selected_tune->tune;
6461
5e396da6
KT
6462 if (aarch64_fix_a53_err835769 == 2)
6463 {
6464#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6465 aarch64_fix_a53_err835769 = 1;
6466#else
6467 aarch64_fix_a53_err835769 = 0;
6468#endif
6469 }
6470
43e9d192
IB
6471 aarch64_override_options_after_change ();
6472}
6473
6474/* Implement targetm.override_options_after_change. */
6475
6476static void
6477aarch64_override_options_after_change (void)
6478{
0b7f8166
MS
6479 if (flag_omit_frame_pointer)
6480 flag_omit_leaf_frame_pointer = false;
6481 else if (flag_omit_leaf_frame_pointer)
6482 flag_omit_frame_pointer = true;
43e9d192
IB
6483}
6484
6485static struct machine_function *
6486aarch64_init_machine_status (void)
6487{
6488 struct machine_function *machine;
766090c2 6489 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
6490 return machine;
6491}
6492
6493void
6494aarch64_init_expanders (void)
6495{
6496 init_machine_status = aarch64_init_machine_status;
6497}
6498
6499/* A checking mechanism for the implementation of the various code models. */
6500static void
6501initialize_aarch64_code_model (void)
6502{
6503 if (flag_pic)
6504 {
6505 switch (aarch64_cmodel_var)
6506 {
6507 case AARCH64_CMODEL_TINY:
6508 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6509 break;
6510 case AARCH64_CMODEL_SMALL:
6511 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6512 break;
6513 case AARCH64_CMODEL_LARGE:
6514 sorry ("code model %qs with -f%s", "large",
6515 flag_pic > 1 ? "PIC" : "pic");
6516 default:
6517 gcc_unreachable ();
6518 }
6519 }
6520 else
6521 aarch64_cmodel = aarch64_cmodel_var;
6522}
6523
6524/* Return true if SYMBOL_REF X binds locally. */
6525
6526static bool
6527aarch64_symbol_binds_local_p (const_rtx x)
6528{
6529 return (SYMBOL_REF_DECL (x)
6530 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6531 : SYMBOL_REF_LOCAL_P (x));
6532}
6533
6534/* Return true if SYMBOL_REF X is thread local */
6535static bool
6536aarch64_tls_symbol_p (rtx x)
6537{
6538 if (! TARGET_HAVE_TLS)
6539 return false;
6540
6541 if (GET_CODE (x) != SYMBOL_REF)
6542 return false;
6543
6544 return SYMBOL_REF_TLS_MODEL (x) != 0;
6545}
6546
6547/* Classify a TLS symbol into one of the TLS kinds. */
6548enum aarch64_symbol_type
6549aarch64_classify_tls_symbol (rtx x)
6550{
6551 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6552
6553 switch (tls_kind)
6554 {
6555 case TLS_MODEL_GLOBAL_DYNAMIC:
6556 case TLS_MODEL_LOCAL_DYNAMIC:
6557 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6558
6559 case TLS_MODEL_INITIAL_EXEC:
6560 return SYMBOL_SMALL_GOTTPREL;
6561
6562 case TLS_MODEL_LOCAL_EXEC:
6563 return SYMBOL_SMALL_TPREL;
6564
6565 case TLS_MODEL_EMULATED:
6566 case TLS_MODEL_NONE:
6567 return SYMBOL_FORCE_TO_MEM;
6568
6569 default:
6570 gcc_unreachable ();
6571 }
6572}
6573
6574/* Return the method that should be used to access SYMBOL_REF or
6575 LABEL_REF X in context CONTEXT. */
17f4d4bf 6576
43e9d192
IB
6577enum aarch64_symbol_type
6578aarch64_classify_symbol (rtx x,
6579 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6580{
6581 if (GET_CODE (x) == LABEL_REF)
6582 {
6583 switch (aarch64_cmodel)
6584 {
6585 case AARCH64_CMODEL_LARGE:
6586 return SYMBOL_FORCE_TO_MEM;
6587
6588 case AARCH64_CMODEL_TINY_PIC:
6589 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6590 return SYMBOL_TINY_ABSOLUTE;
6591
43e9d192
IB
6592 case AARCH64_CMODEL_SMALL_PIC:
6593 case AARCH64_CMODEL_SMALL:
6594 return SYMBOL_SMALL_ABSOLUTE;
6595
6596 default:
6597 gcc_unreachable ();
6598 }
6599 }
6600
17f4d4bf 6601 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 6602 {
4a985a37
MS
6603 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6604 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
6605
6606 if (aarch64_tls_symbol_p (x))
6607 return aarch64_classify_tls_symbol (x);
6608
17f4d4bf
CSS
6609 switch (aarch64_cmodel)
6610 {
6611 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6612 if (SYMBOL_REF_WEAK (x))
6613 return SYMBOL_FORCE_TO_MEM;
6614 return SYMBOL_TINY_ABSOLUTE;
6615
17f4d4bf
CSS
6616 case AARCH64_CMODEL_SMALL:
6617 if (SYMBOL_REF_WEAK (x))
6618 return SYMBOL_FORCE_TO_MEM;
6619 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6620
17f4d4bf 6621 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 6622 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 6623 return SYMBOL_TINY_GOT;
38e6c9a6
MS
6624 return SYMBOL_TINY_ABSOLUTE;
6625
17f4d4bf
CSS
6626 case AARCH64_CMODEL_SMALL_PIC:
6627 if (!aarch64_symbol_binds_local_p (x))
6628 return SYMBOL_SMALL_GOT;
6629 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6630
17f4d4bf
CSS
6631 default:
6632 gcc_unreachable ();
6633 }
43e9d192 6634 }
17f4d4bf 6635
43e9d192
IB
6636 /* By default push everything into the constant pool. */
6637 return SYMBOL_FORCE_TO_MEM;
6638}
6639
43e9d192
IB
6640bool
6641aarch64_constant_address_p (rtx x)
6642{
6643 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6644}
6645
6646bool
6647aarch64_legitimate_pic_operand_p (rtx x)
6648{
6649 if (GET_CODE (x) == SYMBOL_REF
6650 || (GET_CODE (x) == CONST
6651 && GET_CODE (XEXP (x, 0)) == PLUS
6652 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6653 return false;
6654
6655 return true;
6656}
6657
3520f7cc
JG
6658/* Return true if X holds either a quarter-precision or
6659 floating-point +0.0 constant. */
6660static bool
ef4bddc2 6661aarch64_valid_floating_const (machine_mode mode, rtx x)
3520f7cc
JG
6662{
6663 if (!CONST_DOUBLE_P (x))
6664 return false;
6665
6666 /* TODO: We could handle moving 0.0 to a TFmode register,
6667 but first we would like to refactor the movtf_aarch64
6668 to be more amicable to split moves properly and
6669 correctly gate on TARGET_SIMD. For now - reject all
6670 constants which are not to SFmode or DFmode registers. */
6671 if (!(mode == SFmode || mode == DFmode))
6672 return false;
6673
6674 if (aarch64_float_const_zero_rtx_p (x))
6675 return true;
6676 return aarch64_float_const_representable_p (x);
6677}
6678
43e9d192 6679static bool
ef4bddc2 6680aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192
IB
6681{
6682 /* Do not allow vector struct mode constants. We could support
6683 0 and -1 easily, but they need support in aarch64-simd.md. */
6684 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6685 return false;
6686
6687 /* This could probably go away because
6688 we now decompose CONST_INTs according to expand_mov_immediate. */
6689 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 6690 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
6691 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6692 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
6693
6694 if (GET_CODE (x) == HIGH
6695 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6696 return true;
6697
6698 return aarch64_constant_address_p (x);
6699}
6700
a5bc806c 6701rtx
43e9d192
IB
6702aarch64_load_tp (rtx target)
6703{
6704 if (!target
6705 || GET_MODE (target) != Pmode
6706 || !register_operand (target, Pmode))
6707 target = gen_reg_rtx (Pmode);
6708
6709 /* Can return in any reg. */
6710 emit_insn (gen_aarch64_load_tp_hard (target));
6711 return target;
6712}
6713
43e9d192
IB
6714/* On AAPCS systems, this is the "struct __va_list". */
6715static GTY(()) tree va_list_type;
6716
6717/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6718 Return the type to use as __builtin_va_list.
6719
6720 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6721
6722 struct __va_list
6723 {
6724 void *__stack;
6725 void *__gr_top;
6726 void *__vr_top;
6727 int __gr_offs;
6728 int __vr_offs;
6729 }; */
6730
6731static tree
6732aarch64_build_builtin_va_list (void)
6733{
6734 tree va_list_name;
6735 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6736
6737 /* Create the type. */
6738 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6739 /* Give it the required name. */
6740 va_list_name = build_decl (BUILTINS_LOCATION,
6741 TYPE_DECL,
6742 get_identifier ("__va_list"),
6743 va_list_type);
6744 DECL_ARTIFICIAL (va_list_name) = 1;
6745 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 6746 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
6747
6748 /* Create the fields. */
6749 f_stack = build_decl (BUILTINS_LOCATION,
6750 FIELD_DECL, get_identifier ("__stack"),
6751 ptr_type_node);
6752 f_grtop = build_decl (BUILTINS_LOCATION,
6753 FIELD_DECL, get_identifier ("__gr_top"),
6754 ptr_type_node);
6755 f_vrtop = build_decl (BUILTINS_LOCATION,
6756 FIELD_DECL, get_identifier ("__vr_top"),
6757 ptr_type_node);
6758 f_groff = build_decl (BUILTINS_LOCATION,
6759 FIELD_DECL, get_identifier ("__gr_offs"),
6760 integer_type_node);
6761 f_vroff = build_decl (BUILTINS_LOCATION,
6762 FIELD_DECL, get_identifier ("__vr_offs"),
6763 integer_type_node);
6764
6765 DECL_ARTIFICIAL (f_stack) = 1;
6766 DECL_ARTIFICIAL (f_grtop) = 1;
6767 DECL_ARTIFICIAL (f_vrtop) = 1;
6768 DECL_ARTIFICIAL (f_groff) = 1;
6769 DECL_ARTIFICIAL (f_vroff) = 1;
6770
6771 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6772 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6773 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6774 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6775 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6776
6777 TYPE_FIELDS (va_list_type) = f_stack;
6778 DECL_CHAIN (f_stack) = f_grtop;
6779 DECL_CHAIN (f_grtop) = f_vrtop;
6780 DECL_CHAIN (f_vrtop) = f_groff;
6781 DECL_CHAIN (f_groff) = f_vroff;
6782
6783 /* Compute its layout. */
6784 layout_type (va_list_type);
6785
6786 return va_list_type;
6787}
6788
6789/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6790static void
6791aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6792{
6793 const CUMULATIVE_ARGS *cum;
6794 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6795 tree stack, grtop, vrtop, groff, vroff;
6796 tree t;
6797 int gr_save_area_size;
6798 int vr_save_area_size;
6799 int vr_offset;
6800
6801 cum = &crtl->args.info;
6802 gr_save_area_size
6803 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6804 vr_save_area_size
6805 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6806
6807 if (TARGET_GENERAL_REGS_ONLY)
6808 {
6809 if (cum->aapcs_nvrn > 0)
6810 sorry ("%qs and floating point or vector arguments",
6811 "-mgeneral-regs-only");
6812 vr_save_area_size = 0;
6813 }
6814
6815 f_stack = TYPE_FIELDS (va_list_type_node);
6816 f_grtop = DECL_CHAIN (f_stack);
6817 f_vrtop = DECL_CHAIN (f_grtop);
6818 f_groff = DECL_CHAIN (f_vrtop);
6819 f_vroff = DECL_CHAIN (f_groff);
6820
6821 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6822 NULL_TREE);
6823 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6824 NULL_TREE);
6825 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6826 NULL_TREE);
6827 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6828 NULL_TREE);
6829 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6830 NULL_TREE);
6831
6832 /* Emit code to initialize STACK, which points to the next varargs stack
6833 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6834 by named arguments. STACK is 8-byte aligned. */
6835 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6836 if (cum->aapcs_stack_size > 0)
6837 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6838 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6839 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6840
6841 /* Emit code to initialize GRTOP, the top of the GR save area.
6842 virtual_incoming_args_rtx should have been 16 byte aligned. */
6843 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6844 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6845 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6846
6847 /* Emit code to initialize VRTOP, the top of the VR save area.
6848 This address is gr_save_area_bytes below GRTOP, rounded
6849 down to the next 16-byte boundary. */
6850 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6851 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6852 STACK_BOUNDARY / BITS_PER_UNIT);
6853
6854 if (vr_offset)
6855 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6856 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6857 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6858
6859 /* Emit code to initialize GROFF, the offset from GRTOP of the
6860 next GPR argument. */
6861 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6862 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6863 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6864
6865 /* Likewise emit code to initialize VROFF, the offset from FTOP
6866 of the next VR argument. */
6867 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6868 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6869 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6870}
6871
6872/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6873
6874static tree
6875aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6876 gimple_seq *post_p ATTRIBUTE_UNUSED)
6877{
6878 tree addr;
6879 bool indirect_p;
6880 bool is_ha; /* is HFA or HVA. */
6881 bool dw_align; /* double-word align. */
ef4bddc2 6882 machine_mode ag_mode = VOIDmode;
43e9d192 6883 int nregs;
ef4bddc2 6884 machine_mode mode;
43e9d192
IB
6885
6886 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6887 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6888 HOST_WIDE_INT size, rsize, adjust, align;
6889 tree t, u, cond1, cond2;
6890
6891 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6892 if (indirect_p)
6893 type = build_pointer_type (type);
6894
6895 mode = TYPE_MODE (type);
6896
6897 f_stack = TYPE_FIELDS (va_list_type_node);
6898 f_grtop = DECL_CHAIN (f_stack);
6899 f_vrtop = DECL_CHAIN (f_grtop);
6900 f_groff = DECL_CHAIN (f_vrtop);
6901 f_vroff = DECL_CHAIN (f_groff);
6902
6903 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6904 f_stack, NULL_TREE);
6905 size = int_size_in_bytes (type);
6906 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6907
6908 dw_align = false;
6909 adjust = 0;
6910 if (aarch64_vfp_is_call_or_return_candidate (mode,
6911 type,
6912 &ag_mode,
6913 &nregs,
6914 &is_ha))
6915 {
6916 /* TYPE passed in fp/simd registers. */
6917 if (TARGET_GENERAL_REGS_ONLY)
6918 sorry ("%qs and floating point or vector arguments",
6919 "-mgeneral-regs-only");
6920
6921 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6922 unshare_expr (valist), f_vrtop, NULL_TREE);
6923 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6924 unshare_expr (valist), f_vroff, NULL_TREE);
6925
6926 rsize = nregs * UNITS_PER_VREG;
6927
6928 if (is_ha)
6929 {
6930 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6931 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6932 }
6933 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6934 && size < UNITS_PER_VREG)
6935 {
6936 adjust = UNITS_PER_VREG - size;
6937 }
6938 }
6939 else
6940 {
6941 /* TYPE passed in general registers. */
6942 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6943 unshare_expr (valist), f_grtop, NULL_TREE);
6944 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6945 unshare_expr (valist), f_groff, NULL_TREE);
6946 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6947 nregs = rsize / UNITS_PER_WORD;
6948
6949 if (align > 8)
6950 dw_align = true;
6951
6952 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6953 && size < UNITS_PER_WORD)
6954 {
6955 adjust = UNITS_PER_WORD - size;
6956 }
6957 }
6958
6959 /* Get a local temporary for the field value. */
6960 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6961
6962 /* Emit code to branch if off >= 0. */
6963 t = build2 (GE_EXPR, boolean_type_node, off,
6964 build_int_cst (TREE_TYPE (off), 0));
6965 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6966
6967 if (dw_align)
6968 {
6969 /* Emit: offs = (offs + 15) & -16. */
6970 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6971 build_int_cst (TREE_TYPE (off), 15));
6972 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6973 build_int_cst (TREE_TYPE (off), -16));
6974 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6975 }
6976 else
6977 roundup = NULL;
6978
6979 /* Update ap.__[g|v]r_offs */
6980 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6981 build_int_cst (TREE_TYPE (off), rsize));
6982 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6983
6984 /* String up. */
6985 if (roundup)
6986 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6987
6988 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6989 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6990 build_int_cst (TREE_TYPE (f_off), 0));
6991 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6992
6993 /* String up: make sure the assignment happens before the use. */
6994 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6995 COND_EXPR_ELSE (cond1) = t;
6996
6997 /* Prepare the trees handling the argument that is passed on the stack;
6998 the top level node will store in ON_STACK. */
6999 arg = get_initialized_tmp_var (stack, pre_p, NULL);
7000 if (align > 8)
7001 {
7002 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7003 t = fold_convert (intDI_type_node, arg);
7004 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7005 build_int_cst (TREE_TYPE (t), 15));
7006 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7007 build_int_cst (TREE_TYPE (t), -16));
7008 t = fold_convert (TREE_TYPE (arg), t);
7009 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7010 }
7011 else
7012 roundup = NULL;
7013 /* Advance ap.__stack */
7014 t = fold_convert (intDI_type_node, arg);
7015 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7016 build_int_cst (TREE_TYPE (t), size + 7));
7017 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7018 build_int_cst (TREE_TYPE (t), -8));
7019 t = fold_convert (TREE_TYPE (arg), t);
7020 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7021 /* String up roundup and advance. */
7022 if (roundup)
7023 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7024 /* String up with arg */
7025 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7026 /* Big-endianness related address adjustment. */
7027 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7028 && size < UNITS_PER_WORD)
7029 {
7030 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7031 size_int (UNITS_PER_WORD - size));
7032 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7033 }
7034
7035 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7036 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7037
7038 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7039 t = off;
7040 if (adjust)
7041 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7042 build_int_cst (TREE_TYPE (off), adjust));
7043
7044 t = fold_convert (sizetype, t);
7045 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7046
7047 if (is_ha)
7048 {
7049 /* type ha; // treat as "struct {ftype field[n];}"
7050 ... [computing offs]
7051 for (i = 0; i <nregs; ++i, offs += 16)
7052 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7053 return ha; */
7054 int i;
7055 tree tmp_ha, field_t, field_ptr_t;
7056
7057 /* Declare a local variable. */
7058 tmp_ha = create_tmp_var_raw (type, "ha");
7059 gimple_add_tmp_var (tmp_ha);
7060
7061 /* Establish the base type. */
7062 switch (ag_mode)
7063 {
7064 case SFmode:
7065 field_t = float_type_node;
7066 field_ptr_t = float_ptr_type_node;
7067 break;
7068 case DFmode:
7069 field_t = double_type_node;
7070 field_ptr_t = double_ptr_type_node;
7071 break;
7072 case TFmode:
7073 field_t = long_double_type_node;
7074 field_ptr_t = long_double_ptr_type_node;
7075 break;
7076/* The half precision and quad precision are not fully supported yet. Enable
7077 the following code after the support is complete. Need to find the correct
7078 type node for __fp16 *. */
7079#if 0
7080 case HFmode:
7081 field_t = float_type_node;
7082 field_ptr_t = float_ptr_type_node;
7083 break;
7084#endif
7085 case V2SImode:
7086 case V4SImode:
7087 {
7088 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7089 field_t = build_vector_type_for_mode (innertype, ag_mode);
7090 field_ptr_t = build_pointer_type (field_t);
7091 }
7092 break;
7093 default:
7094 gcc_assert (0);
7095 }
7096
7097 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7098 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7099 addr = t;
7100 t = fold_convert (field_ptr_t, addr);
7101 t = build2 (MODIFY_EXPR, field_t,
7102 build1 (INDIRECT_REF, field_t, tmp_ha),
7103 build1 (INDIRECT_REF, field_t, t));
7104
7105 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7106 for (i = 1; i < nregs; ++i)
7107 {
7108 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7109 u = fold_convert (field_ptr_t, addr);
7110 u = build2 (MODIFY_EXPR, field_t,
7111 build2 (MEM_REF, field_t, tmp_ha,
7112 build_int_cst (field_ptr_t,
7113 (i *
7114 int_size_in_bytes (field_t)))),
7115 build1 (INDIRECT_REF, field_t, u));
7116 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7117 }
7118
7119 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7120 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7121 }
7122
7123 COND_EXPR_ELSE (cond2) = t;
7124 addr = fold_convert (build_pointer_type (type), cond1);
7125 addr = build_va_arg_indirect_ref (addr);
7126
7127 if (indirect_p)
7128 addr = build_va_arg_indirect_ref (addr);
7129
7130 return addr;
7131}
7132
7133/* Implement TARGET_SETUP_INCOMING_VARARGS. */
7134
7135static void
ef4bddc2 7136aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
7137 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7138 int no_rtl)
7139{
7140 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7141 CUMULATIVE_ARGS local_cum;
7142 int gr_saved, vr_saved;
7143
7144 /* The caller has advanced CUM up to, but not beyond, the last named
7145 argument. Advance a local copy of CUM past the last "real" named
7146 argument, to find out how many registers are left over. */
7147 local_cum = *cum;
7148 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7149
7150 /* Found out how many registers we need to save. */
7151 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7152 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7153
7154 if (TARGET_GENERAL_REGS_ONLY)
7155 {
7156 if (local_cum.aapcs_nvrn > 0)
7157 sorry ("%qs and floating point or vector arguments",
7158 "-mgeneral-regs-only");
7159 vr_saved = 0;
7160 }
7161
7162 if (!no_rtl)
7163 {
7164 if (gr_saved > 0)
7165 {
7166 rtx ptr, mem;
7167
7168 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7169 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7170 - gr_saved * UNITS_PER_WORD);
7171 mem = gen_frame_mem (BLKmode, ptr);
7172 set_mem_alias_set (mem, get_varargs_alias_set ());
7173
7174 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7175 mem, gr_saved);
7176 }
7177 if (vr_saved > 0)
7178 {
7179 /* We can't use move_block_from_reg, because it will use
7180 the wrong mode, storing D regs only. */
ef4bddc2 7181 machine_mode mode = TImode;
43e9d192
IB
7182 int off, i;
7183
7184 /* Set OFF to the offset from virtual_incoming_args_rtx of
7185 the first vector register. The VR save area lies below
7186 the GR one, and is aligned to 16 bytes. */
7187 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7188 STACK_BOUNDARY / BITS_PER_UNIT);
7189 off -= vr_saved * UNITS_PER_VREG;
7190
7191 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7192 {
7193 rtx ptr, mem;
7194
7195 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7196 mem = gen_frame_mem (mode, ptr);
7197 set_mem_alias_set (mem, get_varargs_alias_set ());
7198 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7199 off += UNITS_PER_VREG;
7200 }
7201 }
7202 }
7203
7204 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7205 any complication of having crtl->args.pretend_args_size changed. */
8799637a 7206 cfun->machine->frame.saved_varargs_size
43e9d192
IB
7207 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7208 STACK_BOUNDARY / BITS_PER_UNIT)
7209 + vr_saved * UNITS_PER_VREG);
7210}
7211
7212static void
7213aarch64_conditional_register_usage (void)
7214{
7215 int i;
7216 if (!TARGET_FLOAT)
7217 {
7218 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7219 {
7220 fixed_regs[i] = 1;
7221 call_used_regs[i] = 1;
7222 }
7223 }
7224}
7225
7226/* Walk down the type tree of TYPE counting consecutive base elements.
7227 If *MODEP is VOIDmode, then set it to the first valid floating point
7228 type. If a non-floating point type is found, or if a floating point
7229 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7230 otherwise return the count in the sub-tree. */
7231static int
ef4bddc2 7232aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 7233{
ef4bddc2 7234 machine_mode mode;
43e9d192
IB
7235 HOST_WIDE_INT size;
7236
7237 switch (TREE_CODE (type))
7238 {
7239 case REAL_TYPE:
7240 mode = TYPE_MODE (type);
7241 if (mode != DFmode && mode != SFmode && mode != TFmode)
7242 return -1;
7243
7244 if (*modep == VOIDmode)
7245 *modep = mode;
7246
7247 if (*modep == mode)
7248 return 1;
7249
7250 break;
7251
7252 case COMPLEX_TYPE:
7253 mode = TYPE_MODE (TREE_TYPE (type));
7254 if (mode != DFmode && mode != SFmode && mode != TFmode)
7255 return -1;
7256
7257 if (*modep == VOIDmode)
7258 *modep = mode;
7259
7260 if (*modep == mode)
7261 return 2;
7262
7263 break;
7264
7265 case VECTOR_TYPE:
7266 /* Use V2SImode and V4SImode as representatives of all 64-bit
7267 and 128-bit vector types. */
7268 size = int_size_in_bytes (type);
7269 switch (size)
7270 {
7271 case 8:
7272 mode = V2SImode;
7273 break;
7274 case 16:
7275 mode = V4SImode;
7276 break;
7277 default:
7278 return -1;
7279 }
7280
7281 if (*modep == VOIDmode)
7282 *modep = mode;
7283
7284 /* Vector modes are considered to be opaque: two vectors are
7285 equivalent for the purposes of being homogeneous aggregates
7286 if they are the same size. */
7287 if (*modep == mode)
7288 return 1;
7289
7290 break;
7291
7292 case ARRAY_TYPE:
7293 {
7294 int count;
7295 tree index = TYPE_DOMAIN (type);
7296
807e902e
KZ
7297 /* Can't handle incomplete types nor sizes that are not
7298 fixed. */
7299 if (!COMPLETE_TYPE_P (type)
7300 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7301 return -1;
7302
7303 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7304 if (count == -1
7305 || !index
7306 || !TYPE_MAX_VALUE (index)
cc269bb6 7307 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 7308 || !TYPE_MIN_VALUE (index)
cc269bb6 7309 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
7310 || count < 0)
7311 return -1;
7312
ae7e9ddd
RS
7313 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7314 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
7315
7316 /* There must be no padding. */
807e902e 7317 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7318 return -1;
7319
7320 return count;
7321 }
7322
7323 case RECORD_TYPE:
7324 {
7325 int count = 0;
7326 int sub_count;
7327 tree field;
7328
807e902e
KZ
7329 /* Can't handle incomplete types nor sizes that are not
7330 fixed. */
7331 if (!COMPLETE_TYPE_P (type)
7332 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7333 return -1;
7334
7335 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7336 {
7337 if (TREE_CODE (field) != FIELD_DECL)
7338 continue;
7339
7340 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7341 if (sub_count < 0)
7342 return -1;
7343 count += sub_count;
7344 }
7345
7346 /* There must be no padding. */
807e902e 7347 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7348 return -1;
7349
7350 return count;
7351 }
7352
7353 case UNION_TYPE:
7354 case QUAL_UNION_TYPE:
7355 {
7356 /* These aren't very interesting except in a degenerate case. */
7357 int count = 0;
7358 int sub_count;
7359 tree field;
7360
807e902e
KZ
7361 /* Can't handle incomplete types nor sizes that are not
7362 fixed. */
7363 if (!COMPLETE_TYPE_P (type)
7364 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7365 return -1;
7366
7367 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7368 {
7369 if (TREE_CODE (field) != FIELD_DECL)
7370 continue;
7371
7372 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7373 if (sub_count < 0)
7374 return -1;
7375 count = count > sub_count ? count : sub_count;
7376 }
7377
7378 /* There must be no padding. */
807e902e 7379 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7380 return -1;
7381
7382 return count;
7383 }
7384
7385 default:
7386 break;
7387 }
7388
7389 return -1;
7390}
7391
38e8f663
YR
7392/* Return true if we use LRA instead of reload pass. */
7393static bool
7394aarch64_lra_p (void)
7395{
7396 return aarch64_lra_flag;
7397}
7398
43e9d192
IB
7399/* Return TRUE if the type, as described by TYPE and MODE, is a composite
7400 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7401 array types. The C99 floating-point complex types are also considered
7402 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7403 types, which are GCC extensions and out of the scope of AAPCS64, are
7404 treated as composite types here as well.
7405
7406 Note that MODE itself is not sufficient in determining whether a type
7407 is such a composite type or not. This is because
7408 stor-layout.c:compute_record_mode may have already changed the MODE
7409 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7410 structure with only one field may have its MODE set to the mode of the
7411 field. Also an integer mode whose size matches the size of the
7412 RECORD_TYPE type may be used to substitute the original mode
7413 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7414 solely relied on. */
7415
7416static bool
7417aarch64_composite_type_p (const_tree type,
ef4bddc2 7418 machine_mode mode)
43e9d192
IB
7419{
7420 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7421 return true;
7422
7423 if (mode == BLKmode
7424 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7425 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7426 return true;
7427
7428 return false;
7429}
7430
7431/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7432 type as described in AAPCS64 \S 4.1.2.
7433
7434 See the comment above aarch64_composite_type_p for the notes on MODE. */
7435
7436static bool
7437aarch64_short_vector_p (const_tree type,
ef4bddc2 7438 machine_mode mode)
43e9d192
IB
7439{
7440 HOST_WIDE_INT size = -1;
7441
7442 if (type && TREE_CODE (type) == VECTOR_TYPE)
7443 size = int_size_in_bytes (type);
7444 else if (!aarch64_composite_type_p (type, mode)
7445 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7446 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7447 size = GET_MODE_SIZE (mode);
7448
7449 return (size == 8 || size == 16) ? true : false;
7450}
7451
7452/* Return TRUE if an argument, whose type is described by TYPE and MODE,
7453 shall be passed or returned in simd/fp register(s) (providing these
7454 parameter passing registers are available).
7455
7456 Upon successful return, *COUNT returns the number of needed registers,
7457 *BASE_MODE returns the mode of the individual register and when IS_HAF
7458 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7459 floating-point aggregate or a homogeneous short-vector aggregate. */
7460
7461static bool
ef4bddc2 7462aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 7463 const_tree type,
ef4bddc2 7464 machine_mode *base_mode,
43e9d192
IB
7465 int *count,
7466 bool *is_ha)
7467{
ef4bddc2 7468 machine_mode new_mode = VOIDmode;
43e9d192
IB
7469 bool composite_p = aarch64_composite_type_p (type, mode);
7470
7471 if (is_ha != NULL) *is_ha = false;
7472
7473 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7474 || aarch64_short_vector_p (type, mode))
7475 {
7476 *count = 1;
7477 new_mode = mode;
7478 }
7479 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7480 {
7481 if (is_ha != NULL) *is_ha = true;
7482 *count = 2;
7483 new_mode = GET_MODE_INNER (mode);
7484 }
7485 else if (type && composite_p)
7486 {
7487 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7488
7489 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7490 {
7491 if (is_ha != NULL) *is_ha = true;
7492 *count = ag_count;
7493 }
7494 else
7495 return false;
7496 }
7497 else
7498 return false;
7499
7500 *base_mode = new_mode;
7501 return true;
7502}
7503
7504/* Implement TARGET_STRUCT_VALUE_RTX. */
7505
7506static rtx
7507aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7508 int incoming ATTRIBUTE_UNUSED)
7509{
7510 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7511}
7512
7513/* Implements target hook vector_mode_supported_p. */
7514static bool
ef4bddc2 7515aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192
IB
7516{
7517 if (TARGET_SIMD
7518 && (mode == V4SImode || mode == V8HImode
7519 || mode == V16QImode || mode == V2DImode
7520 || mode == V2SImode || mode == V4HImode
7521 || mode == V8QImode || mode == V2SFmode
ad7d90cc
AL
7522 || mode == V4SFmode || mode == V2DFmode
7523 || mode == V1DFmode))
43e9d192
IB
7524 return true;
7525
7526 return false;
7527}
7528
b7342d25
IB
7529/* Return appropriate SIMD container
7530 for MODE within a vector of WIDTH bits. */
ef4bddc2
RS
7531static machine_mode
7532aarch64_simd_container_mode (machine_mode mode, unsigned width)
43e9d192 7533{
b7342d25 7534 gcc_assert (width == 64 || width == 128);
43e9d192 7535 if (TARGET_SIMD)
b7342d25
IB
7536 {
7537 if (width == 128)
7538 switch (mode)
7539 {
7540 case DFmode:
7541 return V2DFmode;
7542 case SFmode:
7543 return V4SFmode;
7544 case SImode:
7545 return V4SImode;
7546 case HImode:
7547 return V8HImode;
7548 case QImode:
7549 return V16QImode;
7550 case DImode:
7551 return V2DImode;
7552 default:
7553 break;
7554 }
7555 else
7556 switch (mode)
7557 {
7558 case SFmode:
7559 return V2SFmode;
7560 case SImode:
7561 return V2SImode;
7562 case HImode:
7563 return V4HImode;
7564 case QImode:
7565 return V8QImode;
7566 default:
7567 break;
7568 }
7569 }
43e9d192
IB
7570 return word_mode;
7571}
7572
b7342d25 7573/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2
RS
7574static machine_mode
7575aarch64_preferred_simd_mode (machine_mode mode)
b7342d25
IB
7576{
7577 return aarch64_simd_container_mode (mode, 128);
7578}
7579
3b357264
JG
7580/* Return the bitmask of possible vector sizes for the vectorizer
7581 to iterate over. */
7582static unsigned int
7583aarch64_autovectorize_vector_sizes (void)
7584{
7585 return (16 | 8);
7586}
7587
c6fc9e43
YZ
7588/* A table to help perform AArch64-specific name mangling for AdvSIMD
7589 vector types in order to conform to the AAPCS64 (see "Procedure
7590 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7591 qualify for emission with the mangled names defined in that document,
7592 a vector type must not only be of the correct mode but also be
7593 composed of AdvSIMD vector element types (e.g.
7594 _builtin_aarch64_simd_qi); these types are registered by
7595 aarch64_init_simd_builtins (). In other words, vector types defined
7596 in other ways e.g. via vector_size attribute will get default
7597 mangled names. */
7598typedef struct
7599{
ef4bddc2 7600 machine_mode mode;
c6fc9e43
YZ
7601 const char *element_type_name;
7602 const char *mangled_name;
7603} aarch64_simd_mangle_map_entry;
7604
7605static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7606 /* 64-bit containerized types. */
7607 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7608 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7609 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7610 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7611 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7612 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7613 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
096c59be
AL
7614 { DImode, "__builtin_aarch64_simd_di", "11__Int64x1_t" },
7615 { DImode, "__builtin_aarch64_simd_udi", "12__Uint64x1_t" },
c6a29a09 7616 { V1DFmode, "__builtin_aarch64_simd_df", "13__Float64x1_t" },
c6fc9e43
YZ
7617 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7618 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7619 /* 128-bit containerized types. */
7620 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7621 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7622 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7623 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7624 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7625 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7626 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7627 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7628 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7629 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7630 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7631 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7baa225d 7632 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
c6fc9e43
YZ
7633 { VOIDmode, NULL, NULL }
7634};
7635
ac2b960f
YZ
7636/* Implement TARGET_MANGLE_TYPE. */
7637
6f549691 7638static const char *
ac2b960f
YZ
7639aarch64_mangle_type (const_tree type)
7640{
7641 /* The AArch64 ABI documents say that "__va_list" has to be
7642 managled as if it is in the "std" namespace. */
7643 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7644 return "St9__va_list";
7645
c6fc9e43
YZ
7646 /* Check the mode of the vector type, and the name of the vector
7647 element type, against the table. */
7648 if (TREE_CODE (type) == VECTOR_TYPE)
7649 {
7650 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7651
7652 while (pos->mode != VOIDmode)
7653 {
7654 tree elt_type = TREE_TYPE (type);
7655
7656 if (pos->mode == TYPE_MODE (type)
7657 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7658 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7659 pos->element_type_name))
7660 return pos->mangled_name;
7661
7662 pos++;
7663 }
7664 }
7665
ac2b960f
YZ
7666 /* Use the default mangling. */
7667 return NULL;
7668}
7669
8baff86e
KT
7670
7671/* Return true if the rtx_insn contains a MEM RTX somewhere
7672 in it. */
75cf1494
KT
7673
7674static bool
8baff86e 7675has_memory_op (rtx_insn *mem_insn)
75cf1494 7676{
8baff86e
KT
7677 subrtx_iterator::array_type array;
7678 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
7679 if (MEM_P (*iter))
7680 return true;
7681
7682 return false;
75cf1494
KT
7683}
7684
7685/* Find the first rtx_insn before insn that will generate an assembly
7686 instruction. */
7687
7688static rtx_insn *
7689aarch64_prev_real_insn (rtx_insn *insn)
7690{
7691 if (!insn)
7692 return NULL;
7693
7694 do
7695 {
7696 insn = prev_real_insn (insn);
7697 }
7698 while (insn && recog_memoized (insn) < 0);
7699
7700 return insn;
7701}
7702
7703static bool
7704is_madd_op (enum attr_type t1)
7705{
7706 unsigned int i;
7707 /* A number of these may be AArch32 only. */
7708 enum attr_type mlatypes[] = {
7709 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7710 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7711 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7712 };
7713
7714 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7715 {
7716 if (t1 == mlatypes[i])
7717 return true;
7718 }
7719
7720 return false;
7721}
7722
7723/* Check if there is a register dependency between a load and the insn
7724 for which we hold recog_data. */
7725
7726static bool
7727dep_between_memop_and_curr (rtx memop)
7728{
7729 rtx load_reg;
7730 int opno;
7731
8baff86e 7732 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
7733
7734 if (!REG_P (SET_DEST (memop)))
7735 return false;
7736
7737 load_reg = SET_DEST (memop);
8baff86e 7738 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
7739 {
7740 rtx operand = recog_data.operand[opno];
7741 if (REG_P (operand)
7742 && reg_overlap_mentioned_p (load_reg, operand))
7743 return true;
7744
7745 }
7746 return false;
7747}
7748
8baff86e
KT
7749
7750/* When working around the Cortex-A53 erratum 835769,
7751 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
7752 instruction and has a preceding memory instruction such that a NOP
7753 should be inserted between them. */
7754
75cf1494
KT
7755bool
7756aarch64_madd_needs_nop (rtx_insn* insn)
7757{
7758 enum attr_type attr_type;
7759 rtx_insn *prev;
7760 rtx body;
7761
7762 if (!aarch64_fix_a53_err835769)
7763 return false;
7764
7765 if (recog_memoized (insn) < 0)
7766 return false;
7767
7768 attr_type = get_attr_type (insn);
7769 if (!is_madd_op (attr_type))
7770 return false;
7771
7772 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
7773 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
7774 Restore recog state to INSN to avoid state corruption. */
7775 extract_constrain_insn_cached (insn);
7776
8baff86e 7777 if (!prev || !has_memory_op (prev))
75cf1494
KT
7778 return false;
7779
7780 body = single_set (prev);
7781
7782 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
7783 it and the DImode madd, emit a NOP between them. If body is NULL then we
7784 have a complex memory operation, probably a load/store pair.
7785 Be conservative for now and emit a NOP. */
7786 if (GET_MODE (recog_data.operand[0]) == DImode
7787 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
7788 return true;
7789
7790 return false;
7791
7792}
7793
8baff86e
KT
7794
7795/* Implement FINAL_PRESCAN_INSN. */
7796
75cf1494
KT
7797void
7798aarch64_final_prescan_insn (rtx_insn *insn)
7799{
7800 if (aarch64_madd_needs_nop (insn))
7801 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7802}
7803
7804
43e9d192 7805/* Return the equivalent letter for size. */
81c2dfb9 7806static char
43e9d192
IB
7807sizetochar (int size)
7808{
7809 switch (size)
7810 {
7811 case 64: return 'd';
7812 case 32: return 's';
7813 case 16: return 'h';
7814 case 8 : return 'b';
7815 default: gcc_unreachable ();
7816 }
7817}
7818
3520f7cc
JG
7819/* Return true iff x is a uniform vector of floating-point
7820 constants, and the constant can be represented in
7821 quarter-precision form. Note, as aarch64_float_const_representable
7822 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7823static bool
7824aarch64_vect_float_const_representable_p (rtx x)
7825{
7826 int i = 0;
7827 REAL_VALUE_TYPE r0, ri;
7828 rtx x0, xi;
7829
7830 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7831 return false;
7832
7833 x0 = CONST_VECTOR_ELT (x, 0);
7834 if (!CONST_DOUBLE_P (x0))
7835 return false;
7836
7837 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7838
7839 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7840 {
7841 xi = CONST_VECTOR_ELT (x, i);
7842 if (!CONST_DOUBLE_P (xi))
7843 return false;
7844
7845 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7846 if (!REAL_VALUES_EQUAL (r0, ri))
7847 return false;
7848 }
7849
7850 return aarch64_float_const_representable_p (x0);
7851}
7852
d8edd899 7853/* Return true for valid and false for invalid. */
3ea63f60 7854bool
ef4bddc2 7855aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
48063b9d 7856 struct simd_immediate_info *info)
43e9d192
IB
7857{
7858#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7859 matches = 1; \
7860 for (i = 0; i < idx; i += (STRIDE)) \
7861 if (!(TEST)) \
7862 matches = 0; \
7863 if (matches) \
7864 { \
7865 immtype = (CLASS); \
7866 elsize = (ELSIZE); \
43e9d192
IB
7867 eshift = (SHIFT); \
7868 emvn = (NEG); \
7869 break; \
7870 }
7871
7872 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7873 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7874 unsigned char bytes[16];
43e9d192
IB
7875 int immtype = -1, matches;
7876 unsigned int invmask = inverse ? 0xff : 0;
7877 int eshift, emvn;
7878
43e9d192 7879 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 7880 {
81c2dfb9
IB
7881 if (! (aarch64_simd_imm_zero_p (op, mode)
7882 || aarch64_vect_float_const_representable_p (op)))
d8edd899 7883 return false;
3520f7cc 7884
48063b9d
IB
7885 if (info)
7886 {
7887 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 7888 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
7889 info->mvn = false;
7890 info->shift = 0;
7891 }
3520f7cc 7892
d8edd899 7893 return true;
3520f7cc 7894 }
43e9d192
IB
7895
7896 /* Splat vector constant out into a byte vector. */
7897 for (i = 0; i < n_elts; i++)
7898 {
4b1e108c
AL
7899 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7900 it must be laid out in the vector register in reverse order. */
7901 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
7902 unsigned HOST_WIDE_INT elpart;
7903 unsigned int part, parts;
7904
4aa81c2e 7905 if (CONST_INT_P (el))
43e9d192
IB
7906 {
7907 elpart = INTVAL (el);
7908 parts = 1;
7909 }
7910 else if (GET_CODE (el) == CONST_DOUBLE)
7911 {
7912 elpart = CONST_DOUBLE_LOW (el);
7913 parts = 2;
7914 }
7915 else
7916 gcc_unreachable ();
7917
7918 for (part = 0; part < parts; part++)
7919 {
7920 unsigned int byte;
7921 for (byte = 0; byte < innersize; byte++)
7922 {
7923 bytes[idx++] = (elpart & 0xff) ^ invmask;
7924 elpart >>= BITS_PER_UNIT;
7925 }
7926 if (GET_CODE (el) == CONST_DOUBLE)
7927 elpart = CONST_DOUBLE_HIGH (el);
7928 }
7929 }
7930
7931 /* Sanity check. */
7932 gcc_assert (idx == GET_MODE_SIZE (mode));
7933
7934 do
7935 {
7936 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7937 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7938
7939 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7940 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7941
7942 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7943 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7944
7945 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7946 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7947
7948 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7949
7950 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7951
7952 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7953 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7954
7955 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7956 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7957
7958 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7959 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7960
7961 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7962 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7963
7964 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7965
7966 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7967
7968 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 7969 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
7970
7971 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 7972 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
7973
7974 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 7975 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
7976
7977 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 7978 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
7979
7980 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7981
7982 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7983 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7984 }
7985 while (0);
7986
e4f0f84d 7987 if (immtype == -1)
d8edd899 7988 return false;
43e9d192 7989
48063b9d 7990 if (info)
43e9d192 7991 {
48063b9d 7992 info->element_width = elsize;
48063b9d
IB
7993 info->mvn = emvn != 0;
7994 info->shift = eshift;
7995
43e9d192
IB
7996 unsigned HOST_WIDE_INT imm = 0;
7997
e4f0f84d
TB
7998 if (immtype >= 12 && immtype <= 15)
7999 info->msl = true;
8000
43e9d192
IB
8001 /* Un-invert bytes of recognized vector, if necessary. */
8002 if (invmask != 0)
8003 for (i = 0; i < idx; i++)
8004 bytes[i] ^= invmask;
8005
8006 if (immtype == 17)
8007 {
8008 /* FIXME: Broken on 32-bit H_W_I hosts. */
8009 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8010
8011 for (i = 0; i < 8; i++)
8012 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8013 << (i * BITS_PER_UNIT);
8014
43e9d192 8015
48063b9d
IB
8016 info->value = GEN_INT (imm);
8017 }
8018 else
8019 {
8020 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8021 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
8022
8023 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
8024 generic constants. */
8025 if (info->mvn)
43e9d192 8026 imm = ~imm;
48063b9d
IB
8027 imm = (imm >> info->shift) & 0xff;
8028 info->value = GEN_INT (imm);
8029 }
43e9d192
IB
8030 }
8031
48063b9d 8032 return true;
43e9d192
IB
8033#undef CHECK
8034}
8035
43e9d192
IB
8036/* Check of immediate shift constants are within range. */
8037bool
ef4bddc2 8038aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
8039{
8040 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8041 if (left)
ddeabd3e 8042 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 8043 else
ddeabd3e 8044 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
8045}
8046
3520f7cc
JG
8047/* Return true if X is a uniform vector where all elements
8048 are either the floating-point constant 0.0 or the
8049 integer constant 0. */
43e9d192 8050bool
ef4bddc2 8051aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
43e9d192 8052{
3520f7cc 8053 return x == CONST0_RTX (mode);
43e9d192
IB
8054}
8055
8056bool
ef4bddc2 8057aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
43e9d192
IB
8058{
8059 HOST_WIDE_INT imm = INTVAL (x);
8060 int i;
8061
8062 for (i = 0; i < 8; i++)
8063 {
8064 unsigned int byte = imm & 0xff;
8065 if (byte != 0xff && byte != 0)
8066 return false;
8067 imm >>= 8;
8068 }
8069
8070 return true;
8071}
8072
83f8c414
CSS
8073bool
8074aarch64_mov_operand_p (rtx x,
a5350ddc 8075 enum aarch64_symbol_context context,
ef4bddc2 8076 machine_mode mode)
83f8c414 8077{
83f8c414
CSS
8078 if (GET_CODE (x) == HIGH
8079 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8080 return true;
8081
8082 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
8083 return true;
8084
8085 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8086 return true;
8087
a5350ddc
CSS
8088 return aarch64_classify_symbolic_expression (x, context)
8089 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
8090}
8091
43e9d192
IB
8092/* Return a const_int vector of VAL. */
8093rtx
ef4bddc2 8094aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
43e9d192
IB
8095{
8096 int nunits = GET_MODE_NUNITS (mode);
8097 rtvec v = rtvec_alloc (nunits);
8098 int i;
8099
8100 for (i=0; i < nunits; i++)
8101 RTVEC_ELT (v, i) = GEN_INT (val);
8102
8103 return gen_rtx_CONST_VECTOR (mode, v);
8104}
8105
051d0e2f
SN
8106/* Check OP is a legal scalar immediate for the MOVI instruction. */
8107
8108bool
ef4bddc2 8109aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
051d0e2f 8110{
ef4bddc2 8111 machine_mode vmode;
051d0e2f
SN
8112
8113 gcc_assert (!VECTOR_MODE_P (mode));
8114 vmode = aarch64_preferred_simd_mode (mode);
8115 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 8116 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
8117}
8118
988fa693
JG
8119/* Construct and return a PARALLEL RTX vector with elements numbering the
8120 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8121 the vector - from the perspective of the architecture. This does not
8122 line up with GCC's perspective on lane numbers, so we end up with
8123 different masks depending on our target endian-ness. The diagram
8124 below may help. We must draw the distinction when building masks
8125 which select one half of the vector. An instruction selecting
8126 architectural low-lanes for a big-endian target, must be described using
8127 a mask selecting GCC high-lanes.
8128
8129 Big-Endian Little-Endian
8130
8131GCC 0 1 2 3 3 2 1 0
8132 | x | x | x | x | | x | x | x | x |
8133Architecture 3 2 1 0 3 2 1 0
8134
8135Low Mask: { 2, 3 } { 0, 1 }
8136High Mask: { 0, 1 } { 2, 3 }
8137*/
8138
43e9d192 8139rtx
ef4bddc2 8140aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
43e9d192
IB
8141{
8142 int nunits = GET_MODE_NUNITS (mode);
8143 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
8144 int high_base = nunits / 2;
8145 int low_base = 0;
8146 int base;
43e9d192
IB
8147 rtx t1;
8148 int i;
8149
988fa693
JG
8150 if (BYTES_BIG_ENDIAN)
8151 base = high ? low_base : high_base;
8152 else
8153 base = high ? high_base : low_base;
8154
8155 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
8156 RTVEC_ELT (v, i) = GEN_INT (base + i);
8157
8158 t1 = gen_rtx_PARALLEL (mode, v);
8159 return t1;
8160}
8161
988fa693
JG
8162/* Check OP for validity as a PARALLEL RTX vector with elements
8163 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8164 from the perspective of the architecture. See the diagram above
8165 aarch64_simd_vect_par_cnst_half for more details. */
8166
8167bool
ef4bddc2 8168aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
8169 bool high)
8170{
8171 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8172 HOST_WIDE_INT count_op = XVECLEN (op, 0);
8173 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8174 int i = 0;
8175
8176 if (!VECTOR_MODE_P (mode))
8177 return false;
8178
8179 if (count_op != count_ideal)
8180 return false;
8181
8182 for (i = 0; i < count_ideal; i++)
8183 {
8184 rtx elt_op = XVECEXP (op, 0, i);
8185 rtx elt_ideal = XVECEXP (ideal, 0, i);
8186
4aa81c2e 8187 if (!CONST_INT_P (elt_op)
988fa693
JG
8188 || INTVAL (elt_ideal) != INTVAL (elt_op))
8189 return false;
8190 }
8191 return true;
8192}
8193
43e9d192
IB
8194/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8195 HIGH (exclusive). */
8196void
8197aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8198{
8199 HOST_WIDE_INT lane;
4aa81c2e 8200 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
8201 lane = INTVAL (operand);
8202
8203 if (lane < low || lane >= high)
8204 error ("lane out of range");
8205}
8206
43e9d192
IB
8207/* Emit code to place a AdvSIMD pair result in memory locations (with equal
8208 registers). */
8209void
ef4bddc2 8210aarch64_simd_emit_pair_result_insn (machine_mode mode,
43e9d192
IB
8211 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8212 rtx op1)
8213{
8214 rtx mem = gen_rtx_MEM (mode, destaddr);
8215 rtx tmp1 = gen_reg_rtx (mode);
8216 rtx tmp2 = gen_reg_rtx (mode);
8217
8218 emit_insn (intfn (tmp1, op1, tmp2));
8219
8220 emit_move_insn (mem, tmp1);
8221 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8222 emit_move_insn (mem, tmp2);
8223}
8224
8225/* Return TRUE if OP is a valid vector addressing mode. */
8226bool
8227aarch64_simd_mem_operand_p (rtx op)
8228{
8229 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 8230 || REG_P (XEXP (op, 0)));
43e9d192
IB
8231}
8232
8233/* Set up OPERANDS for a register copy from SRC to DEST, taking care
8234 not to early-clobber SRC registers in the process.
8235
8236 We assume that the operands described by SRC and DEST represent a
8237 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
8238 number of components into which the copy has been decomposed. */
8239void
8240aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
8241 rtx *src, unsigned int count)
8242{
8243 unsigned int i;
8244
8245 if (!reg_overlap_mentioned_p (operands[0], operands[1])
8246 || REGNO (operands[0]) < REGNO (operands[1]))
8247 {
8248 for (i = 0; i < count; i++)
8249 {
8250 operands[2 * i] = dest[i];
8251 operands[2 * i + 1] = src[i];
8252 }
8253 }
8254 else
8255 {
8256 for (i = 0; i < count; i++)
8257 {
8258 operands[2 * i] = dest[count - i - 1];
8259 operands[2 * i + 1] = src[count - i - 1];
8260 }
8261 }
8262}
8263
8264/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8265 one of VSTRUCT modes: OI, CI or XI. */
8266int
647d790d 8267aarch64_simd_attr_length_move (rtx_insn *insn)
43e9d192 8268{
ef4bddc2 8269 machine_mode mode;
43e9d192
IB
8270
8271 extract_insn_cached (insn);
8272
8273 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8274 {
8275 mode = GET_MODE (recog_data.operand[0]);
8276 switch (mode)
8277 {
8278 case OImode:
8279 return 8;
8280 case CImode:
8281 return 12;
8282 case XImode:
8283 return 16;
8284 default:
8285 gcc_unreachable ();
8286 }
8287 }
8288 return 4;
8289}
8290
db0253a4
TB
8291/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8292 alignment of a vector to 128 bits. */
8293static HOST_WIDE_INT
8294aarch64_simd_vector_alignment (const_tree type)
8295{
9439e9a1 8296 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
8297 return MIN (align, 128);
8298}
8299
8300/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8301static bool
8302aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8303{
8304 if (is_packed)
8305 return false;
8306
8307 /* We guarantee alignment for vectors up to 128-bits. */
8308 if (tree_int_cst_compare (TYPE_SIZE (type),
8309 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8310 return false;
8311
8312 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8313 return true;
8314}
8315
4369c11e
TB
8316/* If VALS is a vector constant that can be loaded into a register
8317 using DUP, generate instructions to do so and return an RTX to
8318 assign to the register. Otherwise return NULL_RTX. */
8319static rtx
8320aarch64_simd_dup_constant (rtx vals)
8321{
ef4bddc2
RS
8322 machine_mode mode = GET_MODE (vals);
8323 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e
TB
8324 int n_elts = GET_MODE_NUNITS (mode);
8325 bool all_same = true;
8326 rtx x;
8327 int i;
8328
8329 if (GET_CODE (vals) != CONST_VECTOR)
8330 return NULL_RTX;
8331
8332 for (i = 1; i < n_elts; ++i)
8333 {
8334 x = CONST_VECTOR_ELT (vals, i);
8335 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8336 all_same = false;
8337 }
8338
8339 if (!all_same)
8340 return NULL_RTX;
8341
8342 /* We can load this constant by using DUP and a constant in a
8343 single ARM register. This will be cheaper than a vector
8344 load. */
8345 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8346 return gen_rtx_VEC_DUPLICATE (mode, x);
8347}
8348
8349
8350/* Generate code to load VALS, which is a PARALLEL containing only
8351 constants (for vec_init) or CONST_VECTOR, efficiently into a
8352 register. Returns an RTX to copy into the register, or NULL_RTX
8353 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 8354static rtx
4369c11e
TB
8355aarch64_simd_make_constant (rtx vals)
8356{
ef4bddc2 8357 machine_mode mode = GET_MODE (vals);
4369c11e
TB
8358 rtx const_dup;
8359 rtx const_vec = NULL_RTX;
8360 int n_elts = GET_MODE_NUNITS (mode);
8361 int n_const = 0;
8362 int i;
8363
8364 if (GET_CODE (vals) == CONST_VECTOR)
8365 const_vec = vals;
8366 else if (GET_CODE (vals) == PARALLEL)
8367 {
8368 /* A CONST_VECTOR must contain only CONST_INTs and
8369 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8370 Only store valid constants in a CONST_VECTOR. */
8371 for (i = 0; i < n_elts; ++i)
8372 {
8373 rtx x = XVECEXP (vals, 0, i);
8374 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8375 n_const++;
8376 }
8377 if (n_const == n_elts)
8378 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8379 }
8380 else
8381 gcc_unreachable ();
8382
8383 if (const_vec != NULL_RTX
48063b9d 8384 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
8385 /* Load using MOVI/MVNI. */
8386 return const_vec;
8387 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8388 /* Loaded using DUP. */
8389 return const_dup;
8390 else if (const_vec != NULL_RTX)
8391 /* Load from constant pool. We can not take advantage of single-cycle
8392 LD1 because we need a PC-relative addressing mode. */
8393 return const_vec;
8394 else
8395 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8396 We can not construct an initializer. */
8397 return NULL_RTX;
8398}
8399
8400void
8401aarch64_expand_vector_init (rtx target, rtx vals)
8402{
ef4bddc2
RS
8403 machine_mode mode = GET_MODE (target);
8404 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e
TB
8405 int n_elts = GET_MODE_NUNITS (mode);
8406 int n_var = 0, one_var = -1;
8407 bool all_same = true;
8408 rtx x, mem;
8409 int i;
8410
8411 x = XVECEXP (vals, 0, 0);
8412 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8413 n_var = 1, one_var = 0;
8414
8415 for (i = 1; i < n_elts; ++i)
8416 {
8417 x = XVECEXP (vals, 0, i);
8418 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8419 ++n_var, one_var = i;
8420
8421 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8422 all_same = false;
8423 }
8424
8425 if (n_var == 0)
8426 {
8427 rtx constant = aarch64_simd_make_constant (vals);
8428 if (constant != NULL_RTX)
8429 {
8430 emit_move_insn (target, constant);
8431 return;
8432 }
8433 }
8434
8435 /* Splat a single non-constant element if we can. */
8436 if (all_same)
8437 {
8438 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8439 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8440 return;
8441 }
8442
8443 /* One field is non-constant. Load constant then overwrite varying
8444 field. This is more efficient than using the stack. */
8445 if (n_var == 1)
8446 {
8447 rtx copy = copy_rtx (vals);
8448 rtx index = GEN_INT (one_var);
8449 enum insn_code icode;
8450
8451 /* Load constant part of vector, substitute neighboring value for
8452 varying element. */
8453 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8454 aarch64_expand_vector_init (target, copy);
8455
8456 /* Insert variable. */
8457 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8458 icode = optab_handler (vec_set_optab, mode);
8459 gcc_assert (icode != CODE_FOR_nothing);
8460 emit_insn (GEN_FCN (icode) (target, x, index));
8461 return;
8462 }
8463
8464 /* Construct the vector in memory one field at a time
8465 and load the whole vector. */
8466 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8467 for (i = 0; i < n_elts; i++)
8468 emit_move_insn (adjust_address_nv (mem, inner_mode,
8469 i * GET_MODE_SIZE (inner_mode)),
8470 XVECEXP (vals, 0, i));
8471 emit_move_insn (target, mem);
8472
8473}
8474
43e9d192 8475static unsigned HOST_WIDE_INT
ef4bddc2 8476aarch64_shift_truncation_mask (machine_mode mode)
43e9d192
IB
8477{
8478 return
8479 (aarch64_vector_mode_supported_p (mode)
8480 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8481}
8482
8483#ifndef TLS_SECTION_ASM_FLAG
8484#define TLS_SECTION_ASM_FLAG 'T'
8485#endif
8486
8487void
8488aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8489 tree decl ATTRIBUTE_UNUSED)
8490{
8491 char flagchars[10], *f = flagchars;
8492
8493 /* If we have already declared this section, we can use an
8494 abbreviated form to switch back to it -- unless this section is
8495 part of a COMDAT groups, in which case GAS requires the full
8496 declaration every time. */
8497 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8498 && (flags & SECTION_DECLARED))
8499 {
8500 fprintf (asm_out_file, "\t.section\t%s\n", name);
8501 return;
8502 }
8503
8504 if (!(flags & SECTION_DEBUG))
8505 *f++ = 'a';
8506 if (flags & SECTION_WRITE)
8507 *f++ = 'w';
8508 if (flags & SECTION_CODE)
8509 *f++ = 'x';
8510 if (flags & SECTION_SMALL)
8511 *f++ = 's';
8512 if (flags & SECTION_MERGE)
8513 *f++ = 'M';
8514 if (flags & SECTION_STRINGS)
8515 *f++ = 'S';
8516 if (flags & SECTION_TLS)
8517 *f++ = TLS_SECTION_ASM_FLAG;
8518 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8519 *f++ = 'G';
8520 *f = '\0';
8521
8522 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8523
8524 if (!(flags & SECTION_NOTYPE))
8525 {
8526 const char *type;
8527 const char *format;
8528
8529 if (flags & SECTION_BSS)
8530 type = "nobits";
8531 else
8532 type = "progbits";
8533
8534#ifdef TYPE_OPERAND_FMT
8535 format = "," TYPE_OPERAND_FMT;
8536#else
8537 format = ",@%s";
8538#endif
8539
8540 fprintf (asm_out_file, format, type);
8541
8542 if (flags & SECTION_ENTSIZE)
8543 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8544 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8545 {
8546 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8547 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8548 else
8549 fprintf (asm_out_file, ",%s,comdat",
8550 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8551 }
8552 }
8553
8554 putc ('\n', asm_out_file);
8555}
8556
8557/* Select a format to encode pointers in exception handling data. */
8558int
8559aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8560{
8561 int type;
8562 switch (aarch64_cmodel)
8563 {
8564 case AARCH64_CMODEL_TINY:
8565 case AARCH64_CMODEL_TINY_PIC:
8566 case AARCH64_CMODEL_SMALL:
8567 case AARCH64_CMODEL_SMALL_PIC:
8568 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8569 for everything. */
8570 type = DW_EH_PE_sdata4;
8571 break;
8572 default:
8573 /* No assumptions here. 8-byte relocs required. */
8574 type = DW_EH_PE_sdata8;
8575 break;
8576 }
8577 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8578}
8579
0462169c
SN
8580/* Emit load exclusive. */
8581
8582static void
ef4bddc2 8583aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
8584 rtx mem, rtx model_rtx)
8585{
8586 rtx (*gen) (rtx, rtx, rtx);
8587
8588 switch (mode)
8589 {
8590 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8591 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8592 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8593 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8594 default:
8595 gcc_unreachable ();
8596 }
8597
8598 emit_insn (gen (rval, mem, model_rtx));
8599}
8600
8601/* Emit store exclusive. */
8602
8603static void
ef4bddc2 8604aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
8605 rtx rval, rtx mem, rtx model_rtx)
8606{
8607 rtx (*gen) (rtx, rtx, rtx, rtx);
8608
8609 switch (mode)
8610 {
8611 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8612 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8613 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8614 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8615 default:
8616 gcc_unreachable ();
8617 }
8618
8619 emit_insn (gen (bval, rval, mem, model_rtx));
8620}
8621
8622/* Mark the previous jump instruction as unlikely. */
8623
8624static void
8625aarch64_emit_unlikely_jump (rtx insn)
8626{
e5af9ddd 8627 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
8628
8629 insn = emit_jump_insn (insn);
e5af9ddd 8630 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
8631}
8632
8633/* Expand a compare and swap pattern. */
8634
8635void
8636aarch64_expand_compare_and_swap (rtx operands[])
8637{
8638 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 8639 machine_mode mode, cmp_mode;
0462169c
SN
8640 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8641
8642 bval = operands[0];
8643 rval = operands[1];
8644 mem = operands[2];
8645 oldval = operands[3];
8646 newval = operands[4];
8647 is_weak = operands[5];
8648 mod_s = operands[6];
8649 mod_f = operands[7];
8650 mode = GET_MODE (mem);
8651 cmp_mode = mode;
8652
8653 /* Normally the succ memory model must be stronger than fail, but in the
8654 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8655 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8656
8657 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8658 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8659 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8660
8661 switch (mode)
8662 {
8663 case QImode:
8664 case HImode:
8665 /* For short modes, we're going to perform the comparison in SImode,
8666 so do the zero-extension now. */
8667 cmp_mode = SImode;
8668 rval = gen_reg_rtx (SImode);
8669 oldval = convert_modes (SImode, mode, oldval, true);
8670 /* Fall through. */
8671
8672 case SImode:
8673 case DImode:
8674 /* Force the value into a register if needed. */
8675 if (!aarch64_plus_operand (oldval, mode))
8676 oldval = force_reg (cmp_mode, oldval);
8677 break;
8678
8679 default:
8680 gcc_unreachable ();
8681 }
8682
8683 switch (mode)
8684 {
8685 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8686 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8687 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8688 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8689 default:
8690 gcc_unreachable ();
8691 }
8692
8693 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8694
8695 if (mode == QImode || mode == HImode)
8696 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8697
8698 x = gen_rtx_REG (CCmode, CC_REGNUM);
8699 x = gen_rtx_EQ (SImode, x, const0_rtx);
8700 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8701}
8702
8703/* Split a compare and swap pattern. */
8704
8705void
8706aarch64_split_compare_and_swap (rtx operands[])
8707{
8708 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 8709 machine_mode mode;
0462169c 8710 bool is_weak;
5d8a22a5
DM
8711 rtx_code_label *label1, *label2;
8712 rtx x, cond;
0462169c
SN
8713
8714 rval = operands[0];
8715 mem = operands[1];
8716 oldval = operands[2];
8717 newval = operands[3];
8718 is_weak = (operands[4] != const0_rtx);
0462169c
SN
8719 scratch = operands[7];
8720 mode = GET_MODE (mem);
8721
5d8a22a5 8722 label1 = NULL;
0462169c
SN
8723 if (!is_weak)
8724 {
8725 label1 = gen_label_rtx ();
8726 emit_label (label1);
8727 }
8728 label2 = gen_label_rtx ();
8729
8730 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8731
8732 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8733 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8734 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8735 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8736 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8737
8738 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8739
8740 if (!is_weak)
8741 {
8742 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8743 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8744 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8745 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8746 }
8747 else
8748 {
8749 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8750 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8751 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8752 }
8753
8754 emit_label (label2);
8755}
8756
8757/* Split an atomic operation. */
8758
8759void
8760aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8761 rtx value, rtx model_rtx, rtx cond)
8762{
ef4bddc2
RS
8763 machine_mode mode = GET_MODE (mem);
8764 machine_mode wmode = (mode == DImode ? DImode : SImode);
5d8a22a5
DM
8765 rtx_code_label *label;
8766 rtx x;
0462169c
SN
8767
8768 label = gen_label_rtx ();
8769 emit_label (label);
8770
8771 if (new_out)
8772 new_out = gen_lowpart (wmode, new_out);
8773 if (old_out)
8774 old_out = gen_lowpart (wmode, old_out);
8775 else
8776 old_out = new_out;
8777 value = simplify_gen_subreg (wmode, value, mode, 0);
8778
8779 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8780
8781 switch (code)
8782 {
8783 case SET:
8784 new_out = value;
8785 break;
8786
8787 case NOT:
8788 x = gen_rtx_AND (wmode, old_out, value);
8789 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8790 x = gen_rtx_NOT (wmode, new_out);
8791 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8792 break;
8793
8794 case MINUS:
8795 if (CONST_INT_P (value))
8796 {
8797 value = GEN_INT (-INTVAL (value));
8798 code = PLUS;
8799 }
8800 /* Fall through. */
8801
8802 default:
8803 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8804 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8805 break;
8806 }
8807
8808 aarch64_emit_store_exclusive (mode, cond, mem,
8809 gen_lowpart (mode, new_out), model_rtx);
8810
8811 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8812 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8813 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8814 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8815}
8816
95ca411e
YZ
8817static void
8818aarch64_print_extension (void)
8819{
8820 const struct aarch64_option_extension *opt = NULL;
8821
8822 for (opt = all_extensions; opt->name != NULL; opt++)
8823 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8824 asm_fprintf (asm_out_file, "+%s", opt->name);
8825
8826 asm_fprintf (asm_out_file, "\n");
8827}
8828
43e9d192
IB
8829static void
8830aarch64_start_file (void)
8831{
8832 if (selected_arch)
95ca411e
YZ
8833 {
8834 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8835 aarch64_print_extension ();
8836 }
43e9d192 8837 else if (selected_cpu)
95ca411e 8838 {
682287fb
JG
8839 const char *truncated_name
8840 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8841 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
8842 aarch64_print_extension ();
8843 }
43e9d192
IB
8844 default_file_start();
8845}
8846
8847/* Target hook for c_mode_for_suffix. */
ef4bddc2 8848static machine_mode
43e9d192
IB
8849aarch64_c_mode_for_suffix (char suffix)
8850{
8851 if (suffix == 'q')
8852 return TFmode;
8853
8854 return VOIDmode;
8855}
8856
3520f7cc
JG
8857/* We can only represent floating point constants which will fit in
8858 "quarter-precision" values. These values are characterised by
8859 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8860 by:
8861
8862 (-1)^s * (n/16) * 2^r
8863
8864 Where:
8865 's' is the sign bit.
8866 'n' is an integer in the range 16 <= n <= 31.
8867 'r' is an integer in the range -3 <= r <= 4. */
8868
8869/* Return true iff X can be represented by a quarter-precision
8870 floating point immediate operand X. Note, we cannot represent 0.0. */
8871bool
8872aarch64_float_const_representable_p (rtx x)
8873{
8874 /* This represents our current view of how many bits
8875 make up the mantissa. */
8876 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 8877 int exponent;
3520f7cc 8878 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 8879 REAL_VALUE_TYPE r, m;
807e902e 8880 bool fail;
3520f7cc
JG
8881
8882 if (!CONST_DOUBLE_P (x))
8883 return false;
8884
94bfa2da
TV
8885 if (GET_MODE (x) == VOIDmode)
8886 return false;
8887
3520f7cc
JG
8888 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8889
8890 /* We cannot represent infinities, NaNs or +/-zero. We won't
8891 know if we have +zero until we analyse the mantissa, but we
8892 can reject the other invalid values. */
8893 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8894 || REAL_VALUE_MINUS_ZERO (r))
8895 return false;
8896
ba96cdfb 8897 /* Extract exponent. */
3520f7cc
JG
8898 r = real_value_abs (&r);
8899 exponent = REAL_EXP (&r);
8900
8901 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8902 highest (sign) bit, with a fixed binary point at bit point_pos.
8903 m1 holds the low part of the mantissa, m2 the high part.
8904 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8905 bits for the mantissa, this can fail (low bits will be lost). */
8906 real_ldexp (&m, &r, point_pos - exponent);
807e902e 8907 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
8908
8909 /* If the low part of the mantissa has bits set we cannot represent
8910 the value. */
807e902e 8911 if (w.elt (0) != 0)
3520f7cc
JG
8912 return false;
8913 /* We have rejected the lower HOST_WIDE_INT, so update our
8914 understanding of how many bits lie in the mantissa and
8915 look only at the high HOST_WIDE_INT. */
807e902e 8916 mantissa = w.elt (1);
3520f7cc
JG
8917 point_pos -= HOST_BITS_PER_WIDE_INT;
8918
8919 /* We can only represent values with a mantissa of the form 1.xxxx. */
8920 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8921 if ((mantissa & mask) != 0)
8922 return false;
8923
8924 /* Having filtered unrepresentable values, we may now remove all
8925 but the highest 5 bits. */
8926 mantissa >>= point_pos - 5;
8927
8928 /* We cannot represent the value 0.0, so reject it. This is handled
8929 elsewhere. */
8930 if (mantissa == 0)
8931 return false;
8932
8933 /* Then, as bit 4 is always set, we can mask it off, leaving
8934 the mantissa in the range [0, 15]. */
8935 mantissa &= ~(1 << 4);
8936 gcc_assert (mantissa <= 15);
8937
8938 /* GCC internally does not use IEEE754-like encoding (where normalized
8939 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8940 Our mantissa values are shifted 4 places to the left relative to
8941 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8942 by 5 places to correct for GCC's representation. */
8943 exponent = 5 - exponent;
8944
8945 return (exponent >= 0 && exponent <= 7);
8946}
8947
8948char*
81c2dfb9 8949aarch64_output_simd_mov_immediate (rtx const_vector,
ef4bddc2 8950 machine_mode mode,
3520f7cc
JG
8951 unsigned width)
8952{
3ea63f60 8953 bool is_valid;
3520f7cc 8954 static char templ[40];
3520f7cc 8955 const char *mnemonic;
e4f0f84d 8956 const char *shift_op;
3520f7cc 8957 unsigned int lane_count = 0;
81c2dfb9 8958 char element_char;
3520f7cc 8959
e4f0f84d 8960 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
8961
8962 /* This will return true to show const_vector is legal for use as either
8963 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8964 also update INFO to show how the immediate should be generated. */
81c2dfb9 8965 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
8966 gcc_assert (is_valid);
8967
81c2dfb9 8968 element_char = sizetochar (info.element_width);
48063b9d
IB
8969 lane_count = width / info.element_width;
8970
3520f7cc
JG
8971 mode = GET_MODE_INNER (mode);
8972 if (mode == SFmode || mode == DFmode)
8973 {
48063b9d
IB
8974 gcc_assert (info.shift == 0 && ! info.mvn);
8975 if (aarch64_float_const_zero_rtx_p (info.value))
8976 info.value = GEN_INT (0);
8977 else
8978 {
8979#define buf_size 20
8980 REAL_VALUE_TYPE r;
8981 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8982 char float_buf[buf_size] = {'\0'};
8983 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8984#undef buf_size
8985
8986 if (lane_count == 1)
8987 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8988 else
8989 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 8990 lane_count, element_char, float_buf);
48063b9d
IB
8991 return templ;
8992 }
3520f7cc 8993 }
3520f7cc 8994
48063b9d 8995 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 8996 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
8997
8998 if (lane_count == 1)
48063b9d
IB
8999 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9000 mnemonic, UINTVAL (info.value));
9001 else if (info.shift)
9002 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
9003 ", %s %d", mnemonic, lane_count, element_char,
9004 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 9005 else
48063b9d 9006 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 9007 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
9008 return templ;
9009}
9010
b7342d25
IB
9011char*
9012aarch64_output_scalar_simd_mov_immediate (rtx immediate,
ef4bddc2 9013 machine_mode mode)
b7342d25 9014{
ef4bddc2 9015 machine_mode vmode;
b7342d25
IB
9016
9017 gcc_assert (!VECTOR_MODE_P (mode));
9018 vmode = aarch64_simd_container_mode (mode, 64);
9019 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9020 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9021}
9022
88b08073
JG
9023/* Split operands into moves from op[1] + op[2] into op[0]. */
9024
9025void
9026aarch64_split_combinev16qi (rtx operands[3])
9027{
9028 unsigned int dest = REGNO (operands[0]);
9029 unsigned int src1 = REGNO (operands[1]);
9030 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 9031 machine_mode halfmode = GET_MODE (operands[1]);
88b08073
JG
9032 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9033 rtx destlo, desthi;
9034
9035 gcc_assert (halfmode == V16QImode);
9036
9037 if (src1 == dest && src2 == dest + halfregs)
9038 {
9039 /* No-op move. Can't split to nothing; emit something. */
9040 emit_note (NOTE_INSN_DELETED);
9041 return;
9042 }
9043
9044 /* Preserve register attributes for variable tracking. */
9045 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9046 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9047 GET_MODE_SIZE (halfmode));
9048
9049 /* Special case of reversed high/low parts. */
9050 if (reg_overlap_mentioned_p (operands[2], destlo)
9051 && reg_overlap_mentioned_p (operands[1], desthi))
9052 {
9053 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9054 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9055 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9056 }
9057 else if (!reg_overlap_mentioned_p (operands[2], destlo))
9058 {
9059 /* Try to avoid unnecessary moves if part of the result
9060 is in the right place already. */
9061 if (src1 != dest)
9062 emit_move_insn (destlo, operands[1]);
9063 if (src2 != dest + halfregs)
9064 emit_move_insn (desthi, operands[2]);
9065 }
9066 else
9067 {
9068 if (src2 != dest + halfregs)
9069 emit_move_insn (desthi, operands[2]);
9070 if (src1 != dest)
9071 emit_move_insn (destlo, operands[1]);
9072 }
9073}
9074
9075/* vec_perm support. */
9076
9077#define MAX_VECT_LEN 16
9078
9079struct expand_vec_perm_d
9080{
9081 rtx target, op0, op1;
9082 unsigned char perm[MAX_VECT_LEN];
ef4bddc2 9083 machine_mode vmode;
88b08073
JG
9084 unsigned char nelt;
9085 bool one_vector_p;
9086 bool testing_p;
9087};
9088
9089/* Generate a variable permutation. */
9090
9091static void
9092aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9093{
ef4bddc2 9094 machine_mode vmode = GET_MODE (target);
88b08073
JG
9095 bool one_vector_p = rtx_equal_p (op0, op1);
9096
9097 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9098 gcc_checking_assert (GET_MODE (op0) == vmode);
9099 gcc_checking_assert (GET_MODE (op1) == vmode);
9100 gcc_checking_assert (GET_MODE (sel) == vmode);
9101 gcc_checking_assert (TARGET_SIMD);
9102
9103 if (one_vector_p)
9104 {
9105 if (vmode == V8QImode)
9106 {
9107 /* Expand the argument to a V16QI mode by duplicating it. */
9108 rtx pair = gen_reg_rtx (V16QImode);
9109 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9110 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9111 }
9112 else
9113 {
9114 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9115 }
9116 }
9117 else
9118 {
9119 rtx pair;
9120
9121 if (vmode == V8QImode)
9122 {
9123 pair = gen_reg_rtx (V16QImode);
9124 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9125 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9126 }
9127 else
9128 {
9129 pair = gen_reg_rtx (OImode);
9130 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9131 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9132 }
9133 }
9134}
9135
9136void
9137aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9138{
ef4bddc2 9139 machine_mode vmode = GET_MODE (target);
c9d1a16a 9140 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 9141 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 9142 rtx mask;
88b08073
JG
9143
9144 /* The TBL instruction does not use a modulo index, so we must take care
9145 of that ourselves. */
f7c4e5b8
AL
9146 mask = aarch64_simd_gen_const_vector_dup (vmode,
9147 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
9148 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9149
f7c4e5b8
AL
9150 /* For big-endian, we also need to reverse the index within the vector
9151 (but not which vector). */
9152 if (BYTES_BIG_ENDIAN)
9153 {
9154 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9155 if (!one_vector_p)
9156 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9157 sel = expand_simple_binop (vmode, XOR, sel, mask,
9158 NULL, 0, OPTAB_LIB_WIDEN);
9159 }
88b08073
JG
9160 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9161}
9162
cc4d934f
JG
9163/* Recognize patterns suitable for the TRN instructions. */
9164static bool
9165aarch64_evpc_trn (struct expand_vec_perm_d *d)
9166{
9167 unsigned int i, odd, mask, nelt = d->nelt;
9168 rtx out, in0, in1, x;
9169 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9170 machine_mode vmode = d->vmode;
cc4d934f
JG
9171
9172 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9173 return false;
9174
9175 /* Note that these are little-endian tests.
9176 We correct for big-endian later. */
9177 if (d->perm[0] == 0)
9178 odd = 0;
9179 else if (d->perm[0] == 1)
9180 odd = 1;
9181 else
9182 return false;
9183 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9184
9185 for (i = 0; i < nelt; i += 2)
9186 {
9187 if (d->perm[i] != i + odd)
9188 return false;
9189 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9190 return false;
9191 }
9192
9193 /* Success! */
9194 if (d->testing_p)
9195 return true;
9196
9197 in0 = d->op0;
9198 in1 = d->op1;
9199 if (BYTES_BIG_ENDIAN)
9200 {
9201 x = in0, in0 = in1, in1 = x;
9202 odd = !odd;
9203 }
9204 out = d->target;
9205
9206 if (odd)
9207 {
9208 switch (vmode)
9209 {
9210 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9211 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9212 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9213 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9214 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9215 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9216 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9217 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9218 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9219 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9220 default:
9221 return false;
9222 }
9223 }
9224 else
9225 {
9226 switch (vmode)
9227 {
9228 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9229 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9230 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9231 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9232 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9233 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9234 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9235 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9236 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9237 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9238 default:
9239 return false;
9240 }
9241 }
9242
9243 emit_insn (gen (out, in0, in1));
9244 return true;
9245}
9246
9247/* Recognize patterns suitable for the UZP instructions. */
9248static bool
9249aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9250{
9251 unsigned int i, odd, mask, nelt = d->nelt;
9252 rtx out, in0, in1, x;
9253 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9254 machine_mode vmode = d->vmode;
cc4d934f
JG
9255
9256 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9257 return false;
9258
9259 /* Note that these are little-endian tests.
9260 We correct for big-endian later. */
9261 if (d->perm[0] == 0)
9262 odd = 0;
9263 else if (d->perm[0] == 1)
9264 odd = 1;
9265 else
9266 return false;
9267 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9268
9269 for (i = 0; i < nelt; i++)
9270 {
9271 unsigned elt = (i * 2 + odd) & mask;
9272 if (d->perm[i] != elt)
9273 return false;
9274 }
9275
9276 /* Success! */
9277 if (d->testing_p)
9278 return true;
9279
9280 in0 = d->op0;
9281 in1 = d->op1;
9282 if (BYTES_BIG_ENDIAN)
9283 {
9284 x = in0, in0 = in1, in1 = x;
9285 odd = !odd;
9286 }
9287 out = d->target;
9288
9289 if (odd)
9290 {
9291 switch (vmode)
9292 {
9293 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9294 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9295 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9296 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9297 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9298 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9299 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9300 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9301 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9302 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9303 default:
9304 return false;
9305 }
9306 }
9307 else
9308 {
9309 switch (vmode)
9310 {
9311 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9312 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9313 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9314 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9315 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9316 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9317 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9318 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9319 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9320 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9321 default:
9322 return false;
9323 }
9324 }
9325
9326 emit_insn (gen (out, in0, in1));
9327 return true;
9328}
9329
9330/* Recognize patterns suitable for the ZIP instructions. */
9331static bool
9332aarch64_evpc_zip (struct expand_vec_perm_d *d)
9333{
9334 unsigned int i, high, mask, nelt = d->nelt;
9335 rtx out, in0, in1, x;
9336 rtx (*gen) (rtx, rtx, rtx);
ef4bddc2 9337 machine_mode vmode = d->vmode;
cc4d934f
JG
9338
9339 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9340 return false;
9341
9342 /* Note that these are little-endian tests.
9343 We correct for big-endian later. */
9344 high = nelt / 2;
9345 if (d->perm[0] == high)
9346 /* Do Nothing. */
9347 ;
9348 else if (d->perm[0] == 0)
9349 high = 0;
9350 else
9351 return false;
9352 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9353
9354 for (i = 0; i < nelt / 2; i++)
9355 {
9356 unsigned elt = (i + high) & mask;
9357 if (d->perm[i * 2] != elt)
9358 return false;
9359 elt = (elt + nelt) & mask;
9360 if (d->perm[i * 2 + 1] != elt)
9361 return false;
9362 }
9363
9364 /* Success! */
9365 if (d->testing_p)
9366 return true;
9367
9368 in0 = d->op0;
9369 in1 = d->op1;
9370 if (BYTES_BIG_ENDIAN)
9371 {
9372 x = in0, in0 = in1, in1 = x;
9373 high = !high;
9374 }
9375 out = d->target;
9376
9377 if (high)
9378 {
9379 switch (vmode)
9380 {
9381 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9382 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9383 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9384 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9385 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9386 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9387 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9388 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9389 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9390 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9391 default:
9392 return false;
9393 }
9394 }
9395 else
9396 {
9397 switch (vmode)
9398 {
9399 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9400 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9401 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9402 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9403 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9404 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9405 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9406 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9407 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9408 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9409 default:
9410 return false;
9411 }
9412 }
9413
9414 emit_insn (gen (out, in0, in1));
9415 return true;
9416}
9417
ae0533da
AL
9418/* Recognize patterns for the EXT insn. */
9419
9420static bool
9421aarch64_evpc_ext (struct expand_vec_perm_d *d)
9422{
9423 unsigned int i, nelt = d->nelt;
9424 rtx (*gen) (rtx, rtx, rtx, rtx);
9425 rtx offset;
9426
9427 unsigned int location = d->perm[0]; /* Always < nelt. */
9428
9429 /* Check if the extracted indices are increasing by one. */
9430 for (i = 1; i < nelt; i++)
9431 {
9432 unsigned int required = location + i;
9433 if (d->one_vector_p)
9434 {
9435 /* We'll pass the same vector in twice, so allow indices to wrap. */
9436 required &= (nelt - 1);
9437 }
9438 if (d->perm[i] != required)
9439 return false;
9440 }
9441
ae0533da
AL
9442 switch (d->vmode)
9443 {
9444 case V16QImode: gen = gen_aarch64_extv16qi; break;
9445 case V8QImode: gen = gen_aarch64_extv8qi; break;
9446 case V4HImode: gen = gen_aarch64_extv4hi; break;
9447 case V8HImode: gen = gen_aarch64_extv8hi; break;
9448 case V2SImode: gen = gen_aarch64_extv2si; break;
9449 case V4SImode: gen = gen_aarch64_extv4si; break;
9450 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9451 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9452 case V2DImode: gen = gen_aarch64_extv2di; break;
9453 case V2DFmode: gen = gen_aarch64_extv2df; break;
9454 default:
9455 return false;
9456 }
9457
9458 /* Success! */
9459 if (d->testing_p)
9460 return true;
9461
b31e65bb
AL
9462 /* The case where (location == 0) is a no-op for both big- and little-endian,
9463 and is removed by the mid-end at optimization levels -O1 and higher. */
9464
9465 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
9466 {
9467 /* After setup, we want the high elements of the first vector (stored
9468 at the LSB end of the register), and the low elements of the second
9469 vector (stored at the MSB end of the register). So swap. */
9470 rtx temp = d->op0;
9471 d->op0 = d->op1;
9472 d->op1 = temp;
9473 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9474 location = nelt - location;
9475 }
9476
9477 offset = GEN_INT (location);
9478 emit_insn (gen (d->target, d->op0, d->op1, offset));
9479 return true;
9480}
9481
923fcec3
AL
9482/* Recognize patterns for the REV insns. */
9483
9484static bool
9485aarch64_evpc_rev (struct expand_vec_perm_d *d)
9486{
9487 unsigned int i, j, diff, nelt = d->nelt;
9488 rtx (*gen) (rtx, rtx);
9489
9490 if (!d->one_vector_p)
9491 return false;
9492
9493 diff = d->perm[0];
9494 switch (diff)
9495 {
9496 case 7:
9497 switch (d->vmode)
9498 {
9499 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9500 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9501 default:
9502 return false;
9503 }
9504 break;
9505 case 3:
9506 switch (d->vmode)
9507 {
9508 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9509 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9510 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9511 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9512 default:
9513 return false;
9514 }
9515 break;
9516 case 1:
9517 switch (d->vmode)
9518 {
9519 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9520 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9521 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9522 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9523 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9524 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9525 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9526 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9527 default:
9528 return false;
9529 }
9530 break;
9531 default:
9532 return false;
9533 }
9534
9535 for (i = 0; i < nelt ; i += diff + 1)
9536 for (j = 0; j <= diff; j += 1)
9537 {
9538 /* This is guaranteed to be true as the value of diff
9539 is 7, 3, 1 and we should have enough elements in the
9540 queue to generate this. Getting a vector mask with a
9541 value of diff other than these values implies that
9542 something is wrong by the time we get here. */
9543 gcc_assert (i + j < nelt);
9544 if (d->perm[i + j] != i + diff - j)
9545 return false;
9546 }
9547
9548 /* Success! */
9549 if (d->testing_p)
9550 return true;
9551
9552 emit_insn (gen (d->target, d->op0));
9553 return true;
9554}
9555
91bd4114
JG
9556static bool
9557aarch64_evpc_dup (struct expand_vec_perm_d *d)
9558{
9559 rtx (*gen) (rtx, rtx, rtx);
9560 rtx out = d->target;
9561 rtx in0;
ef4bddc2 9562 machine_mode vmode = d->vmode;
91bd4114
JG
9563 unsigned int i, elt, nelt = d->nelt;
9564 rtx lane;
9565
91bd4114
JG
9566 elt = d->perm[0];
9567 for (i = 1; i < nelt; i++)
9568 {
9569 if (elt != d->perm[i])
9570 return false;
9571 }
9572
9573 /* The generic preparation in aarch64_expand_vec_perm_const_1
9574 swaps the operand order and the permute indices if it finds
9575 d->perm[0] to be in the second operand. Thus, we can always
9576 use d->op0 and need not do any extra arithmetic to get the
9577 correct lane number. */
9578 in0 = d->op0;
f901401e 9579 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
9580
9581 switch (vmode)
9582 {
9583 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9584 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9585 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9586 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9587 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9588 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9589 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9590 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9591 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9592 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9593 default:
9594 return false;
9595 }
9596
9597 emit_insn (gen (out, in0, lane));
9598 return true;
9599}
9600
88b08073
JG
9601static bool
9602aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9603{
9604 rtx rperm[MAX_VECT_LEN], sel;
ef4bddc2 9605 machine_mode vmode = d->vmode;
88b08073
JG
9606 unsigned int i, nelt = d->nelt;
9607
88b08073
JG
9608 if (d->testing_p)
9609 return true;
9610
9611 /* Generic code will try constant permutation twice. Once with the
9612 original mode and again with the elements lowered to QImode.
9613 So wait and don't do the selector expansion ourselves. */
9614 if (vmode != V8QImode && vmode != V16QImode)
9615 return false;
9616
9617 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
9618 {
9619 int nunits = GET_MODE_NUNITS (vmode);
9620
9621 /* If big-endian and two vectors we end up with a weird mixed-endian
9622 mode on NEON. Reverse the index within each word but not the word
9623 itself. */
9624 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9625 : d->perm[i]);
9626 }
88b08073
JG
9627 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9628 sel = force_reg (vmode, sel);
9629
9630 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9631 return true;
9632}
9633
9634static bool
9635aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9636{
9637 /* The pattern matching functions above are written to look for a small
9638 number to begin the sequence (0, 1, N/2). If we begin with an index
9639 from the second operand, we can swap the operands. */
9640 if (d->perm[0] >= d->nelt)
9641 {
9642 unsigned i, nelt = d->nelt;
9643 rtx x;
9644
0696116a 9645 gcc_assert (nelt == (nelt & -nelt));
88b08073 9646 for (i = 0; i < nelt; ++i)
0696116a 9647 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073
JG
9648
9649 x = d->op0;
9650 d->op0 = d->op1;
9651 d->op1 = x;
9652 }
9653
9654 if (TARGET_SIMD)
cc4d934f 9655 {
923fcec3
AL
9656 if (aarch64_evpc_rev (d))
9657 return true;
9658 else if (aarch64_evpc_ext (d))
ae0533da 9659 return true;
f901401e
AL
9660 else if (aarch64_evpc_dup (d))
9661 return true;
ae0533da 9662 else if (aarch64_evpc_zip (d))
cc4d934f
JG
9663 return true;
9664 else if (aarch64_evpc_uzp (d))
9665 return true;
9666 else if (aarch64_evpc_trn (d))
9667 return true;
9668 return aarch64_evpc_tbl (d);
9669 }
88b08073
JG
9670 return false;
9671}
9672
9673/* Expand a vec_perm_const pattern. */
9674
9675bool
9676aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9677{
9678 struct expand_vec_perm_d d;
9679 int i, nelt, which;
9680
9681 d.target = target;
9682 d.op0 = op0;
9683 d.op1 = op1;
9684
9685 d.vmode = GET_MODE (target);
9686 gcc_assert (VECTOR_MODE_P (d.vmode));
9687 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9688 d.testing_p = false;
9689
9690 for (i = which = 0; i < nelt; ++i)
9691 {
9692 rtx e = XVECEXP (sel, 0, i);
9693 int ei = INTVAL (e) & (2 * nelt - 1);
9694 which |= (ei < nelt ? 1 : 2);
9695 d.perm[i] = ei;
9696 }
9697
9698 switch (which)
9699 {
9700 default:
9701 gcc_unreachable ();
9702
9703 case 3:
9704 d.one_vector_p = false;
9705 if (!rtx_equal_p (op0, op1))
9706 break;
9707
9708 /* The elements of PERM do not suggest that only the first operand
9709 is used, but both operands are identical. Allow easier matching
9710 of the permutation by folding the permutation into the single
9711 input vector. */
9712 /* Fall Through. */
9713 case 2:
9714 for (i = 0; i < nelt; ++i)
9715 d.perm[i] &= nelt - 1;
9716 d.op0 = op1;
9717 d.one_vector_p = true;
9718 break;
9719
9720 case 1:
9721 d.op1 = op0;
9722 d.one_vector_p = true;
9723 break;
9724 }
9725
9726 return aarch64_expand_vec_perm_const_1 (&d);
9727}
9728
9729static bool
ef4bddc2 9730aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
88b08073
JG
9731 const unsigned char *sel)
9732{
9733 struct expand_vec_perm_d d;
9734 unsigned int i, nelt, which;
9735 bool ret;
9736
9737 d.vmode = vmode;
9738 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9739 d.testing_p = true;
9740 memcpy (d.perm, sel, nelt);
9741
9742 /* Calculate whether all elements are in one vector. */
9743 for (i = which = 0; i < nelt; ++i)
9744 {
9745 unsigned char e = d.perm[i];
9746 gcc_assert (e < 2 * nelt);
9747 which |= (e < nelt ? 1 : 2);
9748 }
9749
9750 /* If all elements are from the second vector, reindex as if from the
9751 first vector. */
9752 if (which == 2)
9753 for (i = 0; i < nelt; ++i)
9754 d.perm[i] -= nelt;
9755
9756 /* Check whether the mask can be applied to a single vector. */
9757 d.one_vector_p = (which != 3);
9758
9759 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9760 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9761 if (!d.one_vector_p)
9762 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9763
9764 start_sequence ();
9765 ret = aarch64_expand_vec_perm_const_1 (&d);
9766 end_sequence ();
9767
9768 return ret;
9769}
9770
69675d50
TB
9771/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9772bool
ef4bddc2
RS
9773aarch64_cannot_change_mode_class (machine_mode from,
9774 machine_mode to,
69675d50
TB
9775 enum reg_class rclass)
9776{
9777 /* Full-reg subregs are allowed on general regs or any class if they are
9778 the same size. */
9779 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9780 || !reg_classes_intersect_p (FP_REGS, rclass))
9781 return false;
9782
9783 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9784 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9785 2. Scalar to Scalar for integer modes or same size float modes.
97e1ad78
JG
9786 3. Vector to Vector modes.
9787 4. On little-endian only, Vector-Structure to Vector modes. */
69675d50
TB
9788 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9789 {
9790 if (aarch64_vector_mode_supported_p (from)
9791 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9792 return false;
9793
9794 if (GET_MODE_NUNITS (from) == 1
9795 && GET_MODE_NUNITS (to) == 1
9796 && (GET_MODE_CLASS (from) == MODE_INT
9797 || from == to))
9798 return false;
9799
9800 if (aarch64_vector_mode_supported_p (from)
9801 && aarch64_vector_mode_supported_p (to))
9802 return false;
97e1ad78
JG
9803
9804 /* Within an vector structure straddling multiple vector registers
9805 we are in a mixed-endian representation. As such, we can't
9806 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9807 switch between vectors and vector structures cheaply. */
9808 if (!BYTES_BIG_ENDIAN)
9809 if ((aarch64_vector_mode_supported_p (from)
9810 && aarch64_vect_struct_mode_p (to))
9811 || (aarch64_vector_mode_supported_p (to)
9812 && aarch64_vect_struct_mode_p (from)))
9813 return false;
69675d50
TB
9814 }
9815
9816 return true;
9817}
9818
97e1ad78
JG
9819/* Implement MODES_TIEABLE_P. */
9820
9821bool
ef4bddc2 9822aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
9823{
9824 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9825 return true;
9826
9827 /* We specifically want to allow elements of "structure" modes to
9828 be tieable to the structure. This more general condition allows
9829 other rarer situations too. */
9830 if (TARGET_SIMD
9831 && aarch64_vector_mode_p (mode1)
9832 && aarch64_vector_mode_p (mode2))
9833 return true;
9834
9835 return false;
9836}
9837
e2c75eea
JG
9838/* Return a new RTX holding the result of moving POINTER forward by
9839 AMOUNT bytes. */
9840
9841static rtx
9842aarch64_move_pointer (rtx pointer, int amount)
9843{
9844 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9845
9846 return adjust_automodify_address (pointer, GET_MODE (pointer),
9847 next, amount);
9848}
9849
9850/* Return a new RTX holding the result of moving POINTER forward by the
9851 size of the mode it points to. */
9852
9853static rtx
9854aarch64_progress_pointer (rtx pointer)
9855{
9856 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9857
9858 return aarch64_move_pointer (pointer, amount);
9859}
9860
9861/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9862 MODE bytes. */
9863
9864static void
9865aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 9866 machine_mode mode)
e2c75eea
JG
9867{
9868 rtx reg = gen_reg_rtx (mode);
9869
9870 /* "Cast" the pointers to the correct mode. */
9871 *src = adjust_address (*src, mode, 0);
9872 *dst = adjust_address (*dst, mode, 0);
9873 /* Emit the memcpy. */
9874 emit_move_insn (reg, *src);
9875 emit_move_insn (*dst, reg);
9876 /* Move the pointers forward. */
9877 *src = aarch64_progress_pointer (*src);
9878 *dst = aarch64_progress_pointer (*dst);
9879}
9880
9881/* Expand movmem, as if from a __builtin_memcpy. Return true if
9882 we succeed, otherwise return false. */
9883
9884bool
9885aarch64_expand_movmem (rtx *operands)
9886{
9887 unsigned int n;
9888 rtx dst = operands[0];
9889 rtx src = operands[1];
9890 rtx base;
9891 bool speed_p = !optimize_function_for_size_p (cfun);
9892
9893 /* When optimizing for size, give a better estimate of the length of a
9894 memcpy call, but use the default otherwise. */
9895 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9896
9897 /* We can't do anything smart if the amount to copy is not constant. */
9898 if (!CONST_INT_P (operands[2]))
9899 return false;
9900
9901 n = UINTVAL (operands[2]);
9902
9903 /* Try to keep the number of instructions low. For cases below 16 bytes we
9904 need to make at most two moves. For cases above 16 bytes it will be one
9905 move for each 16 byte chunk, then at most two additional moves. */
9906 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9907 return false;
9908
9909 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9910 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9911
9912 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9913 src = adjust_automodify_address (src, VOIDmode, base, 0);
9914
9915 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9916 1-byte chunk. */
9917 if (n < 4)
9918 {
9919 if (n >= 2)
9920 {
9921 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9922 n -= 2;
9923 }
9924
9925 if (n == 1)
9926 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9927
9928 return true;
9929 }
9930
9931 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9932 4-byte chunk, partially overlapping with the previously copied chunk. */
9933 if (n < 8)
9934 {
9935 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9936 n -= 4;
9937 if (n > 0)
9938 {
9939 int move = n - 4;
9940
9941 src = aarch64_move_pointer (src, move);
9942 dst = aarch64_move_pointer (dst, move);
9943 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9944 }
9945 return true;
9946 }
9947
9948 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9949 them, then (if applicable) an 8-byte chunk. */
9950 while (n >= 8)
9951 {
9952 if (n / 16)
9953 {
9954 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9955 n -= 16;
9956 }
9957 else
9958 {
9959 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9960 n -= 8;
9961 }
9962 }
9963
9964 /* Finish the final bytes of the copy. We can always do this in one
9965 instruction. We either copy the exact amount we need, or partially
9966 overlap with the previous chunk we copied and copy 8-bytes. */
9967 if (n == 0)
9968 return true;
9969 else if (n == 1)
9970 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9971 else if (n == 2)
9972 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9973 else if (n == 4)
9974 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9975 else
9976 {
9977 if (n == 3)
9978 {
9979 src = aarch64_move_pointer (src, -1);
9980 dst = aarch64_move_pointer (dst, -1);
9981 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9982 }
9983 else
9984 {
9985 int move = n - 8;
9986
9987 src = aarch64_move_pointer (src, move);
9988 dst = aarch64_move_pointer (dst, move);
9989 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9990 }
9991 }
9992
9993 return true;
9994}
9995
a3125fc2
CL
9996/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
9997
9998static unsigned HOST_WIDE_INT
9999aarch64_asan_shadow_offset (void)
10000{
10001 return (HOST_WIDE_INT_1 << 36);
10002}
10003
43e9d192
IB
10004#undef TARGET_ADDRESS_COST
10005#define TARGET_ADDRESS_COST aarch64_address_cost
10006
10007/* This hook will determines whether unnamed bitfields affect the alignment
10008 of the containing structure. The hook returns true if the structure
10009 should inherit the alignment requirements of an unnamed bitfield's
10010 type. */
10011#undef TARGET_ALIGN_ANON_BITFIELD
10012#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
10013
10014#undef TARGET_ASM_ALIGNED_DI_OP
10015#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
10016
10017#undef TARGET_ASM_ALIGNED_HI_OP
10018#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
10019
10020#undef TARGET_ASM_ALIGNED_SI_OP
10021#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10022
10023#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10024#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
10025 hook_bool_const_tree_hwi_hwi_const_tree_true
10026
10027#undef TARGET_ASM_FILE_START
10028#define TARGET_ASM_FILE_START aarch64_start_file
10029
10030#undef TARGET_ASM_OUTPUT_MI_THUNK
10031#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
10032
10033#undef TARGET_ASM_SELECT_RTX_SECTION
10034#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
10035
10036#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
10037#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
10038
10039#undef TARGET_BUILD_BUILTIN_VA_LIST
10040#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
10041
10042#undef TARGET_CALLEE_COPIES
10043#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
10044
10045#undef TARGET_CAN_ELIMINATE
10046#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
10047
10048#undef TARGET_CANNOT_FORCE_CONST_MEM
10049#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
10050
10051#undef TARGET_CONDITIONAL_REGISTER_USAGE
10052#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
10053
10054/* Only the least significant bit is used for initialization guard
10055 variables. */
10056#undef TARGET_CXX_GUARD_MASK_BIT
10057#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
10058
10059#undef TARGET_C_MODE_FOR_SUFFIX
10060#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
10061
10062#ifdef TARGET_BIG_ENDIAN_DEFAULT
10063#undef TARGET_DEFAULT_TARGET_FLAGS
10064#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
10065#endif
10066
10067#undef TARGET_CLASS_MAX_NREGS
10068#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
10069
119103ca
JG
10070#undef TARGET_BUILTIN_DECL
10071#define TARGET_BUILTIN_DECL aarch64_builtin_decl
10072
43e9d192
IB
10073#undef TARGET_EXPAND_BUILTIN
10074#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
10075
10076#undef TARGET_EXPAND_BUILTIN_VA_START
10077#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
10078
9697e620
JG
10079#undef TARGET_FOLD_BUILTIN
10080#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
10081
43e9d192
IB
10082#undef TARGET_FUNCTION_ARG
10083#define TARGET_FUNCTION_ARG aarch64_function_arg
10084
10085#undef TARGET_FUNCTION_ARG_ADVANCE
10086#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
10087
10088#undef TARGET_FUNCTION_ARG_BOUNDARY
10089#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
10090
10091#undef TARGET_FUNCTION_OK_FOR_SIBCALL
10092#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
10093
10094#undef TARGET_FUNCTION_VALUE
10095#define TARGET_FUNCTION_VALUE aarch64_function_value
10096
10097#undef TARGET_FUNCTION_VALUE_REGNO_P
10098#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
10099
10100#undef TARGET_FRAME_POINTER_REQUIRED
10101#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
10102
fc72cba7
AL
10103#undef TARGET_GIMPLE_FOLD_BUILTIN
10104#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 10105
43e9d192
IB
10106#undef TARGET_GIMPLIFY_VA_ARG_EXPR
10107#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
10108
10109#undef TARGET_INIT_BUILTINS
10110#define TARGET_INIT_BUILTINS aarch64_init_builtins
10111
10112#undef TARGET_LEGITIMATE_ADDRESS_P
10113#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
10114
10115#undef TARGET_LEGITIMATE_CONSTANT_P
10116#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
10117
10118#undef TARGET_LIBGCC_CMP_RETURN_MODE
10119#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
10120
38e8f663
YR
10121#undef TARGET_LRA_P
10122#define TARGET_LRA_P aarch64_lra_p
10123
ac2b960f
YZ
10124#undef TARGET_MANGLE_TYPE
10125#define TARGET_MANGLE_TYPE aarch64_mangle_type
10126
43e9d192
IB
10127#undef TARGET_MEMORY_MOVE_COST
10128#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
10129
10130#undef TARGET_MUST_PASS_IN_STACK
10131#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
10132
10133/* This target hook should return true if accesses to volatile bitfields
10134 should use the narrowest mode possible. It should return false if these
10135 accesses should use the bitfield container type. */
10136#undef TARGET_NARROW_VOLATILE_BITFIELD
10137#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
10138
10139#undef TARGET_OPTION_OVERRIDE
10140#define TARGET_OPTION_OVERRIDE aarch64_override_options
10141
10142#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10143#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10144 aarch64_override_options_after_change
10145
10146#undef TARGET_PASS_BY_REFERENCE
10147#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
10148
10149#undef TARGET_PREFERRED_RELOAD_CLASS
10150#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
10151
10152#undef TARGET_SECONDARY_RELOAD
10153#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
10154
10155#undef TARGET_SHIFT_TRUNCATION_MASK
10156#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
10157
10158#undef TARGET_SETUP_INCOMING_VARARGS
10159#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
10160
10161#undef TARGET_STRUCT_VALUE_RTX
10162#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
10163
10164#undef TARGET_REGISTER_MOVE_COST
10165#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
10166
10167#undef TARGET_RETURN_IN_MEMORY
10168#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
10169
10170#undef TARGET_RETURN_IN_MSB
10171#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
10172
10173#undef TARGET_RTX_COSTS
7cc2145f 10174#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 10175
d126a4ae
AP
10176#undef TARGET_SCHED_ISSUE_RATE
10177#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
10178
43e9d192
IB
10179#undef TARGET_TRAMPOLINE_INIT
10180#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
10181
10182#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10183#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
10184
10185#undef TARGET_VECTOR_MODE_SUPPORTED_P
10186#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
10187
10188#undef TARGET_ARRAY_MODE_SUPPORTED_P
10189#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
10190
8990e73a
TB
10191#undef TARGET_VECTORIZE_ADD_STMT_COST
10192#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
10193
10194#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
10195#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
10196 aarch64_builtin_vectorization_cost
10197
43e9d192
IB
10198#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
10199#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
10200
42fc9a7f
JG
10201#undef TARGET_VECTORIZE_BUILTINS
10202#define TARGET_VECTORIZE_BUILTINS
10203
10204#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
10205#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
10206 aarch64_builtin_vectorized_function
10207
3b357264
JG
10208#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
10209#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
10210 aarch64_autovectorize_vector_sizes
10211
aa87aced
KV
10212#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10213#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
10214 aarch64_atomic_assign_expand_fenv
10215
43e9d192
IB
10216/* Section anchor support. */
10217
10218#undef TARGET_MIN_ANCHOR_OFFSET
10219#define TARGET_MIN_ANCHOR_OFFSET -256
10220
10221/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
10222 byte offset; we can do much more for larger data types, but have no way
10223 to determine the size of the access. We assume accesses are aligned. */
10224#undef TARGET_MAX_ANCHOR_OFFSET
10225#define TARGET_MAX_ANCHOR_OFFSET 4095
10226
db0253a4
TB
10227#undef TARGET_VECTOR_ALIGNMENT
10228#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
10229
10230#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
10231#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
10232 aarch64_simd_vector_alignment_reachable
10233
88b08073
JG
10234/* vec_perm support. */
10235
10236#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
10237#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
10238 aarch64_vectorize_vec_perm_const_ok
10239
70f09188 10240
706b2314 10241#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
10242#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
10243
5cb74e90
RR
10244#undef TARGET_FLAGS_REGNUM
10245#define TARGET_FLAGS_REGNUM CC_REGNUM
10246
78607708
TV
10247#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
10248#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
10249
a3125fc2
CL
10250#undef TARGET_ASAN_SHADOW_OFFSET
10251#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
10252
0c4ec427
RE
10253#undef TARGET_LEGITIMIZE_ADDRESS
10254#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
10255
43e9d192
IB
10256struct gcc_target targetm = TARGET_INITIALIZER;
10257
10258#include "gt-aarch64.h"