1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
57 #include "gimple-expr.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
67 /* Defined for convenience. */
68 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
70 /* Classifies an address.
73 A simple base register plus immediate offset.
76 A base register indexed by immediate offset with writeback.
79 A base register indexed by (optionally scaled) register.
82 A base register indexed by (optionally scaled) zero-extended register.
85 A base register indexed by (optionally scaled) sign-extended register.
88 A LO_SUM rtx with a base register and "LO12" symbol relocation.
91 A constant symbolic address, in pc-relative literal pool. */
93 enum aarch64_address_type
{
103 struct aarch64_address_info
{
104 enum aarch64_address_type type
;
108 enum aarch64_symbol_type symbol_type
;
111 struct simd_immediate_info
120 /* The current code model. */
121 enum aarch64_code_model aarch64_cmodel
;
124 #undef TARGET_HAVE_TLS
125 #define TARGET_HAVE_TLS 1
128 static bool aarch64_lra_p (void);
129 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
130 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
132 enum machine_mode
*, int *,
134 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
135 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
136 static void aarch64_override_options_after_change (void);
137 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
138 static unsigned bit_count (unsigned HOST_WIDE_INT
);
139 static bool aarch64_const_vec_all_same_int_p (rtx
,
140 HOST_WIDE_INT
, HOST_WIDE_INT
);
142 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
143 const unsigned char *sel
);
145 /* The processor for which instructions should be scheduled. */
146 enum aarch64_processor aarch64_tune
= cortexa53
;
148 /* The current tuning set. */
149 const struct tune_params
*aarch64_tune_params
;
151 /* Mask to specify which instructions we are allowed to generate. */
152 unsigned long aarch64_isa_flags
= 0;
154 /* Mask to specify which instruction scheduling options should be used. */
155 unsigned long aarch64_tune_flags
= 0;
157 /* Tuning parameters. */
159 #if HAVE_DESIGNATED_INITIALIZERS
160 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
162 #define NAMED_PARAM(NAME, VAL) (VAL)
165 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
169 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
172 static const struct cpu_addrcost_table generic_addrcost_table
=
174 NAMED_PARAM (pre_modify
, 0),
175 NAMED_PARAM (post_modify
, 0),
176 NAMED_PARAM (register_offset
, 0),
177 NAMED_PARAM (register_extend
, 0),
178 NAMED_PARAM (imm_offset
, 0)
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
184 static const struct cpu_regmove_cost generic_regmove_cost
=
186 NAMED_PARAM (GP2GP
, 1),
187 NAMED_PARAM (GP2FP
, 2),
188 NAMED_PARAM (FP2GP
, 2),
189 /* We currently do not provide direct support for TFmode Q->Q move.
190 Therefore we need to raise the cost above 2 in order to have
191 reload handle the situation. */
192 NAMED_PARAM (FP2FP
, 4)
195 /* Generic costs for vector insn classes. */
196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
199 static const struct cpu_vector_cost generic_vector_cost
=
201 NAMED_PARAM (scalar_stmt_cost
, 1),
202 NAMED_PARAM (scalar_load_cost
, 1),
203 NAMED_PARAM (scalar_store_cost
, 1),
204 NAMED_PARAM (vec_stmt_cost
, 1),
205 NAMED_PARAM (vec_to_scalar_cost
, 1),
206 NAMED_PARAM (scalar_to_vec_cost
, 1),
207 NAMED_PARAM (vec_align_load_cost
, 1),
208 NAMED_PARAM (vec_unalign_load_cost
, 1),
209 NAMED_PARAM (vec_unalign_store_cost
, 1),
210 NAMED_PARAM (vec_store_cost
, 1),
211 NAMED_PARAM (cond_taken_branch_cost
, 3),
212 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
215 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
218 static const struct tune_params generic_tunings
=
220 &generic_extra_costs
,
221 &generic_addrcost_table
,
222 &generic_regmove_cost
,
223 &generic_vector_cost
,
224 NAMED_PARAM (memmov_cost
, 4)
227 /* A processor implementing AArch64. */
230 const char *const name
;
231 enum aarch64_processor core
;
233 const unsigned long flags
;
234 const struct tune_params
*const tune
;
237 /* Processor cores implementing AArch64. */
238 static const struct processor all_cores
[] =
240 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
241 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
242 #include "aarch64-cores.def"
244 {"generic", cortexa53
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
245 {NULL
, aarch64_none
, NULL
, 0, NULL
}
248 /* Architectures implementing AArch64. */
249 static const struct processor all_architectures
[] =
251 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
252 {NAME, CORE, #ARCH, FLAGS, NULL},
253 #include "aarch64-arches.def"
255 {NULL
, aarch64_none
, NULL
, 0, NULL
}
258 /* Target specification. These are populated as commandline arguments
259 are processed, or NULL if not specified. */
260 static const struct processor
*selected_arch
;
261 static const struct processor
*selected_cpu
;
262 static const struct processor
*selected_tune
;
264 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
266 /* An ISA extension in the co-processor and main instruction set space. */
267 struct aarch64_option_extension
269 const char *const name
;
270 const unsigned long flags_on
;
271 const unsigned long flags_off
;
274 /* ISA extensions in AArch64. */
275 static const struct aarch64_option_extension all_extensions
[] =
277 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
278 {NAME, FLAGS_ON, FLAGS_OFF},
279 #include "aarch64-option-extensions.def"
280 #undef AARCH64_OPT_EXTENSION
284 /* Used to track the size of an address when generating a pre/post
285 increment address. */
286 static enum machine_mode aarch64_memory_reference_mode
;
288 /* Used to force GTY into this file. */
289 static GTY(()) int gty_dummy
;
291 /* A table of valid AArch64 "bitmask immediate" values for
292 logical instructions. */
294 #define AARCH64_NUM_BITMASKS 5334
295 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
297 /* Did we set flag_omit_frame_pointer just so
298 aarch64_frame_pointer_required would be called? */
299 static bool faked_omit_frame_pointer
;
301 typedef enum aarch64_cond_code
303 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
304 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
305 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
309 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
311 /* The condition codes of the processor, and the inverse function. */
312 static const char * const aarch64_condition_codes
[] =
314 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
315 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
318 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
320 aarch64_dbx_register_number (unsigned regno
)
322 if (GP_REGNUM_P (regno
))
323 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
324 else if (regno
== SP_REGNUM
)
325 return AARCH64_DWARF_SP
;
326 else if (FP_REGNUM_P (regno
))
327 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
329 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
330 equivalent DWARF register. */
331 return DWARF_FRAME_REGISTERS
;
334 /* Return TRUE if MODE is any of the large INT modes. */
336 aarch64_vect_struct_mode_p (enum machine_mode mode
)
338 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
341 /* Return TRUE if MODE is any of the vector modes. */
343 aarch64_vector_mode_p (enum machine_mode mode
)
345 return aarch64_vector_mode_supported_p (mode
)
346 || aarch64_vect_struct_mode_p (mode
);
349 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
351 aarch64_array_mode_supported_p (enum machine_mode mode
,
352 unsigned HOST_WIDE_INT nelems
)
355 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
356 && (nelems
>= 2 && nelems
<= 4))
362 /* Implement HARD_REGNO_NREGS. */
365 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
367 switch (aarch64_regno_regclass (regno
))
371 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
373 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
378 /* Implement HARD_REGNO_MODE_OK. */
381 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
383 if (GET_MODE_CLASS (mode
) == MODE_CC
)
384 return regno
== CC_REGNUM
;
386 if (regno
== SP_REGNUM
)
387 /* The purpose of comparing with ptr_mode is to support the
388 global register variable associated with the stack pointer
389 register via the syntax of asm ("wsp") in ILP32. */
390 return mode
== Pmode
|| mode
== ptr_mode
;
392 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
393 return mode
== Pmode
;
395 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
398 if (FP_REGNUM_P (regno
))
400 if (aarch64_vect_struct_mode_p (mode
))
402 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
410 /* Return true if calls to DECL should be treated as
411 long-calls (ie called via a register). */
413 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
418 /* Return true if calls to symbol-ref SYM should be treated as
419 long-calls (ie called via a register). */
421 aarch64_is_long_call_p (rtx sym
)
423 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
426 /* Return true if the offsets to a zero/sign-extract operation
427 represent an expression that matches an extend operation. The
428 operands represent the paramters from
430 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
432 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
435 HOST_WIDE_INT mult_val
, extract_val
;
437 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
440 mult_val
= INTVAL (mult_imm
);
441 extract_val
= INTVAL (extract_imm
);
444 && extract_val
< GET_MODE_BITSIZE (mode
)
445 && exact_log2 (extract_val
& ~7) > 0
446 && (extract_val
& 7) <= 4
447 && mult_val
== (1 << (extract_val
& 7)))
453 /* Emit an insn that's a simple single-set. Both the operands must be
454 known to be valid. */
456 emit_set_insn (rtx x
, rtx y
)
458 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
461 /* X and Y are two things to compare using CODE. Emit the compare insn and
462 return the rtx for register 0 in the proper mode. */
464 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
466 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
467 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
469 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
473 /* Build the SYMBOL_REF for __tls_get_addr. */
475 static GTY(()) rtx tls_get_addr_libfunc
;
478 aarch64_tls_get_addr (void)
480 if (!tls_get_addr_libfunc
)
481 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
482 return tls_get_addr_libfunc
;
485 /* Return the TLS model to use for ADDR. */
487 static enum tls_model
488 tls_symbolic_operand_type (rtx addr
)
490 enum tls_model tls_kind
= TLS_MODEL_NONE
;
493 if (GET_CODE (addr
) == CONST
)
495 split_const (addr
, &sym
, &addend
);
496 if (GET_CODE (sym
) == SYMBOL_REF
)
497 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
499 else if (GET_CODE (addr
) == SYMBOL_REF
)
500 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
505 /* We'll allow lo_sum's in addresses in our legitimate addresses
506 so that combine would take care of combining addresses where
507 necessary, but for generation purposes, we'll generate the address
510 tmp = hi (symbol_ref); adrp x1, foo
511 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
515 adrp x1, :got:foo adrp tmp, :tlsgd:foo
516 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
520 Load TLS symbol, depending on TLS mechanism and TLS access model.
522 Global Dynamic - Traditional TLS:
524 add dest, tmp, #:tlsgd_lo12:imm
527 Global Dynamic - TLS Descriptors:
528 adrp dest, :tlsdesc:imm
529 ldr tmp, [dest, #:tlsdesc_lo12:imm]
530 add dest, dest, #:tlsdesc_lo12:imm
537 adrp tmp, :gottprel:imm
538 ldr dest, [tmp, #:gottprel_lo12:imm]
543 add t0, tp, #:tprel_hi12:imm
544 add t0, #:tprel_lo12_nc:imm
548 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
549 enum aarch64_symbol_type type
)
553 case SYMBOL_SMALL_ABSOLUTE
:
555 /* In ILP32, the mode of dest can be either SImode or DImode. */
557 enum machine_mode mode
= GET_MODE (dest
);
559 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
561 if (can_create_pseudo_p ())
562 tmp_reg
= gen_reg_rtx (mode
);
564 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
565 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
569 case SYMBOL_TINY_ABSOLUTE
:
570 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
573 case SYMBOL_SMALL_GOT
:
575 /* In ILP32, the mode of dest can be either SImode or DImode,
576 while the got entry is always of SImode size. The mode of
577 dest depends on how dest is used: if dest is assigned to a
578 pointer (e.g. in the memory), it has SImode; it may have
579 DImode if dest is dereferenced to access the memeory.
580 This is why we have to handle three different ldr_got_small
581 patterns here (two patterns for ILP32). */
583 enum machine_mode mode
= GET_MODE (dest
);
585 if (can_create_pseudo_p ())
586 tmp_reg
= gen_reg_rtx (mode
);
588 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
589 if (mode
== ptr_mode
)
592 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
594 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
598 gcc_assert (mode
== Pmode
);
599 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
605 case SYMBOL_SMALL_TLSGD
:
608 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
611 emit_call_insn (gen_tlsgd_small (result
, imm
));
612 insns
= get_insns ();
615 RTL_CONST_CALL_P (insns
) = 1;
616 emit_libcall_block (insns
, dest
, result
, imm
);
620 case SYMBOL_SMALL_TLSDESC
:
622 rtx x0
= gen_rtx_REG (Pmode
, R0_REGNUM
);
625 emit_insn (gen_tlsdesc_small (imm
));
626 tp
= aarch64_load_tp (NULL
);
627 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, x0
)));
628 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
632 case SYMBOL_SMALL_GOTTPREL
:
634 rtx tmp_reg
= gen_reg_rtx (Pmode
);
635 rtx tp
= aarch64_load_tp (NULL
);
636 emit_insn (gen_tlsie_small (tmp_reg
, imm
));
637 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, tmp_reg
)));
638 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
642 case SYMBOL_SMALL_TPREL
:
644 rtx tp
= aarch64_load_tp (NULL
);
645 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
646 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
650 case SYMBOL_TINY_GOT
:
651 emit_insn (gen_ldr_got_tiny (dest
, imm
));
659 /* Emit a move from SRC to DEST. Assume that the move expanders can
660 handle all moves if !can_create_pseudo_p (). The distinction is
661 important because, unlike emit_move_insn, the move expanders know
662 how to force Pmode objects into the constant pool even when the
663 constant pool address is not itself legitimate. */
665 aarch64_emit_move (rtx dest
, rtx src
)
667 return (can_create_pseudo_p ()
668 ? emit_move_insn (dest
, src
)
669 : emit_move_insn_1 (dest
, src
));
673 aarch64_split_128bit_move (rtx dst
, rtx src
)
677 enum machine_mode src_mode
= GET_MODE (src
);
678 enum machine_mode dst_mode
= GET_MODE (dst
);
679 int src_regno
= REGNO (src
);
680 int dst_regno
= REGNO (dst
);
682 gcc_assert (dst_mode
== TImode
|| dst_mode
== TFmode
);
684 if (REG_P (dst
) && REG_P (src
))
686 gcc_assert (src_mode
== TImode
|| src_mode
== TFmode
);
688 /* Handle r -> w, w -> r. */
689 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
694 (gen_aarch64_movtilow_di (dst
, gen_lowpart (word_mode
, src
)));
696 (gen_aarch64_movtihigh_di (dst
, gen_highpart (word_mode
, src
)));
700 (gen_aarch64_movtflow_di (dst
, gen_lowpart (word_mode
, src
)));
702 (gen_aarch64_movtfhigh_di (dst
, gen_highpart (word_mode
, src
)));
708 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
713 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode
, dst
), src
));
715 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode
, dst
), src
));
719 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode
, dst
), src
));
721 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode
, dst
), src
));
727 /* Fall through to r -> r cases. */
732 low_dst
= gen_lowpart (word_mode
, dst
);
734 && reg_overlap_mentioned_p (low_dst
, src
))
736 aarch64_emit_move (gen_highpart (word_mode
, dst
),
737 gen_highpart_mode (word_mode
, TImode
, src
));
738 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
742 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
743 aarch64_emit_move (gen_highpart (word_mode
, dst
),
744 gen_highpart_mode (word_mode
, TImode
, src
));
748 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
),
749 gen_rtx_REG (DFmode
, src_regno
));
750 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
+ 1),
751 gen_rtx_REG (DFmode
, src_regno
+ 1));
759 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
761 return (! REG_P (src
)
762 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
765 /* Split a complex SIMD combine. */
768 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
770 enum machine_mode src_mode
= GET_MODE (src1
);
771 enum machine_mode dst_mode
= GET_MODE (dst
);
773 gcc_assert (VECTOR_MODE_P (dst_mode
));
775 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
777 rtx (*gen
) (rtx
, rtx
, rtx
);
782 gen
= gen_aarch64_simd_combinev8qi
;
785 gen
= gen_aarch64_simd_combinev4hi
;
788 gen
= gen_aarch64_simd_combinev2si
;
791 gen
= gen_aarch64_simd_combinev2sf
;
794 gen
= gen_aarch64_simd_combinedi
;
797 gen
= gen_aarch64_simd_combinedf
;
803 emit_insn (gen (dst
, src1
, src2
));
808 /* Split a complex SIMD move. */
811 aarch64_split_simd_move (rtx dst
, rtx src
)
813 enum machine_mode src_mode
= GET_MODE (src
);
814 enum machine_mode dst_mode
= GET_MODE (dst
);
816 gcc_assert (VECTOR_MODE_P (dst_mode
));
818 if (REG_P (dst
) && REG_P (src
))
820 rtx (*gen
) (rtx
, rtx
);
822 gcc_assert (VECTOR_MODE_P (src_mode
));
827 gen
= gen_aarch64_split_simd_movv16qi
;
830 gen
= gen_aarch64_split_simd_movv8hi
;
833 gen
= gen_aarch64_split_simd_movv4si
;
836 gen
= gen_aarch64_split_simd_movv2di
;
839 gen
= gen_aarch64_split_simd_movv4sf
;
842 gen
= gen_aarch64_split_simd_movv2df
;
848 emit_insn (gen (dst
, src
));
854 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
856 if (can_create_pseudo_p ())
857 return force_reg (mode
, value
);
860 x
= aarch64_emit_move (x
, value
);
867 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
869 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
872 /* Load the full offset into a register. This
873 might be improvable in the future. */
874 high
= GEN_INT (offset
);
876 high
= aarch64_force_temporary (mode
, temp
, high
);
877 reg
= aarch64_force_temporary (mode
, temp
,
878 gen_rtx_PLUS (mode
, high
, reg
));
880 return plus_constant (mode
, reg
, offset
);
884 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
886 enum machine_mode mode
= GET_MODE (dest
);
887 unsigned HOST_WIDE_INT mask
;
890 unsigned HOST_WIDE_INT val
;
893 int one_match
, zero_match
;
895 gcc_assert (mode
== SImode
|| mode
== DImode
);
897 /* Check on what type of symbol it is. */
898 if (GET_CODE (imm
) == SYMBOL_REF
899 || GET_CODE (imm
) == LABEL_REF
900 || GET_CODE (imm
) == CONST
)
902 rtx mem
, base
, offset
;
903 enum aarch64_symbol_type sty
;
905 /* If we have (const (plus symbol offset)), separate out the offset
906 before we start classifying the symbol. */
907 split_const (imm
, &base
, &offset
);
909 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
912 case SYMBOL_FORCE_TO_MEM
:
913 if (offset
!= const0_rtx
914 && targetm
.cannot_force_const_mem (mode
, imm
))
916 gcc_assert(can_create_pseudo_p ());
917 base
= aarch64_force_temporary (mode
, dest
, base
);
918 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
919 aarch64_emit_move (dest
, base
);
922 mem
= force_const_mem (ptr_mode
, imm
);
924 if (mode
!= ptr_mode
)
925 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
926 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
929 case SYMBOL_SMALL_TLSGD
:
930 case SYMBOL_SMALL_TLSDESC
:
931 case SYMBOL_SMALL_GOTTPREL
:
932 case SYMBOL_SMALL_GOT
:
933 case SYMBOL_TINY_GOT
:
934 if (offset
!= const0_rtx
)
936 gcc_assert(can_create_pseudo_p ());
937 base
= aarch64_force_temporary (mode
, dest
, base
);
938 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
939 aarch64_emit_move (dest
, base
);
944 case SYMBOL_SMALL_TPREL
:
945 case SYMBOL_SMALL_ABSOLUTE
:
946 case SYMBOL_TINY_ABSOLUTE
:
947 aarch64_load_symref_appropriately (dest
, imm
, sty
);
955 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
957 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
961 if (!CONST_INT_P (imm
))
963 if (GET_CODE (imm
) == HIGH
)
964 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
967 rtx mem
= force_const_mem (mode
, imm
);
969 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
977 /* We know we can't do this in 1 insn, and we must be able to do it
978 in two; so don't mess around looking for sequences that don't buy
980 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
981 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
982 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
986 /* Remaining cases are all for DImode. */
989 subtargets
= optimize
&& can_create_pseudo_p ();
995 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
997 if ((val
& mask
) == 0)
999 else if ((val
& mask
) == mask
)
1006 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1008 if ((val
& mask
) != mask
)
1010 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1011 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1012 GEN_INT ((val
>> i
) & 0xffff)));
1019 if (zero_match
== 2)
1020 goto simple_sequence
;
1022 mask
= 0x0ffff0000UL
;
1023 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1025 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1027 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1029 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1031 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
1032 emit_insn (gen_adddi3 (dest
, subtarget
,
1033 GEN_INT (val
- (val
& mask
))));
1036 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1038 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1040 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1041 GEN_INT ((val
+ comp
) & mask
)));
1042 emit_insn (gen_adddi3 (dest
, subtarget
,
1043 GEN_INT (val
- ((val
+ comp
) & mask
))));
1046 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1048 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1050 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1051 GEN_INT ((val
- comp
) | ~mask
)));
1052 emit_insn (gen_adddi3 (dest
, subtarget
,
1053 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1056 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1058 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1060 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1061 GEN_INT (val
| ~mask
)));
1062 emit_insn (gen_adddi3 (dest
, subtarget
,
1063 GEN_INT (val
- (val
| ~mask
))));
1068 /* See if we can do it by arithmetically combining two
1070 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1075 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1076 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1078 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1079 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1080 GEN_INT (aarch64_bitmasks
[i
])));
1081 emit_insn (gen_adddi3 (dest
, subtarget
,
1082 GEN_INT (val
- aarch64_bitmasks
[i
])));
1086 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1088 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1090 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1091 GEN_INT (aarch64_bitmasks
[i
])));
1092 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1093 GEN_INT ((val
>> j
) & 0xffff)));
1099 /* See if we can do it by logically combining two immediates. */
1100 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1102 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1106 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1107 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1109 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1110 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1111 GEN_INT (aarch64_bitmasks
[i
])));
1112 emit_insn (gen_iordi3 (dest
, subtarget
,
1113 GEN_INT (aarch64_bitmasks
[j
])));
1117 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1121 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1122 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1125 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1126 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1127 GEN_INT (aarch64_bitmasks
[j
])));
1128 emit_insn (gen_anddi3 (dest
, subtarget
,
1129 GEN_INT (aarch64_bitmasks
[i
])));
1138 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1140 if ((val
& mask
) != 0)
1144 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1145 GEN_INT (val
& mask
)));
1149 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1150 GEN_INT ((val
>> i
) & 0xffff)));
1156 aarch64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
1158 /* Indirect calls are not currently supported. */
1162 /* Cannot tail-call to long-calls, since these are outside of the
1163 range of a branch instruction (we could handle this if we added
1164 support for indirect tail-calls. */
1165 if (aarch64_decl_is_long_call_p (decl
))
1171 /* Implement TARGET_PASS_BY_REFERENCE. */
1174 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1175 enum machine_mode mode
,
1177 bool named ATTRIBUTE_UNUSED
)
1180 enum machine_mode dummymode
;
1183 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1184 size
= (mode
== BLKmode
&& type
)
1185 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1189 /* Arrays always passed by reference. */
1190 if (TREE_CODE (type
) == ARRAY_TYPE
)
1192 /* Other aggregates based on their size. */
1193 if (AGGREGATE_TYPE_P (type
))
1194 size
= int_size_in_bytes (type
);
1197 /* Variable sized arguments are always returned by reference. */
1201 /* Can this be a candidate to be passed in fp/simd register(s)? */
1202 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1207 /* Arguments which are variable sized or larger than 2 registers are
1208 passed by reference unless they are a homogenous floating point
1210 return size
> 2 * UNITS_PER_WORD
;
1213 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1215 aarch64_return_in_msb (const_tree valtype
)
1217 enum machine_mode dummy_mode
;
1220 /* Never happens in little-endian mode. */
1221 if (!BYTES_BIG_ENDIAN
)
1224 /* Only composite types smaller than or equal to 16 bytes can
1225 be potentially returned in registers. */
1226 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1227 || int_size_in_bytes (valtype
) <= 0
1228 || int_size_in_bytes (valtype
) > 16)
1231 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1232 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1233 is always passed/returned in the least significant bits of fp/simd
1235 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1236 &dummy_mode
, &dummy_int
, NULL
))
1242 /* Implement TARGET_FUNCTION_VALUE.
1243 Define how to find the value returned by a function. */
1246 aarch64_function_value (const_tree type
, const_tree func
,
1247 bool outgoing ATTRIBUTE_UNUSED
)
1249 enum machine_mode mode
;
1252 enum machine_mode ag_mode
;
1254 mode
= TYPE_MODE (type
);
1255 if (INTEGRAL_TYPE_P (type
))
1256 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1258 if (aarch64_return_in_msb (type
))
1260 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1262 if (size
% UNITS_PER_WORD
!= 0)
1264 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1265 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1269 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1270 &ag_mode
, &count
, NULL
))
1272 if (!aarch64_composite_type_p (type
, mode
))
1274 gcc_assert (count
== 1 && mode
== ag_mode
);
1275 return gen_rtx_REG (mode
, V0_REGNUM
);
1282 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1283 for (i
= 0; i
< count
; i
++)
1285 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1286 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1287 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1288 XVECEXP (par
, 0, i
) = tmp
;
1294 return gen_rtx_REG (mode
, R0_REGNUM
);
1297 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1298 Return true if REGNO is the number of a hard register in which the values
1299 of called function may come back. */
1302 aarch64_function_value_regno_p (const unsigned int regno
)
1304 /* Maximum of 16 bytes can be returned in the general registers. Examples
1305 of 16-byte return values are: 128-bit integers and 16-byte small
1306 structures (excluding homogeneous floating-point aggregates). */
1307 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1310 /* Up to four fp/simd registers can return a function value, e.g. a
1311 homogeneous floating-point aggregate having four members. */
1312 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1313 return !TARGET_GENERAL_REGS_ONLY
;
1318 /* Implement TARGET_RETURN_IN_MEMORY.
1320 If the type T of the result of a function is such that
1322 would require that arg be passed as a value in a register (or set of
1323 registers) according to the parameter passing rules, then the result
1324 is returned in the same registers as would be used for such an
1328 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1331 enum machine_mode ag_mode
;
1334 if (!AGGREGATE_TYPE_P (type
)
1335 && TREE_CODE (type
) != COMPLEX_TYPE
1336 && TREE_CODE (type
) != VECTOR_TYPE
)
1337 /* Simple scalar types always returned in registers. */
1340 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1347 /* Types larger than 2 registers returned in memory. */
1348 size
= int_size_in_bytes (type
);
1349 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1353 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1354 const_tree type
, int *nregs
)
1356 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1357 return aarch64_vfp_is_call_or_return_candidate (mode
,
1359 &pcum
->aapcs_vfp_rmode
,
1364 /* Given MODE and TYPE of a function argument, return the alignment in
1365 bits. The idea is to suppress any stronger alignment requested by
1366 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1367 This is a helper function for local use only. */
1370 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1372 unsigned int alignment
;
1376 if (!integer_zerop (TYPE_SIZE (type
)))
1378 if (TYPE_MODE (type
) == mode
)
1379 alignment
= TYPE_ALIGN (type
);
1381 alignment
= GET_MODE_ALIGNMENT (mode
);
1387 alignment
= GET_MODE_ALIGNMENT (mode
);
1392 /* Layout a function argument according to the AAPCS64 rules. The rule
1393 numbers refer to the rule numbers in the AAPCS64. */
1396 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1398 bool named ATTRIBUTE_UNUSED
)
1400 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1401 int ncrn
, nvrn
, nregs
;
1402 bool allocate_ncrn
, allocate_nvrn
;
1404 /* We need to do this once per argument. */
1405 if (pcum
->aapcs_arg_processed
)
1408 pcum
->aapcs_arg_processed
= true;
1410 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1411 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1416 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1417 The following code thus handles passing by SIMD/FP registers first. */
1419 nvrn
= pcum
->aapcs_nvrn
;
1421 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1422 and homogenous short-vector aggregates (HVA). */
1425 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1427 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1428 if (!aarch64_composite_type_p (type
, mode
))
1430 gcc_assert (nregs
== 1);
1431 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1437 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1438 for (i
= 0; i
< nregs
; i
++)
1440 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1441 V0_REGNUM
+ nvrn
+ i
);
1442 tmp
= gen_rtx_EXPR_LIST
1444 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1445 XVECEXP (par
, 0, i
) = tmp
;
1447 pcum
->aapcs_reg
= par
;
1453 /* C.3 NSRN is set to 8. */
1454 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1459 ncrn
= pcum
->aapcs_ncrn
;
1460 nregs
= ((type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
))
1461 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1464 /* C6 - C9. though the sign and zero extension semantics are
1465 handled elsewhere. This is the case where the argument fits
1466 entirely general registers. */
1467 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1469 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1471 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1473 /* C.8 if the argument has an alignment of 16 then the NGRN is
1474 rounded up to the next even number. */
1475 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1478 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1480 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1481 A reg is still generated for it, but the caller should be smart
1482 enough not to use it. */
1483 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1485 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1492 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1493 for (i
= 0; i
< nregs
; i
++)
1495 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1496 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1497 GEN_INT (i
* UNITS_PER_WORD
));
1498 XVECEXP (par
, 0, i
) = tmp
;
1500 pcum
->aapcs_reg
= par
;
1503 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1508 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1510 /* The argument is passed on stack; record the needed number of words for
1511 this argument (we can re-use NREGS) and align the total size if
1514 pcum
->aapcs_stack_words
= nregs
;
1515 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1516 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1517 16 / UNITS_PER_WORD
) + 1;
1521 /* Implement TARGET_FUNCTION_ARG. */
1524 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1525 const_tree type
, bool named
)
1527 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1528 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1530 if (mode
== VOIDmode
)
1533 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1534 return pcum
->aapcs_reg
;
1538 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1539 const_tree fntype ATTRIBUTE_UNUSED
,
1540 rtx libname ATTRIBUTE_UNUSED
,
1541 const_tree fndecl ATTRIBUTE_UNUSED
,
1542 unsigned n_named ATTRIBUTE_UNUSED
)
1544 pcum
->aapcs_ncrn
= 0;
1545 pcum
->aapcs_nvrn
= 0;
1546 pcum
->aapcs_nextncrn
= 0;
1547 pcum
->aapcs_nextnvrn
= 0;
1548 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1549 pcum
->aapcs_reg
= NULL_RTX
;
1550 pcum
->aapcs_arg_processed
= false;
1551 pcum
->aapcs_stack_words
= 0;
1552 pcum
->aapcs_stack_size
= 0;
1558 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1559 enum machine_mode mode
,
1563 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1564 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1566 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1567 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1568 != (pcum
->aapcs_stack_words
!= 0));
1569 pcum
->aapcs_arg_processed
= false;
1570 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1571 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1572 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1573 pcum
->aapcs_stack_words
= 0;
1574 pcum
->aapcs_reg
= NULL_RTX
;
1579 aarch64_function_arg_regno_p (unsigned regno
)
1581 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1582 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1585 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1586 PARM_BOUNDARY bits of alignment, but will be given anything up
1587 to STACK_BOUNDARY bits if the type requires it. This makes sure
1588 that both before and after the layout of each argument, the Next
1589 Stacked Argument Address (NSAA) will have a minimum alignment of
1593 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1595 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1597 if (alignment
< PARM_BOUNDARY
)
1598 alignment
= PARM_BOUNDARY
;
1599 if (alignment
> STACK_BOUNDARY
)
1600 alignment
= STACK_BOUNDARY
;
1604 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1606 Return true if an argument passed on the stack should be padded upwards,
1607 i.e. if the least-significant byte of the stack slot has useful data.
1609 Small aggregate types are placed in the lowest memory address.
1611 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1614 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1616 /* On little-endian targets, the least significant byte of every stack
1617 argument is passed at the lowest byte address of the stack slot. */
1618 if (!BYTES_BIG_ENDIAN
)
1621 /* Otherwise, integral, floating-point and pointer types are padded downward:
1622 the least significant byte of a stack argument is passed at the highest
1623 byte address of the stack slot. */
1625 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1626 || POINTER_TYPE_P (type
))
1627 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1630 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1634 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1636 It specifies padding for the last (may also be the only)
1637 element of a block move between registers and memory. If
1638 assuming the block is in the memory, padding upward means that
1639 the last element is padded after its highest significant byte,
1640 while in downward padding, the last element is padded at the
1641 its least significant byte side.
1643 Small aggregates and small complex types are always padded
1646 We don't need to worry about homogeneous floating-point or
1647 short-vector aggregates; their move is not affected by the
1648 padding direction determined here. Regardless of endianness,
1649 each element of such an aggregate is put in the least
1650 significant bits of a fp/simd register.
1652 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1653 register has useful data, and return the opposite if the most
1654 significant byte does. */
1657 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1658 bool first ATTRIBUTE_UNUSED
)
1661 /* Small composite types are always padded upward. */
1662 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1664 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1665 : GET_MODE_SIZE (mode
));
1666 if (size
< 2 * UNITS_PER_WORD
)
1670 /* Otherwise, use the default padding. */
1671 return !BYTES_BIG_ENDIAN
;
1674 static enum machine_mode
1675 aarch64_libgcc_cmp_return_mode (void)
1681 aarch64_frame_pointer_required (void)
1683 /* If the function contains dynamic stack allocations, we need to
1684 use the frame pointer to access the static parts of the frame. */
1685 if (cfun
->calls_alloca
)
1688 /* We may have turned flag_omit_frame_pointer on in order to have this
1689 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1690 and we'll check it here.
1691 If we really did set flag_omit_frame_pointer normally, then we return false
1692 (no frame pointer required) in all cases. */
1694 if (flag_omit_frame_pointer
&& !faked_omit_frame_pointer
)
1696 else if (flag_omit_leaf_frame_pointer
)
1697 return !crtl
->is_leaf
;
1701 /* Mark the registers that need to be saved by the callee and calculate
1702 the size of the callee-saved registers area and frame record (both FP
1703 and LR may be omitted). */
1705 aarch64_layout_frame (void)
1707 HOST_WIDE_INT offset
= 0;
1710 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1713 cfun
->machine
->frame
.fp_lr_offset
= 0;
1715 /* First mark all the registers that really need to be saved... */
1716 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1717 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1719 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1720 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1722 /* ... that includes the eh data registers (if needed)... */
1723 if (crtl
->calls_eh_return
)
1724 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1725 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)] = 0;
1727 /* ... and any callee saved register that dataflow says is live. */
1728 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1729 if (df_regs_ever_live_p (regno
)
1730 && !call_used_regs
[regno
])
1731 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1733 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1734 if (df_regs_ever_live_p (regno
)
1735 && !call_used_regs
[regno
])
1736 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1738 if (frame_pointer_needed
)
1740 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = 0;
1741 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1742 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1745 /* Now assign stack slots for them. */
1746 for (regno
= R0_REGNUM
; regno
<= R28_REGNUM
; regno
++)
1747 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1749 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1750 offset
+= UNITS_PER_WORD
;
1753 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1754 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1756 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1757 offset
+= UNITS_PER_WORD
;
1760 if (frame_pointer_needed
)
1762 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = offset
;
1763 offset
+= UNITS_PER_WORD
;
1764 cfun
->machine
->frame
.fp_lr_offset
= UNITS_PER_WORD
;
1767 if (cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] != -1)
1769 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = offset
;
1770 offset
+= UNITS_PER_WORD
;
1771 cfun
->machine
->frame
.fp_lr_offset
+= UNITS_PER_WORD
;
1774 cfun
->machine
->frame
.padding0
=
1775 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1776 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1778 cfun
->machine
->frame
.saved_regs_size
= offset
;
1779 cfun
->machine
->frame
.laid_out
= true;
1782 /* Make the last instruction frame-related and note that it performs
1783 the operation described by FRAME_PATTERN. */
1786 aarch64_set_frame_expr (rtx frame_pattern
)
1790 insn
= get_last_insn ();
1791 RTX_FRAME_RELATED_P (insn
) = 1;
1792 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1793 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1799 aarch64_register_saved_on_entry (int regno
)
1801 return cfun
->machine
->frame
.reg_offset
[regno
] != -1;
1806 aarch64_save_or_restore_fprs (int start_offset
, int increment
,
1807 bool restore
, rtx base_rtx
)
1813 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
)
1814 = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1817 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1819 if (aarch64_register_saved_on_entry (regno
))
1822 mem
= gen_mem_ref (DFmode
,
1823 plus_constant (Pmode
,
1827 for (regno2
= regno
+ 1;
1828 regno2
<= V31_REGNUM
1829 && !aarch64_register_saved_on_entry (regno2
);
1834 if (regno2
<= V31_REGNUM
&&
1835 aarch64_register_saved_on_entry (regno2
))
1838 /* Next highest register to be saved. */
1839 mem2
= gen_mem_ref (DFmode
,
1843 start_offset
+ increment
));
1844 if (restore
== false)
1847 ( gen_store_pairdf (mem
, gen_rtx_REG (DFmode
, regno
),
1848 mem2
, gen_rtx_REG (DFmode
, regno2
)));
1854 ( gen_load_pairdf (gen_rtx_REG (DFmode
, regno
), mem
,
1855 gen_rtx_REG (DFmode
, regno2
), mem2
));
1857 add_reg_note (insn
, REG_CFA_RESTORE
,
1858 gen_rtx_REG (DFmode
, regno
));
1859 add_reg_note (insn
, REG_CFA_RESTORE
,
1860 gen_rtx_REG (DFmode
, regno2
));
1863 /* The first part of a frame-related parallel insn
1864 is always assumed to be relevant to the frame
1865 calculations; subsequent parts, are only
1866 frame-related if explicitly marked. */
1867 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
1869 start_offset
+= increment
* 2;
1873 if (restore
== false)
1874 insn
= emit_move_insn (mem
, gen_rtx_REG (DFmode
, regno
));
1877 insn
= emit_move_insn (gen_rtx_REG (DFmode
, regno
), mem
);
1878 add_reg_note (insn
, REG_CFA_RESTORE
,
1879 gen_rtx_REG (DImode
, regno
));
1881 start_offset
+= increment
;
1883 RTX_FRAME_RELATED_P (insn
) = 1;
1890 /* offset from the stack pointer of where the saves and
1891 restore's have to happen. */
1893 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset
,
1897 rtx base_rtx
= stack_pointer_rtx
;
1898 HOST_WIDE_INT start_offset
= offset
;
1899 HOST_WIDE_INT increment
= UNITS_PER_WORD
;
1900 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1901 unsigned limit
= (frame_pointer_needed
)? R28_REGNUM
: R30_REGNUM
;
1905 for (regno
= R0_REGNUM
; regno
<= limit
; regno
++)
1907 if (aarch64_register_saved_on_entry (regno
))
1910 mem
= gen_mem_ref (Pmode
,
1911 plus_constant (Pmode
,
1915 for (regno2
= regno
+ 1;
1917 && !aarch64_register_saved_on_entry (regno2
);
1922 if (regno2
<= limit
&&
1923 aarch64_register_saved_on_entry (regno2
))
1926 /* Next highest register to be saved. */
1927 mem2
= gen_mem_ref (Pmode
,
1931 start_offset
+ increment
));
1932 if (restore
== false)
1935 ( gen_store_pairdi (mem
, gen_rtx_REG (DImode
, regno
),
1936 mem2
, gen_rtx_REG (DImode
, regno2
)));
1942 ( gen_load_pairdi (gen_rtx_REG (DImode
, regno
), mem
,
1943 gen_rtx_REG (DImode
, regno2
), mem2
));
1945 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1946 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno2
));
1949 /* The first part of a frame-related parallel insn
1950 is always assumed to be relevant to the frame
1951 calculations; subsequent parts, are only
1952 frame-related if explicitly marked. */
1953 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1956 start_offset
+= increment
* 2;
1960 if (restore
== false)
1961 insn
= emit_move_insn (mem
, gen_rtx_REG (DImode
, regno
));
1964 insn
= emit_move_insn (gen_rtx_REG (DImode
, regno
), mem
);
1965 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1967 start_offset
+= increment
;
1969 RTX_FRAME_RELATED_P (insn
) = 1;
1973 aarch64_save_or_restore_fprs (start_offset
, increment
, restore
, base_rtx
);
1977 /* AArch64 stack frames generated by this compiler look like:
1979 +-------------------------------+
1981 | incoming stack arguments |
1983 +-------------------------------+ <-- arg_pointer_rtx
1985 | callee-allocated save area |
1986 | for register varargs |
1988 +-------------------------------+
1992 +-------------------------------+ <-- frame_pointer_rtx
1994 | callee-saved registers |
1996 +-------------------------------+
1998 +-------------------------------+
2000 P +-------------------------------+ <-- hard_frame_pointer_rtx
2001 | dynamic allocation |
2002 +-------------------------------+
2004 | outgoing stack arguments |
2006 +-------------------------------+ <-- stack_pointer_rtx
2008 Dynamic stack allocations such as alloca insert data at point P.
2009 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2010 hard_frame_pointer_rtx unchanged. */
2012 /* Generate the prologue instructions for entry into a function.
2013 Establish the stack frame by decreasing the stack pointer with a
2014 properly calculated size and, if necessary, create a frame record
2015 filled with the values of LR and previous frame pointer. The
2016 current FP is also set up if it is in use. */
2019 aarch64_expand_prologue (void)
2021 /* sub sp, sp, #<frame_size>
2022 stp {fp, lr}, [sp, #<frame_size> - 16]
2023 add fp, sp, #<frame_size> - hardfp_offset
2024 stp {cs_reg}, [fp, #-16] etc.
2026 sub sp, sp, <final_adjustment_if_any>
2028 HOST_WIDE_INT original_frame_size
; /* local variables + vararg save */
2029 HOST_WIDE_INT frame_size
, offset
;
2030 HOST_WIDE_INT fp_offset
; /* FP offset from SP */
2033 aarch64_layout_frame ();
2034 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2035 gcc_assert ((!cfun
->machine
->saved_varargs_size
|| cfun
->stdarg
)
2036 && (cfun
->stdarg
|| !cfun
->machine
->saved_varargs_size
));
2037 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2038 + crtl
->outgoing_args_size
);
2039 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2040 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2042 if (flag_stack_usage_info
)
2043 current_function_static_stack_size
= frame_size
;
2046 - original_frame_size
2047 - cfun
->machine
->frame
.saved_regs_size
);
2049 /* Store pairs and load pairs have a range only -512 to 504. */
2052 /* When the frame has a large size, an initial decrease is done on
2053 the stack pointer to jump over the callee-allocated save area for
2054 register varargs, the local variable area and/or the callee-saved
2055 register area. This will allow the pre-index write-back
2056 store pair instructions to be used for setting up the stack frame
2058 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2060 offset
= cfun
->machine
->frame
.saved_regs_size
;
2062 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2065 if (frame_size
>= 0x1000000)
2067 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2068 emit_move_insn (op0
, GEN_INT (-frame_size
));
2069 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2070 aarch64_set_frame_expr (gen_rtx_SET
2071 (Pmode
, stack_pointer_rtx
,
2072 plus_constant (Pmode
,
2076 else if (frame_size
> 0)
2078 if ((frame_size
& 0xfff) != frame_size
)
2080 insn
= emit_insn (gen_add2_insn
2082 GEN_INT (-(frame_size
2083 & ~(HOST_WIDE_INT
)0xfff))));
2084 RTX_FRAME_RELATED_P (insn
) = 1;
2086 if ((frame_size
& 0xfff) != 0)
2088 insn
= emit_insn (gen_add2_insn
2090 GEN_INT (-(frame_size
2091 & (HOST_WIDE_INT
)0xfff))));
2092 RTX_FRAME_RELATED_P (insn
) = 1;
2101 /* Save the frame pointer and lr if the frame pointer is needed
2102 first. Make the frame pointer point to the location of the
2103 old frame pointer on the stack. */
2104 if (frame_pointer_needed
)
2110 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2111 GEN_INT (-offset
)));
2112 RTX_FRAME_RELATED_P (insn
) = 1;
2113 aarch64_set_frame_expr (gen_rtx_SET
2114 (Pmode
, stack_pointer_rtx
,
2115 gen_rtx_MINUS (Pmode
,
2117 GEN_INT (offset
))));
2118 mem_fp
= gen_frame_mem (DImode
,
2119 plus_constant (Pmode
,
2122 mem_lr
= gen_frame_mem (DImode
,
2123 plus_constant (Pmode
,
2127 insn
= emit_insn (gen_store_pairdi (mem_fp
,
2128 hard_frame_pointer_rtx
,
2130 gen_rtx_REG (DImode
,
2135 insn
= emit_insn (gen_storewb_pairdi_di
2136 (stack_pointer_rtx
, stack_pointer_rtx
,
2137 hard_frame_pointer_rtx
,
2138 gen_rtx_REG (DImode
, LR_REGNUM
),
2140 GEN_INT (GET_MODE_SIZE (DImode
) - offset
)));
2141 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2144 /* The first part of a frame-related parallel insn is always
2145 assumed to be relevant to the frame calculations;
2146 subsequent parts, are only frame-related if explicitly
2148 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2149 RTX_FRAME_RELATED_P (insn
) = 1;
2151 /* Set up frame pointer to point to the location of the
2152 previous frame pointer on the stack. */
2153 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2155 GEN_INT (fp_offset
)));
2156 aarch64_set_frame_expr (gen_rtx_SET
2157 (Pmode
, hard_frame_pointer_rtx
,
2158 plus_constant (Pmode
,
2161 RTX_FRAME_RELATED_P (insn
) = 1;
2162 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2163 hard_frame_pointer_rtx
));
2167 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2168 GEN_INT (-offset
)));
2169 RTX_FRAME_RELATED_P (insn
) = 1;
2172 aarch64_save_or_restore_callee_save_registers
2173 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 0);
2176 /* when offset >= 512,
2177 sub sp, sp, #<outgoing_args_size> */
2178 if (frame_size
> -1)
2180 if (crtl
->outgoing_args_size
> 0)
2182 insn
= emit_insn (gen_add2_insn
2184 GEN_INT (- crtl
->outgoing_args_size
)));
2185 RTX_FRAME_RELATED_P (insn
) = 1;
2190 /* Generate the epilogue instructions for returning from a function. */
2192 aarch64_expand_epilogue (bool for_sibcall
)
2194 HOST_WIDE_INT original_frame_size
, frame_size
, offset
;
2195 HOST_WIDE_INT fp_offset
;
2199 aarch64_layout_frame ();
2200 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2201 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2202 + crtl
->outgoing_args_size
);
2203 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2204 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2207 - original_frame_size
2208 - cfun
->machine
->frame
.saved_regs_size
);
2210 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2212 /* Store pairs and load pairs have a range only -512 to 504. */
2215 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2217 offset
= cfun
->machine
->frame
.saved_regs_size
;
2219 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2221 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2223 insn
= emit_insn (gen_add2_insn
2225 GEN_INT (crtl
->outgoing_args_size
)));
2226 RTX_FRAME_RELATED_P (insn
) = 1;
2232 /* If there were outgoing arguments or we've done dynamic stack
2233 allocation, then restore the stack pointer from the frame
2234 pointer. This is at most one insn and more efficient than using
2235 GCC's internal mechanism. */
2236 if (frame_pointer_needed
2237 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2239 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2240 hard_frame_pointer_rtx
,
2241 GEN_INT (- fp_offset
)));
2242 RTX_FRAME_RELATED_P (insn
) = 1;
2243 /* As SP is set to (FP - fp_offset), according to the rules in
2244 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2245 from the value of SP from now on. */
2246 cfa_reg
= stack_pointer_rtx
;
2249 aarch64_save_or_restore_callee_save_registers
2250 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 1);
2252 /* Restore the frame pointer and lr if the frame pointer is needed. */
2255 if (frame_pointer_needed
)
2261 mem_fp
= gen_frame_mem (DImode
,
2262 plus_constant (Pmode
,
2265 mem_lr
= gen_frame_mem (DImode
,
2266 plus_constant (Pmode
,
2270 insn
= emit_insn (gen_load_pairdi (hard_frame_pointer_rtx
,
2272 gen_rtx_REG (DImode
,
2278 insn
= emit_insn (gen_loadwb_pairdi_di
2281 hard_frame_pointer_rtx
,
2282 gen_rtx_REG (DImode
, LR_REGNUM
),
2284 GEN_INT (GET_MODE_SIZE (DImode
) + offset
)));
2285 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2286 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2287 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2288 plus_constant (Pmode
, cfa_reg
,
2292 /* The first part of a frame-related parallel insn
2293 is always assumed to be relevant to the frame
2294 calculations; subsequent parts, are only
2295 frame-related if explicitly marked. */
2296 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2297 RTX_FRAME_RELATED_P (insn
) = 1;
2298 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
2299 add_reg_note (insn
, REG_CFA_RESTORE
,
2300 gen_rtx_REG (DImode
, LR_REGNUM
));
2304 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2306 RTX_FRAME_RELATED_P (insn
) = 1;
2311 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2313 RTX_FRAME_RELATED_P (insn
) = 1;
2317 /* Stack adjustment for exception handler. */
2318 if (crtl
->calls_eh_return
)
2320 /* We need to unwind the stack by the offset computed by
2321 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2322 based on SP. Ideally we would update the SP and define the
2323 CFA along the lines of:
2325 SP = SP + EH_RETURN_STACKADJ_RTX
2326 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2328 However the dwarf emitter only understands a constant
2331 The solution chosen here is to use the otherwise unused IP0
2332 as a temporary register to hold the current SP value. The
2333 CFA is described using IP0 then SP is modified. */
2335 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2337 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2338 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2339 RTX_FRAME_RELATED_P (insn
) = 1;
2341 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2343 /* Ensure the assignment to IP0 does not get optimized away. */
2347 if (frame_size
> -1)
2349 if (frame_size
>= 0x1000000)
2351 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2352 emit_move_insn (op0
, GEN_INT (frame_size
));
2353 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2354 aarch64_set_frame_expr (gen_rtx_SET
2355 (Pmode
, stack_pointer_rtx
,
2356 plus_constant (Pmode
,
2360 else if (frame_size
> 0)
2362 if ((frame_size
& 0xfff) != 0)
2364 insn
= emit_insn (gen_add2_insn
2366 GEN_INT ((frame_size
2367 & (HOST_WIDE_INT
) 0xfff))));
2368 RTX_FRAME_RELATED_P (insn
) = 1;
2370 if ((frame_size
& 0xfff) != frame_size
)
2372 insn
= emit_insn (gen_add2_insn
2374 GEN_INT ((frame_size
2375 & ~ (HOST_WIDE_INT
) 0xfff))));
2376 RTX_FRAME_RELATED_P (insn
) = 1;
2380 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2381 plus_constant (Pmode
,
2386 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2388 emit_jump_insn (ret_rtx
);
2391 /* Return the place to copy the exception unwinding return address to.
2392 This will probably be a stack slot, but could (in theory be the
2393 return register). */
2395 aarch64_final_eh_return_addr (void)
2397 HOST_WIDE_INT original_frame_size
, frame_size
, offset
, fp_offset
;
2398 aarch64_layout_frame ();
2399 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2400 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2401 + crtl
->outgoing_args_size
);
2402 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2403 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2405 - original_frame_size
2406 - cfun
->machine
->frame
.saved_regs_size
;
2408 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2409 return gen_rtx_REG (DImode
, LR_REGNUM
);
2411 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2412 result in a store to save LR introduced by builtin_eh_return () being
2413 incorrectly deleted because the alias is not detected.
2414 So in the calculation of the address to copy the exception unwinding
2415 return address to, we note 2 cases.
2416 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2417 we return a SP-relative location since all the addresses are SP-relative
2418 in this case. This prevents the store from being optimized away.
2419 If the fp_offset is not 0, then the addresses will be FP-relative and
2420 therefore we return a FP-relative location. */
2422 if (frame_pointer_needed
)
2425 return gen_frame_mem (DImode
,
2426 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2428 return gen_frame_mem (DImode
,
2429 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2432 /* If FP is not needed, we calculate the location of LR, which would be
2433 at the top of the saved registers block. */
2435 return gen_frame_mem (DImode
,
2436 plus_constant (Pmode
,
2439 + cfun
->machine
->frame
.saved_regs_size
2440 - 2 * UNITS_PER_WORD
));
2443 /* Output code to build up a constant in a register. */
2445 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
)
2447 if (aarch64_bitmask_imm (val
, DImode
))
2448 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2454 HOST_WIDE_INT valp
= val
>> 16;
2458 for (i
= 16; i
< 64; i
+= 16)
2460 valm
= (valp
& 0xffff);
2471 /* zcount contains the number of additional MOVK instructions
2472 required if the constant is built up with an initial MOVZ instruction,
2473 while ncount is the number of MOVK instructions required if starting
2474 with a MOVN instruction. Choose the sequence that yields the fewest
2475 number of instructions, preferring MOVZ instructions when they are both
2477 if (ncount
< zcount
)
2479 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2480 GEN_INT ((~val
) & 0xffff));
2485 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2486 GEN_INT (val
& 0xffff));
2492 for (i
= 16; i
< 64; i
+= 16)
2494 if ((val
& 0xffff) != tval
)
2495 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2496 GEN_INT (i
), GEN_INT (val
& 0xffff)));
2503 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2505 HOST_WIDE_INT mdelta
= delta
;
2506 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2507 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2512 if (mdelta
>= 4096 * 4096)
2514 aarch64_build_constant (scratchreg
, delta
);
2515 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2517 else if (mdelta
> 0)
2521 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2522 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2524 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2525 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2527 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2528 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2530 if (mdelta
% 4096 != 0)
2532 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2533 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2534 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2539 /* Output code to add DELTA to the first argument, and then jump
2540 to FUNCTION. Used for C++ multiple inheritance. */
2542 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2543 HOST_WIDE_INT delta
,
2544 HOST_WIDE_INT vcall_offset
,
2547 /* The this pointer is always in x0. Note that this differs from
2548 Arm where the this pointer maybe bumped to r1 if r0 is required
2549 to return a pointer to an aggregate. On AArch64 a result value
2550 pointer will be in x8. */
2551 int this_regno
= R0_REGNUM
;
2552 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2554 reload_completed
= 1;
2555 emit_note (NOTE_INSN_PROLOGUE_END
);
2557 if (vcall_offset
== 0)
2558 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2561 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2563 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2564 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2565 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2570 if (delta
>= -256 && delta
< 256)
2571 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2572 plus_constant (Pmode
, this_rtx
, delta
));
2574 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2577 if (Pmode
== ptr_mode
)
2578 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2580 aarch64_emit_move (temp0
,
2581 gen_rtx_ZERO_EXTEND (Pmode
,
2582 gen_rtx_MEM (ptr_mode
, addr
)));
2584 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2585 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2588 aarch64_build_constant (IP1_REGNUM
, vcall_offset
);
2589 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2592 if (Pmode
== ptr_mode
)
2593 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2595 aarch64_emit_move (temp1
,
2596 gen_rtx_SIGN_EXTEND (Pmode
,
2597 gen_rtx_MEM (ptr_mode
, addr
)));
2599 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2602 /* Generate a tail call to the target function. */
2603 if (!TREE_USED (function
))
2605 assemble_external (function
);
2606 TREE_USED (function
) = 1;
2608 funexp
= XEXP (DECL_RTL (function
), 0);
2609 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2610 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2611 SIBLING_CALL_P (insn
) = 1;
2613 insn
= get_insns ();
2614 shorten_branches (insn
);
2615 final_start_function (insn
, file
, 1);
2616 final (insn
, file
, 1);
2617 final_end_function ();
2619 /* Stop pretending to be a post-reload pass. */
2620 reload_completed
= 0;
2624 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2626 if (GET_CODE (*x
) == SYMBOL_REF
)
2627 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2629 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2630 TLS offsets, not real symbol references. */
2631 if (GET_CODE (*x
) == UNSPEC
2632 && XINT (*x
, 1) == UNSPEC_TLS
)
2639 aarch64_tls_referenced_p (rtx x
)
2641 if (!TARGET_HAVE_TLS
)
2644 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2649 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2651 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2652 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2663 aarch64_build_bitmask_table (void)
2665 unsigned HOST_WIDE_INT mask
, imm
;
2666 unsigned int log_e
, e
, s
, r
;
2667 unsigned int nimms
= 0;
2669 for (log_e
= 1; log_e
<= 6; log_e
++)
2673 mask
= ~(HOST_WIDE_INT
) 0;
2675 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2676 for (s
= 1; s
< e
; s
++)
2678 for (r
= 0; r
< e
; r
++)
2680 /* set s consecutive bits to 1 (s < 64) */
2681 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2682 /* rotate right by r */
2684 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2685 /* replicate the constant depending on SIMD size */
2687 case 1: imm
|= (imm
<< 2);
2688 case 2: imm
|= (imm
<< 4);
2689 case 3: imm
|= (imm
<< 8);
2690 case 4: imm
|= (imm
<< 16);
2691 case 5: imm
|= (imm
<< 32);
2697 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2698 aarch64_bitmasks
[nimms
++] = imm
;
2703 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2704 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2705 aarch64_bitmasks_cmp
);
2709 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2710 a left shift of 0 or 12 bits. */
2712 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2714 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2715 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2720 /* Return true if val is an immediate that can be loaded into a
2721 register by a MOVZ instruction. */
2723 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2725 if (GET_MODE_SIZE (mode
) > 4)
2727 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2728 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2733 /* Ignore sign extension. */
2734 val
&= (HOST_WIDE_INT
) 0xffffffff;
2736 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2737 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2741 /* Return true if val is a valid bitmask immediate. */
2743 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2745 if (GET_MODE_SIZE (mode
) < 8)
2747 /* Replicate bit pattern. */
2748 val
&= (HOST_WIDE_INT
) 0xffffffff;
2751 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2752 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2756 /* Return true if val is an immediate that can be loaded into a
2757 register in a single instruction. */
2759 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2761 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2763 return aarch64_bitmask_imm (val
, mode
);
2767 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2771 if (GET_CODE (x
) == HIGH
)
2774 split_const (x
, &base
, &offset
);
2775 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2777 if (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2778 != SYMBOL_FORCE_TO_MEM
)
2781 /* Avoid generating a 64-bit relocation in ILP32; leave
2782 to aarch64_expand_mov_immediate to handle it properly. */
2783 return mode
!= ptr_mode
;
2786 return aarch64_tls_referenced_p (x
);
2789 /* Return true if register REGNO is a valid index register.
2790 STRICT_P is true if REG_OK_STRICT is in effect. */
2793 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2795 if (!HARD_REGISTER_NUM_P (regno
))
2803 regno
= reg_renumber
[regno
];
2805 return GP_REGNUM_P (regno
);
2808 /* Return true if register REGNO is a valid base register for mode MODE.
2809 STRICT_P is true if REG_OK_STRICT is in effect. */
2812 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2814 if (!HARD_REGISTER_NUM_P (regno
))
2822 regno
= reg_renumber
[regno
];
2825 /* The fake registers will be eliminated to either the stack or
2826 hard frame pointer, both of which are usually valid base registers.
2827 Reload deals with the cases where the eliminated form isn't valid. */
2828 return (GP_REGNUM_P (regno
)
2829 || regno
== SP_REGNUM
2830 || regno
== FRAME_POINTER_REGNUM
2831 || regno
== ARG_POINTER_REGNUM
);
2834 /* Return true if X is a valid base register for mode MODE.
2835 STRICT_P is true if REG_OK_STRICT is in effect. */
2838 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2840 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2843 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2846 /* Return true if address offset is a valid index. If it is, fill in INFO
2847 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2850 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2851 enum machine_mode mode
, bool strict_p
)
2853 enum aarch64_address_type type
;
2858 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2859 && GET_MODE (x
) == Pmode
)
2861 type
= ADDRESS_REG_REG
;
2865 /* (sign_extend:DI (reg:SI)) */
2866 else if ((GET_CODE (x
) == SIGN_EXTEND
2867 || GET_CODE (x
) == ZERO_EXTEND
)
2868 && GET_MODE (x
) == DImode
2869 && GET_MODE (XEXP (x
, 0)) == SImode
)
2871 type
= (GET_CODE (x
) == SIGN_EXTEND
)
2872 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2873 index
= XEXP (x
, 0);
2876 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2877 else if (GET_CODE (x
) == MULT
2878 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2879 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2880 && GET_MODE (XEXP (x
, 0)) == DImode
2881 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2882 && CONST_INT_P (XEXP (x
, 1)))
2884 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2885 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2886 index
= XEXP (XEXP (x
, 0), 0);
2887 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2889 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2890 else if (GET_CODE (x
) == ASHIFT
2891 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2892 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2893 && GET_MODE (XEXP (x
, 0)) == DImode
2894 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2895 && CONST_INT_P (XEXP (x
, 1)))
2897 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2898 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2899 index
= XEXP (XEXP (x
, 0), 0);
2900 shift
= INTVAL (XEXP (x
, 1));
2902 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2903 else if ((GET_CODE (x
) == SIGN_EXTRACT
2904 || GET_CODE (x
) == ZERO_EXTRACT
)
2905 && GET_MODE (x
) == DImode
2906 && GET_CODE (XEXP (x
, 0)) == MULT
2907 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2908 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2910 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2911 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2912 index
= XEXP (XEXP (x
, 0), 0);
2913 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2914 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2915 || INTVAL (XEXP (x
, 2)) != 0)
2918 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2919 (const_int 0xffffffff<<shift)) */
2920 else if (GET_CODE (x
) == AND
2921 && GET_MODE (x
) == DImode
2922 && GET_CODE (XEXP (x
, 0)) == MULT
2923 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2924 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2925 && CONST_INT_P (XEXP (x
, 1)))
2927 type
= ADDRESS_REG_UXTW
;
2928 index
= XEXP (XEXP (x
, 0), 0);
2929 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2930 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2933 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2934 else if ((GET_CODE (x
) == SIGN_EXTRACT
2935 || GET_CODE (x
) == ZERO_EXTRACT
)
2936 && GET_MODE (x
) == DImode
2937 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2938 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2939 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2941 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2942 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2943 index
= XEXP (XEXP (x
, 0), 0);
2944 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2945 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2946 || INTVAL (XEXP (x
, 2)) != 0)
2949 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2950 (const_int 0xffffffff<<shift)) */
2951 else if (GET_CODE (x
) == AND
2952 && GET_MODE (x
) == DImode
2953 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2954 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2955 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2956 && CONST_INT_P (XEXP (x
, 1)))
2958 type
= ADDRESS_REG_UXTW
;
2959 index
= XEXP (XEXP (x
, 0), 0);
2960 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2961 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2964 /* (mult:P (reg:P) (const_int scale)) */
2965 else if (GET_CODE (x
) == MULT
2966 && GET_MODE (x
) == Pmode
2967 && GET_MODE (XEXP (x
, 0)) == Pmode
2968 && CONST_INT_P (XEXP (x
, 1)))
2970 type
= ADDRESS_REG_REG
;
2971 index
= XEXP (x
, 0);
2972 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2974 /* (ashift:P (reg:P) (const_int shift)) */
2975 else if (GET_CODE (x
) == ASHIFT
2976 && GET_MODE (x
) == Pmode
2977 && GET_MODE (XEXP (x
, 0)) == Pmode
2978 && CONST_INT_P (XEXP (x
, 1)))
2980 type
= ADDRESS_REG_REG
;
2981 index
= XEXP (x
, 0);
2982 shift
= INTVAL (XEXP (x
, 1));
2987 if (GET_CODE (index
) == SUBREG
)
2988 index
= SUBREG_REG (index
);
2991 (shift
> 0 && shift
<= 3
2992 && (1 << shift
) == GET_MODE_SIZE (mode
)))
2994 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
2997 info
->offset
= index
;
2998 info
->shift
= shift
;
3006 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3008 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3009 && offset
< 64 * GET_MODE_SIZE (mode
)
3010 && offset
% GET_MODE_SIZE (mode
) == 0);
3014 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
3015 HOST_WIDE_INT offset
)
3017 return offset
>= -256 && offset
< 256;
3021 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3024 && offset
< 4096 * GET_MODE_SIZE (mode
)
3025 && offset
% GET_MODE_SIZE (mode
) == 0);
3028 /* Return true if X is a valid address for machine mode MODE. If it is,
3029 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3030 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3033 aarch64_classify_address (struct aarch64_address_info
*info
,
3034 rtx x
, enum machine_mode mode
,
3035 RTX_CODE outer_code
, bool strict_p
)
3037 enum rtx_code code
= GET_CODE (x
);
3039 bool allow_reg_index_p
=
3040 outer_code
!= PARALLEL
&& GET_MODE_SIZE(mode
) != 16;
3042 /* Don't support anything other than POST_INC or REG addressing for
3044 if (aarch64_vector_mode_p (mode
)
3045 && (code
!= POST_INC
&& code
!= REG
))
3052 info
->type
= ADDRESS_REG_IMM
;
3054 info
->offset
= const0_rtx
;
3055 return aarch64_base_register_rtx_p (x
, strict_p
);
3060 if (GET_MODE_SIZE (mode
) != 0
3061 && CONST_INT_P (op1
)
3062 && aarch64_base_register_rtx_p (op0
, strict_p
))
3064 HOST_WIDE_INT offset
= INTVAL (op1
);
3066 info
->type
= ADDRESS_REG_IMM
;
3070 /* TImode and TFmode values are allowed in both pairs of X
3071 registers and individual Q registers. The available
3073 X,X: 7-bit signed scaled offset
3074 Q: 9-bit signed offset
3075 We conservatively require an offset representable in either mode.
3077 if (mode
== TImode
|| mode
== TFmode
)
3078 return (offset_7bit_signed_scaled_p (mode
, offset
)
3079 && offset_9bit_signed_unscaled_p (mode
, offset
));
3081 if (outer_code
== PARALLEL
)
3082 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3083 && offset_7bit_signed_scaled_p (mode
, offset
));
3085 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3086 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3089 if (allow_reg_index_p
)
3091 /* Look for base + (scaled/extended) index register. */
3092 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3093 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3098 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3099 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3112 info
->type
= ADDRESS_REG_WB
;
3113 info
->base
= XEXP (x
, 0);
3114 info
->offset
= NULL_RTX
;
3115 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3119 info
->type
= ADDRESS_REG_WB
;
3120 info
->base
= XEXP (x
, 0);
3121 if (GET_CODE (XEXP (x
, 1)) == PLUS
3122 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3123 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3124 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3126 HOST_WIDE_INT offset
;
3127 info
->offset
= XEXP (XEXP (x
, 1), 1);
3128 offset
= INTVAL (info
->offset
);
3130 /* TImode and TFmode values are allowed in both pairs of X
3131 registers and individual Q registers. The available
3133 X,X: 7-bit signed scaled offset
3134 Q: 9-bit signed offset
3135 We conservatively require an offset representable in either mode.
3137 if (mode
== TImode
|| mode
== TFmode
)
3138 return (offset_7bit_signed_scaled_p (mode
, offset
)
3139 && offset_9bit_signed_unscaled_p (mode
, offset
));
3141 if (outer_code
== PARALLEL
)
3142 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3143 && offset_7bit_signed_scaled_p (mode
, offset
));
3145 return offset_9bit_signed_unscaled_p (mode
, offset
);
3152 /* load literal: pc-relative constant pool entry. Only supported
3153 for SI mode or larger. */
3154 info
->type
= ADDRESS_SYMBOLIC
;
3155 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3159 split_const (x
, &sym
, &addend
);
3160 return (GET_CODE (sym
) == LABEL_REF
3161 || (GET_CODE (sym
) == SYMBOL_REF
3162 && CONSTANT_POOL_ADDRESS_P (sym
)));
3167 info
->type
= ADDRESS_LO_SUM
;
3168 info
->base
= XEXP (x
, 0);
3169 info
->offset
= XEXP (x
, 1);
3170 if (allow_reg_index_p
3171 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3174 split_const (info
->offset
, &sym
, &offs
);
3175 if (GET_CODE (sym
) == SYMBOL_REF
3176 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3177 == SYMBOL_SMALL_ABSOLUTE
))
3179 /* The symbol and offset must be aligned to the access size. */
3181 unsigned int ref_size
;
3183 if (CONSTANT_POOL_ADDRESS_P (sym
))
3184 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3185 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3187 tree exp
= SYMBOL_REF_DECL (sym
);
3188 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3189 align
= CONSTANT_ALIGNMENT (exp
, align
);
3191 else if (SYMBOL_REF_DECL (sym
))
3192 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3194 align
= BITS_PER_UNIT
;
3196 ref_size
= GET_MODE_SIZE (mode
);
3198 ref_size
= GET_MODE_SIZE (DImode
);
3200 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3201 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3212 aarch64_symbolic_address_p (rtx x
)
3216 split_const (x
, &x
, &offset
);
3217 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3220 /* Classify the base of symbolic expression X, given that X appears in
3223 enum aarch64_symbol_type
3224 aarch64_classify_symbolic_expression (rtx x
,
3225 enum aarch64_symbol_context context
)
3229 split_const (x
, &x
, &offset
);
3230 return aarch64_classify_symbol (x
, context
);
3234 /* Return TRUE if X is a legitimate address for accessing memory in
3237 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3239 struct aarch64_address_info addr
;
3241 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3244 /* Return TRUE if X is a legitimate address for accessing memory in
3245 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3248 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3249 RTX_CODE outer_code
, bool strict_p
)
3251 struct aarch64_address_info addr
;
3253 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3256 /* Return TRUE if rtx X is immediate constant 0.0 */
3258 aarch64_float_const_zero_rtx_p (rtx x
)
3262 if (GET_MODE (x
) == VOIDmode
)
3265 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3266 if (REAL_VALUE_MINUS_ZERO (r
))
3267 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3268 return REAL_VALUES_EQUAL (r
, dconst0
);
3271 /* Return the fixed registers used for condition codes. */
3274 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3277 *p2
= INVALID_REGNUM
;
3282 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3284 /* All floating point compares return CCFP if it is an equality
3285 comparison, and CCFPE otherwise. */
3286 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3313 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3315 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3316 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3317 || GET_CODE (x
) == NEG
))
3320 /* A compare with a shifted or negated operand. Because of canonicalization,
3321 the comparison will have to be swapped when we emit the assembly
3323 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3324 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3325 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3326 || GET_CODE (x
) == LSHIFTRT
3327 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
3328 || GET_CODE (x
) == NEG
))
3331 /* A compare of a mode narrower than SI mode against zero can be done
3332 by extending the value in the comparison. */
3333 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3335 /* Only use sign-extension if we really need it. */
3336 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3337 ? CC_SESWPmode
: CC_ZESWPmode
);
3339 /* For everything else, return CCmode. */
3344 aarch64_get_condition_code (rtx x
)
3346 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3347 enum rtx_code comp_code
= GET_CODE (x
);
3349 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3350 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3358 case GE
: return AARCH64_GE
;
3359 case GT
: return AARCH64_GT
;
3360 case LE
: return AARCH64_LS
;
3361 case LT
: return AARCH64_MI
;
3362 case NE
: return AARCH64_NE
;
3363 case EQ
: return AARCH64_EQ
;
3364 case ORDERED
: return AARCH64_VC
;
3365 case UNORDERED
: return AARCH64_VS
;
3366 case UNLT
: return AARCH64_LT
;
3367 case UNLE
: return AARCH64_LE
;
3368 case UNGT
: return AARCH64_HI
;
3369 case UNGE
: return AARCH64_PL
;
3370 default: gcc_unreachable ();
3377 case NE
: return AARCH64_NE
;
3378 case EQ
: return AARCH64_EQ
;
3379 case GE
: return AARCH64_GE
;
3380 case GT
: return AARCH64_GT
;
3381 case LE
: return AARCH64_LE
;
3382 case LT
: return AARCH64_LT
;
3383 case GEU
: return AARCH64_CS
;
3384 case GTU
: return AARCH64_HI
;
3385 case LEU
: return AARCH64_LS
;
3386 case LTU
: return AARCH64_CC
;
3387 default: gcc_unreachable ();
3396 case NE
: return AARCH64_NE
;
3397 case EQ
: return AARCH64_EQ
;
3398 case GE
: return AARCH64_LE
;
3399 case GT
: return AARCH64_LT
;
3400 case LE
: return AARCH64_GE
;
3401 case LT
: return AARCH64_GT
;
3402 case GEU
: return AARCH64_LS
;
3403 case GTU
: return AARCH64_CC
;
3404 case LEU
: return AARCH64_CS
;
3405 case LTU
: return AARCH64_HI
;
3406 default: gcc_unreachable ();
3413 case NE
: return AARCH64_NE
;
3414 case EQ
: return AARCH64_EQ
;
3415 case GE
: return AARCH64_PL
;
3416 case LT
: return AARCH64_MI
;
3417 default: gcc_unreachable ();
3428 bit_count (unsigned HOST_WIDE_INT value
)
3442 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3446 /* An integer or symbol address without a preceding # sign. */
3448 switch (GET_CODE (x
))
3451 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3455 output_addr_const (f
, x
);
3459 if (GET_CODE (XEXP (x
, 0)) == PLUS
3460 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3462 output_addr_const (f
, x
);
3468 output_operand_lossage ("Unsupported operand for code '%c'", code
);
3473 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3477 if (GET_CODE (x
) != CONST_INT
3478 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3480 output_operand_lossage ("invalid operand for '%%%c'", code
);
3496 output_operand_lossage ("invalid operand for '%%%c'", code
);
3506 /* Print N such that 2^N == X. */
3507 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3509 output_operand_lossage ("invalid operand for '%%%c'", code
);
3513 asm_fprintf (f
, "%d", n
);
3518 /* Print the number of non-zero bits in X (a const_int). */
3519 if (GET_CODE (x
) != CONST_INT
)
3521 output_operand_lossage ("invalid operand for '%%%c'", code
);
3525 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3529 /* Print the higher numbered register of a pair (TImode) of regs. */
3530 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3532 output_operand_lossage ("invalid operand for '%%%c'", code
);
3536 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3540 /* Print a condition (eq, ne, etc). */
3542 /* CONST_TRUE_RTX means always -- that's the default. */
3543 if (x
== const_true_rtx
)
3546 if (!COMPARISON_P (x
))
3548 output_operand_lossage ("invalid operand for '%%%c'", code
);
3552 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3556 /* Print the inverse of a condition (eq <-> ne, etc). */
3558 /* CONST_TRUE_RTX means never -- that's the default. */
3559 if (x
== const_true_rtx
)
3565 if (!COMPARISON_P (x
))
3567 output_operand_lossage ("invalid operand for '%%%c'", code
);
3571 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3572 (aarch64_get_condition_code (x
))], f
);
3580 /* Print a scalar FP/SIMD register name. */
3581 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3583 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3586 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3593 /* Print the first FP/SIMD register name in a list. */
3594 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3596 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3599 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3603 /* Print bottom 16 bits of integer constant in hex. */
3604 if (GET_CODE (x
) != CONST_INT
)
3606 output_operand_lossage ("invalid operand for '%%%c'", code
);
3609 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3614 /* Print a general register name or the zero register (32-bit or
3617 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3619 asm_fprintf (f
, "%czr", code
);
3623 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3625 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3629 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3631 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3638 /* Print a normal operand, if it's a general register, then we
3642 output_operand_lossage ("missing operand");
3646 switch (GET_CODE (x
))
3649 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3653 aarch64_memory_reference_mode
= GET_MODE (x
);
3654 output_address (XEXP (x
, 0));
3659 output_addr_const (asm_out_file
, x
);
3663 asm_fprintf (f
, "%wd", INTVAL (x
));
3667 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3669 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3671 HOST_WIDE_INT_MAX
));
3672 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3674 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3683 /* CONST_DOUBLE can represent a double-width integer.
3684 In this case, the mode of x is VOIDmode. */
3685 if (GET_MODE (x
) == VOIDmode
)
3687 else if (aarch64_float_const_zero_rtx_p (x
))
3692 else if (aarch64_float_const_representable_p (x
))
3695 char float_buf
[buf_size
] = {'\0'};
3697 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3698 real_to_decimal_for_mode (float_buf
, &r
,
3701 asm_fprintf (asm_out_file
, "%s", float_buf
);
3705 output_operand_lossage ("invalid constant");
3708 output_operand_lossage ("invalid operand");
3714 if (GET_CODE (x
) == HIGH
)
3717 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3719 case SYMBOL_SMALL_GOT
:
3720 asm_fprintf (asm_out_file
, ":got:");
3723 case SYMBOL_SMALL_TLSGD
:
3724 asm_fprintf (asm_out_file
, ":tlsgd:");
3727 case SYMBOL_SMALL_TLSDESC
:
3728 asm_fprintf (asm_out_file
, ":tlsdesc:");
3731 case SYMBOL_SMALL_GOTTPREL
:
3732 asm_fprintf (asm_out_file
, ":gottprel:");
3735 case SYMBOL_SMALL_TPREL
:
3736 asm_fprintf (asm_out_file
, ":tprel:");
3739 case SYMBOL_TINY_GOT
:
3746 output_addr_const (asm_out_file
, x
);
3750 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3752 case SYMBOL_SMALL_GOT
:
3753 asm_fprintf (asm_out_file
, ":lo12:");
3756 case SYMBOL_SMALL_TLSGD
:
3757 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3760 case SYMBOL_SMALL_TLSDESC
:
3761 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3764 case SYMBOL_SMALL_GOTTPREL
:
3765 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3768 case SYMBOL_SMALL_TPREL
:
3769 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3772 case SYMBOL_TINY_GOT
:
3773 asm_fprintf (asm_out_file
, ":got:");
3779 output_addr_const (asm_out_file
, x
);
3784 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3786 case SYMBOL_SMALL_TPREL
:
3787 asm_fprintf (asm_out_file
, ":tprel_hi12:");
3792 output_addr_const (asm_out_file
, x
);
3796 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
3802 aarch64_print_operand_address (FILE *f
, rtx x
)
3804 struct aarch64_address_info addr
;
3806 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
3810 case ADDRESS_REG_IMM
:
3811 if (addr
.offset
== const0_rtx
)
3812 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
3814 asm_fprintf (f
, "[%s,%wd]", reg_names
[REGNO (addr
.base
)],
3815 INTVAL (addr
.offset
));
3818 case ADDRESS_REG_REG
:
3819 if (addr
.shift
== 0)
3820 asm_fprintf (f
, "[%s,%s]", reg_names
[REGNO (addr
.base
)],
3821 reg_names
[REGNO (addr
.offset
)]);
3823 asm_fprintf (f
, "[%s,%s,lsl %u]", reg_names
[REGNO (addr
.base
)],
3824 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
3827 case ADDRESS_REG_UXTW
:
3828 if (addr
.shift
== 0)
3829 asm_fprintf (f
, "[%s,w%d,uxtw]", reg_names
[REGNO (addr
.base
)],
3830 REGNO (addr
.offset
) - R0_REGNUM
);
3832 asm_fprintf (f
, "[%s,w%d,uxtw %u]", reg_names
[REGNO (addr
.base
)],
3833 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3836 case ADDRESS_REG_SXTW
:
3837 if (addr
.shift
== 0)
3838 asm_fprintf (f
, "[%s,w%d,sxtw]", reg_names
[REGNO (addr
.base
)],
3839 REGNO (addr
.offset
) - R0_REGNUM
);
3841 asm_fprintf (f
, "[%s,w%d,sxtw %u]", reg_names
[REGNO (addr
.base
)],
3842 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3845 case ADDRESS_REG_WB
:
3846 switch (GET_CODE (x
))
3849 asm_fprintf (f
, "[%s,%d]!", reg_names
[REGNO (addr
.base
)],
3850 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3853 asm_fprintf (f
, "[%s],%d", reg_names
[REGNO (addr
.base
)],
3854 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3857 asm_fprintf (f
, "[%s,-%d]!", reg_names
[REGNO (addr
.base
)],
3858 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3861 asm_fprintf (f
, "[%s],-%d", reg_names
[REGNO (addr
.base
)],
3862 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3865 asm_fprintf (f
, "[%s,%wd]!", reg_names
[REGNO (addr
.base
)],
3866 INTVAL (addr
.offset
));
3869 asm_fprintf (f
, "[%s],%wd", reg_names
[REGNO (addr
.base
)],
3870 INTVAL (addr
.offset
));
3877 case ADDRESS_LO_SUM
:
3878 asm_fprintf (f
, "[%s,#:lo12:", reg_names
[REGNO (addr
.base
)]);
3879 output_addr_const (f
, addr
.offset
);
3880 asm_fprintf (f
, "]");
3883 case ADDRESS_SYMBOLIC
:
3887 output_addr_const (f
, x
);
3891 aarch64_label_mentioned_p (rtx x
)
3896 if (GET_CODE (x
) == LABEL_REF
)
3899 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3900 referencing instruction, but they are constant offsets, not
3902 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3905 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
3906 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
3912 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3913 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
3916 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
3923 /* Implement REGNO_REG_CLASS. */
3926 aarch64_regno_regclass (unsigned regno
)
3928 if (GP_REGNUM_P (regno
))
3931 if (regno
== SP_REGNUM
)
3934 if (regno
== FRAME_POINTER_REGNUM
3935 || regno
== ARG_POINTER_REGNUM
)
3936 return POINTER_REGS
;
3938 if (FP_REGNUM_P (regno
))
3939 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
3944 /* Try a machine-dependent way of reloading an illegitimate address
3945 operand. If we find one, push the reload and return the new rtx. */
3948 aarch64_legitimize_reload_address (rtx
*x_p
,
3949 enum machine_mode mode
,
3950 int opnum
, int type
,
3951 int ind_levels ATTRIBUTE_UNUSED
)
3955 /* Do not allow mem (plus (reg, const)) if vector mode. */
3956 if (aarch64_vector_mode_p (mode
)
3957 && GET_CODE (x
) == PLUS
3958 && REG_P (XEXP (x
, 0))
3959 && CONST_INT_P (XEXP (x
, 1)))
3963 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
3964 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3965 opnum
, (enum reload_type
) type
);
3969 /* We must recognize output that we have already generated ourselves. */
3970 if (GET_CODE (x
) == PLUS
3971 && GET_CODE (XEXP (x
, 0)) == PLUS
3972 && REG_P (XEXP (XEXP (x
, 0), 0))
3973 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3974 && CONST_INT_P (XEXP (x
, 1)))
3976 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
3977 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3978 opnum
, (enum reload_type
) type
);
3982 /* We wish to handle large displacements off a base register by splitting
3983 the addend across an add and the mem insn. This can cut the number of
3984 extra insns needed from 3 to 1. It is only useful for load/store of a
3985 single register with 12 bit offset field. */
3986 if (GET_CODE (x
) == PLUS
3987 && REG_P (XEXP (x
, 0))
3988 && CONST_INT_P (XEXP (x
, 1))
3989 && HARD_REGISTER_P (XEXP (x
, 0))
3992 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
3994 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
3995 HOST_WIDE_INT low
= val
& 0xfff;
3996 HOST_WIDE_INT high
= val
- low
;
3999 enum machine_mode xmode
= GET_MODE (x
);
4001 /* In ILP32, xmode can be either DImode or SImode. */
4002 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4004 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4005 BLKmode alignment. */
4006 if (GET_MODE_SIZE (mode
) == 0)
4009 offs
= low
% GET_MODE_SIZE (mode
);
4011 /* Align misaligned offset by adjusting high part to compensate. */
4014 if (aarch64_uimm12_shift (high
+ offs
))
4023 offs
= GET_MODE_SIZE (mode
) - offs
;
4025 high
= high
+ (low
& 0x1000) - offs
;
4030 /* Check for overflow. */
4031 if (high
+ low
!= val
)
4034 cst
= GEN_INT (high
);
4035 if (!aarch64_uimm12_shift (high
))
4036 cst
= force_const_mem (xmode
, cst
);
4038 /* Reload high part into base reg, leaving the low part
4039 in the mem instruction.
4040 Note that replacing this gen_rtx_PLUS with plus_constant is
4041 wrong in this case because we rely on the
4042 (plus (plus reg c1) c2) structure being preserved so that
4043 XEXP (*p, 0) in push_reload below uses the correct term. */
4044 x
= gen_rtx_PLUS (xmode
,
4045 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4048 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4049 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4050 opnum
, (enum reload_type
) type
);
4059 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4061 enum machine_mode mode
,
4062 secondary_reload_info
*sri
)
4064 /* Without the TARGET_SIMD instructions we cannot move a Q register
4065 to a Q register directly. We need a scratch. */
4066 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4067 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4068 && reg_class_subset_p (rclass
, FP_REGS
))
4071 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4072 else if (mode
== TImode
)
4073 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4077 /* A TFmode or TImode memory access should be handled via an FP_REGS
4078 because AArch64 has richer addressing modes for LDR/STR instructions
4079 than LDP/STP instructions. */
4080 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
4081 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4084 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4091 aarch64_can_eliminate (const int from
, const int to
)
4093 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4094 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4096 if (frame_pointer_needed
)
4098 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4100 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4102 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4103 && !cfun
->calls_alloca
)
4105 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4111 /* If we decided that we didn't need a leaf frame pointer but then used
4112 LR in the function, then we'll want a frame pointer after all, so
4113 prevent this elimination to ensure a frame pointer is used.
4115 NOTE: the original value of flag_omit_frame_pointer gets trashed
4116 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4117 of faked_omit_frame_pointer here (which is true when we always
4118 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4119 pointers when LR is clobbered). */
4120 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4121 && df_regs_ever_live_p (LR_REGNUM
)
4122 && faked_omit_frame_pointer
)
4130 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4132 HOST_WIDE_INT frame_size
;
4133 HOST_WIDE_INT offset
;
4135 aarch64_layout_frame ();
4136 frame_size
= (get_frame_size () + cfun
->machine
->frame
.saved_regs_size
4137 + crtl
->outgoing_args_size
4138 + cfun
->machine
->saved_varargs_size
);
4140 frame_size
= AARCH64_ROUND_UP (frame_size
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4141 offset
= frame_size
;
4143 if (to
== HARD_FRAME_POINTER_REGNUM
)
4145 if (from
== ARG_POINTER_REGNUM
)
4146 return offset
- crtl
->outgoing_args_size
;
4148 if (from
== FRAME_POINTER_REGNUM
)
4149 return cfun
->machine
->frame
.saved_regs_size
+ get_frame_size ();
4152 if (to
== STACK_POINTER_REGNUM
)
4154 if (from
== FRAME_POINTER_REGNUM
)
4156 HOST_WIDE_INT elim
= crtl
->outgoing_args_size
4157 + cfun
->machine
->frame
.saved_regs_size
4159 - cfun
->machine
->frame
.fp_lr_offset
;
4160 elim
= AARCH64_ROUND_UP (elim
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4169 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4173 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4177 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4182 aarch64_asm_trampoline_template (FILE *f
)
4186 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4187 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4191 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4192 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4194 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4195 assemble_aligned_integer (4, const0_rtx
);
4196 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4197 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4201 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4203 rtx fnaddr
, mem
, a_tramp
;
4204 const int tramp_code_sz
= 16;
4206 /* Don't need to copy the trailing D-words, we fill those in below. */
4207 emit_block_move (m_tramp
, assemble_trampoline_template (),
4208 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4209 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4210 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4211 if (GET_MODE (fnaddr
) != ptr_mode
)
4212 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4213 emit_move_insn (mem
, fnaddr
);
4215 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4216 emit_move_insn (mem
, chain_value
);
4218 /* XXX We should really define a "clear_cache" pattern and use
4219 gen_clear_cache(). */
4220 a_tramp
= XEXP (m_tramp
, 0);
4221 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4222 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4223 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4227 static unsigned char
4228 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4239 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4240 (GET_MODE_SIZE (mode
) + 7) / 8;
4254 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4256 if (regclass
== POINTER_REGS
)
4257 return GENERAL_REGS
;
4259 if (regclass
== STACK_REG
)
4262 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4268 /* If it's an integer immediate that MOVI can't handle, then
4269 FP_REGS is not an option, so we return NO_REGS instead. */
4270 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4271 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4274 /* Register eliminiation can result in a request for
4275 SP+constant->FP_REGS. We cannot support such operations which
4276 use SP as source and an FP_REG as destination, so reject out
4278 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4280 rtx lhs
= XEXP (x
, 0);
4282 /* Look through a possible SUBREG introduced by ILP32. */
4283 if (GET_CODE (lhs
) == SUBREG
)
4284 lhs
= SUBREG_REG (lhs
);
4286 gcc_assert (REG_P (lhs
));
4287 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4296 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4298 asm_fprintf (f
, "%U%s", name
);
4302 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4304 if (priority
== DEFAULT_INIT_PRIORITY
)
4305 default_ctor_section_asm_out_constructor (symbol
, priority
);
4310 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4311 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4312 switch_to_section (s
);
4313 assemble_align (POINTER_SIZE
);
4314 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4319 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4321 if (priority
== DEFAULT_INIT_PRIORITY
)
4322 default_dtor_section_asm_out_destructor (symbol
, priority
);
4327 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4328 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4329 switch_to_section (s
);
4330 assemble_align (POINTER_SIZE
);
4331 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4336 aarch64_output_casesi (rtx
*operands
)
4340 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
4342 static const char *const patterns
[4][2] =
4345 "ldrb\t%w3, [%0,%w1,uxtw]",
4346 "add\t%3, %4, %w3, sxtb #2"
4349 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4350 "add\t%3, %4, %w3, sxth #2"
4353 "ldr\t%w3, [%0,%w1,uxtw #2]",
4354 "add\t%3, %4, %w3, sxtw #2"
4356 /* We assume that DImode is only generated when not optimizing and
4357 that we don't really need 64-bit address offsets. That would
4358 imply an object file with 8GB of code in a single function! */
4360 "ldr\t%w3, [%0,%w1,uxtw #2]",
4361 "add\t%3, %4, %w3, sxtw #2"
4365 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4367 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4369 gcc_assert (index
>= 0 && index
<= 3);
4371 /* Need to implement table size reduction, by chaning the code below. */
4372 output_asm_insn (patterns
[index
][0], operands
);
4373 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4374 snprintf (buf
, sizeof (buf
),
4375 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4376 output_asm_insn (buf
, operands
);
4377 output_asm_insn (patterns
[index
][1], operands
);
4378 output_asm_insn ("br\t%3", operands
);
4379 assemble_label (asm_out_file
, label
);
4384 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4385 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4389 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4391 if (shift
>= 0 && shift
<= 3)
4394 for (size
= 8; size
<= 32; size
*= 2)
4396 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4397 if (mask
== bits
<< shift
)
4405 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4406 const_rtx x ATTRIBUTE_UNUSED
)
4408 /* We can't use blocks for constants when we're using a per-function
4414 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4415 rtx x ATTRIBUTE_UNUSED
,
4416 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4418 /* Force all constant pool entries into the current function section. */
4419 return function_section (current_function_decl
);
4425 /* Helper function for rtx cost calculation. Strip a shift expression
4426 from X. Returns the inner operand if successful, or the original
4427 expression on failure. */
4429 aarch64_strip_shift (rtx x
)
4433 if ((GET_CODE (op
) == ASHIFT
4434 || GET_CODE (op
) == ASHIFTRT
4435 || GET_CODE (op
) == LSHIFTRT
)
4436 && CONST_INT_P (XEXP (op
, 1)))
4437 return XEXP (op
, 0);
4439 if (GET_CODE (op
) == MULT
4440 && CONST_INT_P (XEXP (op
, 1))
4441 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4442 return XEXP (op
, 0);
4447 /* Helper function for rtx cost calculation. Strip a shift or extend
4448 expression from X. Returns the inner operand if successful, or the
4449 original expression on failure. We deal with a number of possible
4450 canonicalization variations here. */
4452 aarch64_strip_shift_or_extend (rtx x
)
4456 /* Zero and sign extraction of a widened value. */
4457 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4458 && XEXP (op
, 2) == const0_rtx
4459 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4461 return XEXP (XEXP (op
, 0), 0);
4463 /* It can also be represented (for zero-extend) as an AND with an
4465 if (GET_CODE (op
) == AND
4466 && GET_CODE (XEXP (op
, 0)) == MULT
4467 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4468 && CONST_INT_P (XEXP (op
, 1))
4469 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4470 INTVAL (XEXP (op
, 1))) != 0)
4471 return XEXP (XEXP (op
, 0), 0);
4473 /* Now handle extended register, as this may also have an optional
4474 left shift by 1..4. */
4475 if (GET_CODE (op
) == ASHIFT
4476 && CONST_INT_P (XEXP (op
, 1))
4477 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4480 if (GET_CODE (op
) == ZERO_EXTEND
4481 || GET_CODE (op
) == SIGN_EXTEND
)
4487 return aarch64_strip_shift (x
);
4490 /* Calculate the cost of calculating X, storing it in *COST. Result
4491 is true if the total cost of the operation has now been calculated. */
4493 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
4494 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
4497 const struct cpu_cost_table
*extra_cost
4498 = aarch64_tune_params
->insn_extra_cost
;
4506 switch (GET_CODE (op0
))
4510 *cost
+= extra_cost
->ldst
.store
;
4512 if (op1
!= const0_rtx
)
4513 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4517 if (! REG_P (SUBREG_REG (op0
)))
4518 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
4521 /* Cost is just the cost of the RHS of the set. */
4522 *cost
+= rtx_cost (op1
, SET
, 1, true);
4525 case ZERO_EXTRACT
: /* Bit-field insertion. */
4527 /* Strip any redundant widening of the RHS to meet the width of
4529 if (GET_CODE (op1
) == SUBREG
)
4530 op1
= SUBREG_REG (op1
);
4531 if ((GET_CODE (op1
) == ZERO_EXTEND
4532 || GET_CODE (op1
) == SIGN_EXTEND
)
4533 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
4534 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
4535 >= INTVAL (XEXP (op0
, 1))))
4536 op1
= XEXP (op1
, 0);
4537 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4547 *cost
+= extra_cost
->ldst
.load
;
4552 op0
= CONST0_RTX (GET_MODE (x
));
4560 if (op1
== const0_rtx
4561 && GET_CODE (op0
) == AND
)
4567 /* Comparisons can work if the order is swapped.
4568 Canonicalization puts the more complex operation first, but
4569 we want it in op1. */
4571 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
4583 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
4584 || (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
4585 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
4587 if (op0
!= const0_rtx
)
4588 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
4590 if (CONST_INT_P (op1
))
4592 if (!aarch64_uimm12_shift (INTVAL (op1
)))
4593 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4597 op1
= aarch64_strip_shift_or_extend (op1
);
4598 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4609 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4611 if (CONST_INT_P (op1
) && aarch64_uimm12_shift (INTVAL (op1
)))
4613 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
4617 rtx new_op0
= aarch64_strip_shift_or_extend (op0
);
4620 && GET_CODE (op0
) == MULT
)
4622 if ((GET_CODE (XEXP (op0
, 0)) == ZERO_EXTEND
4623 && GET_CODE (XEXP (op0
, 1)) == ZERO_EXTEND
)
4624 || (GET_CODE (XEXP (op0
, 0)) == SIGN_EXTEND
4625 && GET_CODE (XEXP (op0
, 1)) == SIGN_EXTEND
))
4627 *cost
+= (rtx_cost (XEXP (XEXP (op0
, 0), 0), MULT
, 0,
4629 + rtx_cost (XEXP (XEXP (op0
, 1), 0), MULT
, 1,
4631 + rtx_cost (op1
, PLUS
, 1, speed
));
4634 extra_cost
->mult
[GET_MODE (x
) == DImode
].extend_add
;
4637 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4638 + rtx_cost (XEXP (op0
, 1), MULT
, 1, speed
)
4639 + rtx_cost (op1
, PLUS
, 1, speed
));
4642 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].add
;
4645 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
4646 + rtx_cost (op1
, PLUS
, 1, speed
));
4660 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4662 if (CONST_INT_P (op1
)
4663 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
4665 *cost
+= rtx_cost (op0
, AND
, 0, speed
);
4669 if (GET_CODE (op0
) == NOT
)
4670 op0
= XEXP (op0
, 0);
4671 op0
= aarch64_strip_shift (op0
);
4672 *cost
+= (rtx_cost (op0
, AND
, 0, speed
)
4673 + rtx_cost (op1
, AND
, 1, speed
));
4680 if ((GET_MODE (x
) == DImode
4681 && GET_MODE (XEXP (x
, 0)) == SImode
)
4682 || GET_CODE (XEXP (x
, 0)) == MEM
)
4684 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
4690 if (GET_CODE (XEXP (x
, 0)) == MEM
)
4692 *cost
+= rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed
);
4698 if (!CONST_INT_P (XEXP (x
, 1)))
4699 *cost
+= COSTS_N_INSNS (2);
4706 /* Shifting by a register often takes an extra cycle. */
4707 if (speed
&& !CONST_INT_P (XEXP (x
, 1)))
4708 *cost
+= extra_cost
->alu
.arith_shift_reg
;
4710 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed
);
4714 if (!CONSTANT_P (XEXP (x
, 0)))
4715 *cost
+= rtx_cost (XEXP (x
, 0), HIGH
, 0, speed
);
4719 if (!CONSTANT_P (XEXP (x
, 1)))
4720 *cost
+= rtx_cost (XEXP (x
, 1), LO_SUM
, 1, speed
);
4721 *cost
+= rtx_cost (XEXP (x
, 0), LO_SUM
, 0, speed
);
4726 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTRACT
, 0, speed
);
4733 *cost
= COSTS_N_INSNS (1);
4734 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4736 if (CONST_INT_P (op1
)
4737 && exact_log2 (INTVAL (op1
)) > 0)
4739 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
4743 if ((GET_CODE (op0
) == ZERO_EXTEND
4744 && GET_CODE (op1
) == ZERO_EXTEND
)
4745 || (GET_CODE (op0
) == SIGN_EXTEND
4746 && GET_CODE (op1
) == SIGN_EXTEND
))
4748 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4749 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
));
4751 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].extend
;
4756 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].simple
;
4760 if (GET_MODE (x
) == DFmode
)
4761 *cost
+= extra_cost
->fp
[1].mult
;
4762 else if (GET_MODE (x
) == SFmode
)
4763 *cost
+= extra_cost
->fp
[0].mult
;
4766 return false; /* All arguments need to be in registers. */
4770 *cost
= COSTS_N_INSNS (2);
4773 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4774 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
4775 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
4776 else if (GET_MODE (x
) == DFmode
)
4777 *cost
+= (extra_cost
->fp
[1].mult
4778 + extra_cost
->fp
[1].div
);
4779 else if (GET_MODE (x
) == SFmode
)
4780 *cost
+= (extra_cost
->fp
[0].mult
4781 + extra_cost
->fp
[0].div
);
4783 return false; /* All arguments need to be in registers. */
4787 *cost
= COSTS_N_INSNS (1);
4790 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4791 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
;
4792 else if (GET_MODE (x
) == DFmode
)
4793 *cost
+= extra_cost
->fp
[1].div
;
4794 else if (GET_MODE (x
) == SFmode
)
4795 *cost
+= extra_cost
->fp
[0].div
;
4797 return false; /* All arguments need to be in registers. */
4806 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED
,
4807 enum machine_mode mode ATTRIBUTE_UNUSED
,
4808 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
4810 enum rtx_code c
= GET_CODE (x
);
4811 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4813 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4814 return addr_cost
->pre_modify
;
4816 if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4817 return addr_cost
->post_modify
;
4821 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4822 return addr_cost
->imm_offset
;
4823 else if (GET_CODE (XEXP (x
, 0)) == MULT
4824 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
4825 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4826 return addr_cost
->register_extend
;
4828 return addr_cost
->register_offset
;
4830 else if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
4831 return addr_cost
->imm_offset
;
4837 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4838 reg_class_t from
, reg_class_t to
)
4840 const struct cpu_regmove_cost
*regmove_cost
4841 = aarch64_tune_params
->regmove_cost
;
4843 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
4844 return regmove_cost
->GP2GP
;
4845 else if (from
== GENERAL_REGS
)
4846 return regmove_cost
->GP2FP
;
4847 else if (to
== GENERAL_REGS
)
4848 return regmove_cost
->FP2GP
;
4850 /* When AdvSIMD instructions are disabled it is not possible to move
4851 a 128-bit value directly between Q registers. This is handled in
4852 secondary reload. A general register is used as a scratch to move
4853 the upper DI value and the lower DI value is moved directly,
4854 hence the cost is the sum of three moves. */
4856 if (! TARGET_SIMD
&& GET_MODE_SIZE (from
) == 128 && GET_MODE_SIZE (to
) == 128)
4857 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
4859 return regmove_cost
->FP2FP
;
4863 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4864 reg_class_t rclass ATTRIBUTE_UNUSED
,
4865 bool in ATTRIBUTE_UNUSED
)
4867 return aarch64_tune_params
->memmov_cost
;
4870 /* Vectorizer cost model target hooks. */
4872 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4874 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4876 int misalign ATTRIBUTE_UNUSED
)
4880 switch (type_of_cost
)
4883 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
4886 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
4889 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
4892 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4895 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
4898 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
4901 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
4904 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
4906 case unaligned_load
:
4907 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
4909 case unaligned_store
:
4910 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
4912 case cond_branch_taken
:
4913 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
4915 case cond_branch_not_taken
:
4916 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
4919 case vec_promote_demote
:
4920 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4923 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
4924 return elements
/ 2 + 1;
4931 /* Implement targetm.vectorize.add_stmt_cost. */
4933 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
4934 struct _stmt_vec_info
*stmt_info
, int misalign
,
4935 enum vect_cost_model_location where
)
4937 unsigned *cost
= (unsigned *) data
;
4938 unsigned retval
= 0;
4940 if (flag_vect_cost_model
)
4942 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
4944 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
4946 /* Statements in an inner loop relative to the loop being
4947 vectorized are weighted more heavily. The value here is
4948 a function (linear for now) of the loop nest level. */
4949 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
4951 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4952 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
4953 unsigned nest_level
= loop_depth (loop
);
4955 count
*= nest_level
;
4958 retval
= (unsigned) (count
* stmt_cost
);
4959 cost
[where
] += retval
;
4965 static void initialize_aarch64_code_model (void);
4967 /* Parse the architecture extension string. */
4970 aarch64_parse_extension (char *str
)
4972 /* The extension string is parsed left to right. */
4973 const struct aarch64_option_extension
*opt
= NULL
;
4975 /* Flag to say whether we are adding or removing an extension. */
4976 int adding_ext
= -1;
4978 while (str
!= NULL
&& *str
!= 0)
4984 ext
= strchr (str
, '+');
4991 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
5002 error ("missing feature modifier after %qs", "+no");
5006 /* Scan over the extensions table trying to find an exact match. */
5007 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
5009 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
5011 /* Add or remove the extension. */
5013 aarch64_isa_flags
|= opt
->flags_on
;
5015 aarch64_isa_flags
&= ~(opt
->flags_off
);
5020 if (opt
->name
== NULL
)
5022 /* Extension not found in list. */
5023 error ("unknown feature modifier %qs", str
);
5033 /* Parse the ARCH string. */
5036 aarch64_parse_arch (void)
5039 const struct processor
*arch
;
5040 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
5043 strcpy (str
, aarch64_arch_string
);
5045 ext
= strchr (str
, '+');
5054 error ("missing arch name in -march=%qs", str
);
5058 /* Loop through the list of supported ARCHs to find a match. */
5059 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
5061 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
5063 selected_arch
= arch
;
5064 aarch64_isa_flags
= selected_arch
->flags
;
5065 selected_cpu
= &all_cores
[selected_arch
->core
];
5069 /* ARCH string contains at least one extension. */
5070 aarch64_parse_extension (ext
);
5077 /* ARCH name not found in list. */
5078 error ("unknown value %qs for -march", str
);
5082 /* Parse the CPU string. */
5085 aarch64_parse_cpu (void)
5088 const struct processor
*cpu
;
5089 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
5092 strcpy (str
, aarch64_cpu_string
);
5094 ext
= strchr (str
, '+');
5103 error ("missing cpu name in -mcpu=%qs", str
);
5107 /* Loop through the list of supported CPUs to find a match. */
5108 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5110 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
5113 aarch64_isa_flags
= selected_cpu
->flags
;
5117 /* CPU string contains at least one extension. */
5118 aarch64_parse_extension (ext
);
5125 /* CPU name not found in list. */
5126 error ("unknown value %qs for -mcpu", str
);
5130 /* Parse the TUNE string. */
5133 aarch64_parse_tune (void)
5135 const struct processor
*cpu
;
5136 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
5137 strcpy (str
, aarch64_tune_string
);
5139 /* Loop through the list of supported CPUs to find a match. */
5140 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5142 if (strcmp (cpu
->name
, str
) == 0)
5144 selected_tune
= cpu
;
5149 /* CPU name not found in list. */
5150 error ("unknown value %qs for -mtune", str
);
5155 /* Implement TARGET_OPTION_OVERRIDE. */
5158 aarch64_override_options (void)
5160 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5161 otherwise march remains undefined. mtune can be used with either march or
5164 if (aarch64_arch_string
)
5166 aarch64_parse_arch ();
5167 aarch64_cpu_string
= NULL
;
5170 if (aarch64_cpu_string
)
5172 aarch64_parse_cpu ();
5173 selected_arch
= NULL
;
5176 if (aarch64_tune_string
)
5178 aarch64_parse_tune ();
5181 initialize_aarch64_code_model ();
5183 aarch64_build_bitmask_table ();
5185 /* This target defaults to strict volatile bitfields. */
5186 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
5187 flag_strict_volatile_bitfields
= 1;
5189 /* If the user did not specify a processor, choose the default
5190 one for them. This will be the CPU set during configuration using
5191 --with-cpu, otherwise it is "coretex-a53". */
5194 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
5195 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
5198 gcc_assert (selected_cpu
);
5200 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5202 selected_tune
= &all_cores
[selected_cpu
->core
];
5204 aarch64_tune_flags
= selected_tune
->flags
;
5205 aarch64_tune
= selected_tune
->core
;
5206 aarch64_tune_params
= selected_tune
->tune
;
5208 aarch64_override_options_after_change ();
5211 /* Implement targetm.override_options_after_change. */
5214 aarch64_override_options_after_change (void)
5216 faked_omit_frame_pointer
= false;
5218 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5219 that aarch64_frame_pointer_required will be called. We need to remember
5220 whether flag_omit_frame_pointer was turned on normally or just faked. */
5222 if (flag_omit_leaf_frame_pointer
&& !flag_omit_frame_pointer
)
5224 flag_omit_frame_pointer
= true;
5225 faked_omit_frame_pointer
= true;
5229 static struct machine_function
*
5230 aarch64_init_machine_status (void)
5232 struct machine_function
*machine
;
5233 machine
= ggc_alloc_cleared_machine_function ();
5238 aarch64_init_expanders (void)
5240 init_machine_status
= aarch64_init_machine_status
;
5243 /* A checking mechanism for the implementation of the various code models. */
5245 initialize_aarch64_code_model (void)
5249 switch (aarch64_cmodel_var
)
5251 case AARCH64_CMODEL_TINY
:
5252 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
5254 case AARCH64_CMODEL_SMALL
:
5255 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
5257 case AARCH64_CMODEL_LARGE
:
5258 sorry ("code model %qs with -f%s", "large",
5259 flag_pic
> 1 ? "PIC" : "pic");
5265 aarch64_cmodel
= aarch64_cmodel_var
;
5268 /* Return true if SYMBOL_REF X binds locally. */
5271 aarch64_symbol_binds_local_p (const_rtx x
)
5273 return (SYMBOL_REF_DECL (x
)
5274 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
5275 : SYMBOL_REF_LOCAL_P (x
));
5278 /* Return true if SYMBOL_REF X is thread local */
5280 aarch64_tls_symbol_p (rtx x
)
5282 if (! TARGET_HAVE_TLS
)
5285 if (GET_CODE (x
) != SYMBOL_REF
)
5288 return SYMBOL_REF_TLS_MODEL (x
) != 0;
5291 /* Classify a TLS symbol into one of the TLS kinds. */
5292 enum aarch64_symbol_type
5293 aarch64_classify_tls_symbol (rtx x
)
5295 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
5299 case TLS_MODEL_GLOBAL_DYNAMIC
:
5300 case TLS_MODEL_LOCAL_DYNAMIC
:
5301 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
5303 case TLS_MODEL_INITIAL_EXEC
:
5304 return SYMBOL_SMALL_GOTTPREL
;
5306 case TLS_MODEL_LOCAL_EXEC
:
5307 return SYMBOL_SMALL_TPREL
;
5309 case TLS_MODEL_EMULATED
:
5310 case TLS_MODEL_NONE
:
5311 return SYMBOL_FORCE_TO_MEM
;
5318 /* Return the method that should be used to access SYMBOL_REF or
5319 LABEL_REF X in context CONTEXT. */
5321 enum aarch64_symbol_type
5322 aarch64_classify_symbol (rtx x
,
5323 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
5325 if (GET_CODE (x
) == LABEL_REF
)
5327 switch (aarch64_cmodel
)
5329 case AARCH64_CMODEL_LARGE
:
5330 return SYMBOL_FORCE_TO_MEM
;
5332 case AARCH64_CMODEL_TINY_PIC
:
5333 case AARCH64_CMODEL_TINY
:
5334 return SYMBOL_TINY_ABSOLUTE
;
5336 case AARCH64_CMODEL_SMALL_PIC
:
5337 case AARCH64_CMODEL_SMALL
:
5338 return SYMBOL_SMALL_ABSOLUTE
;
5345 if (GET_CODE (x
) == SYMBOL_REF
)
5347 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
5348 || CONSTANT_POOL_ADDRESS_P (x
))
5349 return SYMBOL_FORCE_TO_MEM
;
5351 if (aarch64_tls_symbol_p (x
))
5352 return aarch64_classify_tls_symbol (x
);
5354 switch (aarch64_cmodel
)
5356 case AARCH64_CMODEL_TINY
:
5357 if (SYMBOL_REF_WEAK (x
))
5358 return SYMBOL_FORCE_TO_MEM
;
5359 return SYMBOL_TINY_ABSOLUTE
;
5361 case AARCH64_CMODEL_SMALL
:
5362 if (SYMBOL_REF_WEAK (x
))
5363 return SYMBOL_FORCE_TO_MEM
;
5364 return SYMBOL_SMALL_ABSOLUTE
;
5366 case AARCH64_CMODEL_TINY_PIC
:
5367 if (!aarch64_symbol_binds_local_p (x
))
5368 return SYMBOL_TINY_GOT
;
5369 return SYMBOL_TINY_ABSOLUTE
;
5371 case AARCH64_CMODEL_SMALL_PIC
:
5372 if (!aarch64_symbol_binds_local_p (x
))
5373 return SYMBOL_SMALL_GOT
;
5374 return SYMBOL_SMALL_ABSOLUTE
;
5381 /* By default push everything into the constant pool. */
5382 return SYMBOL_FORCE_TO_MEM
;
5386 aarch64_constant_address_p (rtx x
)
5388 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
5392 aarch64_legitimate_pic_operand_p (rtx x
)
5394 if (GET_CODE (x
) == SYMBOL_REF
5395 || (GET_CODE (x
) == CONST
5396 && GET_CODE (XEXP (x
, 0)) == PLUS
5397 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5403 /* Return true if X holds either a quarter-precision or
5404 floating-point +0.0 constant. */
5406 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
5408 if (!CONST_DOUBLE_P (x
))
5411 /* TODO: We could handle moving 0.0 to a TFmode register,
5412 but first we would like to refactor the movtf_aarch64
5413 to be more amicable to split moves properly and
5414 correctly gate on TARGET_SIMD. For now - reject all
5415 constants which are not to SFmode or DFmode registers. */
5416 if (!(mode
== SFmode
|| mode
== DFmode
))
5419 if (aarch64_float_const_zero_rtx_p (x
))
5421 return aarch64_float_const_representable_p (x
);
5425 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
5427 /* Do not allow vector struct mode constants. We could support
5428 0 and -1 easily, but they need support in aarch64-simd.md. */
5429 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
5432 /* This could probably go away because
5433 we now decompose CONST_INTs according to expand_mov_immediate. */
5434 if ((GET_CODE (x
) == CONST_VECTOR
5435 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
5436 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
5437 return !targetm
.cannot_force_const_mem (mode
, x
);
5439 if (GET_CODE (x
) == HIGH
5440 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
5443 return aarch64_constant_address_p (x
);
5447 aarch64_load_tp (rtx target
)
5450 || GET_MODE (target
) != Pmode
5451 || !register_operand (target
, Pmode
))
5452 target
= gen_reg_rtx (Pmode
);
5454 /* Can return in any reg. */
5455 emit_insn (gen_aarch64_load_tp_hard (target
));
5459 /* On AAPCS systems, this is the "struct __va_list". */
5460 static GTY(()) tree va_list_type
;
5462 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5463 Return the type to use as __builtin_va_list.
5465 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5477 aarch64_build_builtin_va_list (void)
5480 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5482 /* Create the type. */
5483 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
5484 /* Give it the required name. */
5485 va_list_name
= build_decl (BUILTINS_LOCATION
,
5487 get_identifier ("__va_list"),
5489 DECL_ARTIFICIAL (va_list_name
) = 1;
5490 TYPE_NAME (va_list_type
) = va_list_name
;
5491 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
5493 /* Create the fields. */
5494 f_stack
= build_decl (BUILTINS_LOCATION
,
5495 FIELD_DECL
, get_identifier ("__stack"),
5497 f_grtop
= build_decl (BUILTINS_LOCATION
,
5498 FIELD_DECL
, get_identifier ("__gr_top"),
5500 f_vrtop
= build_decl (BUILTINS_LOCATION
,
5501 FIELD_DECL
, get_identifier ("__vr_top"),
5503 f_groff
= build_decl (BUILTINS_LOCATION
,
5504 FIELD_DECL
, get_identifier ("__gr_offs"),
5506 f_vroff
= build_decl (BUILTINS_LOCATION
,
5507 FIELD_DECL
, get_identifier ("__vr_offs"),
5510 DECL_ARTIFICIAL (f_stack
) = 1;
5511 DECL_ARTIFICIAL (f_grtop
) = 1;
5512 DECL_ARTIFICIAL (f_vrtop
) = 1;
5513 DECL_ARTIFICIAL (f_groff
) = 1;
5514 DECL_ARTIFICIAL (f_vroff
) = 1;
5516 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
5517 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
5518 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
5519 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
5520 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
5522 TYPE_FIELDS (va_list_type
) = f_stack
;
5523 DECL_CHAIN (f_stack
) = f_grtop
;
5524 DECL_CHAIN (f_grtop
) = f_vrtop
;
5525 DECL_CHAIN (f_vrtop
) = f_groff
;
5526 DECL_CHAIN (f_groff
) = f_vroff
;
5528 /* Compute its layout. */
5529 layout_type (va_list_type
);
5531 return va_list_type
;
5534 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5536 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
5538 const CUMULATIVE_ARGS
*cum
;
5539 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5540 tree stack
, grtop
, vrtop
, groff
, vroff
;
5542 int gr_save_area_size
;
5543 int vr_save_area_size
;
5546 cum
= &crtl
->args
.info
;
5548 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
5550 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
5552 if (TARGET_GENERAL_REGS_ONLY
)
5554 if (cum
->aapcs_nvrn
> 0)
5555 sorry ("%qs and floating point or vector arguments",
5556 "-mgeneral-regs-only");
5557 vr_save_area_size
= 0;
5560 f_stack
= TYPE_FIELDS (va_list_type_node
);
5561 f_grtop
= DECL_CHAIN (f_stack
);
5562 f_vrtop
= DECL_CHAIN (f_grtop
);
5563 f_groff
= DECL_CHAIN (f_vrtop
);
5564 f_vroff
= DECL_CHAIN (f_groff
);
5566 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
5568 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
5570 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
5572 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
5574 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
5577 /* Emit code to initialize STACK, which points to the next varargs stack
5578 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5579 by named arguments. STACK is 8-byte aligned. */
5580 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
5581 if (cum
->aapcs_stack_size
> 0)
5582 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
5583 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
5584 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5586 /* Emit code to initialize GRTOP, the top of the GR save area.
5587 virtual_incoming_args_rtx should have been 16 byte aligned. */
5588 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
5589 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
5590 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5592 /* Emit code to initialize VRTOP, the top of the VR save area.
5593 This address is gr_save_area_bytes below GRTOP, rounded
5594 down to the next 16-byte boundary. */
5595 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
5596 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
5597 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5600 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
5601 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
5602 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5604 /* Emit code to initialize GROFF, the offset from GRTOP of the
5605 next GPR argument. */
5606 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
5607 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
5608 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5610 /* Likewise emit code to initialize VROFF, the offset from FTOP
5611 of the next VR argument. */
5612 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
5613 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
5614 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5617 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5620 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
5621 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
5625 bool is_ha
; /* is HFA or HVA. */
5626 bool dw_align
; /* double-word align. */
5627 enum machine_mode ag_mode
= VOIDmode
;
5629 enum machine_mode mode
;
5631 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5632 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
5633 HOST_WIDE_INT size
, rsize
, adjust
, align
;
5634 tree t
, u
, cond1
, cond2
;
5636 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5638 type
= build_pointer_type (type
);
5640 mode
= TYPE_MODE (type
);
5642 f_stack
= TYPE_FIELDS (va_list_type_node
);
5643 f_grtop
= DECL_CHAIN (f_stack
);
5644 f_vrtop
= DECL_CHAIN (f_grtop
);
5645 f_groff
= DECL_CHAIN (f_vrtop
);
5646 f_vroff
= DECL_CHAIN (f_groff
);
5648 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
5649 f_stack
, NULL_TREE
);
5650 size
= int_size_in_bytes (type
);
5651 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
5655 if (aarch64_vfp_is_call_or_return_candidate (mode
,
5661 /* TYPE passed in fp/simd registers. */
5662 if (TARGET_GENERAL_REGS_ONLY
)
5663 sorry ("%qs and floating point or vector arguments",
5664 "-mgeneral-regs-only");
5666 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
5667 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
5668 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
5669 unshare_expr (valist
), f_vroff
, NULL_TREE
);
5671 rsize
= nregs
* UNITS_PER_VREG
;
5675 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
5676 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
5678 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5679 && size
< UNITS_PER_VREG
)
5681 adjust
= UNITS_PER_VREG
- size
;
5686 /* TYPE passed in general registers. */
5687 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
5688 unshare_expr (valist
), f_grtop
, NULL_TREE
);
5689 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
5690 unshare_expr (valist
), f_groff
, NULL_TREE
);
5691 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
5692 nregs
= rsize
/ UNITS_PER_WORD
;
5697 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5698 && size
< UNITS_PER_WORD
)
5700 adjust
= UNITS_PER_WORD
- size
;
5704 /* Get a local temporary for the field value. */
5705 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
5707 /* Emit code to branch if off >= 0. */
5708 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
5709 build_int_cst (TREE_TYPE (off
), 0));
5710 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
5714 /* Emit: offs = (offs + 15) & -16. */
5715 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5716 build_int_cst (TREE_TYPE (off
), 15));
5717 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
5718 build_int_cst (TREE_TYPE (off
), -16));
5719 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
5724 /* Update ap.__[g|v]r_offs */
5725 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5726 build_int_cst (TREE_TYPE (off
), rsize
));
5727 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
5731 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5733 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5734 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
5735 build_int_cst (TREE_TYPE (f_off
), 0));
5736 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
5738 /* String up: make sure the assignment happens before the use. */
5739 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
5740 COND_EXPR_ELSE (cond1
) = t
;
5742 /* Prepare the trees handling the argument that is passed on the stack;
5743 the top level node will store in ON_STACK. */
5744 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
5747 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5748 t
= fold_convert (intDI_type_node
, arg
);
5749 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5750 build_int_cst (TREE_TYPE (t
), 15));
5751 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5752 build_int_cst (TREE_TYPE (t
), -16));
5753 t
= fold_convert (TREE_TYPE (arg
), t
);
5754 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
5758 /* Advance ap.__stack */
5759 t
= fold_convert (intDI_type_node
, arg
);
5760 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5761 build_int_cst (TREE_TYPE (t
), size
+ 7));
5762 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5763 build_int_cst (TREE_TYPE (t
), -8));
5764 t
= fold_convert (TREE_TYPE (arg
), t
);
5765 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
5766 /* String up roundup and advance. */
5768 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5769 /* String up with arg */
5770 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
5771 /* Big-endianness related address adjustment. */
5772 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5773 && size
< UNITS_PER_WORD
)
5775 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
5776 size_int (UNITS_PER_WORD
- size
));
5777 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
5780 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
5781 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
5783 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5786 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
5787 build_int_cst (TREE_TYPE (off
), adjust
));
5789 t
= fold_convert (sizetype
, t
);
5790 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
5794 /* type ha; // treat as "struct {ftype field[n];}"
5795 ... [computing offs]
5796 for (i = 0; i <nregs; ++i, offs += 16)
5797 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5800 tree tmp_ha
, field_t
, field_ptr_t
;
5802 /* Declare a local variable. */
5803 tmp_ha
= create_tmp_var_raw (type
, "ha");
5804 gimple_add_tmp_var (tmp_ha
);
5806 /* Establish the base type. */
5810 field_t
= float_type_node
;
5811 field_ptr_t
= float_ptr_type_node
;
5814 field_t
= double_type_node
;
5815 field_ptr_t
= double_ptr_type_node
;
5818 field_t
= long_double_type_node
;
5819 field_ptr_t
= long_double_ptr_type_node
;
5821 /* The half precision and quad precision are not fully supported yet. Enable
5822 the following code after the support is complete. Need to find the correct
5823 type node for __fp16 *. */
5826 field_t
= float_type_node
;
5827 field_ptr_t
= float_ptr_type_node
;
5833 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
5834 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
5835 field_ptr_t
= build_pointer_type (field_t
);
5842 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5843 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
5845 t
= fold_convert (field_ptr_t
, addr
);
5846 t
= build2 (MODIFY_EXPR
, field_t
,
5847 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
5848 build1 (INDIRECT_REF
, field_t
, t
));
5850 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5851 for (i
= 1; i
< nregs
; ++i
)
5853 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
5854 u
= fold_convert (field_ptr_t
, addr
);
5855 u
= build2 (MODIFY_EXPR
, field_t
,
5856 build2 (MEM_REF
, field_t
, tmp_ha
,
5857 build_int_cst (field_ptr_t
,
5859 int_size_in_bytes (field_t
)))),
5860 build1 (INDIRECT_REF
, field_t
, u
));
5861 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
5864 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
5865 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
5868 COND_EXPR_ELSE (cond2
) = t
;
5869 addr
= fold_convert (build_pointer_type (type
), cond1
);
5870 addr
= build_va_arg_indirect_ref (addr
);
5873 addr
= build_va_arg_indirect_ref (addr
);
5878 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5881 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
5882 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
5885 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
5886 CUMULATIVE_ARGS local_cum
;
5887 int gr_saved
, vr_saved
;
5889 /* The caller has advanced CUM up to, but not beyond, the last named
5890 argument. Advance a local copy of CUM past the last "real" named
5891 argument, to find out how many registers are left over. */
5893 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
5895 /* Found out how many registers we need to save. */
5896 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
5897 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
5899 if (TARGET_GENERAL_REGS_ONLY
)
5901 if (local_cum
.aapcs_nvrn
> 0)
5902 sorry ("%qs and floating point or vector arguments",
5903 "-mgeneral-regs-only");
5913 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5914 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
5915 - gr_saved
* UNITS_PER_WORD
);
5916 mem
= gen_frame_mem (BLKmode
, ptr
);
5917 set_mem_alias_set (mem
, get_varargs_alias_set ());
5919 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
5924 /* We can't use move_block_from_reg, because it will use
5925 the wrong mode, storing D regs only. */
5926 enum machine_mode mode
= TImode
;
5929 /* Set OFF to the offset from virtual_incoming_args_rtx of
5930 the first vector register. The VR save area lies below
5931 the GR one, and is aligned to 16 bytes. */
5932 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5933 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5934 off
-= vr_saved
* UNITS_PER_VREG
;
5936 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
5940 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
5941 mem
= gen_frame_mem (mode
, ptr
);
5942 set_mem_alias_set (mem
, get_varargs_alias_set ());
5943 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
5944 off
+= UNITS_PER_VREG
;
5949 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5950 any complication of having crtl->args.pretend_args_size changed. */
5951 cfun
->machine
->saved_varargs_size
5952 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5953 STACK_BOUNDARY
/ BITS_PER_UNIT
)
5954 + vr_saved
* UNITS_PER_VREG
);
5958 aarch64_conditional_register_usage (void)
5963 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
5966 call_used_regs
[i
] = 1;
5971 /* Walk down the type tree of TYPE counting consecutive base elements.
5972 If *MODEP is VOIDmode, then set it to the first valid floating point
5973 type. If a non-floating point type is found, or if a floating point
5974 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5975 otherwise return the count in the sub-tree. */
5977 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
5979 enum machine_mode mode
;
5982 switch (TREE_CODE (type
))
5985 mode
= TYPE_MODE (type
);
5986 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5989 if (*modep
== VOIDmode
)
5998 mode
= TYPE_MODE (TREE_TYPE (type
));
5999 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
6002 if (*modep
== VOIDmode
)
6011 /* Use V2SImode and V4SImode as representatives of all 64-bit
6012 and 128-bit vector types. */
6013 size
= int_size_in_bytes (type
);
6026 if (*modep
== VOIDmode
)
6029 /* Vector modes are considered to be opaque: two vectors are
6030 equivalent for the purposes of being homogeneous aggregates
6031 if they are the same size. */
6040 tree index
= TYPE_DOMAIN (type
);
6042 /* Can't handle incomplete types. */
6043 if (!COMPLETE_TYPE_P (type
))
6046 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
6049 || !TYPE_MAX_VALUE (index
)
6050 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6051 || !TYPE_MIN_VALUE (index
)
6052 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6056 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6057 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6059 /* There must be no padding. */
6060 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
6061 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
6062 != count
* GET_MODE_BITSIZE (*modep
)))
6074 /* Can't handle incomplete types. */
6075 if (!COMPLETE_TYPE_P (type
))
6078 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6080 if (TREE_CODE (field
) != FIELD_DECL
)
6083 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6089 /* There must be no padding. */
6090 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
6091 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
6092 != count
* GET_MODE_BITSIZE (*modep
)))
6099 case QUAL_UNION_TYPE
:
6101 /* These aren't very interesting except in a degenerate case. */
6106 /* Can't handle incomplete types. */
6107 if (!COMPLETE_TYPE_P (type
))
6110 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6112 if (TREE_CODE (field
) != FIELD_DECL
)
6115 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6118 count
= count
> sub_count
? count
: sub_count
;
6121 /* There must be no padding. */
6122 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
6123 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
6124 != count
* GET_MODE_BITSIZE (*modep
)))
6137 /* Return true if we use LRA instead of reload pass. */
6139 aarch64_lra_p (void)
6141 return aarch64_lra_flag
;
6144 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6145 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6146 array types. The C99 floating-point complex types are also considered
6147 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6148 types, which are GCC extensions and out of the scope of AAPCS64, are
6149 treated as composite types here as well.
6151 Note that MODE itself is not sufficient in determining whether a type
6152 is such a composite type or not. This is because
6153 stor-layout.c:compute_record_mode may have already changed the MODE
6154 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6155 structure with only one field may have its MODE set to the mode of the
6156 field. Also an integer mode whose size matches the size of the
6157 RECORD_TYPE type may be used to substitute the original mode
6158 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6159 solely relied on. */
6162 aarch64_composite_type_p (const_tree type
,
6163 enum machine_mode mode
)
6165 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
6169 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
6170 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
6176 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6177 type as described in AAPCS64 \S 4.1.2.
6179 See the comment above aarch64_composite_type_p for the notes on MODE. */
6182 aarch64_short_vector_p (const_tree type
,
6183 enum machine_mode mode
)
6185 HOST_WIDE_INT size
= -1;
6187 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6188 size
= int_size_in_bytes (type
);
6189 else if (!aarch64_composite_type_p (type
, mode
)
6190 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6191 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
6192 size
= GET_MODE_SIZE (mode
);
6194 return (size
== 8 || size
== 16) ? true : false;
6197 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6198 shall be passed or returned in simd/fp register(s) (providing these
6199 parameter passing registers are available).
6201 Upon successful return, *COUNT returns the number of needed registers,
6202 *BASE_MODE returns the mode of the individual register and when IS_HAF
6203 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6204 floating-point aggregate or a homogeneous short-vector aggregate. */
6207 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
6209 enum machine_mode
*base_mode
,
6213 enum machine_mode new_mode
= VOIDmode
;
6214 bool composite_p
= aarch64_composite_type_p (type
, mode
);
6216 if (is_ha
!= NULL
) *is_ha
= false;
6218 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6219 || aarch64_short_vector_p (type
, mode
))
6224 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6226 if (is_ha
!= NULL
) *is_ha
= true;
6228 new_mode
= GET_MODE_INNER (mode
);
6230 else if (type
&& composite_p
)
6232 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6234 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
6236 if (is_ha
!= NULL
) *is_ha
= true;
6245 *base_mode
= new_mode
;
6249 /* Implement TARGET_STRUCT_VALUE_RTX. */
6252 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
6253 int incoming ATTRIBUTE_UNUSED
)
6255 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
6258 /* Implements target hook vector_mode_supported_p. */
6260 aarch64_vector_mode_supported_p (enum machine_mode mode
)
6263 && (mode
== V4SImode
|| mode
== V8HImode
6264 || mode
== V16QImode
|| mode
== V2DImode
6265 || mode
== V2SImode
|| mode
== V4HImode
6266 || mode
== V8QImode
|| mode
== V2SFmode
6267 || mode
== V4SFmode
|| mode
== V2DFmode
))
6273 /* Return appropriate SIMD container
6274 for MODE within a vector of WIDTH bits. */
6275 static enum machine_mode
6276 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
6278 gcc_assert (width
== 64 || width
== 128);
6317 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6318 static enum machine_mode
6319 aarch64_preferred_simd_mode (enum machine_mode mode
)
6321 return aarch64_simd_container_mode (mode
, 128);
6324 /* Return the bitmask of possible vector sizes for the vectorizer
6327 aarch64_autovectorize_vector_sizes (void)
6332 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6333 vector types in order to conform to the AAPCS64 (see "Procedure
6334 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6335 qualify for emission with the mangled names defined in that document,
6336 a vector type must not only be of the correct mode but also be
6337 composed of AdvSIMD vector element types (e.g.
6338 _builtin_aarch64_simd_qi); these types are registered by
6339 aarch64_init_simd_builtins (). In other words, vector types defined
6340 in other ways e.g. via vector_size attribute will get default
6344 enum machine_mode mode
;
6345 const char *element_type_name
;
6346 const char *mangled_name
;
6347 } aarch64_simd_mangle_map_entry
;
6349 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
6350 /* 64-bit containerized types. */
6351 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6352 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6353 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6354 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6355 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6356 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6357 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6358 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6359 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6360 /* 128-bit containerized types. */
6361 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6362 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6363 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6364 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6365 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6366 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6367 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6368 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6369 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6370 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6371 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6372 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6373 { VOIDmode
, NULL
, NULL
}
6376 /* Implement TARGET_MANGLE_TYPE. */
6379 aarch64_mangle_type (const_tree type
)
6381 /* The AArch64 ABI documents say that "__va_list" has to be
6382 managled as if it is in the "std" namespace. */
6383 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
6384 return "St9__va_list";
6386 /* Check the mode of the vector type, and the name of the vector
6387 element type, against the table. */
6388 if (TREE_CODE (type
) == VECTOR_TYPE
)
6390 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
6392 while (pos
->mode
!= VOIDmode
)
6394 tree elt_type
= TREE_TYPE (type
);
6396 if (pos
->mode
== TYPE_MODE (type
)
6397 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
6398 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
6399 pos
->element_type_name
))
6400 return pos
->mangled_name
;
6406 /* Use the default mangling. */
6410 /* Return the equivalent letter for size. */
6412 sizetochar (int size
)
6416 case 64: return 'd';
6417 case 32: return 's';
6418 case 16: return 'h';
6419 case 8 : return 'b';
6420 default: gcc_unreachable ();
6424 /* Return true iff x is a uniform vector of floating-point
6425 constants, and the constant can be represented in
6426 quarter-precision form. Note, as aarch64_float_const_representable
6427 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6429 aarch64_vect_float_const_representable_p (rtx x
)
6432 REAL_VALUE_TYPE r0
, ri
;
6435 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
6438 x0
= CONST_VECTOR_ELT (x
, 0);
6439 if (!CONST_DOUBLE_P (x0
))
6442 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
6444 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
6446 xi
= CONST_VECTOR_ELT (x
, i
);
6447 if (!CONST_DOUBLE_P (xi
))
6450 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
6451 if (!REAL_VALUES_EQUAL (r0
, ri
))
6455 return aarch64_float_const_representable_p (x0
);
6458 /* Return true for valid and false for invalid. */
6460 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
6461 struct simd_immediate_info
*info
)
6463 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6465 for (i = 0; i < idx; i += (STRIDE)) \
6470 immtype = (CLASS); \
6471 elsize = (ELSIZE); \
6477 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
6478 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
6479 unsigned char bytes
[16];
6480 int immtype
= -1, matches
;
6481 unsigned int invmask
= inverse
? 0xff : 0;
6484 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6486 if (! (aarch64_simd_imm_zero_p (op
, mode
)
6487 || aarch64_vect_float_const_representable_p (op
)))
6492 info
->value
= CONST_VECTOR_ELT (op
, 0);
6493 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
6501 /* Splat vector constant out into a byte vector. */
6502 for (i
= 0; i
< n_elts
; i
++)
6504 rtx el
= CONST_VECTOR_ELT (op
, i
);
6505 unsigned HOST_WIDE_INT elpart
;
6506 unsigned int part
, parts
;
6508 if (GET_CODE (el
) == CONST_INT
)
6510 elpart
= INTVAL (el
);
6513 else if (GET_CODE (el
) == CONST_DOUBLE
)
6515 elpart
= CONST_DOUBLE_LOW (el
);
6521 for (part
= 0; part
< parts
; part
++)
6524 for (byte
= 0; byte
< innersize
; byte
++)
6526 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
6527 elpart
>>= BITS_PER_UNIT
;
6529 if (GET_CODE (el
) == CONST_DOUBLE
)
6530 elpart
= CONST_DOUBLE_HIGH (el
);
6535 gcc_assert (idx
== GET_MODE_SIZE (mode
));
6539 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
6540 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
6542 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6543 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6545 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6546 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6548 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6549 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
6551 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
6553 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
6555 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
6556 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
6558 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6559 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6561 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6562 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6564 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6565 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
6567 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
6569 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
6571 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6572 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6574 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6575 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6577 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6578 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6580 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6581 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6583 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
6585 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
6586 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
6595 info
->element_width
= elsize
;
6596 info
->mvn
= emvn
!= 0;
6597 info
->shift
= eshift
;
6599 unsigned HOST_WIDE_INT imm
= 0;
6601 if (immtype
>= 12 && immtype
<= 15)
6604 /* Un-invert bytes of recognized vector, if necessary. */
6606 for (i
= 0; i
< idx
; i
++)
6607 bytes
[i
] ^= invmask
;
6611 /* FIXME: Broken on 32-bit H_W_I hosts. */
6612 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
6614 for (i
= 0; i
< 8; i
++)
6615 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
6616 << (i
* BITS_PER_UNIT
);
6619 info
->value
= GEN_INT (imm
);
6623 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
6624 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
6626 /* Construct 'abcdefgh' because the assembler cannot handle
6627 generic constants. */
6630 imm
= (imm
>> info
->shift
) & 0xff;
6631 info
->value
= GEN_INT (imm
);
6640 aarch64_const_vec_all_same_int_p (rtx x
,
6641 HOST_WIDE_INT minval
,
6642 HOST_WIDE_INT maxval
)
6644 HOST_WIDE_INT firstval
;
6647 if (GET_CODE (x
) != CONST_VECTOR
6648 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
6651 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
6652 if (firstval
< minval
|| firstval
> maxval
)
6655 count
= CONST_VECTOR_NUNITS (x
);
6656 for (i
= 1; i
< count
; i
++)
6657 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
6663 /* Check of immediate shift constants are within range. */
6665 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
6667 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
6669 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
6671 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
6674 /* Return true if X is a uniform vector where all elements
6675 are either the floating-point constant 0.0 or the
6676 integer constant 0. */
6678 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
6680 return x
== CONST0_RTX (mode
);
6684 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
6686 HOST_WIDE_INT imm
= INTVAL (x
);
6689 for (i
= 0; i
< 8; i
++)
6691 unsigned int byte
= imm
& 0xff;
6692 if (byte
!= 0xff && byte
!= 0)
6701 aarch64_mov_operand_p (rtx x
,
6702 enum aarch64_symbol_context context
,
6703 enum machine_mode mode
)
6705 if (GET_CODE (x
) == HIGH
6706 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6709 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
6712 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
6715 return aarch64_classify_symbolic_expression (x
, context
)
6716 == SYMBOL_TINY_ABSOLUTE
;
6719 /* Return a const_int vector of VAL. */
6721 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
6723 int nunits
= GET_MODE_NUNITS (mode
);
6724 rtvec v
= rtvec_alloc (nunits
);
6727 for (i
=0; i
< nunits
; i
++)
6728 RTVEC_ELT (v
, i
) = GEN_INT (val
);
6730 return gen_rtx_CONST_VECTOR (mode
, v
);
6733 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6736 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
6738 enum machine_mode vmode
;
6740 gcc_assert (!VECTOR_MODE_P (mode
));
6741 vmode
= aarch64_preferred_simd_mode (mode
);
6742 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
6743 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
6746 /* Construct and return a PARALLEL RTX vector. */
6748 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
6750 int nunits
= GET_MODE_NUNITS (mode
);
6751 rtvec v
= rtvec_alloc (nunits
/ 2);
6752 int base
= high
? nunits
/ 2 : 0;
6756 for (i
=0; i
< nunits
/ 2; i
++)
6757 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
6759 t1
= gen_rtx_PARALLEL (mode
, v
);
6763 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6764 HIGH (exclusive). */
6766 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6769 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6770 lane
= INTVAL (operand
);
6772 if (lane
< low
|| lane
>= high
)
6773 error ("lane out of range");
6777 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6779 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6780 HOST_WIDE_INT lane
= INTVAL (operand
);
6782 if (lane
< low
|| lane
>= high
)
6783 error ("constant out of range");
6786 /* Emit code to reinterpret one AdvSIMD type as another,
6787 without altering bits. */
6789 aarch64_simd_reinterpret (rtx dest
, rtx src
)
6791 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
6794 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6797 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
6798 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
6801 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
6802 rtx tmp1
= gen_reg_rtx (mode
);
6803 rtx tmp2
= gen_reg_rtx (mode
);
6805 emit_insn (intfn (tmp1
, op1
, tmp2
));
6807 emit_move_insn (mem
, tmp1
);
6808 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
6809 emit_move_insn (mem
, tmp2
);
6812 /* Return TRUE if OP is a valid vector addressing mode. */
6814 aarch64_simd_mem_operand_p (rtx op
)
6816 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
6817 || GET_CODE (XEXP (op
, 0)) == REG
);
6820 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6821 not to early-clobber SRC registers in the process.
6823 We assume that the operands described by SRC and DEST represent a
6824 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6825 number of components into which the copy has been decomposed. */
6827 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
6828 rtx
*src
, unsigned int count
)
6832 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
6833 || REGNO (operands
[0]) < REGNO (operands
[1]))
6835 for (i
= 0; i
< count
; i
++)
6837 operands
[2 * i
] = dest
[i
];
6838 operands
[2 * i
+ 1] = src
[i
];
6843 for (i
= 0; i
< count
; i
++)
6845 operands
[2 * i
] = dest
[count
- i
- 1];
6846 operands
[2 * i
+ 1] = src
[count
- i
- 1];
6851 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6852 one of VSTRUCT modes: OI, CI or XI. */
6854 aarch64_simd_attr_length_move (rtx insn
)
6856 enum machine_mode mode
;
6858 extract_insn_cached (insn
);
6860 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
6862 mode
= GET_MODE (recog_data
.operand
[0]);
6878 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6879 alignment of a vector to 128 bits. */
6880 static HOST_WIDE_INT
6881 aarch64_simd_vector_alignment (const_tree type
)
6883 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
6884 return MIN (align
, 128);
6887 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6889 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
6894 /* We guarantee alignment for vectors up to 128-bits. */
6895 if (tree_int_cst_compare (TYPE_SIZE (type
),
6896 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
6899 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6903 /* If VALS is a vector constant that can be loaded into a register
6904 using DUP, generate instructions to do so and return an RTX to
6905 assign to the register. Otherwise return NULL_RTX. */
6907 aarch64_simd_dup_constant (rtx vals
)
6909 enum machine_mode mode
= GET_MODE (vals
);
6910 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6911 int n_elts
= GET_MODE_NUNITS (mode
);
6912 bool all_same
= true;
6916 if (GET_CODE (vals
) != CONST_VECTOR
)
6919 for (i
= 1; i
< n_elts
; ++i
)
6921 x
= CONST_VECTOR_ELT (vals
, i
);
6922 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
6929 /* We can load this constant by using DUP and a constant in a
6930 single ARM register. This will be cheaper than a vector
6932 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
6933 return gen_rtx_VEC_DUPLICATE (mode
, x
);
6937 /* Generate code to load VALS, which is a PARALLEL containing only
6938 constants (for vec_init) or CONST_VECTOR, efficiently into a
6939 register. Returns an RTX to copy into the register, or NULL_RTX
6940 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6942 aarch64_simd_make_constant (rtx vals
)
6944 enum machine_mode mode
= GET_MODE (vals
);
6946 rtx const_vec
= NULL_RTX
;
6947 int n_elts
= GET_MODE_NUNITS (mode
);
6951 if (GET_CODE (vals
) == CONST_VECTOR
)
6953 else if (GET_CODE (vals
) == PARALLEL
)
6955 /* A CONST_VECTOR must contain only CONST_INTs and
6956 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6957 Only store valid constants in a CONST_VECTOR. */
6958 for (i
= 0; i
< n_elts
; ++i
)
6960 rtx x
= XVECEXP (vals
, 0, i
);
6961 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
6964 if (n_const
== n_elts
)
6965 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6970 if (const_vec
!= NULL_RTX
6971 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
6972 /* Load using MOVI/MVNI. */
6974 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
6975 /* Loaded using DUP. */
6977 else if (const_vec
!= NULL_RTX
)
6978 /* Load from constant pool. We can not take advantage of single-cycle
6979 LD1 because we need a PC-relative addressing mode. */
6982 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6983 We can not construct an initializer. */
6988 aarch64_expand_vector_init (rtx target
, rtx vals
)
6990 enum machine_mode mode
= GET_MODE (target
);
6991 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6992 int n_elts
= GET_MODE_NUNITS (mode
);
6993 int n_var
= 0, one_var
= -1;
6994 bool all_same
= true;
6998 x
= XVECEXP (vals
, 0, 0);
6999 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
7000 n_var
= 1, one_var
= 0;
7002 for (i
= 1; i
< n_elts
; ++i
)
7004 x
= XVECEXP (vals
, 0, i
);
7005 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
7006 ++n_var
, one_var
= i
;
7008 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7014 rtx constant
= aarch64_simd_make_constant (vals
);
7015 if (constant
!= NULL_RTX
)
7017 emit_move_insn (target
, constant
);
7022 /* Splat a single non-constant element if we can. */
7025 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
7026 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
7030 /* One field is non-constant. Load constant then overwrite varying
7031 field. This is more efficient than using the stack. */
7034 rtx copy
= copy_rtx (vals
);
7035 rtx index
= GEN_INT (one_var
);
7036 enum insn_code icode
;
7038 /* Load constant part of vector, substitute neighboring value for
7040 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
7041 aarch64_expand_vector_init (target
, copy
);
7043 /* Insert variable. */
7044 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
7045 icode
= optab_handler (vec_set_optab
, mode
);
7046 gcc_assert (icode
!= CODE_FOR_nothing
);
7047 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
7051 /* Construct the vector in memory one field at a time
7052 and load the whole vector. */
7053 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7054 for (i
= 0; i
< n_elts
; i
++)
7055 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7056 i
* GET_MODE_SIZE (inner_mode
)),
7057 XVECEXP (vals
, 0, i
));
7058 emit_move_insn (target
, mem
);
7062 static unsigned HOST_WIDE_INT
7063 aarch64_shift_truncation_mask (enum machine_mode mode
)
7066 (aarch64_vector_mode_supported_p (mode
)
7067 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
7070 #ifndef TLS_SECTION_ASM_FLAG
7071 #define TLS_SECTION_ASM_FLAG 'T'
7075 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
7076 tree decl ATTRIBUTE_UNUSED
)
7078 char flagchars
[10], *f
= flagchars
;
7080 /* If we have already declared this section, we can use an
7081 abbreviated form to switch back to it -- unless this section is
7082 part of a COMDAT groups, in which case GAS requires the full
7083 declaration every time. */
7084 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7085 && (flags
& SECTION_DECLARED
))
7087 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
7091 if (!(flags
& SECTION_DEBUG
))
7093 if (flags
& SECTION_WRITE
)
7095 if (flags
& SECTION_CODE
)
7097 if (flags
& SECTION_SMALL
)
7099 if (flags
& SECTION_MERGE
)
7101 if (flags
& SECTION_STRINGS
)
7103 if (flags
& SECTION_TLS
)
7104 *f
++ = TLS_SECTION_ASM_FLAG
;
7105 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7109 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
7111 if (!(flags
& SECTION_NOTYPE
))
7116 if (flags
& SECTION_BSS
)
7121 #ifdef TYPE_OPERAND_FMT
7122 format
= "," TYPE_OPERAND_FMT
;
7127 fprintf (asm_out_file
, format
, type
);
7129 if (flags
& SECTION_ENTSIZE
)
7130 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
7131 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7133 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
7134 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
7136 fprintf (asm_out_file
, ",%s,comdat",
7137 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
7141 putc ('\n', asm_out_file
);
7144 /* Select a format to encode pointers in exception handling data. */
7146 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
7149 switch (aarch64_cmodel
)
7151 case AARCH64_CMODEL_TINY
:
7152 case AARCH64_CMODEL_TINY_PIC
:
7153 case AARCH64_CMODEL_SMALL
:
7154 case AARCH64_CMODEL_SMALL_PIC
:
7155 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7157 type
= DW_EH_PE_sdata4
;
7160 /* No assumptions here. 8-byte relocs required. */
7161 type
= DW_EH_PE_sdata8
;
7164 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
7167 /* Emit load exclusive. */
7170 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
7171 rtx mem
, rtx model_rtx
)
7173 rtx (*gen
) (rtx
, rtx
, rtx
);
7177 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
7178 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
7179 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
7180 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
7185 emit_insn (gen (rval
, mem
, model_rtx
));
7188 /* Emit store exclusive. */
7191 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
7192 rtx rval
, rtx mem
, rtx model_rtx
)
7194 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
7198 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
7199 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
7200 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
7201 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
7206 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
7209 /* Mark the previous jump instruction as unlikely. */
7212 aarch64_emit_unlikely_jump (rtx insn
)
7214 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
7216 insn
= emit_jump_insn (insn
);
7217 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
7220 /* Expand a compare and swap pattern. */
7223 aarch64_expand_compare_and_swap (rtx operands
[])
7225 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
7226 enum machine_mode mode
, cmp_mode
;
7227 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
7232 oldval
= operands
[3];
7233 newval
= operands
[4];
7234 is_weak
= operands
[5];
7235 mod_s
= operands
[6];
7236 mod_f
= operands
[7];
7237 mode
= GET_MODE (mem
);
7240 /* Normally the succ memory model must be stronger than fail, but in the
7241 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7242 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7244 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
7245 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
7246 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
7252 /* For short modes, we're going to perform the comparison in SImode,
7253 so do the zero-extension now. */
7255 rval
= gen_reg_rtx (SImode
);
7256 oldval
= convert_modes (SImode
, mode
, oldval
, true);
7261 /* Force the value into a register if needed. */
7262 if (!aarch64_plus_operand (oldval
, mode
))
7263 oldval
= force_reg (cmp_mode
, oldval
);
7272 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
7273 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
7274 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
7275 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
7280 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
7282 if (mode
== QImode
|| mode
== HImode
)
7283 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
7285 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7286 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
7287 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
7290 /* Split a compare and swap pattern. */
7293 aarch64_split_compare_and_swap (rtx operands
[])
7295 rtx rval
, mem
, oldval
, newval
, scratch
;
7296 enum machine_mode mode
;
7298 rtx label1
, label2
, x
, cond
;
7302 oldval
= operands
[2];
7303 newval
= operands
[3];
7304 is_weak
= (operands
[4] != const0_rtx
);
7305 scratch
= operands
[7];
7306 mode
= GET_MODE (mem
);
7311 label1
= gen_label_rtx ();
7312 emit_label (label1
);
7314 label2
= gen_label_rtx ();
7316 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
7318 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
7319 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7320 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7321 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
7322 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7324 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
7328 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
7329 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7330 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
7331 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7335 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7336 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
7337 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
7340 emit_label (label2
);
7343 /* Split an atomic operation. */
7346 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
7347 rtx value
, rtx model_rtx
, rtx cond
)
7349 enum machine_mode mode
= GET_MODE (mem
);
7350 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
7353 label
= gen_label_rtx ();
7357 new_out
= gen_lowpart (wmode
, new_out
);
7359 old_out
= gen_lowpart (wmode
, old_out
);
7362 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
7364 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
7373 x
= gen_rtx_AND (wmode
, old_out
, value
);
7374 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7375 x
= gen_rtx_NOT (wmode
, new_out
);
7376 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7380 if (CONST_INT_P (value
))
7382 value
= GEN_INT (-INTVAL (value
));
7388 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
7389 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7393 aarch64_emit_store_exclusive (mode
, cond
, mem
,
7394 gen_lowpart (mode
, new_out
), model_rtx
);
7396 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7397 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7398 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
7399 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7403 aarch64_print_extension (void)
7405 const struct aarch64_option_extension
*opt
= NULL
;
7407 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
7408 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
7409 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
7411 asm_fprintf (asm_out_file
, "\n");
7415 aarch64_start_file (void)
7419 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
7420 aarch64_print_extension ();
7422 else if (selected_cpu
)
7424 asm_fprintf (asm_out_file
, "\t.cpu %s", selected_cpu
->name
);
7425 aarch64_print_extension ();
7427 default_file_start();
7430 /* Target hook for c_mode_for_suffix. */
7431 static enum machine_mode
7432 aarch64_c_mode_for_suffix (char suffix
)
7440 /* We can only represent floating point constants which will fit in
7441 "quarter-precision" values. These values are characterised by
7442 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7445 (-1)^s * (n/16) * 2^r
7448 's' is the sign bit.
7449 'n' is an integer in the range 16 <= n <= 31.
7450 'r' is an integer in the range -3 <= r <= 4. */
7452 /* Return true iff X can be represented by a quarter-precision
7453 floating point immediate operand X. Note, we cannot represent 0.0. */
7455 aarch64_float_const_representable_p (rtx x
)
7457 /* This represents our current view of how many bits
7458 make up the mantissa. */
7459 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
7461 unsigned HOST_WIDE_INT mantissa
, mask
;
7462 HOST_WIDE_INT m1
, m2
;
7463 REAL_VALUE_TYPE r
, m
;
7465 if (!CONST_DOUBLE_P (x
))
7468 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7470 /* We cannot represent infinities, NaNs or +/-zero. We won't
7471 know if we have +zero until we analyse the mantissa, but we
7472 can reject the other invalid values. */
7473 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
7474 || REAL_VALUE_MINUS_ZERO (r
))
7477 /* Extract exponent. */
7478 r
= real_value_abs (&r
);
7479 exponent
= REAL_EXP (&r
);
7481 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7482 highest (sign) bit, with a fixed binary point at bit point_pos.
7483 m1 holds the low part of the mantissa, m2 the high part.
7484 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7485 bits for the mantissa, this can fail (low bits will be lost). */
7486 real_ldexp (&m
, &r
, point_pos
- exponent
);
7487 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
7489 /* If the low part of the mantissa has bits set we cannot represent
7493 /* We have rejected the lower HOST_WIDE_INT, so update our
7494 understanding of how many bits lie in the mantissa and
7495 look only at the high HOST_WIDE_INT. */
7497 point_pos
-= HOST_BITS_PER_WIDE_INT
;
7499 /* We can only represent values with a mantissa of the form 1.xxxx. */
7500 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
7501 if ((mantissa
& mask
) != 0)
7504 /* Having filtered unrepresentable values, we may now remove all
7505 but the highest 5 bits. */
7506 mantissa
>>= point_pos
- 5;
7508 /* We cannot represent the value 0.0, so reject it. This is handled
7513 /* Then, as bit 4 is always set, we can mask it off, leaving
7514 the mantissa in the range [0, 15]. */
7515 mantissa
&= ~(1 << 4);
7516 gcc_assert (mantissa
<= 15);
7518 /* GCC internally does not use IEEE754-like encoding (where normalized
7519 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7520 Our mantissa values are shifted 4 places to the left relative to
7521 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7522 by 5 places to correct for GCC's representation. */
7523 exponent
= 5 - exponent
;
7525 return (exponent
>= 0 && exponent
<= 7);
7529 aarch64_output_simd_mov_immediate (rtx const_vector
,
7530 enum machine_mode mode
,
7534 static char templ
[40];
7535 const char *mnemonic
;
7536 const char *shift_op
;
7537 unsigned int lane_count
= 0;
7540 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
7542 /* This will return true to show const_vector is legal for use as either
7543 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7544 also update INFO to show how the immediate should be generated. */
7545 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
7546 gcc_assert (is_valid
);
7548 element_char
= sizetochar (info
.element_width
);
7549 lane_count
= width
/ info
.element_width
;
7551 mode
= GET_MODE_INNER (mode
);
7552 if (mode
== SFmode
|| mode
== DFmode
)
7554 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
7555 if (aarch64_float_const_zero_rtx_p (info
.value
))
7556 info
.value
= GEN_INT (0);
7561 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
7562 char float_buf
[buf_size
] = {'\0'};
7563 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
7566 if (lane_count
== 1)
7567 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
7569 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
7570 lane_count
, element_char
, float_buf
);
7575 mnemonic
= info
.mvn
? "mvni" : "movi";
7576 shift_op
= info
.msl
? "msl" : "lsl";
7578 if (lane_count
== 1)
7579 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
7580 mnemonic
, UINTVAL (info
.value
));
7581 else if (info
.shift
)
7582 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7583 ", %s %d", mnemonic
, lane_count
, element_char
,
7584 UINTVAL (info
.value
), shift_op
, info
.shift
);
7586 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
7587 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
7592 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
7593 enum machine_mode mode
)
7595 enum machine_mode vmode
;
7597 gcc_assert (!VECTOR_MODE_P (mode
));
7598 vmode
= aarch64_simd_container_mode (mode
, 64);
7599 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
7600 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
7603 /* Split operands into moves from op[1] + op[2] into op[0]. */
7606 aarch64_split_combinev16qi (rtx operands
[3])
7608 unsigned int dest
= REGNO (operands
[0]);
7609 unsigned int src1
= REGNO (operands
[1]);
7610 unsigned int src2
= REGNO (operands
[2]);
7611 enum machine_mode halfmode
= GET_MODE (operands
[1]);
7612 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
7615 gcc_assert (halfmode
== V16QImode
);
7617 if (src1
== dest
&& src2
== dest
+ halfregs
)
7619 /* No-op move. Can't split to nothing; emit something. */
7620 emit_note (NOTE_INSN_DELETED
);
7624 /* Preserve register attributes for variable tracking. */
7625 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
7626 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
7627 GET_MODE_SIZE (halfmode
));
7629 /* Special case of reversed high/low parts. */
7630 if (reg_overlap_mentioned_p (operands
[2], destlo
)
7631 && reg_overlap_mentioned_p (operands
[1], desthi
))
7633 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7634 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
7635 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7637 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
7639 /* Try to avoid unnecessary moves if part of the result
7640 is in the right place already. */
7642 emit_move_insn (destlo
, operands
[1]);
7643 if (src2
!= dest
+ halfregs
)
7644 emit_move_insn (desthi
, operands
[2]);
7648 if (src2
!= dest
+ halfregs
)
7649 emit_move_insn (desthi
, operands
[2]);
7651 emit_move_insn (destlo
, operands
[1]);
7655 /* vec_perm support. */
7657 #define MAX_VECT_LEN 16
7659 struct expand_vec_perm_d
7661 rtx target
, op0
, op1
;
7662 unsigned char perm
[MAX_VECT_LEN
];
7663 enum machine_mode vmode
;
7669 /* Generate a variable permutation. */
7672 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7674 enum machine_mode vmode
= GET_MODE (target
);
7675 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7677 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
7678 gcc_checking_assert (GET_MODE (op0
) == vmode
);
7679 gcc_checking_assert (GET_MODE (op1
) == vmode
);
7680 gcc_checking_assert (GET_MODE (sel
) == vmode
);
7681 gcc_checking_assert (TARGET_SIMD
);
7685 if (vmode
== V8QImode
)
7687 /* Expand the argument to a V16QI mode by duplicating it. */
7688 rtx pair
= gen_reg_rtx (V16QImode
);
7689 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
7690 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7694 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
7701 if (vmode
== V8QImode
)
7703 pair
= gen_reg_rtx (V16QImode
);
7704 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
7705 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7709 pair
= gen_reg_rtx (OImode
);
7710 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
7711 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
7717 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7719 enum machine_mode vmode
= GET_MODE (target
);
7720 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
7721 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7722 rtx rmask
[MAX_VECT_LEN
], mask
;
7724 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
7726 /* The TBL instruction does not use a modulo index, so we must take care
7727 of that ourselves. */
7728 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7729 for (i
= 0; i
< nelt
; ++i
)
7731 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
7732 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
7734 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
7737 /* Recognize patterns suitable for the TRN instructions. */
7739 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
7741 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7742 rtx out
, in0
, in1
, x
;
7743 rtx (*gen
) (rtx
, rtx
, rtx
);
7744 enum machine_mode vmode
= d
->vmode
;
7746 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7749 /* Note that these are little-endian tests.
7750 We correct for big-endian later. */
7751 if (d
->perm
[0] == 0)
7753 else if (d
->perm
[0] == 1)
7757 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7759 for (i
= 0; i
< nelt
; i
+= 2)
7761 if (d
->perm
[i
] != i
+ odd
)
7763 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
7773 if (BYTES_BIG_ENDIAN
)
7775 x
= in0
, in0
= in1
, in1
= x
;
7784 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
7785 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
7786 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
7787 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
7788 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
7789 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
7790 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
7791 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
7792 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
7793 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
7802 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
7803 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
7804 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
7805 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
7806 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
7807 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
7808 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
7809 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
7810 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
7811 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
7817 emit_insn (gen (out
, in0
, in1
));
7821 /* Recognize patterns suitable for the UZP instructions. */
7823 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
7825 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7826 rtx out
, in0
, in1
, x
;
7827 rtx (*gen
) (rtx
, rtx
, rtx
);
7828 enum machine_mode vmode
= d
->vmode
;
7830 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7833 /* Note that these are little-endian tests.
7834 We correct for big-endian later. */
7835 if (d
->perm
[0] == 0)
7837 else if (d
->perm
[0] == 1)
7841 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7843 for (i
= 0; i
< nelt
; i
++)
7845 unsigned elt
= (i
* 2 + odd
) & mask
;
7846 if (d
->perm
[i
] != elt
)
7856 if (BYTES_BIG_ENDIAN
)
7858 x
= in0
, in0
= in1
, in1
= x
;
7867 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
7868 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
7869 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
7870 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
7871 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
7872 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
7873 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
7874 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
7875 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
7876 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
7885 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
7886 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
7887 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
7888 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
7889 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
7890 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
7891 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
7892 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
7893 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
7894 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
7900 emit_insn (gen (out
, in0
, in1
));
7904 /* Recognize patterns suitable for the ZIP instructions. */
7906 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
7908 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
7909 rtx out
, in0
, in1
, x
;
7910 rtx (*gen
) (rtx
, rtx
, rtx
);
7911 enum machine_mode vmode
= d
->vmode
;
7913 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7916 /* Note that these are little-endian tests.
7917 We correct for big-endian later. */
7919 if (d
->perm
[0] == high
)
7922 else if (d
->perm
[0] == 0)
7926 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7928 for (i
= 0; i
< nelt
/ 2; i
++)
7930 unsigned elt
= (i
+ high
) & mask
;
7931 if (d
->perm
[i
* 2] != elt
)
7933 elt
= (elt
+ nelt
) & mask
;
7934 if (d
->perm
[i
* 2 + 1] != elt
)
7944 if (BYTES_BIG_ENDIAN
)
7946 x
= in0
, in0
= in1
, in1
= x
;
7955 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
7956 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
7957 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
7958 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
7959 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
7960 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
7961 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
7962 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
7963 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
7964 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
7973 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
7974 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
7975 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
7976 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
7977 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
7978 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
7979 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
7980 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
7981 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
7982 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
7988 emit_insn (gen (out
, in0
, in1
));
7993 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
7995 rtx (*gen
) (rtx
, rtx
, rtx
);
7996 rtx out
= d
->target
;
7998 enum machine_mode vmode
= d
->vmode
;
7999 unsigned int i
, elt
, nelt
= d
->nelt
;
8002 /* TODO: This may not be big-endian safe. */
8003 if (BYTES_BIG_ENDIAN
)
8007 for (i
= 1; i
< nelt
; i
++)
8009 if (elt
!= d
->perm
[i
])
8013 /* The generic preparation in aarch64_expand_vec_perm_const_1
8014 swaps the operand order and the permute indices if it finds
8015 d->perm[0] to be in the second operand. Thus, we can always
8016 use d->op0 and need not do any extra arithmetic to get the
8017 correct lane number. */
8019 lane
= GEN_INT (elt
);
8023 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
8024 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
8025 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
8026 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
8027 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
8028 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
8029 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
8030 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
8031 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
8032 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
8037 emit_insn (gen (out
, in0
, lane
));
8042 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
8044 rtx rperm
[MAX_VECT_LEN
], sel
;
8045 enum machine_mode vmode
= d
->vmode
;
8046 unsigned int i
, nelt
= d
->nelt
;
8048 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8049 numbering of elements for big-endian, we must reverse the order. */
8050 if (BYTES_BIG_ENDIAN
)
8056 /* Generic code will try constant permutation twice. Once with the
8057 original mode and again with the elements lowered to QImode.
8058 So wait and don't do the selector expansion ourselves. */
8059 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
8062 for (i
= 0; i
< nelt
; ++i
)
8063 rperm
[i
] = GEN_INT (d
->perm
[i
]);
8064 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
8065 sel
= force_reg (vmode
, sel
);
8067 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
8072 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
8074 /* The pattern matching functions above are written to look for a small
8075 number to begin the sequence (0, 1, N/2). If we begin with an index
8076 from the second operand, we can swap the operands. */
8077 if (d
->perm
[0] >= d
->nelt
)
8079 unsigned i
, nelt
= d
->nelt
;
8082 for (i
= 0; i
< nelt
; ++i
)
8083 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
8092 if (aarch64_evpc_zip (d
))
8094 else if (aarch64_evpc_uzp (d
))
8096 else if (aarch64_evpc_trn (d
))
8098 else if (aarch64_evpc_dup (d
))
8100 return aarch64_evpc_tbl (d
);
8105 /* Expand a vec_perm_const pattern. */
8108 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8110 struct expand_vec_perm_d d
;
8117 d
.vmode
= GET_MODE (target
);
8118 gcc_assert (VECTOR_MODE_P (d
.vmode
));
8119 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8120 d
.testing_p
= false;
8122 for (i
= which
= 0; i
< nelt
; ++i
)
8124 rtx e
= XVECEXP (sel
, 0, i
);
8125 int ei
= INTVAL (e
) & (2 * nelt
- 1);
8126 which
|= (ei
< nelt
? 1 : 2);
8136 d
.one_vector_p
= false;
8137 if (!rtx_equal_p (op0
, op1
))
8140 /* The elements of PERM do not suggest that only the first operand
8141 is used, but both operands are identical. Allow easier matching
8142 of the permutation by folding the permutation into the single
8146 for (i
= 0; i
< nelt
; ++i
)
8147 d
.perm
[i
] &= nelt
- 1;
8149 d
.one_vector_p
= true;
8154 d
.one_vector_p
= true;
8158 return aarch64_expand_vec_perm_const_1 (&d
);
8162 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
8163 const unsigned char *sel
)
8165 struct expand_vec_perm_d d
;
8166 unsigned int i
, nelt
, which
;
8170 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8172 memcpy (d
.perm
, sel
, nelt
);
8174 /* Calculate whether all elements are in one vector. */
8175 for (i
= which
= 0; i
< nelt
; ++i
)
8177 unsigned char e
= d
.perm
[i
];
8178 gcc_assert (e
< 2 * nelt
);
8179 which
|= (e
< nelt
? 1 : 2);
8182 /* If all elements are from the second vector, reindex as if from the
8185 for (i
= 0; i
< nelt
; ++i
)
8188 /* Check whether the mask can be applied to a single vector. */
8189 d
.one_vector_p
= (which
!= 3);
8191 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
8192 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
8193 if (!d
.one_vector_p
)
8194 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
8197 ret
= aarch64_expand_vec_perm_const_1 (&d
);
8203 #undef TARGET_ADDRESS_COST
8204 #define TARGET_ADDRESS_COST aarch64_address_cost
8206 /* This hook will determines whether unnamed bitfields affect the alignment
8207 of the containing structure. The hook returns true if the structure
8208 should inherit the alignment requirements of an unnamed bitfield's
8210 #undef TARGET_ALIGN_ANON_BITFIELD
8211 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8213 #undef TARGET_ASM_ALIGNED_DI_OP
8214 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8216 #undef TARGET_ASM_ALIGNED_HI_OP
8217 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8219 #undef TARGET_ASM_ALIGNED_SI_OP
8220 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8222 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8223 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8224 hook_bool_const_tree_hwi_hwi_const_tree_true
8226 #undef TARGET_ASM_FILE_START
8227 #define TARGET_ASM_FILE_START aarch64_start_file
8229 #undef TARGET_ASM_OUTPUT_MI_THUNK
8230 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8232 #undef TARGET_ASM_SELECT_RTX_SECTION
8233 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8235 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8236 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8238 #undef TARGET_BUILD_BUILTIN_VA_LIST
8239 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8241 #undef TARGET_CALLEE_COPIES
8242 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8244 #undef TARGET_CAN_ELIMINATE
8245 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8247 #undef TARGET_CANNOT_FORCE_CONST_MEM
8248 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8250 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8251 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8253 /* Only the least significant bit is used for initialization guard
8255 #undef TARGET_CXX_GUARD_MASK_BIT
8256 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8258 #undef TARGET_C_MODE_FOR_SUFFIX
8259 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8261 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8262 #undef TARGET_DEFAULT_TARGET_FLAGS
8263 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8266 #undef TARGET_CLASS_MAX_NREGS
8267 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8269 #undef TARGET_BUILTIN_DECL
8270 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8272 #undef TARGET_EXPAND_BUILTIN
8273 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8275 #undef TARGET_EXPAND_BUILTIN_VA_START
8276 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8278 #undef TARGET_FOLD_BUILTIN
8279 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8281 #undef TARGET_FUNCTION_ARG
8282 #define TARGET_FUNCTION_ARG aarch64_function_arg
8284 #undef TARGET_FUNCTION_ARG_ADVANCE
8285 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8287 #undef TARGET_FUNCTION_ARG_BOUNDARY
8288 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8290 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8291 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8293 #undef TARGET_FUNCTION_VALUE
8294 #define TARGET_FUNCTION_VALUE aarch64_function_value
8296 #undef TARGET_FUNCTION_VALUE_REGNO_P
8297 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8299 #undef TARGET_FRAME_POINTER_REQUIRED
8300 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8302 #undef TARGET_GIMPLE_FOLD_BUILTIN
8303 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8305 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8306 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8308 #undef TARGET_INIT_BUILTINS
8309 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8311 #undef TARGET_LEGITIMATE_ADDRESS_P
8312 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8314 #undef TARGET_LEGITIMATE_CONSTANT_P
8315 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8317 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8318 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8321 #define TARGET_LRA_P aarch64_lra_p
8323 #undef TARGET_MANGLE_TYPE
8324 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8326 #undef TARGET_MEMORY_MOVE_COST
8327 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8329 #undef TARGET_MUST_PASS_IN_STACK
8330 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8332 /* This target hook should return true if accesses to volatile bitfields
8333 should use the narrowest mode possible. It should return false if these
8334 accesses should use the bitfield container type. */
8335 #undef TARGET_NARROW_VOLATILE_BITFIELD
8336 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8338 #undef TARGET_OPTION_OVERRIDE
8339 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8341 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8342 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8343 aarch64_override_options_after_change
8345 #undef TARGET_PASS_BY_REFERENCE
8346 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8348 #undef TARGET_PREFERRED_RELOAD_CLASS
8349 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8351 #undef TARGET_SECONDARY_RELOAD
8352 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8354 #undef TARGET_SHIFT_TRUNCATION_MASK
8355 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8357 #undef TARGET_SETUP_INCOMING_VARARGS
8358 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8360 #undef TARGET_STRUCT_VALUE_RTX
8361 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8363 #undef TARGET_REGISTER_MOVE_COST
8364 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8366 #undef TARGET_RETURN_IN_MEMORY
8367 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8369 #undef TARGET_RETURN_IN_MSB
8370 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8372 #undef TARGET_RTX_COSTS
8373 #define TARGET_RTX_COSTS aarch64_rtx_costs
8375 #undef TARGET_TRAMPOLINE_INIT
8376 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8378 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8379 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8381 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8382 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8384 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8385 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8387 #undef TARGET_VECTORIZE_ADD_STMT_COST
8388 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8390 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8391 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8392 aarch64_builtin_vectorization_cost
8394 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8395 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8397 #undef TARGET_VECTORIZE_BUILTINS
8398 #define TARGET_VECTORIZE_BUILTINS
8400 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8401 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8402 aarch64_builtin_vectorized_function
8404 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8405 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8406 aarch64_autovectorize_vector_sizes
8408 /* Section anchor support. */
8410 #undef TARGET_MIN_ANCHOR_OFFSET
8411 #define TARGET_MIN_ANCHOR_OFFSET -256
8413 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8414 byte offset; we can do much more for larger data types, but have no way
8415 to determine the size of the access. We assume accesses are aligned. */
8416 #undef TARGET_MAX_ANCHOR_OFFSET
8417 #define TARGET_MAX_ANCHOR_OFFSET 4095
8419 #undef TARGET_VECTOR_ALIGNMENT
8420 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8422 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8423 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8424 aarch64_simd_vector_alignment_reachable
8426 /* vec_perm support. */
8428 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8429 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8430 aarch64_vectorize_vec_perm_const_ok
8433 #undef TARGET_FIXED_CONDITION_CODE_REGS
8434 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8436 struct gcc_target targetm
= TARGET_INITIALIZER
;
8438 #include "gt-aarch64.h"