1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
31 #include "hard-reg-set.h"
37 #include "target-def.h"
38 #include "targhooks.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
49 #include "tree-vectorizer.h"
51 /* Defined for convenience. */
52 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
54 /* Classifies an address.
57 A simple base register plus immediate offset.
60 A base register indexed by immediate offset with writeback.
63 A base register indexed by (optionally scaled) register.
66 A base register indexed by (optionally scaled) zero-extended register.
69 A base register indexed by (optionally scaled) sign-extended register.
72 A LO_SUM rtx with a base register and "LO12" symbol relocation.
75 A constant symbolic address, in pc-relative literal pool. */
77 enum aarch64_address_type
{
87 struct aarch64_address_info
{
88 enum aarch64_address_type type
;
92 enum aarch64_symbol_type symbol_type
;
95 struct simd_immediate_info
104 /* The current code model. */
105 enum aarch64_code_model aarch64_cmodel
;
108 #undef TARGET_HAVE_TLS
109 #define TARGET_HAVE_TLS 1
112 static bool aarch64_lra_p (void);
113 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
114 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
116 enum machine_mode
*, int *,
118 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
119 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
120 static void aarch64_override_options_after_change (void);
121 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
122 static unsigned bit_count (unsigned HOST_WIDE_INT
);
123 static bool aarch64_const_vec_all_same_int_p (rtx
,
124 HOST_WIDE_INT
, HOST_WIDE_INT
);
126 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
127 const unsigned char *sel
);
129 /* The processor for which instructions should be scheduled. */
130 enum aarch64_processor aarch64_tune
= generic
;
132 /* The current tuning set. */
133 const struct tune_params
*aarch64_tune_params
;
135 /* Mask to specify which instructions we are allowed to generate. */
136 unsigned long aarch64_isa_flags
= 0;
138 /* Mask to specify which instruction scheduling options should be used. */
139 unsigned long aarch64_tune_flags
= 0;
141 /* Tuning parameters. */
143 #if HAVE_DESIGNATED_INITIALIZERS
144 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
146 #define NAMED_PARAM(NAME, VAL) (VAL)
149 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
152 static const struct cpu_rtx_cost_table generic_rtx_cost_table
=
154 NAMED_PARAM (memory_load
, COSTS_N_INSNS (1)),
155 NAMED_PARAM (memory_store
, COSTS_N_INSNS (0)),
156 NAMED_PARAM (register_shift
, COSTS_N_INSNS (1)),
157 NAMED_PARAM (int_divide
, COSTS_N_INSNS (6)),
158 NAMED_PARAM (float_divide
, COSTS_N_INSNS (2)),
159 NAMED_PARAM (double_divide
, COSTS_N_INSNS (6)),
160 NAMED_PARAM (int_multiply
, COSTS_N_INSNS (1)),
161 NAMED_PARAM (int_multiply_extend
, COSTS_N_INSNS (1)),
162 NAMED_PARAM (int_multiply_add
, COSTS_N_INSNS (1)),
163 NAMED_PARAM (int_multiply_extend_add
, COSTS_N_INSNS (1)),
164 NAMED_PARAM (float_multiply
, COSTS_N_INSNS (0)),
165 NAMED_PARAM (double_multiply
, COSTS_N_INSNS (1))
168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
171 static const struct cpu_addrcost_table generic_addrcost_table
=
173 NAMED_PARAM (pre_modify
, 0),
174 NAMED_PARAM (post_modify
, 0),
175 NAMED_PARAM (register_offset
, 0),
176 NAMED_PARAM (register_extend
, 0),
177 NAMED_PARAM (imm_offset
, 0)
180 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
183 static const struct cpu_regmove_cost generic_regmove_cost
=
185 NAMED_PARAM (GP2GP
, 1),
186 NAMED_PARAM (GP2FP
, 2),
187 NAMED_PARAM (FP2GP
, 2),
188 /* We currently do not provide direct support for TFmode Q->Q move.
189 Therefore we need to raise the cost above 2 in order to have
190 reload handle the situation. */
191 NAMED_PARAM (FP2FP
, 4)
194 /* Generic costs for vector insn classes. */
195 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
198 static const struct cpu_vector_cost generic_vector_cost
=
200 NAMED_PARAM (scalar_stmt_cost
, 1),
201 NAMED_PARAM (scalar_load_cost
, 1),
202 NAMED_PARAM (scalar_store_cost
, 1),
203 NAMED_PARAM (vec_stmt_cost
, 1),
204 NAMED_PARAM (vec_to_scalar_cost
, 1),
205 NAMED_PARAM (scalar_to_vec_cost
, 1),
206 NAMED_PARAM (vec_align_load_cost
, 1),
207 NAMED_PARAM (vec_unalign_load_cost
, 1),
208 NAMED_PARAM (vec_unalign_store_cost
, 1),
209 NAMED_PARAM (vec_store_cost
, 1),
210 NAMED_PARAM (cond_taken_branch_cost
, 3),
211 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
214 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
217 static const struct tune_params generic_tunings
=
219 &generic_rtx_cost_table
,
220 &generic_addrcost_table
,
221 &generic_regmove_cost
,
222 &generic_vector_cost
,
223 NAMED_PARAM (memmov_cost
, 4)
226 /* A processor implementing AArch64. */
229 const char *const name
;
230 enum aarch64_processor core
;
232 const unsigned long flags
;
233 const struct tune_params
*const tune
;
236 /* Processor cores implementing AArch64. */
237 static const struct processor all_cores
[] =
239 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
240 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
241 #include "aarch64-cores.def"
243 {"generic", generic
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
244 {NULL
, aarch64_none
, NULL
, 0, NULL
}
247 /* Architectures implementing AArch64. */
248 static const struct processor all_architectures
[] =
250 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
251 {NAME, CORE, #ARCH, FLAGS, NULL},
252 #include "aarch64-arches.def"
254 {"generic", generic
, "8", AARCH64_FL_FOR_ARCH8
, NULL
},
255 {NULL
, aarch64_none
, NULL
, 0, NULL
}
258 /* Target specification. These are populated as commandline arguments
259 are processed, or NULL if not specified. */
260 static const struct processor
*selected_arch
;
261 static const struct processor
*selected_cpu
;
262 static const struct processor
*selected_tune
;
264 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
266 /* An ISA extension in the co-processor and main instruction set space. */
267 struct aarch64_option_extension
269 const char *const name
;
270 const unsigned long flags_on
;
271 const unsigned long flags_off
;
274 /* ISA extensions in AArch64. */
275 static const struct aarch64_option_extension all_extensions
[] =
277 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
278 {NAME, FLAGS_ON, FLAGS_OFF},
279 #include "aarch64-option-extensions.def"
280 #undef AARCH64_OPT_EXTENSION
284 /* Used to track the size of an address when generating a pre/post
285 increment address. */
286 static enum machine_mode aarch64_memory_reference_mode
;
288 /* Used to force GTY into this file. */
289 static GTY(()) int gty_dummy
;
291 /* A table of valid AArch64 "bitmask immediate" values for
292 logical instructions. */
294 #define AARCH64_NUM_BITMASKS 5334
295 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
297 /* Did we set flag_omit_frame_pointer just so
298 aarch64_frame_pointer_required would be called? */
299 static bool faked_omit_frame_pointer
;
301 typedef enum aarch64_cond_code
303 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
304 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
305 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
309 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
311 /* The condition codes of the processor, and the inverse function. */
312 static const char * const aarch64_condition_codes
[] =
314 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
315 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
318 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
320 aarch64_dbx_register_number (unsigned regno
)
322 if (GP_REGNUM_P (regno
))
323 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
324 else if (regno
== SP_REGNUM
)
325 return AARCH64_DWARF_SP
;
326 else if (FP_REGNUM_P (regno
))
327 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
329 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
330 equivalent DWARF register. */
331 return DWARF_FRAME_REGISTERS
;
334 /* Return TRUE if MODE is any of the large INT modes. */
336 aarch64_vect_struct_mode_p (enum machine_mode mode
)
338 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
341 /* Return TRUE if MODE is any of the vector modes. */
343 aarch64_vector_mode_p (enum machine_mode mode
)
345 return aarch64_vector_mode_supported_p (mode
)
346 || aarch64_vect_struct_mode_p (mode
);
349 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
351 aarch64_array_mode_supported_p (enum machine_mode mode
,
352 unsigned HOST_WIDE_INT nelems
)
355 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
356 && (nelems
>= 2 && nelems
<= 4))
362 /* Implement HARD_REGNO_NREGS. */
365 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
367 switch (aarch64_regno_regclass (regno
))
371 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
373 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
378 /* Implement HARD_REGNO_MODE_OK. */
381 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
383 if (GET_MODE_CLASS (mode
) == MODE_CC
)
384 return regno
== CC_REGNUM
;
386 if (regno
== SP_REGNUM
)
387 /* The purpose of comparing with ptr_mode is to support the
388 global register variable associated with the stack pointer
389 register via the syntax of asm ("wsp") in ILP32. */
390 return mode
== Pmode
|| mode
== ptr_mode
;
392 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
393 return mode
== Pmode
;
395 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
398 if (FP_REGNUM_P (regno
))
400 if (aarch64_vect_struct_mode_p (mode
))
402 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
410 /* Return true if calls to DECL should be treated as
411 long-calls (ie called via a register). */
413 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
418 /* Return true if calls to symbol-ref SYM should be treated as
419 long-calls (ie called via a register). */
421 aarch64_is_long_call_p (rtx sym
)
423 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
426 /* Return true if the offsets to a zero/sign-extract operation
427 represent an expression that matches an extend operation. The
428 operands represent the paramters from
430 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
432 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
435 HOST_WIDE_INT mult_val
, extract_val
;
437 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
440 mult_val
= INTVAL (mult_imm
);
441 extract_val
= INTVAL (extract_imm
);
444 && extract_val
< GET_MODE_BITSIZE (mode
)
445 && exact_log2 (extract_val
& ~7) > 0
446 && (extract_val
& 7) <= 4
447 && mult_val
== (1 << (extract_val
& 7)))
453 /* Emit an insn that's a simple single-set. Both the operands must be
454 known to be valid. */
456 emit_set_insn (rtx x
, rtx y
)
458 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
461 /* X and Y are two things to compare using CODE. Emit the compare insn and
462 return the rtx for register 0 in the proper mode. */
464 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
466 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
467 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
469 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
473 /* Build the SYMBOL_REF for __tls_get_addr. */
475 static GTY(()) rtx tls_get_addr_libfunc
;
478 aarch64_tls_get_addr (void)
480 if (!tls_get_addr_libfunc
)
481 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
482 return tls_get_addr_libfunc
;
485 /* Return the TLS model to use for ADDR. */
487 static enum tls_model
488 tls_symbolic_operand_type (rtx addr
)
490 enum tls_model tls_kind
= TLS_MODEL_NONE
;
493 if (GET_CODE (addr
) == CONST
)
495 split_const (addr
, &sym
, &addend
);
496 if (GET_CODE (sym
) == SYMBOL_REF
)
497 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
499 else if (GET_CODE (addr
) == SYMBOL_REF
)
500 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
505 /* We'll allow lo_sum's in addresses in our legitimate addresses
506 so that combine would take care of combining addresses where
507 necessary, but for generation purposes, we'll generate the address
510 tmp = hi (symbol_ref); adrp x1, foo
511 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
515 adrp x1, :got:foo adrp tmp, :tlsgd:foo
516 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
520 Load TLS symbol, depending on TLS mechanism and TLS access model.
522 Global Dynamic - Traditional TLS:
524 add dest, tmp, #:tlsgd_lo12:imm
527 Global Dynamic - TLS Descriptors:
528 adrp dest, :tlsdesc:imm
529 ldr tmp, [dest, #:tlsdesc_lo12:imm]
530 add dest, dest, #:tlsdesc_lo12:imm
537 adrp tmp, :gottprel:imm
538 ldr dest, [tmp, #:gottprel_lo12:imm]
543 add t0, tp, #:tprel_hi12:imm
544 add t0, #:tprel_lo12_nc:imm
548 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
549 enum aarch64_symbol_type type
)
553 case SYMBOL_SMALL_ABSOLUTE
:
555 /* In ILP32, the mode of dest can be either SImode or DImode. */
557 enum machine_mode mode
= GET_MODE (dest
);
559 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
561 if (can_create_pseudo_p ())
562 tmp_reg
= gen_reg_rtx (mode
);
564 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
565 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
569 case SYMBOL_TINY_ABSOLUTE
:
570 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
573 case SYMBOL_SMALL_GOT
:
575 /* In ILP32, the mode of dest can be either SImode or DImode,
576 while the got entry is always of SImode size. The mode of
577 dest depends on how dest is used: if dest is assigned to a
578 pointer (e.g. in the memory), it has SImode; it may have
579 DImode if dest is dereferenced to access the memeory.
580 This is why we have to handle three different ldr_got_small
581 patterns here (two patterns for ILP32). */
583 enum machine_mode mode
= GET_MODE (dest
);
585 if (can_create_pseudo_p ())
586 tmp_reg
= gen_reg_rtx (mode
);
588 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
589 if (mode
== ptr_mode
)
592 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
594 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
598 gcc_assert (mode
== Pmode
);
599 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
605 case SYMBOL_SMALL_TLSGD
:
608 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
611 emit_call_insn (gen_tlsgd_small (result
, imm
));
612 insns
= get_insns ();
615 RTL_CONST_CALL_P (insns
) = 1;
616 emit_libcall_block (insns
, dest
, result
, imm
);
620 case SYMBOL_SMALL_TLSDESC
:
622 rtx x0
= gen_rtx_REG (Pmode
, R0_REGNUM
);
625 emit_insn (gen_tlsdesc_small (imm
));
626 tp
= aarch64_load_tp (NULL
);
627 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, x0
)));
628 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
632 case SYMBOL_SMALL_GOTTPREL
:
634 rtx tmp_reg
= gen_reg_rtx (Pmode
);
635 rtx tp
= aarch64_load_tp (NULL
);
636 emit_insn (gen_tlsie_small (tmp_reg
, imm
));
637 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, tmp_reg
)));
638 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
642 case SYMBOL_SMALL_TPREL
:
644 rtx tp
= aarch64_load_tp (NULL
);
645 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
646 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
650 case SYMBOL_TINY_GOT
:
651 emit_insn (gen_ldr_got_tiny (dest
, imm
));
659 /* Emit a move from SRC to DEST. Assume that the move expanders can
660 handle all moves if !can_create_pseudo_p (). The distinction is
661 important because, unlike emit_move_insn, the move expanders know
662 how to force Pmode objects into the constant pool even when the
663 constant pool address is not itself legitimate. */
665 aarch64_emit_move (rtx dest
, rtx src
)
667 return (can_create_pseudo_p ()
668 ? emit_move_insn (dest
, src
)
669 : emit_move_insn_1 (dest
, src
));
673 aarch64_split_128bit_move (rtx dst
, rtx src
)
677 enum machine_mode src_mode
= GET_MODE (src
);
678 enum machine_mode dst_mode
= GET_MODE (dst
);
679 int src_regno
= REGNO (src
);
680 int dst_regno
= REGNO (dst
);
682 gcc_assert (dst_mode
== TImode
|| dst_mode
== TFmode
);
684 if (REG_P (dst
) && REG_P (src
))
686 gcc_assert (src_mode
== TImode
|| src_mode
== TFmode
);
688 /* Handle r -> w, w -> r. */
689 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
694 (gen_aarch64_movtilow_di (dst
, gen_lowpart (word_mode
, src
)));
696 (gen_aarch64_movtihigh_di (dst
, gen_highpart (word_mode
, src
)));
700 (gen_aarch64_movtflow_di (dst
, gen_lowpart (word_mode
, src
)));
702 (gen_aarch64_movtfhigh_di (dst
, gen_highpart (word_mode
, src
)));
708 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
713 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode
, dst
), src
));
715 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode
, dst
), src
));
719 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode
, dst
), src
));
721 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode
, dst
), src
));
727 /* Fall through to r -> r cases. */
732 low_dst
= gen_lowpart (word_mode
, dst
);
734 && reg_overlap_mentioned_p (low_dst
, src
))
736 aarch64_emit_move (gen_highpart (word_mode
, dst
),
737 gen_highpart_mode (word_mode
, TImode
, src
));
738 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
742 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
743 aarch64_emit_move (gen_highpart (word_mode
, dst
),
744 gen_highpart_mode (word_mode
, TImode
, src
));
748 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
),
749 gen_rtx_REG (DFmode
, src_regno
));
750 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
+ 1),
751 gen_rtx_REG (DFmode
, src_regno
+ 1));
759 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
761 return (! REG_P (src
)
762 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
765 /* Split a complex SIMD combine. */
768 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
770 enum machine_mode src_mode
= GET_MODE (src1
);
771 enum machine_mode dst_mode
= GET_MODE (dst
);
773 gcc_assert (VECTOR_MODE_P (dst_mode
));
775 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
777 rtx (*gen
) (rtx
, rtx
, rtx
);
782 gen
= gen_aarch64_simd_combinev8qi
;
785 gen
= gen_aarch64_simd_combinev4hi
;
788 gen
= gen_aarch64_simd_combinev2si
;
791 gen
= gen_aarch64_simd_combinev2sf
;
794 gen
= gen_aarch64_simd_combinedi
;
797 gen
= gen_aarch64_simd_combinedf
;
803 emit_insn (gen (dst
, src1
, src2
));
808 /* Split a complex SIMD move. */
811 aarch64_split_simd_move (rtx dst
, rtx src
)
813 enum machine_mode src_mode
= GET_MODE (src
);
814 enum machine_mode dst_mode
= GET_MODE (dst
);
816 gcc_assert (VECTOR_MODE_P (dst_mode
));
818 if (REG_P (dst
) && REG_P (src
))
820 rtx (*gen
) (rtx
, rtx
);
822 gcc_assert (VECTOR_MODE_P (src_mode
));
827 gen
= gen_aarch64_split_simd_movv16qi
;
830 gen
= gen_aarch64_split_simd_movv8hi
;
833 gen
= gen_aarch64_split_simd_movv4si
;
836 gen
= gen_aarch64_split_simd_movv2di
;
839 gen
= gen_aarch64_split_simd_movv4sf
;
842 gen
= gen_aarch64_split_simd_movv2df
;
848 emit_insn (gen (dst
, src
));
854 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
856 if (can_create_pseudo_p ())
857 return force_reg (mode
, value
);
860 x
= aarch64_emit_move (x
, value
);
867 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
869 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
872 /* Load the full offset into a register. This
873 might be improvable in the future. */
874 high
= GEN_INT (offset
);
876 high
= aarch64_force_temporary (mode
, temp
, high
);
877 reg
= aarch64_force_temporary (mode
, temp
,
878 gen_rtx_PLUS (mode
, high
, reg
));
880 return plus_constant (mode
, reg
, offset
);
884 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
886 enum machine_mode mode
= GET_MODE (dest
);
887 unsigned HOST_WIDE_INT mask
;
890 unsigned HOST_WIDE_INT val
;
893 int one_match
, zero_match
;
895 gcc_assert (mode
== SImode
|| mode
== DImode
);
897 /* Check on what type of symbol it is. */
898 if (GET_CODE (imm
) == SYMBOL_REF
899 || GET_CODE (imm
) == LABEL_REF
900 || GET_CODE (imm
) == CONST
)
902 rtx mem
, base
, offset
;
903 enum aarch64_symbol_type sty
;
905 /* If we have (const (plus symbol offset)), separate out the offset
906 before we start classifying the symbol. */
907 split_const (imm
, &base
, &offset
);
909 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
912 case SYMBOL_FORCE_TO_MEM
:
913 if (offset
!= const0_rtx
914 && targetm
.cannot_force_const_mem (mode
, imm
))
916 gcc_assert(can_create_pseudo_p ());
917 base
= aarch64_force_temporary (mode
, dest
, base
);
918 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
919 aarch64_emit_move (dest
, base
);
922 mem
= force_const_mem (ptr_mode
, imm
);
924 if (mode
!= ptr_mode
)
925 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
926 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
929 case SYMBOL_SMALL_TLSGD
:
930 case SYMBOL_SMALL_TLSDESC
:
931 case SYMBOL_SMALL_GOTTPREL
:
932 case SYMBOL_SMALL_GOT
:
933 case SYMBOL_TINY_GOT
:
934 if (offset
!= const0_rtx
)
936 gcc_assert(can_create_pseudo_p ());
937 base
= aarch64_force_temporary (mode
, dest
, base
);
938 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
939 aarch64_emit_move (dest
, base
);
944 case SYMBOL_SMALL_TPREL
:
945 case SYMBOL_SMALL_ABSOLUTE
:
946 case SYMBOL_TINY_ABSOLUTE
:
947 aarch64_load_symref_appropriately (dest
, imm
, sty
);
955 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
957 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
961 if (!CONST_INT_P (imm
))
963 if (GET_CODE (imm
) == HIGH
)
964 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
967 rtx mem
= force_const_mem (mode
, imm
);
969 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
977 /* We know we can't do this in 1 insn, and we must be able to do it
978 in two; so don't mess around looking for sequences that don't buy
980 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
981 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
982 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
986 /* Remaining cases are all for DImode. */
989 subtargets
= optimize
&& can_create_pseudo_p ();
995 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
997 if ((val
& mask
) == 0)
999 else if ((val
& mask
) == mask
)
1006 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1008 if ((val
& mask
) != mask
)
1010 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1011 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1012 GEN_INT ((val
>> i
) & 0xffff)));
1019 if (zero_match
== 2)
1020 goto simple_sequence
;
1022 mask
= 0x0ffff0000UL
;
1023 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1025 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1027 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1029 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1031 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
1032 emit_insn (gen_adddi3 (dest
, subtarget
,
1033 GEN_INT (val
- (val
& mask
))));
1036 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1038 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1040 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1041 GEN_INT ((val
+ comp
) & mask
)));
1042 emit_insn (gen_adddi3 (dest
, subtarget
,
1043 GEN_INT (val
- ((val
+ comp
) & mask
))));
1046 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1048 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1050 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1051 GEN_INT ((val
- comp
) | ~mask
)));
1052 emit_insn (gen_adddi3 (dest
, subtarget
,
1053 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1056 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1058 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1060 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1061 GEN_INT (val
| ~mask
)));
1062 emit_insn (gen_adddi3 (dest
, subtarget
,
1063 GEN_INT (val
- (val
| ~mask
))));
1068 /* See if we can do it by arithmetically combining two
1070 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1075 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1076 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1078 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1079 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1080 GEN_INT (aarch64_bitmasks
[i
])));
1081 emit_insn (gen_adddi3 (dest
, subtarget
,
1082 GEN_INT (val
- aarch64_bitmasks
[i
])));
1086 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1088 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1090 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1091 GEN_INT (aarch64_bitmasks
[i
])));
1092 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1093 GEN_INT ((val
>> j
) & 0xffff)));
1099 /* See if we can do it by logically combining two immediates. */
1100 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1102 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1106 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1107 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1109 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1110 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1111 GEN_INT (aarch64_bitmasks
[i
])));
1112 emit_insn (gen_iordi3 (dest
, subtarget
,
1113 GEN_INT (aarch64_bitmasks
[j
])));
1117 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1121 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1122 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1125 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1126 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1127 GEN_INT (aarch64_bitmasks
[j
])));
1128 emit_insn (gen_anddi3 (dest
, subtarget
,
1129 GEN_INT (aarch64_bitmasks
[i
])));
1138 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1140 if ((val
& mask
) != 0)
1144 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1145 GEN_INT (val
& mask
)));
1149 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1150 GEN_INT ((val
>> i
) & 0xffff)));
1156 aarch64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
1158 /* Indirect calls are not currently supported. */
1162 /* Cannot tail-call to long-calls, since these are outside of the
1163 range of a branch instruction (we could handle this if we added
1164 support for indirect tail-calls. */
1165 if (aarch64_decl_is_long_call_p (decl
))
1171 /* Implement TARGET_PASS_BY_REFERENCE. */
1174 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1175 enum machine_mode mode
,
1177 bool named ATTRIBUTE_UNUSED
)
1180 enum machine_mode dummymode
;
1183 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1184 size
= (mode
== BLKmode
&& type
)
1185 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1189 /* Arrays always passed by reference. */
1190 if (TREE_CODE (type
) == ARRAY_TYPE
)
1192 /* Other aggregates based on their size. */
1193 if (AGGREGATE_TYPE_P (type
))
1194 size
= int_size_in_bytes (type
);
1197 /* Variable sized arguments are always returned by reference. */
1201 /* Can this be a candidate to be passed in fp/simd register(s)? */
1202 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1207 /* Arguments which are variable sized or larger than 2 registers are
1208 passed by reference unless they are a homogenous floating point
1210 return size
> 2 * UNITS_PER_WORD
;
1213 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1215 aarch64_return_in_msb (const_tree valtype
)
1217 enum machine_mode dummy_mode
;
1220 /* Never happens in little-endian mode. */
1221 if (!BYTES_BIG_ENDIAN
)
1224 /* Only composite types smaller than or equal to 16 bytes can
1225 be potentially returned in registers. */
1226 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1227 || int_size_in_bytes (valtype
) <= 0
1228 || int_size_in_bytes (valtype
) > 16)
1231 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1232 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1233 is always passed/returned in the least significant bits of fp/simd
1235 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1236 &dummy_mode
, &dummy_int
, NULL
))
1242 /* Implement TARGET_FUNCTION_VALUE.
1243 Define how to find the value returned by a function. */
1246 aarch64_function_value (const_tree type
, const_tree func
,
1247 bool outgoing ATTRIBUTE_UNUSED
)
1249 enum machine_mode mode
;
1252 enum machine_mode ag_mode
;
1254 mode
= TYPE_MODE (type
);
1255 if (INTEGRAL_TYPE_P (type
))
1256 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1258 if (aarch64_return_in_msb (type
))
1260 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1262 if (size
% UNITS_PER_WORD
!= 0)
1264 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1265 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1269 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1270 &ag_mode
, &count
, NULL
))
1272 if (!aarch64_composite_type_p (type
, mode
))
1274 gcc_assert (count
== 1 && mode
== ag_mode
);
1275 return gen_rtx_REG (mode
, V0_REGNUM
);
1282 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1283 for (i
= 0; i
< count
; i
++)
1285 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1286 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1287 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1288 XVECEXP (par
, 0, i
) = tmp
;
1294 return gen_rtx_REG (mode
, R0_REGNUM
);
1297 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1298 Return true if REGNO is the number of a hard register in which the values
1299 of called function may come back. */
1302 aarch64_function_value_regno_p (const unsigned int regno
)
1304 /* Maximum of 16 bytes can be returned in the general registers. Examples
1305 of 16-byte return values are: 128-bit integers and 16-byte small
1306 structures (excluding homogeneous floating-point aggregates). */
1307 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1310 /* Up to four fp/simd registers can return a function value, e.g. a
1311 homogeneous floating-point aggregate having four members. */
1312 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1313 return !TARGET_GENERAL_REGS_ONLY
;
1318 /* Implement TARGET_RETURN_IN_MEMORY.
1320 If the type T of the result of a function is such that
1322 would require that arg be passed as a value in a register (or set of
1323 registers) according to the parameter passing rules, then the result
1324 is returned in the same registers as would be used for such an
1328 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1331 enum machine_mode ag_mode
;
1334 if (!AGGREGATE_TYPE_P (type
)
1335 && TREE_CODE (type
) != COMPLEX_TYPE
1336 && TREE_CODE (type
) != VECTOR_TYPE
)
1337 /* Simple scalar types always returned in registers. */
1340 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1347 /* Types larger than 2 registers returned in memory. */
1348 size
= int_size_in_bytes (type
);
1349 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1353 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1354 const_tree type
, int *nregs
)
1356 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1357 return aarch64_vfp_is_call_or_return_candidate (mode
,
1359 &pcum
->aapcs_vfp_rmode
,
1364 /* Given MODE and TYPE of a function argument, return the alignment in
1365 bits. The idea is to suppress any stronger alignment requested by
1366 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1367 This is a helper function for local use only. */
1370 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1372 unsigned int alignment
;
1376 if (!integer_zerop (TYPE_SIZE (type
)))
1378 if (TYPE_MODE (type
) == mode
)
1379 alignment
= TYPE_ALIGN (type
);
1381 alignment
= GET_MODE_ALIGNMENT (mode
);
1387 alignment
= GET_MODE_ALIGNMENT (mode
);
1392 /* Layout a function argument according to the AAPCS64 rules. The rule
1393 numbers refer to the rule numbers in the AAPCS64. */
1396 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1398 bool named ATTRIBUTE_UNUSED
)
1400 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1401 int ncrn
, nvrn
, nregs
;
1402 bool allocate_ncrn
, allocate_nvrn
;
1404 /* We need to do this once per argument. */
1405 if (pcum
->aapcs_arg_processed
)
1408 pcum
->aapcs_arg_processed
= true;
1410 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1411 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1416 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1417 The following code thus handles passing by SIMD/FP registers first. */
1419 nvrn
= pcum
->aapcs_nvrn
;
1421 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1422 and homogenous short-vector aggregates (HVA). */
1425 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1427 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1428 if (!aarch64_composite_type_p (type
, mode
))
1430 gcc_assert (nregs
== 1);
1431 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1437 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1438 for (i
= 0; i
< nregs
; i
++)
1440 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1441 V0_REGNUM
+ nvrn
+ i
);
1442 tmp
= gen_rtx_EXPR_LIST
1444 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1445 XVECEXP (par
, 0, i
) = tmp
;
1447 pcum
->aapcs_reg
= par
;
1453 /* C.3 NSRN is set to 8. */
1454 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1459 ncrn
= pcum
->aapcs_ncrn
;
1460 nregs
= ((type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
))
1461 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1464 /* C6 - C9. though the sign and zero extension semantics are
1465 handled elsewhere. This is the case where the argument fits
1466 entirely general registers. */
1467 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1469 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1471 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1473 /* C.8 if the argument has an alignment of 16 then the NGRN is
1474 rounded up to the next even number. */
1475 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1478 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1480 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1481 A reg is still generated for it, but the caller should be smart
1482 enough not to use it. */
1483 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1485 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1492 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1493 for (i
= 0; i
< nregs
; i
++)
1495 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1496 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1497 GEN_INT (i
* UNITS_PER_WORD
));
1498 XVECEXP (par
, 0, i
) = tmp
;
1500 pcum
->aapcs_reg
= par
;
1503 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1508 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1510 /* The argument is passed on stack; record the needed number of words for
1511 this argument (we can re-use NREGS) and align the total size if
1514 pcum
->aapcs_stack_words
= nregs
;
1515 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1516 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1517 16 / UNITS_PER_WORD
) + 1;
1521 /* Implement TARGET_FUNCTION_ARG. */
1524 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1525 const_tree type
, bool named
)
1527 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1528 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1530 if (mode
== VOIDmode
)
1533 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1534 return pcum
->aapcs_reg
;
1538 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1539 const_tree fntype ATTRIBUTE_UNUSED
,
1540 rtx libname ATTRIBUTE_UNUSED
,
1541 const_tree fndecl ATTRIBUTE_UNUSED
,
1542 unsigned n_named ATTRIBUTE_UNUSED
)
1544 pcum
->aapcs_ncrn
= 0;
1545 pcum
->aapcs_nvrn
= 0;
1546 pcum
->aapcs_nextncrn
= 0;
1547 pcum
->aapcs_nextnvrn
= 0;
1548 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1549 pcum
->aapcs_reg
= NULL_RTX
;
1550 pcum
->aapcs_arg_processed
= false;
1551 pcum
->aapcs_stack_words
= 0;
1552 pcum
->aapcs_stack_size
= 0;
1558 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1559 enum machine_mode mode
,
1563 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1564 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1566 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1567 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1568 != (pcum
->aapcs_stack_words
!= 0));
1569 pcum
->aapcs_arg_processed
= false;
1570 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1571 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1572 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1573 pcum
->aapcs_stack_words
= 0;
1574 pcum
->aapcs_reg
= NULL_RTX
;
1579 aarch64_function_arg_regno_p (unsigned regno
)
1581 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1582 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1585 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1586 PARM_BOUNDARY bits of alignment, but will be given anything up
1587 to STACK_BOUNDARY bits if the type requires it. This makes sure
1588 that both before and after the layout of each argument, the Next
1589 Stacked Argument Address (NSAA) will have a minimum alignment of
1593 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1595 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1597 if (alignment
< PARM_BOUNDARY
)
1598 alignment
= PARM_BOUNDARY
;
1599 if (alignment
> STACK_BOUNDARY
)
1600 alignment
= STACK_BOUNDARY
;
1604 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1606 Return true if an argument passed on the stack should be padded upwards,
1607 i.e. if the least-significant byte of the stack slot has useful data.
1609 Small aggregate types are placed in the lowest memory address.
1611 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1614 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1616 /* On little-endian targets, the least significant byte of every stack
1617 argument is passed at the lowest byte address of the stack slot. */
1618 if (!BYTES_BIG_ENDIAN
)
1621 /* Otherwise, integral, floating-point and pointer types are padded downward:
1622 the least significant byte of a stack argument is passed at the highest
1623 byte address of the stack slot. */
1625 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1626 || POINTER_TYPE_P (type
))
1627 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1630 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1634 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1636 It specifies padding for the last (may also be the only)
1637 element of a block move between registers and memory. If
1638 assuming the block is in the memory, padding upward means that
1639 the last element is padded after its highest significant byte,
1640 while in downward padding, the last element is padded at the
1641 its least significant byte side.
1643 Small aggregates and small complex types are always padded
1646 We don't need to worry about homogeneous floating-point or
1647 short-vector aggregates; their move is not affected by the
1648 padding direction determined here. Regardless of endianness,
1649 each element of such an aggregate is put in the least
1650 significant bits of a fp/simd register.
1652 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1653 register has useful data, and return the opposite if the most
1654 significant byte does. */
1657 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1658 bool first ATTRIBUTE_UNUSED
)
1661 /* Small composite types are always padded upward. */
1662 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1664 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1665 : GET_MODE_SIZE (mode
));
1666 if (size
< 2 * UNITS_PER_WORD
)
1670 /* Otherwise, use the default padding. */
1671 return !BYTES_BIG_ENDIAN
;
1674 static enum machine_mode
1675 aarch64_libgcc_cmp_return_mode (void)
1681 aarch64_frame_pointer_required (void)
1683 /* If the function contains dynamic stack allocations, we need to
1684 use the frame pointer to access the static parts of the frame. */
1685 if (cfun
->calls_alloca
)
1688 /* We may have turned flag_omit_frame_pointer on in order to have this
1689 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1690 and we'll check it here.
1691 If we really did set flag_omit_frame_pointer normally, then we return false
1692 (no frame pointer required) in all cases. */
1694 if (flag_omit_frame_pointer
&& !faked_omit_frame_pointer
)
1696 else if (flag_omit_leaf_frame_pointer
)
1697 return !crtl
->is_leaf
;
1701 /* Mark the registers that need to be saved by the callee and calculate
1702 the size of the callee-saved registers area and frame record (both FP
1703 and LR may be omitted). */
1705 aarch64_layout_frame (void)
1707 HOST_WIDE_INT offset
= 0;
1710 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1713 cfun
->machine
->frame
.fp_lr_offset
= 0;
1715 /* First mark all the registers that really need to be saved... */
1716 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1717 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1719 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1720 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1722 /* ... that includes the eh data registers (if needed)... */
1723 if (crtl
->calls_eh_return
)
1724 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1725 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)] = 0;
1727 /* ... and any callee saved register that dataflow says is live. */
1728 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1729 if (df_regs_ever_live_p (regno
)
1730 && !call_used_regs
[regno
])
1731 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1733 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1734 if (df_regs_ever_live_p (regno
)
1735 && !call_used_regs
[regno
])
1736 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1738 if (frame_pointer_needed
)
1740 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = 0;
1741 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1742 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1745 /* Now assign stack slots for them. */
1746 for (regno
= R0_REGNUM
; regno
<= R28_REGNUM
; regno
++)
1747 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1749 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1750 offset
+= UNITS_PER_WORD
;
1753 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1754 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1756 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1757 offset
+= UNITS_PER_WORD
;
1760 if (frame_pointer_needed
)
1762 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = offset
;
1763 offset
+= UNITS_PER_WORD
;
1764 cfun
->machine
->frame
.fp_lr_offset
= UNITS_PER_WORD
;
1767 if (cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] != -1)
1769 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = offset
;
1770 offset
+= UNITS_PER_WORD
;
1771 cfun
->machine
->frame
.fp_lr_offset
+= UNITS_PER_WORD
;
1774 cfun
->machine
->frame
.padding0
=
1775 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1776 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1778 cfun
->machine
->frame
.saved_regs_size
= offset
;
1779 cfun
->machine
->frame
.laid_out
= true;
1782 /* Make the last instruction frame-related and note that it performs
1783 the operation described by FRAME_PATTERN. */
1786 aarch64_set_frame_expr (rtx frame_pattern
)
1790 insn
= get_last_insn ();
1791 RTX_FRAME_RELATED_P (insn
) = 1;
1792 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1793 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1799 aarch64_register_saved_on_entry (int regno
)
1801 return cfun
->machine
->frame
.reg_offset
[regno
] != -1;
1806 aarch64_save_or_restore_fprs (int start_offset
, int increment
,
1807 bool restore
, rtx base_rtx
)
1813 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1816 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1818 if (aarch64_register_saved_on_entry (regno
))
1821 mem
= gen_mem_ref (DFmode
,
1822 plus_constant (Pmode
,
1826 for (regno2
= regno
+ 1;
1827 regno2
<= V31_REGNUM
1828 && !aarch64_register_saved_on_entry (regno2
);
1833 if (regno2
<= V31_REGNUM
&&
1834 aarch64_register_saved_on_entry (regno2
))
1837 /* Next highest register to be saved. */
1838 mem2
= gen_mem_ref (DFmode
,
1842 start_offset
+ increment
));
1843 if (restore
== false)
1846 ( gen_store_pairdf (mem
, gen_rtx_REG (DFmode
, regno
),
1847 mem2
, gen_rtx_REG (DFmode
, regno2
)));
1853 ( gen_load_pairdf (gen_rtx_REG (DFmode
, regno
), mem
,
1854 gen_rtx_REG (DFmode
, regno2
), mem2
));
1856 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DFmode
, regno
));
1857 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DFmode
, regno2
));
1860 /* The first part of a frame-related parallel insn
1861 is always assumed to be relevant to the frame
1862 calculations; subsequent parts, are only
1863 frame-related if explicitly marked. */
1864 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1867 start_offset
+= increment
* 2;
1871 if (restore
== false)
1872 insn
= emit_move_insn (mem
, gen_rtx_REG (DFmode
, regno
));
1875 insn
= emit_move_insn (gen_rtx_REG (DFmode
, regno
), mem
);
1876 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1878 start_offset
+= increment
;
1880 RTX_FRAME_RELATED_P (insn
) = 1;
1887 /* offset from the stack pointer of where the saves and
1888 restore's have to happen. */
1890 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset
,
1894 rtx base_rtx
= stack_pointer_rtx
;
1895 HOST_WIDE_INT start_offset
= offset
;
1896 HOST_WIDE_INT increment
= UNITS_PER_WORD
;
1897 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1898 unsigned limit
= (frame_pointer_needed
)? R28_REGNUM
: R30_REGNUM
;
1902 for (regno
= R0_REGNUM
; regno
<= limit
; regno
++)
1904 if (aarch64_register_saved_on_entry (regno
))
1907 mem
= gen_mem_ref (Pmode
,
1908 plus_constant (Pmode
,
1912 for (regno2
= regno
+ 1;
1914 && !aarch64_register_saved_on_entry (regno2
);
1919 if (regno2
<= limit
&&
1920 aarch64_register_saved_on_entry (regno2
))
1923 /* Next highest register to be saved. */
1924 mem2
= gen_mem_ref (Pmode
,
1928 start_offset
+ increment
));
1929 if (restore
== false)
1932 ( gen_store_pairdi (mem
, gen_rtx_REG (DImode
, regno
),
1933 mem2
, gen_rtx_REG (DImode
, regno2
)));
1939 ( gen_load_pairdi (gen_rtx_REG (DImode
, regno
), mem
,
1940 gen_rtx_REG (DImode
, regno2
), mem2
));
1942 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1943 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno2
));
1946 /* The first part of a frame-related parallel insn
1947 is always assumed to be relevant to the frame
1948 calculations; subsequent parts, are only
1949 frame-related if explicitly marked. */
1950 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1953 start_offset
+= increment
* 2;
1957 if (restore
== false)
1958 insn
= emit_move_insn (mem
, gen_rtx_REG (DImode
, regno
));
1961 insn
= emit_move_insn (gen_rtx_REG (DImode
, regno
), mem
);
1962 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1964 start_offset
+= increment
;
1966 RTX_FRAME_RELATED_P (insn
) = 1;
1970 aarch64_save_or_restore_fprs (start_offset
, increment
, restore
, base_rtx
);
1974 /* AArch64 stack frames generated by this compiler look like:
1976 +-------------------------------+
1978 | incoming stack arguments |
1980 +-------------------------------+ <-- arg_pointer_rtx
1982 | callee-allocated save area |
1983 | for register varargs |
1985 +-------------------------------+
1989 +-------------------------------+ <-- frame_pointer_rtx
1991 | callee-saved registers |
1993 +-------------------------------+
1995 +-------------------------------+
1997 P +-------------------------------+ <-- hard_frame_pointer_rtx
1998 | dynamic allocation |
1999 +-------------------------------+
2001 | outgoing stack arguments |
2003 +-------------------------------+ <-- stack_pointer_rtx
2005 Dynamic stack allocations such as alloca insert data at point P.
2006 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2007 hard_frame_pointer_rtx unchanged. */
2009 /* Generate the prologue instructions for entry into a function.
2010 Establish the stack frame by decreasing the stack pointer with a
2011 properly calculated size and, if necessary, create a frame record
2012 filled with the values of LR and previous frame pointer. The
2013 current FP is also set up is it is in use. */
2016 aarch64_expand_prologue (void)
2018 /* sub sp, sp, #<frame_size>
2019 stp {fp, lr}, [sp, #<frame_size> - 16]
2020 add fp, sp, #<frame_size> - hardfp_offset
2021 stp {cs_reg}, [fp, #-16] etc.
2023 sub sp, sp, <final_adjustment_if_any>
2025 HOST_WIDE_INT original_frame_size
; /* local variables + vararg save */
2026 HOST_WIDE_INT frame_size
, offset
;
2027 HOST_WIDE_INT fp_offset
; /* FP offset from SP */
2030 aarch64_layout_frame ();
2031 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2032 gcc_assert ((!cfun
->machine
->saved_varargs_size
|| cfun
->stdarg
)
2033 && (cfun
->stdarg
|| !cfun
->machine
->saved_varargs_size
));
2034 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2035 + crtl
->outgoing_args_size
);
2036 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2037 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2039 if (flag_stack_usage_info
)
2040 current_function_static_stack_size
= frame_size
;
2043 - original_frame_size
2044 - cfun
->machine
->frame
.saved_regs_size
);
2046 /* Store pairs and load pairs have a range only -512 to 504. */
2049 /* When the frame has a large size, an initial decrease is done on
2050 the stack pointer to jump over the callee-allocated save area for
2051 register varargs, the local variable area and/or the callee-saved
2052 register area. This will allow the pre-index write-back
2053 store pair instructions to be used for setting up the stack frame
2055 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2057 offset
= cfun
->machine
->frame
.saved_regs_size
;
2059 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2062 if (frame_size
>= 0x1000000)
2064 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2065 emit_move_insn (op0
, GEN_INT (-frame_size
));
2066 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2067 aarch64_set_frame_expr (gen_rtx_SET
2068 (Pmode
, stack_pointer_rtx
,
2069 plus_constant (Pmode
,
2073 else if (frame_size
> 0)
2075 if ((frame_size
& 0xfff) != frame_size
)
2077 insn
= emit_insn (gen_add2_insn
2079 GEN_INT (-(frame_size
2080 & ~(HOST_WIDE_INT
)0xfff))));
2081 RTX_FRAME_RELATED_P (insn
) = 1;
2083 if ((frame_size
& 0xfff) != 0)
2085 insn
= emit_insn (gen_add2_insn
2087 GEN_INT (-(frame_size
2088 & (HOST_WIDE_INT
)0xfff))));
2089 RTX_FRAME_RELATED_P (insn
) = 1;
2098 /* Save the frame pointer and lr if the frame pointer is needed
2099 first. Make the frame pointer point to the location of the
2100 old frame pointer on the stack. */
2101 if (frame_pointer_needed
)
2107 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2108 GEN_INT (-offset
)));
2109 RTX_FRAME_RELATED_P (insn
) = 1;
2110 aarch64_set_frame_expr (gen_rtx_SET
2111 (Pmode
, stack_pointer_rtx
,
2112 gen_rtx_MINUS (Pmode
,
2114 GEN_INT (offset
))));
2115 mem_fp
= gen_frame_mem (DImode
,
2116 plus_constant (Pmode
,
2119 mem_lr
= gen_frame_mem (DImode
,
2120 plus_constant (Pmode
,
2124 insn
= emit_insn (gen_store_pairdi (mem_fp
,
2125 hard_frame_pointer_rtx
,
2127 gen_rtx_REG (DImode
,
2132 insn
= emit_insn (gen_storewb_pairdi_di
2133 (stack_pointer_rtx
, stack_pointer_rtx
,
2134 hard_frame_pointer_rtx
,
2135 gen_rtx_REG (DImode
, LR_REGNUM
),
2137 GEN_INT (GET_MODE_SIZE (DImode
) - offset
)));
2138 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2141 /* The first part of a frame-related parallel insn is always
2142 assumed to be relevant to the frame calculations;
2143 subsequent parts, are only frame-related if explicitly
2145 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2146 RTX_FRAME_RELATED_P (insn
) = 1;
2148 /* Set up frame pointer to point to the location of the
2149 previous frame pointer on the stack. */
2150 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2152 GEN_INT (fp_offset
)));
2153 aarch64_set_frame_expr (gen_rtx_SET
2154 (Pmode
, hard_frame_pointer_rtx
,
2155 plus_constant (Pmode
,
2158 RTX_FRAME_RELATED_P (insn
) = 1;
2159 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2160 hard_frame_pointer_rtx
));
2164 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2165 GEN_INT (-offset
)));
2166 RTX_FRAME_RELATED_P (insn
) = 1;
2169 aarch64_save_or_restore_callee_save_registers
2170 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 0);
2173 /* when offset >= 512,
2174 sub sp, sp, #<outgoing_args_size> */
2175 if (frame_size
> -1)
2177 if (crtl
->outgoing_args_size
> 0)
2179 insn
= emit_insn (gen_add2_insn
2181 GEN_INT (- crtl
->outgoing_args_size
)));
2182 RTX_FRAME_RELATED_P (insn
) = 1;
2187 /* Generate the epilogue instructions for returning from a function. */
2189 aarch64_expand_epilogue (bool for_sibcall
)
2191 HOST_WIDE_INT original_frame_size
, frame_size
, offset
;
2192 HOST_WIDE_INT fp_offset
;
2196 aarch64_layout_frame ();
2197 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2198 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2199 + crtl
->outgoing_args_size
);
2200 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2201 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2204 - original_frame_size
2205 - cfun
->machine
->frame
.saved_regs_size
);
2207 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2209 /* Store pairs and load pairs have a range only -512 to 504. */
2212 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2214 offset
= cfun
->machine
->frame
.saved_regs_size
;
2216 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2218 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2220 insn
= emit_insn (gen_add2_insn
2222 GEN_INT (crtl
->outgoing_args_size
)));
2223 RTX_FRAME_RELATED_P (insn
) = 1;
2229 /* If there were outgoing arguments or we've done dynamic stack
2230 allocation, then restore the stack pointer from the frame
2231 pointer. This is at most one insn and more efficient than using
2232 GCC's internal mechanism. */
2233 if (frame_pointer_needed
2234 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2236 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2237 hard_frame_pointer_rtx
,
2238 GEN_INT (- fp_offset
)));
2239 RTX_FRAME_RELATED_P (insn
) = 1;
2240 /* As SP is set to (FP - fp_offset), according to the rules in
2241 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2242 from the value of SP from now on. */
2243 cfa_reg
= stack_pointer_rtx
;
2246 aarch64_save_or_restore_callee_save_registers
2247 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 1);
2249 /* Restore the frame pointer and lr if the frame pointer is needed. */
2252 if (frame_pointer_needed
)
2258 mem_fp
= gen_frame_mem (DImode
,
2259 plus_constant (Pmode
,
2262 mem_lr
= gen_frame_mem (DImode
,
2263 plus_constant (Pmode
,
2267 insn
= emit_insn (gen_load_pairdi (hard_frame_pointer_rtx
,
2269 gen_rtx_REG (DImode
,
2275 insn
= emit_insn (gen_loadwb_pairdi_di
2278 hard_frame_pointer_rtx
,
2279 gen_rtx_REG (DImode
, LR_REGNUM
),
2281 GEN_INT (GET_MODE_SIZE (DImode
) + offset
)));
2282 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2283 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2284 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2285 plus_constant (Pmode
, cfa_reg
,
2289 /* The first part of a frame-related parallel insn
2290 is always assumed to be relevant to the frame
2291 calculations; subsequent parts, are only
2292 frame-related if explicitly marked. */
2293 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2294 RTX_FRAME_RELATED_P (insn
) = 1;
2295 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
2296 add_reg_note (insn
, REG_CFA_RESTORE
,
2297 gen_rtx_REG (DImode
, LR_REGNUM
));
2301 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2303 RTX_FRAME_RELATED_P (insn
) = 1;
2308 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2310 RTX_FRAME_RELATED_P (insn
) = 1;
2314 /* Stack adjustment for exception handler. */
2315 if (crtl
->calls_eh_return
)
2317 /* We need to unwind the stack by the offset computed by
2318 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2319 based on SP. Ideally we would update the SP and define the
2320 CFA along the lines of:
2322 SP = SP + EH_RETURN_STACKADJ_RTX
2323 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2325 However the dwarf emitter only understands a constant
2328 The solution chosen here is to use the otherwise unused IP0
2329 as a temporary register to hold the current SP value. The
2330 CFA is described using IP0 then SP is modified. */
2332 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2334 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2335 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2336 RTX_FRAME_RELATED_P (insn
) = 1;
2338 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2340 /* Ensure the assignment to IP0 does not get optimized away. */
2344 if (frame_size
> -1)
2346 if (frame_size
>= 0x1000000)
2348 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2349 emit_move_insn (op0
, GEN_INT (frame_size
));
2350 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2351 aarch64_set_frame_expr (gen_rtx_SET
2352 (Pmode
, stack_pointer_rtx
,
2353 plus_constant (Pmode
,
2357 else if (frame_size
> 0)
2359 if ((frame_size
& 0xfff) != 0)
2361 insn
= emit_insn (gen_add2_insn
2363 GEN_INT ((frame_size
2364 & (HOST_WIDE_INT
) 0xfff))));
2365 RTX_FRAME_RELATED_P (insn
) = 1;
2367 if ((frame_size
& 0xfff) != frame_size
)
2369 insn
= emit_insn (gen_add2_insn
2371 GEN_INT ((frame_size
2372 & ~ (HOST_WIDE_INT
) 0xfff))));
2373 RTX_FRAME_RELATED_P (insn
) = 1;
2377 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2378 plus_constant (Pmode
,
2383 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2385 emit_jump_insn (ret_rtx
);
2388 /* Return the place to copy the exception unwinding return address to.
2389 This will probably be a stack slot, but could (in theory be the
2390 return register). */
2392 aarch64_final_eh_return_addr (void)
2394 HOST_WIDE_INT original_frame_size
, frame_size
, offset
, fp_offset
;
2395 aarch64_layout_frame ();
2396 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2397 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2398 + crtl
->outgoing_args_size
);
2399 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2400 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2402 - original_frame_size
2403 - cfun
->machine
->frame
.saved_regs_size
;
2405 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2406 return gen_rtx_REG (DImode
, LR_REGNUM
);
2408 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2409 result in a store to save LR introduced by builtin_eh_return () being
2410 incorrectly deleted because the alias is not detected.
2411 So in the calculation of the address to copy the exception unwinding
2412 return address to, we note 2 cases.
2413 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2414 we return a SP-relative location since all the addresses are SP-relative
2415 in this case. This prevents the store from being optimized away.
2416 If the fp_offset is not 0, then the addresses will be FP-relative and
2417 therefore we return a FP-relative location. */
2419 if (frame_pointer_needed
)
2422 return gen_frame_mem (DImode
,
2423 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2425 return gen_frame_mem (DImode
,
2426 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2429 /* If FP is not needed, we calculate the location of LR, which would be
2430 at the top of the saved registers block. */
2432 return gen_frame_mem (DImode
,
2433 plus_constant (Pmode
,
2436 + cfun
->machine
->frame
.saved_regs_size
2437 - 2 * UNITS_PER_WORD
));
2440 /* Output code to build up a constant in a register. */
2442 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
)
2444 if (aarch64_bitmask_imm (val
, DImode
))
2445 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2451 HOST_WIDE_INT valp
= val
>> 16;
2455 for (i
= 16; i
< 64; i
+= 16)
2457 valm
= (valp
& 0xffff);
2468 /* zcount contains the number of additional MOVK instructions
2469 required if the constant is built up with an initial MOVZ instruction,
2470 while ncount is the number of MOVK instructions required if starting
2471 with a MOVN instruction. Choose the sequence that yields the fewest
2472 number of instructions, preferring MOVZ instructions when they are both
2474 if (ncount
< zcount
)
2476 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2477 GEN_INT ((~val
) & 0xffff));
2482 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2483 GEN_INT (val
& 0xffff));
2489 for (i
= 16; i
< 64; i
+= 16)
2491 if ((val
& 0xffff) != tval
)
2492 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2493 GEN_INT (i
), GEN_INT (val
& 0xffff)));
2500 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2502 HOST_WIDE_INT mdelta
= delta
;
2503 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2504 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2509 if (mdelta
>= 4096 * 4096)
2511 aarch64_build_constant (scratchreg
, delta
);
2512 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2514 else if (mdelta
> 0)
2518 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2519 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2521 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2522 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2524 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2525 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2527 if (mdelta
% 4096 != 0)
2529 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2530 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2531 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2536 /* Output code to add DELTA to the first argument, and then jump
2537 to FUNCTION. Used for C++ multiple inheritance. */
2539 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2540 HOST_WIDE_INT delta
,
2541 HOST_WIDE_INT vcall_offset
,
2544 /* The this pointer is always in x0. Note that this differs from
2545 Arm where the this pointer maybe bumped to r1 if r0 is required
2546 to return a pointer to an aggregate. On AArch64 a result value
2547 pointer will be in x8. */
2548 int this_regno
= R0_REGNUM
;
2549 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2551 reload_completed
= 1;
2552 emit_note (NOTE_INSN_PROLOGUE_END
);
2554 if (vcall_offset
== 0)
2555 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2558 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2560 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2561 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2562 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2567 if (delta
>= -256 && delta
< 256)
2568 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2569 plus_constant (Pmode
, this_rtx
, delta
));
2571 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2574 if (Pmode
== ptr_mode
)
2575 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2577 aarch64_emit_move (temp0
,
2578 gen_rtx_ZERO_EXTEND (Pmode
,
2579 gen_rtx_MEM (ptr_mode
, addr
)));
2581 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2582 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2585 aarch64_build_constant (IP1_REGNUM
, vcall_offset
);
2586 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2589 if (Pmode
== ptr_mode
)
2590 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2592 aarch64_emit_move (temp1
,
2593 gen_rtx_SIGN_EXTEND (Pmode
,
2594 gen_rtx_MEM (ptr_mode
, addr
)));
2596 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2599 /* Generate a tail call to the target function. */
2600 if (!TREE_USED (function
))
2602 assemble_external (function
);
2603 TREE_USED (function
) = 1;
2605 funexp
= XEXP (DECL_RTL (function
), 0);
2606 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2607 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2608 SIBLING_CALL_P (insn
) = 1;
2610 insn
= get_insns ();
2611 shorten_branches (insn
);
2612 final_start_function (insn
, file
, 1);
2613 final (insn
, file
, 1);
2614 final_end_function ();
2616 /* Stop pretending to be a post-reload pass. */
2617 reload_completed
= 0;
2621 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2623 if (GET_CODE (*x
) == SYMBOL_REF
)
2624 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2626 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2627 TLS offsets, not real symbol references. */
2628 if (GET_CODE (*x
) == UNSPEC
2629 && XINT (*x
, 1) == UNSPEC_TLS
)
2636 aarch64_tls_referenced_p (rtx x
)
2638 if (!TARGET_HAVE_TLS
)
2641 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2646 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2648 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2649 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2660 aarch64_build_bitmask_table (void)
2662 unsigned HOST_WIDE_INT mask
, imm
;
2663 unsigned int log_e
, e
, s
, r
;
2664 unsigned int nimms
= 0;
2666 for (log_e
= 1; log_e
<= 6; log_e
++)
2670 mask
= ~(HOST_WIDE_INT
) 0;
2672 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2673 for (s
= 1; s
< e
; s
++)
2675 for (r
= 0; r
< e
; r
++)
2677 /* set s consecutive bits to 1 (s < 64) */
2678 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2679 /* rotate right by r */
2681 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2682 /* replicate the constant depending on SIMD size */
2684 case 1: imm
|= (imm
<< 2);
2685 case 2: imm
|= (imm
<< 4);
2686 case 3: imm
|= (imm
<< 8);
2687 case 4: imm
|= (imm
<< 16);
2688 case 5: imm
|= (imm
<< 32);
2694 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2695 aarch64_bitmasks
[nimms
++] = imm
;
2700 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2701 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2702 aarch64_bitmasks_cmp
);
2706 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2707 a left shift of 0 or 12 bits. */
2709 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2711 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2712 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2717 /* Return true if val is an immediate that can be loaded into a
2718 register by a MOVZ instruction. */
2720 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2722 if (GET_MODE_SIZE (mode
) > 4)
2724 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2725 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2730 /* Ignore sign extension. */
2731 val
&= (HOST_WIDE_INT
) 0xffffffff;
2733 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2734 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2738 /* Return true if val is a valid bitmask immediate. */
2740 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2742 if (GET_MODE_SIZE (mode
) < 8)
2744 /* Replicate bit pattern. */
2745 val
&= (HOST_WIDE_INT
) 0xffffffff;
2748 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2749 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2753 /* Return true if val is an immediate that can be loaded into a
2754 register in a single instruction. */
2756 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2758 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2760 return aarch64_bitmask_imm (val
, mode
);
2764 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2768 if (GET_CODE (x
) == HIGH
)
2771 split_const (x
, &base
, &offset
);
2772 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2774 if (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2775 != SYMBOL_FORCE_TO_MEM
)
2778 /* Avoid generating a 64-bit relocation in ILP32; leave
2779 to aarch64_expand_mov_immediate to handle it properly. */
2780 return mode
!= ptr_mode
;
2783 return aarch64_tls_referenced_p (x
);
2786 /* Return true if register REGNO is a valid index register.
2787 STRICT_P is true if REG_OK_STRICT is in effect. */
2790 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2792 if (!HARD_REGISTER_NUM_P (regno
))
2800 regno
= reg_renumber
[regno
];
2802 return GP_REGNUM_P (regno
);
2805 /* Return true if register REGNO is a valid base register for mode MODE.
2806 STRICT_P is true if REG_OK_STRICT is in effect. */
2809 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2811 if (!HARD_REGISTER_NUM_P (regno
))
2819 regno
= reg_renumber
[regno
];
2822 /* The fake registers will be eliminated to either the stack or
2823 hard frame pointer, both of which are usually valid base registers.
2824 Reload deals with the cases where the eliminated form isn't valid. */
2825 return (GP_REGNUM_P (regno
)
2826 || regno
== SP_REGNUM
2827 || regno
== FRAME_POINTER_REGNUM
2828 || regno
== ARG_POINTER_REGNUM
);
2831 /* Return true if X is a valid base register for mode MODE.
2832 STRICT_P is true if REG_OK_STRICT is in effect. */
2835 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2837 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2840 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2843 /* Return true if address offset is a valid index. If it is, fill in INFO
2844 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2847 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2848 enum machine_mode mode
, bool strict_p
)
2850 enum aarch64_address_type type
;
2855 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2856 && GET_MODE (x
) == Pmode
)
2858 type
= ADDRESS_REG_REG
;
2862 /* (sign_extend:DI (reg:SI)) */
2863 else if ((GET_CODE (x
) == SIGN_EXTEND
2864 || GET_CODE (x
) == ZERO_EXTEND
)
2865 && GET_MODE (x
) == DImode
2866 && GET_MODE (XEXP (x
, 0)) == SImode
)
2868 type
= (GET_CODE (x
) == SIGN_EXTEND
)
2869 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2870 index
= XEXP (x
, 0);
2873 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2874 else if (GET_CODE (x
) == MULT
2875 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2876 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2877 && GET_MODE (XEXP (x
, 0)) == DImode
2878 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2879 && CONST_INT_P (XEXP (x
, 1)))
2881 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2882 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2883 index
= XEXP (XEXP (x
, 0), 0);
2884 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2886 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2887 else if (GET_CODE (x
) == ASHIFT
2888 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2889 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2890 && GET_MODE (XEXP (x
, 0)) == DImode
2891 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2892 && CONST_INT_P (XEXP (x
, 1)))
2894 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2895 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2896 index
= XEXP (XEXP (x
, 0), 0);
2897 shift
= INTVAL (XEXP (x
, 1));
2899 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2900 else if ((GET_CODE (x
) == SIGN_EXTRACT
2901 || GET_CODE (x
) == ZERO_EXTRACT
)
2902 && GET_MODE (x
) == DImode
2903 && GET_CODE (XEXP (x
, 0)) == MULT
2904 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2905 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2907 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2908 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2909 index
= XEXP (XEXP (x
, 0), 0);
2910 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2911 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2912 || INTVAL (XEXP (x
, 2)) != 0)
2915 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2916 (const_int 0xffffffff<<shift)) */
2917 else if (GET_CODE (x
) == AND
2918 && GET_MODE (x
) == DImode
2919 && GET_CODE (XEXP (x
, 0)) == MULT
2920 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2921 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2922 && CONST_INT_P (XEXP (x
, 1)))
2924 type
= ADDRESS_REG_UXTW
;
2925 index
= XEXP (XEXP (x
, 0), 0);
2926 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2927 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2930 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2931 else if ((GET_CODE (x
) == SIGN_EXTRACT
2932 || GET_CODE (x
) == ZERO_EXTRACT
)
2933 && GET_MODE (x
) == DImode
2934 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2935 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2936 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2938 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2939 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2940 index
= XEXP (XEXP (x
, 0), 0);
2941 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2942 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2943 || INTVAL (XEXP (x
, 2)) != 0)
2946 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2947 (const_int 0xffffffff<<shift)) */
2948 else if (GET_CODE (x
) == AND
2949 && GET_MODE (x
) == DImode
2950 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2951 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2952 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2953 && CONST_INT_P (XEXP (x
, 1)))
2955 type
= ADDRESS_REG_UXTW
;
2956 index
= XEXP (XEXP (x
, 0), 0);
2957 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2958 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2961 /* (mult:P (reg:P) (const_int scale)) */
2962 else if (GET_CODE (x
) == MULT
2963 && GET_MODE (x
) == Pmode
2964 && GET_MODE (XEXP (x
, 0)) == Pmode
2965 && CONST_INT_P (XEXP (x
, 1)))
2967 type
= ADDRESS_REG_REG
;
2968 index
= XEXP (x
, 0);
2969 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2971 /* (ashift:P (reg:P) (const_int shift)) */
2972 else if (GET_CODE (x
) == ASHIFT
2973 && GET_MODE (x
) == Pmode
2974 && GET_MODE (XEXP (x
, 0)) == Pmode
2975 && CONST_INT_P (XEXP (x
, 1)))
2977 type
= ADDRESS_REG_REG
;
2978 index
= XEXP (x
, 0);
2979 shift
= INTVAL (XEXP (x
, 1));
2984 if (GET_CODE (index
) == SUBREG
)
2985 index
= SUBREG_REG (index
);
2988 (shift
> 0 && shift
<= 3
2989 && (1 << shift
) == GET_MODE_SIZE (mode
)))
2991 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
2994 info
->offset
= index
;
2995 info
->shift
= shift
;
3003 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3005 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3006 && offset
< 64 * GET_MODE_SIZE (mode
)
3007 && offset
% GET_MODE_SIZE (mode
) == 0);
3011 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
3012 HOST_WIDE_INT offset
)
3014 return offset
>= -256 && offset
< 256;
3018 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3021 && offset
< 4096 * GET_MODE_SIZE (mode
)
3022 && offset
% GET_MODE_SIZE (mode
) == 0);
3025 /* Return true if X is a valid address for machine mode MODE. If it is,
3026 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3027 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3030 aarch64_classify_address (struct aarch64_address_info
*info
,
3031 rtx x
, enum machine_mode mode
,
3032 RTX_CODE outer_code
, bool strict_p
)
3034 enum rtx_code code
= GET_CODE (x
);
3036 bool allow_reg_index_p
=
3037 outer_code
!= PARALLEL
&& GET_MODE_SIZE(mode
) != 16;
3039 /* Don't support anything other than POST_INC or REG addressing for
3041 if (aarch64_vector_mode_p (mode
)
3042 && (code
!= POST_INC
&& code
!= REG
))
3049 info
->type
= ADDRESS_REG_IMM
;
3051 info
->offset
= const0_rtx
;
3052 return aarch64_base_register_rtx_p (x
, strict_p
);
3057 if (GET_MODE_SIZE (mode
) != 0
3058 && CONST_INT_P (op1
)
3059 && aarch64_base_register_rtx_p (op0
, strict_p
))
3061 HOST_WIDE_INT offset
= INTVAL (op1
);
3063 info
->type
= ADDRESS_REG_IMM
;
3067 /* TImode and TFmode values are allowed in both pairs of X
3068 registers and individual Q registers. The available
3070 X,X: 7-bit signed scaled offset
3071 Q: 9-bit signed offset
3072 We conservatively require an offset representable in either mode.
3074 if (mode
== TImode
|| mode
== TFmode
)
3075 return (offset_7bit_signed_scaled_p (mode
, offset
)
3076 && offset_9bit_signed_unscaled_p (mode
, offset
));
3078 if (outer_code
== PARALLEL
)
3079 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3080 && offset_7bit_signed_scaled_p (mode
, offset
));
3082 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3083 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3086 if (allow_reg_index_p
)
3088 /* Look for base + (scaled/extended) index register. */
3089 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3090 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3095 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3096 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3109 info
->type
= ADDRESS_REG_WB
;
3110 info
->base
= XEXP (x
, 0);
3111 info
->offset
= NULL_RTX
;
3112 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3116 info
->type
= ADDRESS_REG_WB
;
3117 info
->base
= XEXP (x
, 0);
3118 if (GET_CODE (XEXP (x
, 1)) == PLUS
3119 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3120 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3121 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3123 HOST_WIDE_INT offset
;
3124 info
->offset
= XEXP (XEXP (x
, 1), 1);
3125 offset
= INTVAL (info
->offset
);
3127 /* TImode and TFmode values are allowed in both pairs of X
3128 registers and individual Q registers. The available
3130 X,X: 7-bit signed scaled offset
3131 Q: 9-bit signed offset
3132 We conservatively require an offset representable in either mode.
3134 if (mode
== TImode
|| mode
== TFmode
)
3135 return (offset_7bit_signed_scaled_p (mode
, offset
)
3136 && offset_9bit_signed_unscaled_p (mode
, offset
));
3138 if (outer_code
== PARALLEL
)
3139 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3140 && offset_7bit_signed_scaled_p (mode
, offset
));
3142 return offset_9bit_signed_unscaled_p (mode
, offset
);
3149 /* load literal: pc-relative constant pool entry. Only supported
3150 for SI mode or larger. */
3151 info
->type
= ADDRESS_SYMBOLIC
;
3152 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3156 split_const (x
, &sym
, &addend
);
3157 return (GET_CODE (sym
) == LABEL_REF
3158 || (GET_CODE (sym
) == SYMBOL_REF
3159 && CONSTANT_POOL_ADDRESS_P (sym
)));
3164 info
->type
= ADDRESS_LO_SUM
;
3165 info
->base
= XEXP (x
, 0);
3166 info
->offset
= XEXP (x
, 1);
3167 if (allow_reg_index_p
3168 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3171 split_const (info
->offset
, &sym
, &offs
);
3172 if (GET_CODE (sym
) == SYMBOL_REF
3173 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3174 == SYMBOL_SMALL_ABSOLUTE
))
3176 /* The symbol and offset must be aligned to the access size. */
3178 unsigned int ref_size
;
3180 if (CONSTANT_POOL_ADDRESS_P (sym
))
3181 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3182 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3184 tree exp
= SYMBOL_REF_DECL (sym
);
3185 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3186 align
= CONSTANT_ALIGNMENT (exp
, align
);
3188 else if (SYMBOL_REF_DECL (sym
))
3189 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3191 align
= BITS_PER_UNIT
;
3193 ref_size
= GET_MODE_SIZE (mode
);
3195 ref_size
= GET_MODE_SIZE (DImode
);
3197 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3198 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3209 aarch64_symbolic_address_p (rtx x
)
3213 split_const (x
, &x
, &offset
);
3214 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3217 /* Classify the base of symbolic expression X, given that X appears in
3220 enum aarch64_symbol_type
3221 aarch64_classify_symbolic_expression (rtx x
,
3222 enum aarch64_symbol_context context
)
3226 split_const (x
, &x
, &offset
);
3227 return aarch64_classify_symbol (x
, context
);
3231 /* Return TRUE if X is a legitimate address for accessing memory in
3234 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3236 struct aarch64_address_info addr
;
3238 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3241 /* Return TRUE if X is a legitimate address for accessing memory in
3242 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3245 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3246 RTX_CODE outer_code
, bool strict_p
)
3248 struct aarch64_address_info addr
;
3250 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3253 /* Return TRUE if rtx X is immediate constant 0.0 */
3255 aarch64_float_const_zero_rtx_p (rtx x
)
3259 if (GET_MODE (x
) == VOIDmode
)
3262 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3263 if (REAL_VALUE_MINUS_ZERO (r
))
3264 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3265 return REAL_VALUES_EQUAL (r
, dconst0
);
3268 /* Return the fixed registers used for condition codes. */
3271 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3274 *p2
= INVALID_REGNUM
;
3279 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3281 /* All floating point compares return CCFP if it is an equality
3282 comparison, and CCFPE otherwise. */
3283 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3310 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3312 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3313 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3314 || GET_CODE (x
) == NEG
))
3317 /* A compare with a shifted or negated operand. Because of canonicalization,
3318 the comparison will have to be swapped when we emit the assembly
3320 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3321 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3322 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3323 || GET_CODE (x
) == LSHIFTRT
3324 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
3325 || GET_CODE (x
) == NEG
))
3328 /* A compare of a mode narrower than SI mode against zero can be done
3329 by extending the value in the comparison. */
3330 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3332 /* Only use sign-extension if we really need it. */
3333 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3334 ? CC_SESWPmode
: CC_ZESWPmode
);
3336 /* For everything else, return CCmode. */
3341 aarch64_get_condition_code (rtx x
)
3343 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3344 enum rtx_code comp_code
= GET_CODE (x
);
3346 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3347 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3355 case GE
: return AARCH64_GE
;
3356 case GT
: return AARCH64_GT
;
3357 case LE
: return AARCH64_LS
;
3358 case LT
: return AARCH64_MI
;
3359 case NE
: return AARCH64_NE
;
3360 case EQ
: return AARCH64_EQ
;
3361 case ORDERED
: return AARCH64_VC
;
3362 case UNORDERED
: return AARCH64_VS
;
3363 case UNLT
: return AARCH64_LT
;
3364 case UNLE
: return AARCH64_LE
;
3365 case UNGT
: return AARCH64_HI
;
3366 case UNGE
: return AARCH64_PL
;
3367 default: gcc_unreachable ();
3374 case NE
: return AARCH64_NE
;
3375 case EQ
: return AARCH64_EQ
;
3376 case GE
: return AARCH64_GE
;
3377 case GT
: return AARCH64_GT
;
3378 case LE
: return AARCH64_LE
;
3379 case LT
: return AARCH64_LT
;
3380 case GEU
: return AARCH64_CS
;
3381 case GTU
: return AARCH64_HI
;
3382 case LEU
: return AARCH64_LS
;
3383 case LTU
: return AARCH64_CC
;
3384 default: gcc_unreachable ();
3393 case NE
: return AARCH64_NE
;
3394 case EQ
: return AARCH64_EQ
;
3395 case GE
: return AARCH64_LE
;
3396 case GT
: return AARCH64_LT
;
3397 case LE
: return AARCH64_GE
;
3398 case LT
: return AARCH64_GT
;
3399 case GEU
: return AARCH64_LS
;
3400 case GTU
: return AARCH64_CC
;
3401 case LEU
: return AARCH64_CS
;
3402 case LTU
: return AARCH64_HI
;
3403 default: gcc_unreachable ();
3410 case NE
: return AARCH64_NE
;
3411 case EQ
: return AARCH64_EQ
;
3412 case GE
: return AARCH64_PL
;
3413 case LT
: return AARCH64_MI
;
3414 default: gcc_unreachable ();
3425 bit_count (unsigned HOST_WIDE_INT value
)
3439 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3443 /* An integer or symbol address without a preceding # sign. */
3445 switch (GET_CODE (x
))
3448 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3452 output_addr_const (f
, x
);
3456 if (GET_CODE (XEXP (x
, 0)) == PLUS
3457 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3459 output_addr_const (f
, x
);
3465 output_operand_lossage ("Unsupported operand for code '%c'", code
);
3470 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3474 if (GET_CODE (x
) != CONST_INT
3475 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3477 output_operand_lossage ("invalid operand for '%%%c'", code
);
3493 output_operand_lossage ("invalid operand for '%%%c'", code
);
3503 /* Print N such that 2^N == X. */
3504 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3506 output_operand_lossage ("invalid operand for '%%%c'", code
);
3510 asm_fprintf (f
, "%d", n
);
3515 /* Print the number of non-zero bits in X (a const_int). */
3516 if (GET_CODE (x
) != CONST_INT
)
3518 output_operand_lossage ("invalid operand for '%%%c'", code
);
3522 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3526 /* Print the higher numbered register of a pair (TImode) of regs. */
3527 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3529 output_operand_lossage ("invalid operand for '%%%c'", code
);
3533 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3537 /* Print a condition (eq, ne, etc). */
3539 /* CONST_TRUE_RTX means always -- that's the default. */
3540 if (x
== const_true_rtx
)
3543 if (!COMPARISON_P (x
))
3545 output_operand_lossage ("invalid operand for '%%%c'", code
);
3549 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3553 /* Print the inverse of a condition (eq <-> ne, etc). */
3555 /* CONST_TRUE_RTX means never -- that's the default. */
3556 if (x
== const_true_rtx
)
3562 if (!COMPARISON_P (x
))
3564 output_operand_lossage ("invalid operand for '%%%c'", code
);
3568 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3569 (aarch64_get_condition_code (x
))], f
);
3577 /* Print a scalar FP/SIMD register name. */
3578 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3580 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3583 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3590 /* Print the first FP/SIMD register name in a list. */
3591 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3593 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3596 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3600 /* Print bottom 16 bits of integer constant in hex. */
3601 if (GET_CODE (x
) != CONST_INT
)
3603 output_operand_lossage ("invalid operand for '%%%c'", code
);
3606 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3611 /* Print a general register name or the zero register (32-bit or
3614 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3616 asm_fprintf (f
, "%czr", code
);
3620 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3622 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3626 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3628 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3635 /* Print a normal operand, if it's a general register, then we
3639 output_operand_lossage ("missing operand");
3643 switch (GET_CODE (x
))
3646 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3650 aarch64_memory_reference_mode
= GET_MODE (x
);
3651 output_address (XEXP (x
, 0));
3656 output_addr_const (asm_out_file
, x
);
3660 asm_fprintf (f
, "%wd", INTVAL (x
));
3664 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3666 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3668 HOST_WIDE_INT_MAX
));
3669 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3671 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3680 /* CONST_DOUBLE can represent a double-width integer.
3681 In this case, the mode of x is VOIDmode. */
3682 if (GET_MODE (x
) == VOIDmode
)
3684 else if (aarch64_float_const_zero_rtx_p (x
))
3689 else if (aarch64_float_const_representable_p (x
))
3692 char float_buf
[buf_size
] = {'\0'};
3694 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3695 real_to_decimal_for_mode (float_buf
, &r
,
3698 asm_fprintf (asm_out_file
, "%s", float_buf
);
3702 output_operand_lossage ("invalid constant");
3705 output_operand_lossage ("invalid operand");
3711 if (GET_CODE (x
) == HIGH
)
3714 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3716 case SYMBOL_SMALL_GOT
:
3717 asm_fprintf (asm_out_file
, ":got:");
3720 case SYMBOL_SMALL_TLSGD
:
3721 asm_fprintf (asm_out_file
, ":tlsgd:");
3724 case SYMBOL_SMALL_TLSDESC
:
3725 asm_fprintf (asm_out_file
, ":tlsdesc:");
3728 case SYMBOL_SMALL_GOTTPREL
:
3729 asm_fprintf (asm_out_file
, ":gottprel:");
3732 case SYMBOL_SMALL_TPREL
:
3733 asm_fprintf (asm_out_file
, ":tprel:");
3736 case SYMBOL_TINY_GOT
:
3743 output_addr_const (asm_out_file
, x
);
3747 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3749 case SYMBOL_SMALL_GOT
:
3750 asm_fprintf (asm_out_file
, ":lo12:");
3753 case SYMBOL_SMALL_TLSGD
:
3754 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3757 case SYMBOL_SMALL_TLSDESC
:
3758 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3761 case SYMBOL_SMALL_GOTTPREL
:
3762 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3765 case SYMBOL_SMALL_TPREL
:
3766 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3769 case SYMBOL_TINY_GOT
:
3770 asm_fprintf (asm_out_file
, ":got:");
3776 output_addr_const (asm_out_file
, x
);
3781 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3783 case SYMBOL_SMALL_TPREL
:
3784 asm_fprintf (asm_out_file
, ":tprel_hi12:");
3789 output_addr_const (asm_out_file
, x
);
3793 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
3799 aarch64_print_operand_address (FILE *f
, rtx x
)
3801 struct aarch64_address_info addr
;
3803 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
3807 case ADDRESS_REG_IMM
:
3808 if (addr
.offset
== const0_rtx
)
3809 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
3811 asm_fprintf (f
, "[%s,%wd]", reg_names
[REGNO (addr
.base
)],
3812 INTVAL (addr
.offset
));
3815 case ADDRESS_REG_REG
:
3816 if (addr
.shift
== 0)
3817 asm_fprintf (f
, "[%s,%s]", reg_names
[REGNO (addr
.base
)],
3818 reg_names
[REGNO (addr
.offset
)]);
3820 asm_fprintf (f
, "[%s,%s,lsl %u]", reg_names
[REGNO (addr
.base
)],
3821 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
3824 case ADDRESS_REG_UXTW
:
3825 if (addr
.shift
== 0)
3826 asm_fprintf (f
, "[%s,w%d,uxtw]", reg_names
[REGNO (addr
.base
)],
3827 REGNO (addr
.offset
) - R0_REGNUM
);
3829 asm_fprintf (f
, "[%s,w%d,uxtw %u]", reg_names
[REGNO (addr
.base
)],
3830 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3833 case ADDRESS_REG_SXTW
:
3834 if (addr
.shift
== 0)
3835 asm_fprintf (f
, "[%s,w%d,sxtw]", reg_names
[REGNO (addr
.base
)],
3836 REGNO (addr
.offset
) - R0_REGNUM
);
3838 asm_fprintf (f
, "[%s,w%d,sxtw %u]", reg_names
[REGNO (addr
.base
)],
3839 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3842 case ADDRESS_REG_WB
:
3843 switch (GET_CODE (x
))
3846 asm_fprintf (f
, "[%s,%d]!", reg_names
[REGNO (addr
.base
)],
3847 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3850 asm_fprintf (f
, "[%s],%d", reg_names
[REGNO (addr
.base
)],
3851 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3854 asm_fprintf (f
, "[%s,-%d]!", reg_names
[REGNO (addr
.base
)],
3855 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3858 asm_fprintf (f
, "[%s],-%d", reg_names
[REGNO (addr
.base
)],
3859 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3862 asm_fprintf (f
, "[%s,%wd]!", reg_names
[REGNO (addr
.base
)],
3863 INTVAL (addr
.offset
));
3866 asm_fprintf (f
, "[%s],%wd", reg_names
[REGNO (addr
.base
)],
3867 INTVAL (addr
.offset
));
3874 case ADDRESS_LO_SUM
:
3875 asm_fprintf (f
, "[%s,#:lo12:", reg_names
[REGNO (addr
.base
)]);
3876 output_addr_const (f
, addr
.offset
);
3877 asm_fprintf (f
, "]");
3880 case ADDRESS_SYMBOLIC
:
3884 output_addr_const (f
, x
);
3888 aarch64_label_mentioned_p (rtx x
)
3893 if (GET_CODE (x
) == LABEL_REF
)
3896 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3897 referencing instruction, but they are constant offsets, not
3899 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3902 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
3903 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
3909 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3910 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
3913 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
3920 /* Implement REGNO_REG_CLASS. */
3923 aarch64_regno_regclass (unsigned regno
)
3925 if (GP_REGNUM_P (regno
))
3928 if (regno
== SP_REGNUM
)
3931 if (regno
== FRAME_POINTER_REGNUM
3932 || regno
== ARG_POINTER_REGNUM
)
3933 return POINTER_REGS
;
3935 if (FP_REGNUM_P (regno
))
3936 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
3941 /* Try a machine-dependent way of reloading an illegitimate address
3942 operand. If we find one, push the reload and return the new rtx. */
3945 aarch64_legitimize_reload_address (rtx
*x_p
,
3946 enum machine_mode mode
,
3947 int opnum
, int type
,
3948 int ind_levels ATTRIBUTE_UNUSED
)
3952 /* Do not allow mem (plus (reg, const)) if vector mode. */
3953 if (aarch64_vector_mode_p (mode
)
3954 && GET_CODE (x
) == PLUS
3955 && REG_P (XEXP (x
, 0))
3956 && CONST_INT_P (XEXP (x
, 1)))
3960 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
3961 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3962 opnum
, (enum reload_type
) type
);
3966 /* We must recognize output that we have already generated ourselves. */
3967 if (GET_CODE (x
) == PLUS
3968 && GET_CODE (XEXP (x
, 0)) == PLUS
3969 && REG_P (XEXP (XEXP (x
, 0), 0))
3970 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3971 && CONST_INT_P (XEXP (x
, 1)))
3973 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
3974 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3975 opnum
, (enum reload_type
) type
);
3979 /* We wish to handle large displacements off a base register by splitting
3980 the addend across an add and the mem insn. This can cut the number of
3981 extra insns needed from 3 to 1. It is only useful for load/store of a
3982 single register with 12 bit offset field. */
3983 if (GET_CODE (x
) == PLUS
3984 && REG_P (XEXP (x
, 0))
3985 && CONST_INT_P (XEXP (x
, 1))
3986 && HARD_REGISTER_P (XEXP (x
, 0))
3989 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
3991 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
3992 HOST_WIDE_INT low
= val
& 0xfff;
3993 HOST_WIDE_INT high
= val
- low
;
3996 enum machine_mode xmode
= GET_MODE (x
);
3998 /* In ILP32, xmode can be either DImode or SImode. */
3999 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4001 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4002 BLKmode alignment. */
4003 if (GET_MODE_SIZE (mode
) == 0)
4006 offs
= low
% GET_MODE_SIZE (mode
);
4008 /* Align misaligned offset by adjusting high part to compensate. */
4011 if (aarch64_uimm12_shift (high
+ offs
))
4020 offs
= GET_MODE_SIZE (mode
) - offs
;
4022 high
= high
+ (low
& 0x1000) - offs
;
4027 /* Check for overflow. */
4028 if (high
+ low
!= val
)
4031 cst
= GEN_INT (high
);
4032 if (!aarch64_uimm12_shift (high
))
4033 cst
= force_const_mem (xmode
, cst
);
4035 /* Reload high part into base reg, leaving the low part
4036 in the mem instruction.
4037 Note that replacing this gen_rtx_PLUS with plus_constant is
4038 wrong in this case because we rely on the
4039 (plus (plus reg c1) c2) structure being preserved so that
4040 XEXP (*p, 0) in push_reload below uses the correct term. */
4041 x
= gen_rtx_PLUS (xmode
,
4042 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4045 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4046 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4047 opnum
, (enum reload_type
) type
);
4056 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4058 enum machine_mode mode
,
4059 secondary_reload_info
*sri
)
4061 /* Without the TARGET_SIMD instructions we cannot move a Q register
4062 to a Q register directly. We need a scratch. */
4063 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4064 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4065 && reg_class_subset_p (rclass
, FP_REGS
))
4068 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4069 else if (mode
== TImode
)
4070 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4074 /* A TFmode or TImode memory access should be handled via an FP_REGS
4075 because AArch64 has richer addressing modes for LDR/STR instructions
4076 than LDP/STP instructions. */
4077 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
4078 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4081 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4088 aarch64_can_eliminate (const int from
, const int to
)
4090 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4091 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4093 if (frame_pointer_needed
)
4095 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4097 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4099 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4100 && !cfun
->calls_alloca
)
4102 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4108 /* If we decided that we didn't need a leaf frame pointer but then used
4109 LR in the function, then we'll want a frame pointer after all, so
4110 prevent this elimination to ensure a frame pointer is used.
4112 NOTE: the original value of flag_omit_frame_pointer gets trashed
4113 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4114 of faked_omit_frame_pointer here (which is true when we always
4115 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4116 pointers when LR is clobbered). */
4117 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4118 && df_regs_ever_live_p (LR_REGNUM
)
4119 && faked_omit_frame_pointer
)
4127 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4129 HOST_WIDE_INT frame_size
;
4130 HOST_WIDE_INT offset
;
4132 aarch64_layout_frame ();
4133 frame_size
= (get_frame_size () + cfun
->machine
->frame
.saved_regs_size
4134 + crtl
->outgoing_args_size
4135 + cfun
->machine
->saved_varargs_size
);
4137 frame_size
= AARCH64_ROUND_UP (frame_size
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4138 offset
= frame_size
;
4140 if (to
== HARD_FRAME_POINTER_REGNUM
)
4142 if (from
== ARG_POINTER_REGNUM
)
4143 return offset
- crtl
->outgoing_args_size
;
4145 if (from
== FRAME_POINTER_REGNUM
)
4146 return cfun
->machine
->frame
.saved_regs_size
;
4149 if (to
== STACK_POINTER_REGNUM
)
4151 if (from
== FRAME_POINTER_REGNUM
)
4153 HOST_WIDE_INT elim
= crtl
->outgoing_args_size
4154 + cfun
->machine
->frame
.saved_regs_size
4155 - cfun
->machine
->frame
.fp_lr_offset
;
4156 elim
= AARCH64_ROUND_UP (elim
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4165 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4169 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4173 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4178 aarch64_asm_trampoline_template (FILE *f
)
4182 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4183 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4187 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4188 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4190 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4191 assemble_aligned_integer (4, const0_rtx
);
4192 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4193 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4197 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4199 rtx fnaddr
, mem
, a_tramp
;
4200 const int tramp_code_sz
= 16;
4202 /* Don't need to copy the trailing D-words, we fill those in below. */
4203 emit_block_move (m_tramp
, assemble_trampoline_template (),
4204 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4205 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4206 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4207 if (GET_MODE (fnaddr
) != ptr_mode
)
4208 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4209 emit_move_insn (mem
, fnaddr
);
4211 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4212 emit_move_insn (mem
, chain_value
);
4214 /* XXX We should really define a "clear_cache" pattern and use
4215 gen_clear_cache(). */
4216 a_tramp
= XEXP (m_tramp
, 0);
4217 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4218 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4219 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4223 static unsigned char
4224 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4235 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4236 (GET_MODE_SIZE (mode
) + 7) / 8;
4250 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4252 if (regclass
== POINTER_REGS
)
4253 return GENERAL_REGS
;
4255 if (regclass
== STACK_REG
)
4258 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4264 /* If it's an integer immediate that MOVI can't handle, then
4265 FP_REGS is not an option, so we return NO_REGS instead. */
4266 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4267 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4270 /* Register eliminiation can result in a request for
4271 SP+constant->FP_REGS. We cannot support such operations which
4272 use SP as source and an FP_REG as destination, so reject out
4274 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4276 rtx lhs
= XEXP (x
, 0);
4278 /* Look through a possible SUBREG introduced by ILP32. */
4279 if (GET_CODE (lhs
) == SUBREG
)
4280 lhs
= SUBREG_REG (lhs
);
4282 gcc_assert (REG_P (lhs
));
4283 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4292 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4294 asm_fprintf (f
, "%U%s", name
);
4298 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4300 if (priority
== DEFAULT_INIT_PRIORITY
)
4301 default_ctor_section_asm_out_constructor (symbol
, priority
);
4306 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4307 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4308 switch_to_section (s
);
4309 assemble_align (POINTER_SIZE
);
4310 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4315 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4317 if (priority
== DEFAULT_INIT_PRIORITY
)
4318 default_dtor_section_asm_out_destructor (symbol
, priority
);
4323 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4324 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4325 switch_to_section (s
);
4326 assemble_align (POINTER_SIZE
);
4327 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4332 aarch64_output_casesi (rtx
*operands
)
4336 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
4338 static const char *const patterns
[4][2] =
4341 "ldrb\t%w3, [%0,%w1,uxtw]",
4342 "add\t%3, %4, %w3, sxtb #2"
4345 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4346 "add\t%3, %4, %w3, sxth #2"
4349 "ldr\t%w3, [%0,%w1,uxtw #2]",
4350 "add\t%3, %4, %w3, sxtw #2"
4352 /* We assume that DImode is only generated when not optimizing and
4353 that we don't really need 64-bit address offsets. That would
4354 imply an object file with 8GB of code in a single function! */
4356 "ldr\t%w3, [%0,%w1,uxtw #2]",
4357 "add\t%3, %4, %w3, sxtw #2"
4361 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4363 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4365 gcc_assert (index
>= 0 && index
<= 3);
4367 /* Need to implement table size reduction, by chaning the code below. */
4368 output_asm_insn (patterns
[index
][0], operands
);
4369 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4370 snprintf (buf
, sizeof (buf
),
4371 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4372 output_asm_insn (buf
, operands
);
4373 output_asm_insn (patterns
[index
][1], operands
);
4374 output_asm_insn ("br\t%3", operands
);
4375 assemble_label (asm_out_file
, label
);
4380 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4381 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4385 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4387 if (shift
>= 0 && shift
<= 3)
4390 for (size
= 8; size
<= 32; size
*= 2)
4392 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4393 if (mask
== bits
<< shift
)
4401 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4402 const_rtx x ATTRIBUTE_UNUSED
)
4404 /* We can't use blocks for constants when we're using a per-function
4410 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4411 rtx x ATTRIBUTE_UNUSED
,
4412 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4414 /* Force all constant pool entries into the current function section. */
4415 return function_section (current_function_decl
);
4421 /* Helper function for rtx cost calculation. Strip a shift expression
4422 from X. Returns the inner operand if successful, or the original
4423 expression on failure. */
4425 aarch64_strip_shift (rtx x
)
4429 if ((GET_CODE (op
) == ASHIFT
4430 || GET_CODE (op
) == ASHIFTRT
4431 || GET_CODE (op
) == LSHIFTRT
)
4432 && CONST_INT_P (XEXP (op
, 1)))
4433 return XEXP (op
, 0);
4435 if (GET_CODE (op
) == MULT
4436 && CONST_INT_P (XEXP (op
, 1))
4437 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4438 return XEXP (op
, 0);
4443 /* Helper function for rtx cost calculation. Strip a shift or extend
4444 expression from X. Returns the inner operand if successful, or the
4445 original expression on failure. We deal with a number of possible
4446 canonicalization variations here. */
4448 aarch64_strip_shift_or_extend (rtx x
)
4452 /* Zero and sign extraction of a widened value. */
4453 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4454 && XEXP (op
, 2) == const0_rtx
4455 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4457 return XEXP (XEXP (op
, 0), 0);
4459 /* It can also be represented (for zero-extend) as an AND with an
4461 if (GET_CODE (op
) == AND
4462 && GET_CODE (XEXP (op
, 0)) == MULT
4463 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4464 && CONST_INT_P (XEXP (op
, 1))
4465 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4466 INTVAL (XEXP (op
, 1))) != 0)
4467 return XEXP (XEXP (op
, 0), 0);
4469 /* Now handle extended register, as this may also have an optional
4470 left shift by 1..4. */
4471 if (GET_CODE (op
) == ASHIFT
4472 && CONST_INT_P (XEXP (op
, 1))
4473 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4476 if (GET_CODE (op
) == ZERO_EXTEND
4477 || GET_CODE (op
) == SIGN_EXTEND
)
4483 return aarch64_strip_shift (x
);
4486 /* Calculate the cost of calculating X, storing it in *COST. Result
4487 is true if the total cost of the operation has now been calculated. */
4489 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
4490 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
4493 const struct cpu_rtx_cost_table
*extra_cost
4494 = aarch64_tune_params
->insn_extra_cost
;
4502 switch (GET_CODE (op0
))
4506 *cost
+= extra_cost
->memory_store
;
4508 if (op1
!= const0_rtx
)
4509 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4513 if (! REG_P (SUBREG_REG (op0
)))
4514 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
4517 /* Cost is just the cost of the RHS of the set. */
4518 *cost
+= rtx_cost (op1
, SET
, 1, true);
4521 case ZERO_EXTRACT
: /* Bit-field insertion. */
4523 /* Strip any redundant widening of the RHS to meet the width of
4525 if (GET_CODE (op1
) == SUBREG
)
4526 op1
= SUBREG_REG (op1
);
4527 if ((GET_CODE (op1
) == ZERO_EXTEND
4528 || GET_CODE (op1
) == SIGN_EXTEND
)
4529 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
4530 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
4531 >= INTVAL (XEXP (op0
, 1))))
4532 op1
= XEXP (op1
, 0);
4533 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4543 *cost
+= extra_cost
->memory_load
;
4548 op0
= CONST0_RTX (GET_MODE (x
));
4556 if (op1
== const0_rtx
4557 && GET_CODE (op0
) == AND
)
4563 /* Comparisons can work if the order is swapped.
4564 Canonicalization puts the more complex operation first, but
4565 we want it in op1. */
4567 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
4579 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
4580 || (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
4581 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
4583 if (op0
!= const0_rtx
)
4584 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
4586 if (CONST_INT_P (op1
))
4588 if (!aarch64_uimm12_shift (INTVAL (op1
)))
4589 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4593 op1
= aarch64_strip_shift_or_extend (op1
);
4594 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4605 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4607 if (CONST_INT_P (op1
) && aarch64_uimm12_shift (INTVAL (op1
)))
4609 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
4613 rtx new_op0
= aarch64_strip_shift_or_extend (op0
);
4616 && GET_CODE (op0
) == MULT
)
4618 if ((GET_CODE (XEXP (op0
, 0)) == ZERO_EXTEND
4619 && GET_CODE (XEXP (op0
, 1)) == ZERO_EXTEND
)
4620 || (GET_CODE (XEXP (op0
, 0)) == SIGN_EXTEND
4621 && GET_CODE (XEXP (op0
, 1)) == SIGN_EXTEND
))
4623 *cost
+= (rtx_cost (XEXP (XEXP (op0
, 0), 0), MULT
, 0,
4625 + rtx_cost (XEXP (XEXP (op0
, 1), 0), MULT
, 1,
4627 + rtx_cost (op1
, PLUS
, 1, speed
));
4629 *cost
+= extra_cost
->int_multiply_extend_add
;
4632 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4633 + rtx_cost (XEXP (op0
, 1), MULT
, 1, speed
)
4634 + rtx_cost (op1
, PLUS
, 1, speed
));
4637 *cost
+= extra_cost
->int_multiply_add
;
4640 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
4641 + rtx_cost (op1
, PLUS
, 1, speed
));
4655 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4657 if (CONST_INT_P (op1
)
4658 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
4660 *cost
+= rtx_cost (op0
, AND
, 0, speed
);
4664 if (GET_CODE (op0
) == NOT
)
4665 op0
= XEXP (op0
, 0);
4666 op0
= aarch64_strip_shift (op0
);
4667 *cost
+= (rtx_cost (op0
, AND
, 0, speed
)
4668 + rtx_cost (op1
, AND
, 1, speed
));
4675 if ((GET_MODE (x
) == DImode
4676 && GET_MODE (XEXP (x
, 0)) == SImode
)
4677 || GET_CODE (XEXP (x
, 0)) == MEM
)
4679 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
4685 if (GET_CODE (XEXP (x
, 0)) == MEM
)
4687 *cost
+= rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed
);
4693 if (!CONST_INT_P (XEXP (x
, 1)))
4694 *cost
+= COSTS_N_INSNS (2);
4701 /* Shifting by a register often takes an extra cycle. */
4702 if (speed
&& !CONST_INT_P (XEXP (x
, 1)))
4703 *cost
+= extra_cost
->register_shift
;
4705 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed
);
4709 if (!CONSTANT_P (XEXP (x
, 0)))
4710 *cost
+= rtx_cost (XEXP (x
, 0), HIGH
, 0, speed
);
4714 if (!CONSTANT_P (XEXP (x
, 1)))
4715 *cost
+= rtx_cost (XEXP (x
, 1), LO_SUM
, 1, speed
);
4716 *cost
+= rtx_cost (XEXP (x
, 0), LO_SUM
, 0, speed
);
4721 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTRACT
, 0, speed
);
4728 *cost
= COSTS_N_INSNS (1);
4729 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4731 if (CONST_INT_P (op1
)
4732 && exact_log2 (INTVAL (op1
)) > 0)
4734 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
4738 if ((GET_CODE (op0
) == ZERO_EXTEND
4739 && GET_CODE (op1
) == ZERO_EXTEND
)
4740 || (GET_CODE (op0
) == SIGN_EXTEND
4741 && GET_CODE (op1
) == SIGN_EXTEND
))
4743 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4744 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
));
4746 *cost
+= extra_cost
->int_multiply_extend
;
4751 *cost
+= extra_cost
->int_multiply
;
4755 if (GET_MODE (x
) == DFmode
)
4756 *cost
+= extra_cost
->double_multiply
;
4757 else if (GET_MODE (x
) == SFmode
)
4758 *cost
+= extra_cost
->float_multiply
;
4761 return false; /* All arguments need to be in registers. */
4765 *cost
= COSTS_N_INSNS (2);
4768 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4769 *cost
+= (extra_cost
->int_multiply_add
4770 + extra_cost
->int_divide
);
4771 else if (GET_MODE (x
) == DFmode
)
4772 *cost
+= (extra_cost
->double_multiply
4773 + extra_cost
->double_divide
);
4774 else if (GET_MODE (x
) == SFmode
)
4775 *cost
+= (extra_cost
->float_multiply
4776 + extra_cost
->float_divide
);
4778 return false; /* All arguments need to be in registers. */
4782 *cost
= COSTS_N_INSNS (1);
4785 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4786 *cost
+= extra_cost
->int_divide
;
4787 else if (GET_MODE (x
) == DFmode
)
4788 *cost
+= extra_cost
->double_divide
;
4789 else if (GET_MODE (x
) == SFmode
)
4790 *cost
+= extra_cost
->float_divide
;
4792 return false; /* All arguments need to be in registers. */
4801 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED
,
4802 enum machine_mode mode ATTRIBUTE_UNUSED
,
4803 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
4805 enum rtx_code c
= GET_CODE (x
);
4806 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4808 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4809 return addr_cost
->pre_modify
;
4811 if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4812 return addr_cost
->post_modify
;
4816 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4817 return addr_cost
->imm_offset
;
4818 else if (GET_CODE (XEXP (x
, 0)) == MULT
4819 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
4820 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4821 return addr_cost
->register_extend
;
4823 return addr_cost
->register_offset
;
4825 else if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
4826 return addr_cost
->imm_offset
;
4832 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4833 reg_class_t from
, reg_class_t to
)
4835 const struct cpu_regmove_cost
*regmove_cost
4836 = aarch64_tune_params
->regmove_cost
;
4838 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
4839 return regmove_cost
->GP2GP
;
4840 else if (from
== GENERAL_REGS
)
4841 return regmove_cost
->GP2FP
;
4842 else if (to
== GENERAL_REGS
)
4843 return regmove_cost
->FP2GP
;
4845 /* When AdvSIMD instructions are disabled it is not possible to move
4846 a 128-bit value directly between Q registers. This is handled in
4847 secondary reload. A general register is used as a scratch to move
4848 the upper DI value and the lower DI value is moved directly,
4849 hence the cost is the sum of three moves. */
4851 if (! TARGET_SIMD
&& GET_MODE_SIZE (from
) == 128 && GET_MODE_SIZE (to
) == 128)
4852 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
4854 return regmove_cost
->FP2FP
;
4858 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4859 reg_class_t rclass ATTRIBUTE_UNUSED
,
4860 bool in ATTRIBUTE_UNUSED
)
4862 return aarch64_tune_params
->memmov_cost
;
4865 /* Vectorizer cost model target hooks. */
4867 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4869 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4871 int misalign ATTRIBUTE_UNUSED
)
4875 switch (type_of_cost
)
4878 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
4881 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
4884 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
4887 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4890 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
4893 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
4896 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
4899 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
4901 case unaligned_load
:
4902 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
4904 case unaligned_store
:
4905 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
4907 case cond_branch_taken
:
4908 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
4910 case cond_branch_not_taken
:
4911 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
4914 case vec_promote_demote
:
4915 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4918 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
4919 return elements
/ 2 + 1;
4926 /* Implement targetm.vectorize.add_stmt_cost. */
4928 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
4929 struct _stmt_vec_info
*stmt_info
, int misalign
,
4930 enum vect_cost_model_location where
)
4932 unsigned *cost
= (unsigned *) data
;
4933 unsigned retval
= 0;
4935 if (flag_vect_cost_model
)
4937 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
4939 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
4941 /* Statements in an inner loop relative to the loop being
4942 vectorized are weighted more heavily. The value here is
4943 a function (linear for now) of the loop nest level. */
4944 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
4946 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4947 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
4948 unsigned nest_level
= loop_depth (loop
);
4950 count
*= nest_level
;
4953 retval
= (unsigned) (count
* stmt_cost
);
4954 cost
[where
] += retval
;
4960 static void initialize_aarch64_code_model (void);
4962 /* Parse the architecture extension string. */
4965 aarch64_parse_extension (char *str
)
4967 /* The extension string is parsed left to right. */
4968 const struct aarch64_option_extension
*opt
= NULL
;
4970 /* Flag to say whether we are adding or removing an extension. */
4971 int adding_ext
= -1;
4973 while (str
!= NULL
&& *str
!= 0)
4979 ext
= strchr (str
, '+');
4986 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
4997 error ("missing feature modifier after %qs", "+no");
5001 /* Scan over the extensions table trying to find an exact match. */
5002 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
5004 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
5006 /* Add or remove the extension. */
5008 aarch64_isa_flags
|= opt
->flags_on
;
5010 aarch64_isa_flags
&= ~(opt
->flags_off
);
5015 if (opt
->name
== NULL
)
5017 /* Extension not found in list. */
5018 error ("unknown feature modifier %qs", str
);
5028 /* Parse the ARCH string. */
5031 aarch64_parse_arch (void)
5034 const struct processor
*arch
;
5035 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
5038 strcpy (str
, aarch64_arch_string
);
5040 ext
= strchr (str
, '+');
5049 error ("missing arch name in -march=%qs", str
);
5053 /* Loop through the list of supported ARCHs to find a match. */
5054 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
5056 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
5058 selected_arch
= arch
;
5059 aarch64_isa_flags
= selected_arch
->flags
;
5060 selected_cpu
= &all_cores
[selected_arch
->core
];
5064 /* ARCH string contains at least one extension. */
5065 aarch64_parse_extension (ext
);
5072 /* ARCH name not found in list. */
5073 error ("unknown value %qs for -march", str
);
5077 /* Parse the CPU string. */
5080 aarch64_parse_cpu (void)
5083 const struct processor
*cpu
;
5084 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
5087 strcpy (str
, aarch64_cpu_string
);
5089 ext
= strchr (str
, '+');
5098 error ("missing cpu name in -mcpu=%qs", str
);
5102 /* Loop through the list of supported CPUs to find a match. */
5103 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5105 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
5108 aarch64_isa_flags
= selected_cpu
->flags
;
5112 /* CPU string contains at least one extension. */
5113 aarch64_parse_extension (ext
);
5120 /* CPU name not found in list. */
5121 error ("unknown value %qs for -mcpu", str
);
5125 /* Parse the TUNE string. */
5128 aarch64_parse_tune (void)
5130 const struct processor
*cpu
;
5131 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
5132 strcpy (str
, aarch64_tune_string
);
5134 /* Loop through the list of supported CPUs to find a match. */
5135 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5137 if (strcmp (cpu
->name
, str
) == 0)
5139 selected_tune
= cpu
;
5144 /* CPU name not found in list. */
5145 error ("unknown value %qs for -mtune", str
);
5150 /* Implement TARGET_OPTION_OVERRIDE. */
5153 aarch64_override_options (void)
5155 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5156 otherwise march remains undefined. mtune can be used with either march or
5159 if (aarch64_arch_string
)
5161 aarch64_parse_arch ();
5162 aarch64_cpu_string
= NULL
;
5165 if (aarch64_cpu_string
)
5167 aarch64_parse_cpu ();
5168 selected_arch
= NULL
;
5171 if (aarch64_tune_string
)
5173 aarch64_parse_tune ();
5176 initialize_aarch64_code_model ();
5178 aarch64_build_bitmask_table ();
5180 /* This target defaults to strict volatile bitfields. */
5181 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
5182 flag_strict_volatile_bitfields
= 1;
5184 /* If the user did not specify a processor, choose the default
5185 one for them. This will be the CPU set during configuration using
5186 --with-cpu, otherwise it is "generic". */
5189 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
5190 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
5193 gcc_assert (selected_cpu
);
5195 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5197 selected_tune
= &all_cores
[selected_cpu
->core
];
5199 aarch64_tune_flags
= selected_tune
->flags
;
5200 aarch64_tune
= selected_tune
->core
;
5201 aarch64_tune_params
= selected_tune
->tune
;
5203 aarch64_override_options_after_change ();
5206 /* Implement targetm.override_options_after_change. */
5209 aarch64_override_options_after_change (void)
5211 faked_omit_frame_pointer
= false;
5213 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5214 that aarch64_frame_pointer_required will be called. We need to remember
5215 whether flag_omit_frame_pointer was turned on normally or just faked. */
5217 if (flag_omit_leaf_frame_pointer
&& !flag_omit_frame_pointer
)
5219 flag_omit_frame_pointer
= true;
5220 faked_omit_frame_pointer
= true;
5224 static struct machine_function
*
5225 aarch64_init_machine_status (void)
5227 struct machine_function
*machine
;
5228 machine
= ggc_alloc_cleared_machine_function ();
5233 aarch64_init_expanders (void)
5235 init_machine_status
= aarch64_init_machine_status
;
5238 /* A checking mechanism for the implementation of the various code models. */
5240 initialize_aarch64_code_model (void)
5244 switch (aarch64_cmodel_var
)
5246 case AARCH64_CMODEL_TINY
:
5247 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
5249 case AARCH64_CMODEL_SMALL
:
5250 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
5252 case AARCH64_CMODEL_LARGE
:
5253 sorry ("code model %qs with -f%s", "large",
5254 flag_pic
> 1 ? "PIC" : "pic");
5260 aarch64_cmodel
= aarch64_cmodel_var
;
5263 /* Return true if SYMBOL_REF X binds locally. */
5266 aarch64_symbol_binds_local_p (const_rtx x
)
5268 return (SYMBOL_REF_DECL (x
)
5269 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
5270 : SYMBOL_REF_LOCAL_P (x
));
5273 /* Return true if SYMBOL_REF X is thread local */
5275 aarch64_tls_symbol_p (rtx x
)
5277 if (! TARGET_HAVE_TLS
)
5280 if (GET_CODE (x
) != SYMBOL_REF
)
5283 return SYMBOL_REF_TLS_MODEL (x
) != 0;
5286 /* Classify a TLS symbol into one of the TLS kinds. */
5287 enum aarch64_symbol_type
5288 aarch64_classify_tls_symbol (rtx x
)
5290 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
5294 case TLS_MODEL_GLOBAL_DYNAMIC
:
5295 case TLS_MODEL_LOCAL_DYNAMIC
:
5296 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
5298 case TLS_MODEL_INITIAL_EXEC
:
5299 return SYMBOL_SMALL_GOTTPREL
;
5301 case TLS_MODEL_LOCAL_EXEC
:
5302 return SYMBOL_SMALL_TPREL
;
5304 case TLS_MODEL_EMULATED
:
5305 case TLS_MODEL_NONE
:
5306 return SYMBOL_FORCE_TO_MEM
;
5313 /* Return the method that should be used to access SYMBOL_REF or
5314 LABEL_REF X in context CONTEXT. */
5316 enum aarch64_symbol_type
5317 aarch64_classify_symbol (rtx x
,
5318 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
5320 if (GET_CODE (x
) == LABEL_REF
)
5322 switch (aarch64_cmodel
)
5324 case AARCH64_CMODEL_LARGE
:
5325 return SYMBOL_FORCE_TO_MEM
;
5327 case AARCH64_CMODEL_TINY_PIC
:
5328 case AARCH64_CMODEL_TINY
:
5329 return SYMBOL_TINY_ABSOLUTE
;
5331 case AARCH64_CMODEL_SMALL_PIC
:
5332 case AARCH64_CMODEL_SMALL
:
5333 return SYMBOL_SMALL_ABSOLUTE
;
5340 if (GET_CODE (x
) == SYMBOL_REF
)
5342 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
5343 || CONSTANT_POOL_ADDRESS_P (x
))
5344 return SYMBOL_FORCE_TO_MEM
;
5346 if (aarch64_tls_symbol_p (x
))
5347 return aarch64_classify_tls_symbol (x
);
5349 switch (aarch64_cmodel
)
5351 case AARCH64_CMODEL_TINY
:
5352 if (SYMBOL_REF_WEAK (x
))
5353 return SYMBOL_FORCE_TO_MEM
;
5354 return SYMBOL_TINY_ABSOLUTE
;
5356 case AARCH64_CMODEL_SMALL
:
5357 if (SYMBOL_REF_WEAK (x
))
5358 return SYMBOL_FORCE_TO_MEM
;
5359 return SYMBOL_SMALL_ABSOLUTE
;
5361 case AARCH64_CMODEL_TINY_PIC
:
5362 if (!aarch64_symbol_binds_local_p (x
))
5363 return SYMBOL_TINY_GOT
;
5364 return SYMBOL_TINY_ABSOLUTE
;
5366 case AARCH64_CMODEL_SMALL_PIC
:
5367 if (!aarch64_symbol_binds_local_p (x
))
5368 return SYMBOL_SMALL_GOT
;
5369 return SYMBOL_SMALL_ABSOLUTE
;
5376 /* By default push everything into the constant pool. */
5377 return SYMBOL_FORCE_TO_MEM
;
5381 aarch64_constant_address_p (rtx x
)
5383 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
5387 aarch64_legitimate_pic_operand_p (rtx x
)
5389 if (GET_CODE (x
) == SYMBOL_REF
5390 || (GET_CODE (x
) == CONST
5391 && GET_CODE (XEXP (x
, 0)) == PLUS
5392 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5398 /* Return true if X holds either a quarter-precision or
5399 floating-point +0.0 constant. */
5401 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
5403 if (!CONST_DOUBLE_P (x
))
5406 /* TODO: We could handle moving 0.0 to a TFmode register,
5407 but first we would like to refactor the movtf_aarch64
5408 to be more amicable to split moves properly and
5409 correctly gate on TARGET_SIMD. For now - reject all
5410 constants which are not to SFmode or DFmode registers. */
5411 if (!(mode
== SFmode
|| mode
== DFmode
))
5414 if (aarch64_float_const_zero_rtx_p (x
))
5416 return aarch64_float_const_representable_p (x
);
5420 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
5422 /* Do not allow vector struct mode constants. We could support
5423 0 and -1 easily, but they need support in aarch64-simd.md. */
5424 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
5427 /* This could probably go away because
5428 we now decompose CONST_INTs according to expand_mov_immediate. */
5429 if ((GET_CODE (x
) == CONST_VECTOR
5430 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
5431 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
5432 return !targetm
.cannot_force_const_mem (mode
, x
);
5434 if (GET_CODE (x
) == HIGH
5435 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
5438 return aarch64_constant_address_p (x
);
5442 aarch64_load_tp (rtx target
)
5445 || GET_MODE (target
) != Pmode
5446 || !register_operand (target
, Pmode
))
5447 target
= gen_reg_rtx (Pmode
);
5449 /* Can return in any reg. */
5450 emit_insn (gen_aarch64_load_tp_hard (target
));
5454 /* On AAPCS systems, this is the "struct __va_list". */
5455 static GTY(()) tree va_list_type
;
5457 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5458 Return the type to use as __builtin_va_list.
5460 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5472 aarch64_build_builtin_va_list (void)
5475 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5477 /* Create the type. */
5478 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
5479 /* Give it the required name. */
5480 va_list_name
= build_decl (BUILTINS_LOCATION
,
5482 get_identifier ("__va_list"),
5484 DECL_ARTIFICIAL (va_list_name
) = 1;
5485 TYPE_NAME (va_list_type
) = va_list_name
;
5486 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
5488 /* Create the fields. */
5489 f_stack
= build_decl (BUILTINS_LOCATION
,
5490 FIELD_DECL
, get_identifier ("__stack"),
5492 f_grtop
= build_decl (BUILTINS_LOCATION
,
5493 FIELD_DECL
, get_identifier ("__gr_top"),
5495 f_vrtop
= build_decl (BUILTINS_LOCATION
,
5496 FIELD_DECL
, get_identifier ("__vr_top"),
5498 f_groff
= build_decl (BUILTINS_LOCATION
,
5499 FIELD_DECL
, get_identifier ("__gr_offs"),
5501 f_vroff
= build_decl (BUILTINS_LOCATION
,
5502 FIELD_DECL
, get_identifier ("__vr_offs"),
5505 DECL_ARTIFICIAL (f_stack
) = 1;
5506 DECL_ARTIFICIAL (f_grtop
) = 1;
5507 DECL_ARTIFICIAL (f_vrtop
) = 1;
5508 DECL_ARTIFICIAL (f_groff
) = 1;
5509 DECL_ARTIFICIAL (f_vroff
) = 1;
5511 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
5512 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
5513 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
5514 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
5515 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
5517 TYPE_FIELDS (va_list_type
) = f_stack
;
5518 DECL_CHAIN (f_stack
) = f_grtop
;
5519 DECL_CHAIN (f_grtop
) = f_vrtop
;
5520 DECL_CHAIN (f_vrtop
) = f_groff
;
5521 DECL_CHAIN (f_groff
) = f_vroff
;
5523 /* Compute its layout. */
5524 layout_type (va_list_type
);
5526 return va_list_type
;
5529 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5531 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
5533 const CUMULATIVE_ARGS
*cum
;
5534 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5535 tree stack
, grtop
, vrtop
, groff
, vroff
;
5537 int gr_save_area_size
;
5538 int vr_save_area_size
;
5541 cum
= &crtl
->args
.info
;
5543 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
5545 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
5547 if (TARGET_GENERAL_REGS_ONLY
)
5549 if (cum
->aapcs_nvrn
> 0)
5550 sorry ("%qs and floating point or vector arguments",
5551 "-mgeneral-regs-only");
5552 vr_save_area_size
= 0;
5555 f_stack
= TYPE_FIELDS (va_list_type_node
);
5556 f_grtop
= DECL_CHAIN (f_stack
);
5557 f_vrtop
= DECL_CHAIN (f_grtop
);
5558 f_groff
= DECL_CHAIN (f_vrtop
);
5559 f_vroff
= DECL_CHAIN (f_groff
);
5561 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
5563 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
5565 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
5567 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
5569 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
5572 /* Emit code to initialize STACK, which points to the next varargs stack
5573 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5574 by named arguments. STACK is 8-byte aligned. */
5575 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
5576 if (cum
->aapcs_stack_size
> 0)
5577 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
5578 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
5579 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5581 /* Emit code to initialize GRTOP, the top of the GR save area.
5582 virtual_incoming_args_rtx should have been 16 byte aligned. */
5583 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
5584 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
5585 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5587 /* Emit code to initialize VRTOP, the top of the VR save area.
5588 This address is gr_save_area_bytes below GRTOP, rounded
5589 down to the next 16-byte boundary. */
5590 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
5591 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
5592 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5595 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
5596 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
5597 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5599 /* Emit code to initialize GROFF, the offset from GRTOP of the
5600 next GPR argument. */
5601 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
5602 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
5603 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5605 /* Likewise emit code to initialize VROFF, the offset from FTOP
5606 of the next VR argument. */
5607 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
5608 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
5609 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5612 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5615 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
5616 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
5620 bool is_ha
; /* is HFA or HVA. */
5621 bool dw_align
; /* double-word align. */
5622 enum machine_mode ag_mode
= VOIDmode
;
5624 enum machine_mode mode
;
5626 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5627 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
5628 HOST_WIDE_INT size
, rsize
, adjust
, align
;
5629 tree t
, u
, cond1
, cond2
;
5631 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5633 type
= build_pointer_type (type
);
5635 mode
= TYPE_MODE (type
);
5637 f_stack
= TYPE_FIELDS (va_list_type_node
);
5638 f_grtop
= DECL_CHAIN (f_stack
);
5639 f_vrtop
= DECL_CHAIN (f_grtop
);
5640 f_groff
= DECL_CHAIN (f_vrtop
);
5641 f_vroff
= DECL_CHAIN (f_groff
);
5643 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
5644 f_stack
, NULL_TREE
);
5645 size
= int_size_in_bytes (type
);
5646 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
5650 if (aarch64_vfp_is_call_or_return_candidate (mode
,
5656 /* TYPE passed in fp/simd registers. */
5657 if (TARGET_GENERAL_REGS_ONLY
)
5658 sorry ("%qs and floating point or vector arguments",
5659 "-mgeneral-regs-only");
5661 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
5662 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
5663 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
5664 unshare_expr (valist
), f_vroff
, NULL_TREE
);
5666 rsize
= nregs
* UNITS_PER_VREG
;
5670 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
5671 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
5673 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5674 && size
< UNITS_PER_VREG
)
5676 adjust
= UNITS_PER_VREG
- size
;
5681 /* TYPE passed in general registers. */
5682 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
5683 unshare_expr (valist
), f_grtop
, NULL_TREE
);
5684 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
5685 unshare_expr (valist
), f_groff
, NULL_TREE
);
5686 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
5687 nregs
= rsize
/ UNITS_PER_WORD
;
5692 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5693 && size
< UNITS_PER_WORD
)
5695 adjust
= UNITS_PER_WORD
- size
;
5699 /* Get a local temporary for the field value. */
5700 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
5702 /* Emit code to branch if off >= 0. */
5703 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
5704 build_int_cst (TREE_TYPE (off
), 0));
5705 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
5709 /* Emit: offs = (offs + 15) & -16. */
5710 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5711 build_int_cst (TREE_TYPE (off
), 15));
5712 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
5713 build_int_cst (TREE_TYPE (off
), -16));
5714 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
5719 /* Update ap.__[g|v]r_offs */
5720 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5721 build_int_cst (TREE_TYPE (off
), rsize
));
5722 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
5726 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5728 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5729 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
5730 build_int_cst (TREE_TYPE (f_off
), 0));
5731 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
5733 /* String up: make sure the assignment happens before the use. */
5734 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
5735 COND_EXPR_ELSE (cond1
) = t
;
5737 /* Prepare the trees handling the argument that is passed on the stack;
5738 the top level node will store in ON_STACK. */
5739 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
5742 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5743 t
= fold_convert (intDI_type_node
, arg
);
5744 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5745 build_int_cst (TREE_TYPE (t
), 15));
5746 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5747 build_int_cst (TREE_TYPE (t
), -16));
5748 t
= fold_convert (TREE_TYPE (arg
), t
);
5749 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
5753 /* Advance ap.__stack */
5754 t
= fold_convert (intDI_type_node
, arg
);
5755 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5756 build_int_cst (TREE_TYPE (t
), size
+ 7));
5757 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5758 build_int_cst (TREE_TYPE (t
), -8));
5759 t
= fold_convert (TREE_TYPE (arg
), t
);
5760 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
5761 /* String up roundup and advance. */
5763 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5764 /* String up with arg */
5765 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
5766 /* Big-endianness related address adjustment. */
5767 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5768 && size
< UNITS_PER_WORD
)
5770 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
5771 size_int (UNITS_PER_WORD
- size
));
5772 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
5775 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
5776 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
5778 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5781 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
5782 build_int_cst (TREE_TYPE (off
), adjust
));
5784 t
= fold_convert (sizetype
, t
);
5785 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
5789 /* type ha; // treat as "struct {ftype field[n];}"
5790 ... [computing offs]
5791 for (i = 0; i <nregs; ++i, offs += 16)
5792 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5795 tree tmp_ha
, field_t
, field_ptr_t
;
5797 /* Declare a local variable. */
5798 tmp_ha
= create_tmp_var_raw (type
, "ha");
5799 gimple_add_tmp_var (tmp_ha
);
5801 /* Establish the base type. */
5805 field_t
= float_type_node
;
5806 field_ptr_t
= float_ptr_type_node
;
5809 field_t
= double_type_node
;
5810 field_ptr_t
= double_ptr_type_node
;
5813 field_t
= long_double_type_node
;
5814 field_ptr_t
= long_double_ptr_type_node
;
5816 /* The half precision and quad precision are not fully supported yet. Enable
5817 the following code after the support is complete. Need to find the correct
5818 type node for __fp16 *. */
5821 field_t
= float_type_node
;
5822 field_ptr_t
= float_ptr_type_node
;
5828 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
5829 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
5830 field_ptr_t
= build_pointer_type (field_t
);
5837 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5838 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
5840 t
= fold_convert (field_ptr_t
, addr
);
5841 t
= build2 (MODIFY_EXPR
, field_t
,
5842 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
5843 build1 (INDIRECT_REF
, field_t
, t
));
5845 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5846 for (i
= 1; i
< nregs
; ++i
)
5848 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
5849 u
= fold_convert (field_ptr_t
, addr
);
5850 u
= build2 (MODIFY_EXPR
, field_t
,
5851 build2 (MEM_REF
, field_t
, tmp_ha
,
5852 build_int_cst (field_ptr_t
,
5854 int_size_in_bytes (field_t
)))),
5855 build1 (INDIRECT_REF
, field_t
, u
));
5856 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
5859 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
5860 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
5863 COND_EXPR_ELSE (cond2
) = t
;
5864 addr
= fold_convert (build_pointer_type (type
), cond1
);
5865 addr
= build_va_arg_indirect_ref (addr
);
5868 addr
= build_va_arg_indirect_ref (addr
);
5873 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5876 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
5877 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
5880 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
5881 CUMULATIVE_ARGS local_cum
;
5882 int gr_saved
, vr_saved
;
5884 /* The caller has advanced CUM up to, but not beyond, the last named
5885 argument. Advance a local copy of CUM past the last "real" named
5886 argument, to find out how many registers are left over. */
5888 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
5890 /* Found out how many registers we need to save. */
5891 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
5892 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
5894 if (TARGET_GENERAL_REGS_ONLY
)
5896 if (local_cum
.aapcs_nvrn
> 0)
5897 sorry ("%qs and floating point or vector arguments",
5898 "-mgeneral-regs-only");
5908 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5909 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
5910 - gr_saved
* UNITS_PER_WORD
);
5911 mem
= gen_frame_mem (BLKmode
, ptr
);
5912 set_mem_alias_set (mem
, get_varargs_alias_set ());
5914 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
5919 /* We can't use move_block_from_reg, because it will use
5920 the wrong mode, storing D regs only. */
5921 enum machine_mode mode
= TImode
;
5924 /* Set OFF to the offset from virtual_incoming_args_rtx of
5925 the first vector register. The VR save area lies below
5926 the GR one, and is aligned to 16 bytes. */
5927 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5928 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5929 off
-= vr_saved
* UNITS_PER_VREG
;
5931 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
5935 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
5936 mem
= gen_frame_mem (mode
, ptr
);
5937 set_mem_alias_set (mem
, get_varargs_alias_set ());
5938 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
5939 off
+= UNITS_PER_VREG
;
5944 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5945 any complication of having crtl->args.pretend_args_size changed. */
5946 cfun
->machine
->saved_varargs_size
5947 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5948 STACK_BOUNDARY
/ BITS_PER_UNIT
)
5949 + vr_saved
* UNITS_PER_VREG
);
5953 aarch64_conditional_register_usage (void)
5958 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
5961 call_used_regs
[i
] = 1;
5966 /* Walk down the type tree of TYPE counting consecutive base elements.
5967 If *MODEP is VOIDmode, then set it to the first valid floating point
5968 type. If a non-floating point type is found, or if a floating point
5969 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5970 otherwise return the count in the sub-tree. */
5972 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
5974 enum machine_mode mode
;
5977 switch (TREE_CODE (type
))
5980 mode
= TYPE_MODE (type
);
5981 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5984 if (*modep
== VOIDmode
)
5993 mode
= TYPE_MODE (TREE_TYPE (type
));
5994 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5997 if (*modep
== VOIDmode
)
6006 /* Use V2SImode and V4SImode as representatives of all 64-bit
6007 and 128-bit vector types. */
6008 size
= int_size_in_bytes (type
);
6021 if (*modep
== VOIDmode
)
6024 /* Vector modes are considered to be opaque: two vectors are
6025 equivalent for the purposes of being homogeneous aggregates
6026 if they are the same size. */
6035 tree index
= TYPE_DOMAIN (type
);
6037 /* Can't handle incomplete types. */
6038 if (!COMPLETE_TYPE_P (type
))
6041 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
6044 || !TYPE_MAX_VALUE (index
)
6045 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
6046 || !TYPE_MIN_VALUE (index
)
6047 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
6051 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
6052 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
6054 /* There must be no padding. */
6055 if (!host_integerp (TYPE_SIZE (type
), 1)
6056 || (tree_low_cst (TYPE_SIZE (type
), 1)
6057 != count
* GET_MODE_BITSIZE (*modep
)))
6069 /* Can't handle incomplete types. */
6070 if (!COMPLETE_TYPE_P (type
))
6073 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6075 if (TREE_CODE (field
) != FIELD_DECL
)
6078 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6084 /* There must be no padding. */
6085 if (!host_integerp (TYPE_SIZE (type
), 1)
6086 || (tree_low_cst (TYPE_SIZE (type
), 1)
6087 != count
* GET_MODE_BITSIZE (*modep
)))
6094 case QUAL_UNION_TYPE
:
6096 /* These aren't very interesting except in a degenerate case. */
6101 /* Can't handle incomplete types. */
6102 if (!COMPLETE_TYPE_P (type
))
6105 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6107 if (TREE_CODE (field
) != FIELD_DECL
)
6110 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6113 count
= count
> sub_count
? count
: sub_count
;
6116 /* There must be no padding. */
6117 if (!host_integerp (TYPE_SIZE (type
), 1)
6118 || (tree_low_cst (TYPE_SIZE (type
), 1)
6119 != count
* GET_MODE_BITSIZE (*modep
)))
6132 /* Return true if we use LRA instead of reload pass. */
6134 aarch64_lra_p (void)
6136 return aarch64_lra_flag
;
6139 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6140 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6141 array types. The C99 floating-point complex types are also considered
6142 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6143 types, which are GCC extensions and out of the scope of AAPCS64, are
6144 treated as composite types here as well.
6146 Note that MODE itself is not sufficient in determining whether a type
6147 is such a composite type or not. This is because
6148 stor-layout.c:compute_record_mode may have already changed the MODE
6149 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6150 structure with only one field may have its MODE set to the mode of the
6151 field. Also an integer mode whose size matches the size of the
6152 RECORD_TYPE type may be used to substitute the original mode
6153 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6154 solely relied on. */
6157 aarch64_composite_type_p (const_tree type
,
6158 enum machine_mode mode
)
6160 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
6164 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
6165 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
6171 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6172 type as described in AAPCS64 \S 4.1.2.
6174 See the comment above aarch64_composite_type_p for the notes on MODE. */
6177 aarch64_short_vector_p (const_tree type
,
6178 enum machine_mode mode
)
6180 HOST_WIDE_INT size
= -1;
6182 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6183 size
= int_size_in_bytes (type
);
6184 else if (!aarch64_composite_type_p (type
, mode
)
6185 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6186 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
6187 size
= GET_MODE_SIZE (mode
);
6189 return (size
== 8 || size
== 16) ? true : false;
6192 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6193 shall be passed or returned in simd/fp register(s) (providing these
6194 parameter passing registers are available).
6196 Upon successful return, *COUNT returns the number of needed registers,
6197 *BASE_MODE returns the mode of the individual register and when IS_HAF
6198 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6199 floating-point aggregate or a homogeneous short-vector aggregate. */
6202 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
6204 enum machine_mode
*base_mode
,
6208 enum machine_mode new_mode
= VOIDmode
;
6209 bool composite_p
= aarch64_composite_type_p (type
, mode
);
6211 if (is_ha
!= NULL
) *is_ha
= false;
6213 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6214 || aarch64_short_vector_p (type
, mode
))
6219 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6221 if (is_ha
!= NULL
) *is_ha
= true;
6223 new_mode
= GET_MODE_INNER (mode
);
6225 else if (type
&& composite_p
)
6227 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6229 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
6231 if (is_ha
!= NULL
) *is_ha
= true;
6240 *base_mode
= new_mode
;
6244 /* Implement TARGET_STRUCT_VALUE_RTX. */
6247 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
6248 int incoming ATTRIBUTE_UNUSED
)
6250 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
6253 /* Implements target hook vector_mode_supported_p. */
6255 aarch64_vector_mode_supported_p (enum machine_mode mode
)
6258 && (mode
== V4SImode
|| mode
== V8HImode
6259 || mode
== V16QImode
|| mode
== V2DImode
6260 || mode
== V2SImode
|| mode
== V4HImode
6261 || mode
== V8QImode
|| mode
== V2SFmode
6262 || mode
== V4SFmode
|| mode
== V2DFmode
))
6268 /* Return appropriate SIMD container
6269 for MODE within a vector of WIDTH bits. */
6270 static enum machine_mode
6271 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
6273 gcc_assert (width
== 64 || width
== 128);
6312 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6313 static enum machine_mode
6314 aarch64_preferred_simd_mode (enum machine_mode mode
)
6316 return aarch64_simd_container_mode (mode
, 128);
6319 /* Return the bitmask of possible vector sizes for the vectorizer
6322 aarch64_autovectorize_vector_sizes (void)
6327 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6328 vector types in order to conform to the AAPCS64 (see "Procedure
6329 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6330 qualify for emission with the mangled names defined in that document,
6331 a vector type must not only be of the correct mode but also be
6332 composed of AdvSIMD vector element types (e.g.
6333 _builtin_aarch64_simd_qi); these types are registered by
6334 aarch64_init_simd_builtins (). In other words, vector types defined
6335 in other ways e.g. via vector_size attribute will get default
6339 enum machine_mode mode
;
6340 const char *element_type_name
;
6341 const char *mangled_name
;
6342 } aarch64_simd_mangle_map_entry
;
6344 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
6345 /* 64-bit containerized types. */
6346 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6347 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6348 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6349 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6350 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6351 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6352 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6353 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6354 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6355 /* 128-bit containerized types. */
6356 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6357 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6358 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6359 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6360 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6361 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6362 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6363 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6364 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6365 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6366 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6367 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6368 { VOIDmode
, NULL
, NULL
}
6371 /* Implement TARGET_MANGLE_TYPE. */
6374 aarch64_mangle_type (const_tree type
)
6376 /* The AArch64 ABI documents say that "__va_list" has to be
6377 managled as if it is in the "std" namespace. */
6378 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
6379 return "St9__va_list";
6381 /* Check the mode of the vector type, and the name of the vector
6382 element type, against the table. */
6383 if (TREE_CODE (type
) == VECTOR_TYPE
)
6385 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
6387 while (pos
->mode
!= VOIDmode
)
6389 tree elt_type
= TREE_TYPE (type
);
6391 if (pos
->mode
== TYPE_MODE (type
)
6392 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
6393 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
6394 pos
->element_type_name
))
6395 return pos
->mangled_name
;
6401 /* Use the default mangling. */
6405 /* Return the equivalent letter for size. */
6407 sizetochar (int size
)
6411 case 64: return 'd';
6412 case 32: return 's';
6413 case 16: return 'h';
6414 case 8 : return 'b';
6415 default: gcc_unreachable ();
6419 /* Return true iff x is a uniform vector of floating-point
6420 constants, and the constant can be represented in
6421 quarter-precision form. Note, as aarch64_float_const_representable
6422 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6424 aarch64_vect_float_const_representable_p (rtx x
)
6427 REAL_VALUE_TYPE r0
, ri
;
6430 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
6433 x0
= CONST_VECTOR_ELT (x
, 0);
6434 if (!CONST_DOUBLE_P (x0
))
6437 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
6439 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
6441 xi
= CONST_VECTOR_ELT (x
, i
);
6442 if (!CONST_DOUBLE_P (xi
))
6445 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
6446 if (!REAL_VALUES_EQUAL (r0
, ri
))
6450 return aarch64_float_const_representable_p (x0
);
6453 /* Return true for valid and false for invalid. */
6455 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
6456 struct simd_immediate_info
*info
)
6458 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6460 for (i = 0; i < idx; i += (STRIDE)) \
6465 immtype = (CLASS); \
6466 elsize = (ELSIZE); \
6472 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
6473 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
6474 unsigned char bytes
[16];
6475 int immtype
= -1, matches
;
6476 unsigned int invmask
= inverse
? 0xff : 0;
6479 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6481 if (! (aarch64_simd_imm_zero_p (op
, mode
)
6482 || aarch64_vect_float_const_representable_p (op
)))
6487 info
->value
= CONST_VECTOR_ELT (op
, 0);
6488 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
6496 /* Splat vector constant out into a byte vector. */
6497 for (i
= 0; i
< n_elts
; i
++)
6499 rtx el
= CONST_VECTOR_ELT (op
, i
);
6500 unsigned HOST_WIDE_INT elpart
;
6501 unsigned int part
, parts
;
6503 if (GET_CODE (el
) == CONST_INT
)
6505 elpart
= INTVAL (el
);
6508 else if (GET_CODE (el
) == CONST_DOUBLE
)
6510 elpart
= CONST_DOUBLE_LOW (el
);
6516 for (part
= 0; part
< parts
; part
++)
6519 for (byte
= 0; byte
< innersize
; byte
++)
6521 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
6522 elpart
>>= BITS_PER_UNIT
;
6524 if (GET_CODE (el
) == CONST_DOUBLE
)
6525 elpart
= CONST_DOUBLE_HIGH (el
);
6530 gcc_assert (idx
== GET_MODE_SIZE (mode
));
6534 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
6535 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
6537 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6538 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6540 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6541 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6543 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6544 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
6546 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
6548 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
6550 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
6551 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
6553 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6554 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6556 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6557 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6559 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6560 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
6562 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
6564 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
6566 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6567 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6569 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6570 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6572 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6573 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6575 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6576 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6578 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
6580 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
6581 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
6590 info
->element_width
= elsize
;
6591 info
->mvn
= emvn
!= 0;
6592 info
->shift
= eshift
;
6594 unsigned HOST_WIDE_INT imm
= 0;
6596 if (immtype
>= 12 && immtype
<= 15)
6599 /* Un-invert bytes of recognized vector, if necessary. */
6601 for (i
= 0; i
< idx
; i
++)
6602 bytes
[i
] ^= invmask
;
6606 /* FIXME: Broken on 32-bit H_W_I hosts. */
6607 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
6609 for (i
= 0; i
< 8; i
++)
6610 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
6611 << (i
* BITS_PER_UNIT
);
6614 info
->value
= GEN_INT (imm
);
6618 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
6619 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
6621 /* Construct 'abcdefgh' because the assembler cannot handle
6622 generic constants. */
6625 imm
= (imm
>> info
->shift
) & 0xff;
6626 info
->value
= GEN_INT (imm
);
6635 aarch64_const_vec_all_same_int_p (rtx x
,
6636 HOST_WIDE_INT minval
,
6637 HOST_WIDE_INT maxval
)
6639 HOST_WIDE_INT firstval
;
6642 if (GET_CODE (x
) != CONST_VECTOR
6643 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
6646 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
6647 if (firstval
< minval
|| firstval
> maxval
)
6650 count
= CONST_VECTOR_NUNITS (x
);
6651 for (i
= 1; i
< count
; i
++)
6652 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
6658 /* Check of immediate shift constants are within range. */
6660 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
6662 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
6664 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
6666 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
6669 /* Return true if X is a uniform vector where all elements
6670 are either the floating-point constant 0.0 or the
6671 integer constant 0. */
6673 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
6675 return x
== CONST0_RTX (mode
);
6679 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
6681 HOST_WIDE_INT imm
= INTVAL (x
);
6684 for (i
= 0; i
< 8; i
++)
6686 unsigned int byte
= imm
& 0xff;
6687 if (byte
!= 0xff && byte
!= 0)
6696 aarch64_mov_operand_p (rtx x
,
6697 enum aarch64_symbol_context context
,
6698 enum machine_mode mode
)
6700 if (GET_CODE (x
) == HIGH
6701 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6704 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
6707 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
6710 return aarch64_classify_symbolic_expression (x
, context
)
6711 == SYMBOL_TINY_ABSOLUTE
;
6714 /* Return a const_int vector of VAL. */
6716 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
6718 int nunits
= GET_MODE_NUNITS (mode
);
6719 rtvec v
= rtvec_alloc (nunits
);
6722 for (i
=0; i
< nunits
; i
++)
6723 RTVEC_ELT (v
, i
) = GEN_INT (val
);
6725 return gen_rtx_CONST_VECTOR (mode
, v
);
6728 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6731 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
6733 enum machine_mode vmode
;
6735 gcc_assert (!VECTOR_MODE_P (mode
));
6736 vmode
= aarch64_preferred_simd_mode (mode
);
6737 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
6738 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
6741 /* Construct and return a PARALLEL RTX vector. */
6743 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
6745 int nunits
= GET_MODE_NUNITS (mode
);
6746 rtvec v
= rtvec_alloc (nunits
/ 2);
6747 int base
= high
? nunits
/ 2 : 0;
6751 for (i
=0; i
< nunits
/ 2; i
++)
6752 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
6754 t1
= gen_rtx_PARALLEL (mode
, v
);
6758 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6759 HIGH (exclusive). */
6761 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6764 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6765 lane
= INTVAL (operand
);
6767 if (lane
< low
|| lane
>= high
)
6768 error ("lane out of range");
6772 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6774 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6775 HOST_WIDE_INT lane
= INTVAL (operand
);
6777 if (lane
< low
|| lane
>= high
)
6778 error ("constant out of range");
6781 /* Emit code to reinterpret one AdvSIMD type as another,
6782 without altering bits. */
6784 aarch64_simd_reinterpret (rtx dest
, rtx src
)
6786 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
6789 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6792 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
6793 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
6796 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
6797 rtx tmp1
= gen_reg_rtx (mode
);
6798 rtx tmp2
= gen_reg_rtx (mode
);
6800 emit_insn (intfn (tmp1
, op1
, tmp2
));
6802 emit_move_insn (mem
, tmp1
);
6803 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
6804 emit_move_insn (mem
, tmp2
);
6807 /* Return TRUE if OP is a valid vector addressing mode. */
6809 aarch64_simd_mem_operand_p (rtx op
)
6811 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
6812 || GET_CODE (XEXP (op
, 0)) == REG
);
6815 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6816 not to early-clobber SRC registers in the process.
6818 We assume that the operands described by SRC and DEST represent a
6819 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6820 number of components into which the copy has been decomposed. */
6822 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
6823 rtx
*src
, unsigned int count
)
6827 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
6828 || REGNO (operands
[0]) < REGNO (operands
[1]))
6830 for (i
= 0; i
< count
; i
++)
6832 operands
[2 * i
] = dest
[i
];
6833 operands
[2 * i
+ 1] = src
[i
];
6838 for (i
= 0; i
< count
; i
++)
6840 operands
[2 * i
] = dest
[count
- i
- 1];
6841 operands
[2 * i
+ 1] = src
[count
- i
- 1];
6846 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6847 one of VSTRUCT modes: OI, CI or XI. */
6849 aarch64_simd_attr_length_move (rtx insn
)
6851 enum machine_mode mode
;
6853 extract_insn_cached (insn
);
6855 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
6857 mode
= GET_MODE (recog_data
.operand
[0]);
6873 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6874 alignment of a vector to 128 bits. */
6875 static HOST_WIDE_INT
6876 aarch64_simd_vector_alignment (const_tree type
)
6878 HOST_WIDE_INT align
= tree_low_cst (TYPE_SIZE (type
), 0);
6879 return MIN (align
, 128);
6882 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6884 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
6889 /* We guarantee alignment for vectors up to 128-bits. */
6890 if (tree_int_cst_compare (TYPE_SIZE (type
),
6891 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
6894 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6898 /* If VALS is a vector constant that can be loaded into a register
6899 using DUP, generate instructions to do so and return an RTX to
6900 assign to the register. Otherwise return NULL_RTX. */
6902 aarch64_simd_dup_constant (rtx vals
)
6904 enum machine_mode mode
= GET_MODE (vals
);
6905 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6906 int n_elts
= GET_MODE_NUNITS (mode
);
6907 bool all_same
= true;
6911 if (GET_CODE (vals
) != CONST_VECTOR
)
6914 for (i
= 1; i
< n_elts
; ++i
)
6916 x
= CONST_VECTOR_ELT (vals
, i
);
6917 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
6924 /* We can load this constant by using DUP and a constant in a
6925 single ARM register. This will be cheaper than a vector
6927 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
6928 return gen_rtx_VEC_DUPLICATE (mode
, x
);
6932 /* Generate code to load VALS, which is a PARALLEL containing only
6933 constants (for vec_init) or CONST_VECTOR, efficiently into a
6934 register. Returns an RTX to copy into the register, or NULL_RTX
6935 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6937 aarch64_simd_make_constant (rtx vals
)
6939 enum machine_mode mode
= GET_MODE (vals
);
6941 rtx const_vec
= NULL_RTX
;
6942 int n_elts
= GET_MODE_NUNITS (mode
);
6946 if (GET_CODE (vals
) == CONST_VECTOR
)
6948 else if (GET_CODE (vals
) == PARALLEL
)
6950 /* A CONST_VECTOR must contain only CONST_INTs and
6951 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6952 Only store valid constants in a CONST_VECTOR. */
6953 for (i
= 0; i
< n_elts
; ++i
)
6955 rtx x
= XVECEXP (vals
, 0, i
);
6956 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
6959 if (n_const
== n_elts
)
6960 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6965 if (const_vec
!= NULL_RTX
6966 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
6967 /* Load using MOVI/MVNI. */
6969 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
6970 /* Loaded using DUP. */
6972 else if (const_vec
!= NULL_RTX
)
6973 /* Load from constant pool. We can not take advantage of single-cycle
6974 LD1 because we need a PC-relative addressing mode. */
6977 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6978 We can not construct an initializer. */
6983 aarch64_expand_vector_init (rtx target
, rtx vals
)
6985 enum machine_mode mode
= GET_MODE (target
);
6986 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6987 int n_elts
= GET_MODE_NUNITS (mode
);
6988 int n_var
= 0, one_var
= -1;
6989 bool all_same
= true;
6993 x
= XVECEXP (vals
, 0, 0);
6994 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
6995 n_var
= 1, one_var
= 0;
6997 for (i
= 1; i
< n_elts
; ++i
)
6999 x
= XVECEXP (vals
, 0, i
);
7000 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
7001 ++n_var
, one_var
= i
;
7003 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7009 rtx constant
= aarch64_simd_make_constant (vals
);
7010 if (constant
!= NULL_RTX
)
7012 emit_move_insn (target
, constant
);
7017 /* Splat a single non-constant element if we can. */
7020 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
7021 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
7025 /* One field is non-constant. Load constant then overwrite varying
7026 field. This is more efficient than using the stack. */
7029 rtx copy
= copy_rtx (vals
);
7030 rtx index
= GEN_INT (one_var
);
7031 enum insn_code icode
;
7033 /* Load constant part of vector, substitute neighboring value for
7035 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
7036 aarch64_expand_vector_init (target
, copy
);
7038 /* Insert variable. */
7039 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
7040 icode
= optab_handler (vec_set_optab
, mode
);
7041 gcc_assert (icode
!= CODE_FOR_nothing
);
7042 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
7046 /* Construct the vector in memory one field at a time
7047 and load the whole vector. */
7048 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7049 for (i
= 0; i
< n_elts
; i
++)
7050 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7051 i
* GET_MODE_SIZE (inner_mode
)),
7052 XVECEXP (vals
, 0, i
));
7053 emit_move_insn (target
, mem
);
7057 static unsigned HOST_WIDE_INT
7058 aarch64_shift_truncation_mask (enum machine_mode mode
)
7061 (aarch64_vector_mode_supported_p (mode
)
7062 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
7065 #ifndef TLS_SECTION_ASM_FLAG
7066 #define TLS_SECTION_ASM_FLAG 'T'
7070 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
7071 tree decl ATTRIBUTE_UNUSED
)
7073 char flagchars
[10], *f
= flagchars
;
7075 /* If we have already declared this section, we can use an
7076 abbreviated form to switch back to it -- unless this section is
7077 part of a COMDAT groups, in which case GAS requires the full
7078 declaration every time. */
7079 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7080 && (flags
& SECTION_DECLARED
))
7082 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
7086 if (!(flags
& SECTION_DEBUG
))
7088 if (flags
& SECTION_WRITE
)
7090 if (flags
& SECTION_CODE
)
7092 if (flags
& SECTION_SMALL
)
7094 if (flags
& SECTION_MERGE
)
7096 if (flags
& SECTION_STRINGS
)
7098 if (flags
& SECTION_TLS
)
7099 *f
++ = TLS_SECTION_ASM_FLAG
;
7100 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7104 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
7106 if (!(flags
& SECTION_NOTYPE
))
7111 if (flags
& SECTION_BSS
)
7116 #ifdef TYPE_OPERAND_FMT
7117 format
= "," TYPE_OPERAND_FMT
;
7122 fprintf (asm_out_file
, format
, type
);
7124 if (flags
& SECTION_ENTSIZE
)
7125 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
7126 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7128 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
7129 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
7131 fprintf (asm_out_file
, ",%s,comdat",
7132 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
7136 putc ('\n', asm_out_file
);
7139 /* Select a format to encode pointers in exception handling data. */
7141 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
7144 switch (aarch64_cmodel
)
7146 case AARCH64_CMODEL_TINY
:
7147 case AARCH64_CMODEL_TINY_PIC
:
7148 case AARCH64_CMODEL_SMALL
:
7149 case AARCH64_CMODEL_SMALL_PIC
:
7150 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7152 type
= DW_EH_PE_sdata4
;
7155 /* No assumptions here. 8-byte relocs required. */
7156 type
= DW_EH_PE_sdata8
;
7159 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
7162 /* Emit load exclusive. */
7165 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
7166 rtx mem
, rtx model_rtx
)
7168 rtx (*gen
) (rtx
, rtx
, rtx
);
7172 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
7173 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
7174 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
7175 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
7180 emit_insn (gen (rval
, mem
, model_rtx
));
7183 /* Emit store exclusive. */
7186 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
7187 rtx rval
, rtx mem
, rtx model_rtx
)
7189 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
7193 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
7194 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
7195 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
7196 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
7201 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
7204 /* Mark the previous jump instruction as unlikely. */
7207 aarch64_emit_unlikely_jump (rtx insn
)
7209 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
7211 insn
= emit_jump_insn (insn
);
7212 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
7215 /* Expand a compare and swap pattern. */
7218 aarch64_expand_compare_and_swap (rtx operands
[])
7220 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
7221 enum machine_mode mode
, cmp_mode
;
7222 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
7227 oldval
= operands
[3];
7228 newval
= operands
[4];
7229 is_weak
= operands
[5];
7230 mod_s
= operands
[6];
7231 mod_f
= operands
[7];
7232 mode
= GET_MODE (mem
);
7235 /* Normally the succ memory model must be stronger than fail, but in the
7236 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7237 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7239 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
7240 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
7241 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
7247 /* For short modes, we're going to perform the comparison in SImode,
7248 so do the zero-extension now. */
7250 rval
= gen_reg_rtx (SImode
);
7251 oldval
= convert_modes (SImode
, mode
, oldval
, true);
7256 /* Force the value into a register if needed. */
7257 if (!aarch64_plus_operand (oldval
, mode
))
7258 oldval
= force_reg (cmp_mode
, oldval
);
7267 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
7268 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
7269 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
7270 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
7275 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
7277 if (mode
== QImode
|| mode
== HImode
)
7278 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
7280 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7281 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
7282 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
7285 /* Split a compare and swap pattern. */
7288 aarch64_split_compare_and_swap (rtx operands
[])
7290 rtx rval
, mem
, oldval
, newval
, scratch
;
7291 enum machine_mode mode
;
7293 rtx label1
, label2
, x
, cond
;
7297 oldval
= operands
[2];
7298 newval
= operands
[3];
7299 is_weak
= (operands
[4] != const0_rtx
);
7300 scratch
= operands
[7];
7301 mode
= GET_MODE (mem
);
7306 label1
= gen_label_rtx ();
7307 emit_label (label1
);
7309 label2
= gen_label_rtx ();
7311 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
7313 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
7314 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7315 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7316 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
7317 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7319 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
7323 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
7324 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7325 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
7326 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7330 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7331 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
7332 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
7335 emit_label (label2
);
7338 /* Split an atomic operation. */
7341 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
7342 rtx value
, rtx model_rtx
, rtx cond
)
7344 enum machine_mode mode
= GET_MODE (mem
);
7345 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
7348 label
= gen_label_rtx ();
7352 new_out
= gen_lowpart (wmode
, new_out
);
7354 old_out
= gen_lowpart (wmode
, old_out
);
7357 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
7359 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
7368 x
= gen_rtx_AND (wmode
, old_out
, value
);
7369 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7370 x
= gen_rtx_NOT (wmode
, new_out
);
7371 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7375 if (CONST_INT_P (value
))
7377 value
= GEN_INT (-INTVAL (value
));
7383 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
7384 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7388 aarch64_emit_store_exclusive (mode
, cond
, mem
,
7389 gen_lowpart (mode
, new_out
), model_rtx
);
7391 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7392 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7393 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
7394 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7398 aarch64_print_extension (void)
7400 const struct aarch64_option_extension
*opt
= NULL
;
7402 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
7403 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
7404 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
7406 asm_fprintf (asm_out_file
, "\n");
7410 aarch64_start_file (void)
7414 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
7415 aarch64_print_extension ();
7417 else if (selected_cpu
)
7419 asm_fprintf (asm_out_file
, "\t.cpu %s", selected_cpu
->name
);
7420 aarch64_print_extension ();
7422 default_file_start();
7425 /* Target hook for c_mode_for_suffix. */
7426 static enum machine_mode
7427 aarch64_c_mode_for_suffix (char suffix
)
7435 /* We can only represent floating point constants which will fit in
7436 "quarter-precision" values. These values are characterised by
7437 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7440 (-1)^s * (n/16) * 2^r
7443 's' is the sign bit.
7444 'n' is an integer in the range 16 <= n <= 31.
7445 'r' is an integer in the range -3 <= r <= 4. */
7447 /* Return true iff X can be represented by a quarter-precision
7448 floating point immediate operand X. Note, we cannot represent 0.0. */
7450 aarch64_float_const_representable_p (rtx x
)
7452 /* This represents our current view of how many bits
7453 make up the mantissa. */
7454 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
7456 unsigned HOST_WIDE_INT mantissa
, mask
;
7457 HOST_WIDE_INT m1
, m2
;
7458 REAL_VALUE_TYPE r
, m
;
7460 if (!CONST_DOUBLE_P (x
))
7463 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7465 /* We cannot represent infinities, NaNs or +/-zero. We won't
7466 know if we have +zero until we analyse the mantissa, but we
7467 can reject the other invalid values. */
7468 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
7469 || REAL_VALUE_MINUS_ZERO (r
))
7472 /* Extract exponent. */
7473 r
= real_value_abs (&r
);
7474 exponent
= REAL_EXP (&r
);
7476 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7477 highest (sign) bit, with a fixed binary point at bit point_pos.
7478 m1 holds the low part of the mantissa, m2 the high part.
7479 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7480 bits for the mantissa, this can fail (low bits will be lost). */
7481 real_ldexp (&m
, &r
, point_pos
- exponent
);
7482 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
7484 /* If the low part of the mantissa has bits set we cannot represent
7488 /* We have rejected the lower HOST_WIDE_INT, so update our
7489 understanding of how many bits lie in the mantissa and
7490 look only at the high HOST_WIDE_INT. */
7492 point_pos
-= HOST_BITS_PER_WIDE_INT
;
7494 /* We can only represent values with a mantissa of the form 1.xxxx. */
7495 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
7496 if ((mantissa
& mask
) != 0)
7499 /* Having filtered unrepresentable values, we may now remove all
7500 but the highest 5 bits. */
7501 mantissa
>>= point_pos
- 5;
7503 /* We cannot represent the value 0.0, so reject it. This is handled
7508 /* Then, as bit 4 is always set, we can mask it off, leaving
7509 the mantissa in the range [0, 15]. */
7510 mantissa
&= ~(1 << 4);
7511 gcc_assert (mantissa
<= 15);
7513 /* GCC internally does not use IEEE754-like encoding (where normalized
7514 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7515 Our mantissa values are shifted 4 places to the left relative to
7516 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7517 by 5 places to correct for GCC's representation. */
7518 exponent
= 5 - exponent
;
7520 return (exponent
>= 0 && exponent
<= 7);
7524 aarch64_output_simd_mov_immediate (rtx const_vector
,
7525 enum machine_mode mode
,
7529 static char templ
[40];
7530 const char *mnemonic
;
7531 const char *shift_op
;
7532 unsigned int lane_count
= 0;
7535 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
7537 /* This will return true to show const_vector is legal for use as either
7538 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7539 also update INFO to show how the immediate should be generated. */
7540 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
7541 gcc_assert (is_valid
);
7543 element_char
= sizetochar (info
.element_width
);
7544 lane_count
= width
/ info
.element_width
;
7546 mode
= GET_MODE_INNER (mode
);
7547 if (mode
== SFmode
|| mode
== DFmode
)
7549 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
7550 if (aarch64_float_const_zero_rtx_p (info
.value
))
7551 info
.value
= GEN_INT (0);
7556 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
7557 char float_buf
[buf_size
] = {'\0'};
7558 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
7561 if (lane_count
== 1)
7562 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
7564 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
7565 lane_count
, element_char
, float_buf
);
7570 mnemonic
= info
.mvn
? "mvni" : "movi";
7571 shift_op
= info
.msl
? "msl" : "lsl";
7573 if (lane_count
== 1)
7574 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
7575 mnemonic
, UINTVAL (info
.value
));
7576 else if (info
.shift
)
7577 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7578 ", %s %d", mnemonic
, lane_count
, element_char
,
7579 UINTVAL (info
.value
), shift_op
, info
.shift
);
7581 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
7582 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
7587 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
7588 enum machine_mode mode
)
7590 enum machine_mode vmode
;
7592 gcc_assert (!VECTOR_MODE_P (mode
));
7593 vmode
= aarch64_simd_container_mode (mode
, 64);
7594 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
7595 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
7598 /* Split operands into moves from op[1] + op[2] into op[0]. */
7601 aarch64_split_combinev16qi (rtx operands
[3])
7603 unsigned int dest
= REGNO (operands
[0]);
7604 unsigned int src1
= REGNO (operands
[1]);
7605 unsigned int src2
= REGNO (operands
[2]);
7606 enum machine_mode halfmode
= GET_MODE (operands
[1]);
7607 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
7610 gcc_assert (halfmode
== V16QImode
);
7612 if (src1
== dest
&& src2
== dest
+ halfregs
)
7614 /* No-op move. Can't split to nothing; emit something. */
7615 emit_note (NOTE_INSN_DELETED
);
7619 /* Preserve register attributes for variable tracking. */
7620 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
7621 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
7622 GET_MODE_SIZE (halfmode
));
7624 /* Special case of reversed high/low parts. */
7625 if (reg_overlap_mentioned_p (operands
[2], destlo
)
7626 && reg_overlap_mentioned_p (operands
[1], desthi
))
7628 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7629 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
7630 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7632 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
7634 /* Try to avoid unnecessary moves if part of the result
7635 is in the right place already. */
7637 emit_move_insn (destlo
, operands
[1]);
7638 if (src2
!= dest
+ halfregs
)
7639 emit_move_insn (desthi
, operands
[2]);
7643 if (src2
!= dest
+ halfregs
)
7644 emit_move_insn (desthi
, operands
[2]);
7646 emit_move_insn (destlo
, operands
[1]);
7650 /* vec_perm support. */
7652 #define MAX_VECT_LEN 16
7654 struct expand_vec_perm_d
7656 rtx target
, op0
, op1
;
7657 unsigned char perm
[MAX_VECT_LEN
];
7658 enum machine_mode vmode
;
7664 /* Generate a variable permutation. */
7667 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7669 enum machine_mode vmode
= GET_MODE (target
);
7670 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7672 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
7673 gcc_checking_assert (GET_MODE (op0
) == vmode
);
7674 gcc_checking_assert (GET_MODE (op1
) == vmode
);
7675 gcc_checking_assert (GET_MODE (sel
) == vmode
);
7676 gcc_checking_assert (TARGET_SIMD
);
7680 if (vmode
== V8QImode
)
7682 /* Expand the argument to a V16QI mode by duplicating it. */
7683 rtx pair
= gen_reg_rtx (V16QImode
);
7684 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
7685 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7689 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
7696 if (vmode
== V8QImode
)
7698 pair
= gen_reg_rtx (V16QImode
);
7699 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
7700 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7704 pair
= gen_reg_rtx (OImode
);
7705 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
7706 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
7712 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7714 enum machine_mode vmode
= GET_MODE (target
);
7715 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
7716 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7717 rtx rmask
[MAX_VECT_LEN
], mask
;
7719 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
7721 /* The TBL instruction does not use a modulo index, so we must take care
7722 of that ourselves. */
7723 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7724 for (i
= 0; i
< nelt
; ++i
)
7726 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
7727 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
7729 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
7732 /* Recognize patterns suitable for the TRN instructions. */
7734 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
7736 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7737 rtx out
, in0
, in1
, x
;
7738 rtx (*gen
) (rtx
, rtx
, rtx
);
7739 enum machine_mode vmode
= d
->vmode
;
7741 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7744 /* Note that these are little-endian tests.
7745 We correct for big-endian later. */
7746 if (d
->perm
[0] == 0)
7748 else if (d
->perm
[0] == 1)
7752 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7754 for (i
= 0; i
< nelt
; i
+= 2)
7756 if (d
->perm
[i
] != i
+ odd
)
7758 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
7768 if (BYTES_BIG_ENDIAN
)
7770 x
= in0
, in0
= in1
, in1
= x
;
7779 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
7780 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
7781 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
7782 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
7783 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
7784 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
7785 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
7786 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
7787 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
7788 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
7797 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
7798 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
7799 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
7800 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
7801 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
7802 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
7803 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
7804 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
7805 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
7806 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
7812 emit_insn (gen (out
, in0
, in1
));
7816 /* Recognize patterns suitable for the UZP instructions. */
7818 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
7820 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7821 rtx out
, in0
, in1
, x
;
7822 rtx (*gen
) (rtx
, rtx
, rtx
);
7823 enum machine_mode vmode
= d
->vmode
;
7825 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7828 /* Note that these are little-endian tests.
7829 We correct for big-endian later. */
7830 if (d
->perm
[0] == 0)
7832 else if (d
->perm
[0] == 1)
7836 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7838 for (i
= 0; i
< nelt
; i
++)
7840 unsigned elt
= (i
* 2 + odd
) & mask
;
7841 if (d
->perm
[i
] != elt
)
7851 if (BYTES_BIG_ENDIAN
)
7853 x
= in0
, in0
= in1
, in1
= x
;
7862 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
7863 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
7864 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
7865 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
7866 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
7867 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
7868 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
7869 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
7870 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
7871 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
7880 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
7881 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
7882 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
7883 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
7884 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
7885 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
7886 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
7887 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
7888 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
7889 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
7895 emit_insn (gen (out
, in0
, in1
));
7899 /* Recognize patterns suitable for the ZIP instructions. */
7901 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
7903 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
7904 rtx out
, in0
, in1
, x
;
7905 rtx (*gen
) (rtx
, rtx
, rtx
);
7906 enum machine_mode vmode
= d
->vmode
;
7908 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7911 /* Note that these are little-endian tests.
7912 We correct for big-endian later. */
7914 if (d
->perm
[0] == high
)
7917 else if (d
->perm
[0] == 0)
7921 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7923 for (i
= 0; i
< nelt
/ 2; i
++)
7925 unsigned elt
= (i
+ high
) & mask
;
7926 if (d
->perm
[i
* 2] != elt
)
7928 elt
= (elt
+ nelt
) & mask
;
7929 if (d
->perm
[i
* 2 + 1] != elt
)
7939 if (BYTES_BIG_ENDIAN
)
7941 x
= in0
, in0
= in1
, in1
= x
;
7950 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
7951 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
7952 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
7953 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
7954 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
7955 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
7956 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
7957 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
7958 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
7959 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
7968 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
7969 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
7970 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
7971 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
7972 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
7973 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
7974 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
7975 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
7976 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
7977 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
7983 emit_insn (gen (out
, in0
, in1
));
7988 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
7990 rtx (*gen
) (rtx
, rtx
, rtx
);
7991 rtx out
= d
->target
;
7993 enum machine_mode vmode
= d
->vmode
;
7994 unsigned int i
, elt
, nelt
= d
->nelt
;
7997 /* TODO: This may not be big-endian safe. */
7998 if (BYTES_BIG_ENDIAN
)
8002 for (i
= 1; i
< nelt
; i
++)
8004 if (elt
!= d
->perm
[i
])
8008 /* The generic preparation in aarch64_expand_vec_perm_const_1
8009 swaps the operand order and the permute indices if it finds
8010 d->perm[0] to be in the second operand. Thus, we can always
8011 use d->op0 and need not do any extra arithmetic to get the
8012 correct lane number. */
8014 lane
= GEN_INT (elt
);
8018 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
8019 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
8020 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
8021 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
8022 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
8023 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
8024 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
8025 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
8026 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
8027 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
8032 emit_insn (gen (out
, in0
, lane
));
8037 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
8039 rtx rperm
[MAX_VECT_LEN
], sel
;
8040 enum machine_mode vmode
= d
->vmode
;
8041 unsigned int i
, nelt
= d
->nelt
;
8043 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8044 numbering of elements for big-endian, we must reverse the order. */
8045 if (BYTES_BIG_ENDIAN
)
8051 /* Generic code will try constant permutation twice. Once with the
8052 original mode and again with the elements lowered to QImode.
8053 So wait and don't do the selector expansion ourselves. */
8054 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
8057 for (i
= 0; i
< nelt
; ++i
)
8058 rperm
[i
] = GEN_INT (d
->perm
[i
]);
8059 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
8060 sel
= force_reg (vmode
, sel
);
8062 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
8067 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
8069 /* The pattern matching functions above are written to look for a small
8070 number to begin the sequence (0, 1, N/2). If we begin with an index
8071 from the second operand, we can swap the operands. */
8072 if (d
->perm
[0] >= d
->nelt
)
8074 unsigned i
, nelt
= d
->nelt
;
8077 for (i
= 0; i
< nelt
; ++i
)
8078 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
8087 if (aarch64_evpc_zip (d
))
8089 else if (aarch64_evpc_uzp (d
))
8091 else if (aarch64_evpc_trn (d
))
8093 else if (aarch64_evpc_dup (d
))
8095 return aarch64_evpc_tbl (d
);
8100 /* Expand a vec_perm_const pattern. */
8103 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8105 struct expand_vec_perm_d d
;
8112 d
.vmode
= GET_MODE (target
);
8113 gcc_assert (VECTOR_MODE_P (d
.vmode
));
8114 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8115 d
.testing_p
= false;
8117 for (i
= which
= 0; i
< nelt
; ++i
)
8119 rtx e
= XVECEXP (sel
, 0, i
);
8120 int ei
= INTVAL (e
) & (2 * nelt
- 1);
8121 which
|= (ei
< nelt
? 1 : 2);
8131 d
.one_vector_p
= false;
8132 if (!rtx_equal_p (op0
, op1
))
8135 /* The elements of PERM do not suggest that only the first operand
8136 is used, but both operands are identical. Allow easier matching
8137 of the permutation by folding the permutation into the single
8141 for (i
= 0; i
< nelt
; ++i
)
8142 d
.perm
[i
] &= nelt
- 1;
8144 d
.one_vector_p
= true;
8149 d
.one_vector_p
= true;
8153 return aarch64_expand_vec_perm_const_1 (&d
);
8157 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
8158 const unsigned char *sel
)
8160 struct expand_vec_perm_d d
;
8161 unsigned int i
, nelt
, which
;
8165 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8167 memcpy (d
.perm
, sel
, nelt
);
8169 /* Calculate whether all elements are in one vector. */
8170 for (i
= which
= 0; i
< nelt
; ++i
)
8172 unsigned char e
= d
.perm
[i
];
8173 gcc_assert (e
< 2 * nelt
);
8174 which
|= (e
< nelt
? 1 : 2);
8177 /* If all elements are from the second vector, reindex as if from the
8180 for (i
= 0; i
< nelt
; ++i
)
8183 /* Check whether the mask can be applied to a single vector. */
8184 d
.one_vector_p
= (which
!= 3);
8186 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
8187 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
8188 if (!d
.one_vector_p
)
8189 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
8192 ret
= aarch64_expand_vec_perm_const_1 (&d
);
8198 #undef TARGET_ADDRESS_COST
8199 #define TARGET_ADDRESS_COST aarch64_address_cost
8201 /* This hook will determines whether unnamed bitfields affect the alignment
8202 of the containing structure. The hook returns true if the structure
8203 should inherit the alignment requirements of an unnamed bitfield's
8205 #undef TARGET_ALIGN_ANON_BITFIELD
8206 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8208 #undef TARGET_ASM_ALIGNED_DI_OP
8209 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8211 #undef TARGET_ASM_ALIGNED_HI_OP
8212 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8214 #undef TARGET_ASM_ALIGNED_SI_OP
8215 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8217 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8218 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8219 hook_bool_const_tree_hwi_hwi_const_tree_true
8221 #undef TARGET_ASM_FILE_START
8222 #define TARGET_ASM_FILE_START aarch64_start_file
8224 #undef TARGET_ASM_OUTPUT_MI_THUNK
8225 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8227 #undef TARGET_ASM_SELECT_RTX_SECTION
8228 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8230 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8231 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8233 #undef TARGET_BUILD_BUILTIN_VA_LIST
8234 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8236 #undef TARGET_CALLEE_COPIES
8237 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8239 #undef TARGET_CAN_ELIMINATE
8240 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8242 #undef TARGET_CANNOT_FORCE_CONST_MEM
8243 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8245 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8246 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8248 /* Only the least significant bit is used for initialization guard
8250 #undef TARGET_CXX_GUARD_MASK_BIT
8251 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8253 #undef TARGET_C_MODE_FOR_SUFFIX
8254 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8256 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8257 #undef TARGET_DEFAULT_TARGET_FLAGS
8258 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8261 #undef TARGET_CLASS_MAX_NREGS
8262 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8264 #undef TARGET_BUILTIN_DECL
8265 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8267 #undef TARGET_EXPAND_BUILTIN
8268 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8270 #undef TARGET_EXPAND_BUILTIN_VA_START
8271 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8273 #undef TARGET_FOLD_BUILTIN
8274 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8276 #undef TARGET_FUNCTION_ARG
8277 #define TARGET_FUNCTION_ARG aarch64_function_arg
8279 #undef TARGET_FUNCTION_ARG_ADVANCE
8280 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8282 #undef TARGET_FUNCTION_ARG_BOUNDARY
8283 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8285 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8286 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8288 #undef TARGET_FUNCTION_VALUE
8289 #define TARGET_FUNCTION_VALUE aarch64_function_value
8291 #undef TARGET_FUNCTION_VALUE_REGNO_P
8292 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8294 #undef TARGET_FRAME_POINTER_REQUIRED
8295 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8297 #undef TARGET_GIMPLE_FOLD_BUILTIN
8298 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8300 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8301 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8303 #undef TARGET_INIT_BUILTINS
8304 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8306 #undef TARGET_LEGITIMATE_ADDRESS_P
8307 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8309 #undef TARGET_LEGITIMATE_CONSTANT_P
8310 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8312 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8313 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8316 #define TARGET_LRA_P aarch64_lra_p
8318 #undef TARGET_MANGLE_TYPE
8319 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8321 #undef TARGET_MEMORY_MOVE_COST
8322 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8324 #undef TARGET_MUST_PASS_IN_STACK
8325 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8327 /* This target hook should return true if accesses to volatile bitfields
8328 should use the narrowest mode possible. It should return false if these
8329 accesses should use the bitfield container type. */
8330 #undef TARGET_NARROW_VOLATILE_BITFIELD
8331 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8333 #undef TARGET_OPTION_OVERRIDE
8334 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8336 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8337 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8338 aarch64_override_options_after_change
8340 #undef TARGET_PASS_BY_REFERENCE
8341 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8343 #undef TARGET_PREFERRED_RELOAD_CLASS
8344 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8346 #undef TARGET_SECONDARY_RELOAD
8347 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8349 #undef TARGET_SHIFT_TRUNCATION_MASK
8350 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8352 #undef TARGET_SETUP_INCOMING_VARARGS
8353 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8355 #undef TARGET_STRUCT_VALUE_RTX
8356 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8358 #undef TARGET_REGISTER_MOVE_COST
8359 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8361 #undef TARGET_RETURN_IN_MEMORY
8362 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8364 #undef TARGET_RETURN_IN_MSB
8365 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8367 #undef TARGET_RTX_COSTS
8368 #define TARGET_RTX_COSTS aarch64_rtx_costs
8370 #undef TARGET_TRAMPOLINE_INIT
8371 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8373 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8374 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8376 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8377 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8379 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8380 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8382 #undef TARGET_VECTORIZE_ADD_STMT_COST
8383 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8385 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8386 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8387 aarch64_builtin_vectorization_cost
8389 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8390 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8392 #undef TARGET_VECTORIZE_BUILTINS
8393 #define TARGET_VECTORIZE_BUILTINS
8395 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8396 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8397 aarch64_builtin_vectorized_function
8399 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8400 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8401 aarch64_autovectorize_vector_sizes
8403 /* Section anchor support. */
8405 #undef TARGET_MIN_ANCHOR_OFFSET
8406 #define TARGET_MIN_ANCHOR_OFFSET -256
8408 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8409 byte offset; we can do much more for larger data types, but have no way
8410 to determine the size of the access. We assume accesses are aligned. */
8411 #undef TARGET_MAX_ANCHOR_OFFSET
8412 #define TARGET_MAX_ANCHOR_OFFSET 4095
8414 #undef TARGET_VECTOR_ALIGNMENT
8415 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8417 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8418 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8419 aarch64_simd_vector_alignment_reachable
8421 /* vec_perm support. */
8423 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8424 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8425 aarch64_vectorize_vec_perm_const_ok
8428 #undef TARGET_FIXED_CONDITION_CODE_REGS
8429 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8431 struct gcc_target targetm
= TARGET_INITIALIZER
;
8433 #include "gt-aarch64.h"