1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
54 #include "tree-vectorizer.h"
55 #include "config/arm/aarch-cost-tables.h"
57 /* Defined for convenience. */
58 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
60 /* Classifies an address.
63 A simple base register plus immediate offset.
66 A base register indexed by immediate offset with writeback.
69 A base register indexed by (optionally scaled) register.
72 A base register indexed by (optionally scaled) zero-extended register.
75 A base register indexed by (optionally scaled) sign-extended register.
78 A LO_SUM rtx with a base register and "LO12" symbol relocation.
81 A constant symbolic address, in pc-relative literal pool. */
83 enum aarch64_address_type
{
93 struct aarch64_address_info
{
94 enum aarch64_address_type type
;
98 enum aarch64_symbol_type symbol_type
;
101 struct simd_immediate_info
110 /* The current code model. */
111 enum aarch64_code_model aarch64_cmodel
;
114 #undef TARGET_HAVE_TLS
115 #define TARGET_HAVE_TLS 1
118 static bool aarch64_lra_p (void);
119 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
120 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
122 enum machine_mode
*, int *,
124 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
125 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
126 static void aarch64_override_options_after_change (void);
127 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
128 static unsigned bit_count (unsigned HOST_WIDE_INT
);
129 static bool aarch64_const_vec_all_same_int_p (rtx
,
130 HOST_WIDE_INT
, HOST_WIDE_INT
);
132 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
133 const unsigned char *sel
);
135 /* The processor for which instructions should be scheduled. */
136 enum aarch64_processor aarch64_tune
= cortexa53
;
138 /* The current tuning set. */
139 const struct tune_params
*aarch64_tune_params
;
141 /* Mask to specify which instructions we are allowed to generate. */
142 unsigned long aarch64_isa_flags
= 0;
144 /* Mask to specify which instruction scheduling options should be used. */
145 unsigned long aarch64_tune_flags
= 0;
147 /* Tuning parameters. */
149 #if HAVE_DESIGNATED_INITIALIZERS
150 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
152 #define NAMED_PARAM(NAME, VAL) (VAL)
155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
159 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
162 static const struct cpu_addrcost_table generic_addrcost_table
=
164 NAMED_PARAM (pre_modify
, 0),
165 NAMED_PARAM (post_modify
, 0),
166 NAMED_PARAM (register_offset
, 0),
167 NAMED_PARAM (register_extend
, 0),
168 NAMED_PARAM (imm_offset
, 0)
171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
174 static const struct cpu_regmove_cost generic_regmove_cost
=
176 NAMED_PARAM (GP2GP
, 1),
177 NAMED_PARAM (GP2FP
, 2),
178 NAMED_PARAM (FP2GP
, 2),
179 /* We currently do not provide direct support for TFmode Q->Q move.
180 Therefore we need to raise the cost above 2 in order to have
181 reload handle the situation. */
182 NAMED_PARAM (FP2FP
, 4)
185 /* Generic costs for vector insn classes. */
186 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
189 static const struct cpu_vector_cost generic_vector_cost
=
191 NAMED_PARAM (scalar_stmt_cost
, 1),
192 NAMED_PARAM (scalar_load_cost
, 1),
193 NAMED_PARAM (scalar_store_cost
, 1),
194 NAMED_PARAM (vec_stmt_cost
, 1),
195 NAMED_PARAM (vec_to_scalar_cost
, 1),
196 NAMED_PARAM (scalar_to_vec_cost
, 1),
197 NAMED_PARAM (vec_align_load_cost
, 1),
198 NAMED_PARAM (vec_unalign_load_cost
, 1),
199 NAMED_PARAM (vec_unalign_store_cost
, 1),
200 NAMED_PARAM (vec_store_cost
, 1),
201 NAMED_PARAM (cond_taken_branch_cost
, 3),
202 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
205 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
208 static const struct tune_params generic_tunings
=
210 &generic_extra_costs
,
211 &generic_addrcost_table
,
212 &generic_regmove_cost
,
213 &generic_vector_cost
,
214 NAMED_PARAM (memmov_cost
, 4)
217 /* A processor implementing AArch64. */
220 const char *const name
;
221 enum aarch64_processor core
;
223 const unsigned long flags
;
224 const struct tune_params
*const tune
;
227 /* Processor cores implementing AArch64. */
228 static const struct processor all_cores
[] =
230 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
231 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
232 #include "aarch64-cores.def"
234 {"generic", cortexa53
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
235 {NULL
, aarch64_none
, NULL
, 0, NULL
}
238 /* Architectures implementing AArch64. */
239 static const struct processor all_architectures
[] =
241 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
242 {NAME, CORE, #ARCH, FLAGS, NULL},
243 #include "aarch64-arches.def"
245 {NULL
, aarch64_none
, NULL
, 0, NULL
}
248 /* Target specification. These are populated as commandline arguments
249 are processed, or NULL if not specified. */
250 static const struct processor
*selected_arch
;
251 static const struct processor
*selected_cpu
;
252 static const struct processor
*selected_tune
;
254 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
256 /* An ISA extension in the co-processor and main instruction set space. */
257 struct aarch64_option_extension
259 const char *const name
;
260 const unsigned long flags_on
;
261 const unsigned long flags_off
;
264 /* ISA extensions in AArch64. */
265 static const struct aarch64_option_extension all_extensions
[] =
267 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
268 {NAME, FLAGS_ON, FLAGS_OFF},
269 #include "aarch64-option-extensions.def"
270 #undef AARCH64_OPT_EXTENSION
274 /* Used to track the size of an address when generating a pre/post
275 increment address. */
276 static enum machine_mode aarch64_memory_reference_mode
;
278 /* Used to force GTY into this file. */
279 static GTY(()) int gty_dummy
;
281 /* A table of valid AArch64 "bitmask immediate" values for
282 logical instructions. */
284 #define AARCH64_NUM_BITMASKS 5334
285 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
287 /* Did we set flag_omit_frame_pointer just so
288 aarch64_frame_pointer_required would be called? */
289 static bool faked_omit_frame_pointer
;
291 typedef enum aarch64_cond_code
293 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
294 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
295 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
299 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
301 /* The condition codes of the processor, and the inverse function. */
302 static const char * const aarch64_condition_codes
[] =
304 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
305 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
308 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
310 aarch64_dbx_register_number (unsigned regno
)
312 if (GP_REGNUM_P (regno
))
313 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
314 else if (regno
== SP_REGNUM
)
315 return AARCH64_DWARF_SP
;
316 else if (FP_REGNUM_P (regno
))
317 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
319 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
320 equivalent DWARF register. */
321 return DWARF_FRAME_REGISTERS
;
324 /* Return TRUE if MODE is any of the large INT modes. */
326 aarch64_vect_struct_mode_p (enum machine_mode mode
)
328 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
331 /* Return TRUE if MODE is any of the vector modes. */
333 aarch64_vector_mode_p (enum machine_mode mode
)
335 return aarch64_vector_mode_supported_p (mode
)
336 || aarch64_vect_struct_mode_p (mode
);
339 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
341 aarch64_array_mode_supported_p (enum machine_mode mode
,
342 unsigned HOST_WIDE_INT nelems
)
345 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
346 && (nelems
>= 2 && nelems
<= 4))
352 /* Implement HARD_REGNO_NREGS. */
355 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
357 switch (aarch64_regno_regclass (regno
))
361 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
363 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
368 /* Implement HARD_REGNO_MODE_OK. */
371 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
373 if (GET_MODE_CLASS (mode
) == MODE_CC
)
374 return regno
== CC_REGNUM
;
376 if (regno
== SP_REGNUM
)
377 /* The purpose of comparing with ptr_mode is to support the
378 global register variable associated with the stack pointer
379 register via the syntax of asm ("wsp") in ILP32. */
380 return mode
== Pmode
|| mode
== ptr_mode
;
382 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
383 return mode
== Pmode
;
385 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
388 if (FP_REGNUM_P (regno
))
390 if (aarch64_vect_struct_mode_p (mode
))
392 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
400 /* Return true if calls to DECL should be treated as
401 long-calls (ie called via a register). */
403 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
408 /* Return true if calls to symbol-ref SYM should be treated as
409 long-calls (ie called via a register). */
411 aarch64_is_long_call_p (rtx sym
)
413 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
416 /* Return true if the offsets to a zero/sign-extract operation
417 represent an expression that matches an extend operation. The
418 operands represent the paramters from
420 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
422 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
425 HOST_WIDE_INT mult_val
, extract_val
;
427 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
430 mult_val
= INTVAL (mult_imm
);
431 extract_val
= INTVAL (extract_imm
);
434 && extract_val
< GET_MODE_BITSIZE (mode
)
435 && exact_log2 (extract_val
& ~7) > 0
436 && (extract_val
& 7) <= 4
437 && mult_val
== (1 << (extract_val
& 7)))
443 /* Emit an insn that's a simple single-set. Both the operands must be
444 known to be valid. */
446 emit_set_insn (rtx x
, rtx y
)
448 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
451 /* X and Y are two things to compare using CODE. Emit the compare insn and
452 return the rtx for register 0 in the proper mode. */
454 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
456 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
457 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
459 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
463 /* Build the SYMBOL_REF for __tls_get_addr. */
465 static GTY(()) rtx tls_get_addr_libfunc
;
468 aarch64_tls_get_addr (void)
470 if (!tls_get_addr_libfunc
)
471 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
472 return tls_get_addr_libfunc
;
475 /* Return the TLS model to use for ADDR. */
477 static enum tls_model
478 tls_symbolic_operand_type (rtx addr
)
480 enum tls_model tls_kind
= TLS_MODEL_NONE
;
483 if (GET_CODE (addr
) == CONST
)
485 split_const (addr
, &sym
, &addend
);
486 if (GET_CODE (sym
) == SYMBOL_REF
)
487 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
489 else if (GET_CODE (addr
) == SYMBOL_REF
)
490 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
495 /* We'll allow lo_sum's in addresses in our legitimate addresses
496 so that combine would take care of combining addresses where
497 necessary, but for generation purposes, we'll generate the address
500 tmp = hi (symbol_ref); adrp x1, foo
501 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
505 adrp x1, :got:foo adrp tmp, :tlsgd:foo
506 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
510 Load TLS symbol, depending on TLS mechanism and TLS access model.
512 Global Dynamic - Traditional TLS:
514 add dest, tmp, #:tlsgd_lo12:imm
517 Global Dynamic - TLS Descriptors:
518 adrp dest, :tlsdesc:imm
519 ldr tmp, [dest, #:tlsdesc_lo12:imm]
520 add dest, dest, #:tlsdesc_lo12:imm
527 adrp tmp, :gottprel:imm
528 ldr dest, [tmp, #:gottprel_lo12:imm]
533 add t0, tp, #:tprel_hi12:imm
534 add t0, #:tprel_lo12_nc:imm
538 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
539 enum aarch64_symbol_type type
)
543 case SYMBOL_SMALL_ABSOLUTE
:
545 /* In ILP32, the mode of dest can be either SImode or DImode. */
547 enum machine_mode mode
= GET_MODE (dest
);
549 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
551 if (can_create_pseudo_p ())
552 tmp_reg
= gen_reg_rtx (mode
);
554 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
555 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
559 case SYMBOL_TINY_ABSOLUTE
:
560 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
563 case SYMBOL_SMALL_GOT
:
565 /* In ILP32, the mode of dest can be either SImode or DImode,
566 while the got entry is always of SImode size. The mode of
567 dest depends on how dest is used: if dest is assigned to a
568 pointer (e.g. in the memory), it has SImode; it may have
569 DImode if dest is dereferenced to access the memeory.
570 This is why we have to handle three different ldr_got_small
571 patterns here (two patterns for ILP32). */
573 enum machine_mode mode
= GET_MODE (dest
);
575 if (can_create_pseudo_p ())
576 tmp_reg
= gen_reg_rtx (mode
);
578 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
579 if (mode
== ptr_mode
)
582 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
584 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
588 gcc_assert (mode
== Pmode
);
589 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
595 case SYMBOL_SMALL_TLSGD
:
598 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
601 emit_call_insn (gen_tlsgd_small (result
, imm
));
602 insns
= get_insns ();
605 RTL_CONST_CALL_P (insns
) = 1;
606 emit_libcall_block (insns
, dest
, result
, imm
);
610 case SYMBOL_SMALL_TLSDESC
:
612 rtx x0
= gen_rtx_REG (Pmode
, R0_REGNUM
);
615 emit_insn (gen_tlsdesc_small (imm
));
616 tp
= aarch64_load_tp (NULL
);
617 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, x0
)));
618 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
622 case SYMBOL_SMALL_GOTTPREL
:
624 rtx tmp_reg
= gen_reg_rtx (Pmode
);
625 rtx tp
= aarch64_load_tp (NULL
);
626 emit_insn (gen_tlsie_small (tmp_reg
, imm
));
627 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, tmp_reg
)));
628 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
632 case SYMBOL_SMALL_TPREL
:
634 rtx tp
= aarch64_load_tp (NULL
);
635 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
636 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
640 case SYMBOL_TINY_GOT
:
641 emit_insn (gen_ldr_got_tiny (dest
, imm
));
649 /* Emit a move from SRC to DEST. Assume that the move expanders can
650 handle all moves if !can_create_pseudo_p (). The distinction is
651 important because, unlike emit_move_insn, the move expanders know
652 how to force Pmode objects into the constant pool even when the
653 constant pool address is not itself legitimate. */
655 aarch64_emit_move (rtx dest
, rtx src
)
657 return (can_create_pseudo_p ()
658 ? emit_move_insn (dest
, src
)
659 : emit_move_insn_1 (dest
, src
));
663 aarch64_split_128bit_move (rtx dst
, rtx src
)
667 enum machine_mode src_mode
= GET_MODE (src
);
668 enum machine_mode dst_mode
= GET_MODE (dst
);
669 int src_regno
= REGNO (src
);
670 int dst_regno
= REGNO (dst
);
672 gcc_assert (dst_mode
== TImode
|| dst_mode
== TFmode
);
674 if (REG_P (dst
) && REG_P (src
))
676 gcc_assert (src_mode
== TImode
|| src_mode
== TFmode
);
678 /* Handle r -> w, w -> r. */
679 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
684 (gen_aarch64_movtilow_di (dst
, gen_lowpart (word_mode
, src
)));
686 (gen_aarch64_movtihigh_di (dst
, gen_highpart (word_mode
, src
)));
690 (gen_aarch64_movtflow_di (dst
, gen_lowpart (word_mode
, src
)));
692 (gen_aarch64_movtfhigh_di (dst
, gen_highpart (word_mode
, src
)));
698 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
703 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode
, dst
), src
));
705 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode
, dst
), src
));
709 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode
, dst
), src
));
711 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode
, dst
), src
));
717 /* Fall through to r -> r cases. */
722 low_dst
= gen_lowpart (word_mode
, dst
);
724 && reg_overlap_mentioned_p (low_dst
, src
))
726 aarch64_emit_move (gen_highpart (word_mode
, dst
),
727 gen_highpart_mode (word_mode
, TImode
, src
));
728 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
732 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
733 aarch64_emit_move (gen_highpart (word_mode
, dst
),
734 gen_highpart_mode (word_mode
, TImode
, src
));
738 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
),
739 gen_rtx_REG (DFmode
, src_regno
));
740 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
+ 1),
741 gen_rtx_REG (DFmode
, src_regno
+ 1));
749 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
751 return (! REG_P (src
)
752 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
755 /* Split a complex SIMD combine. */
758 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
760 enum machine_mode src_mode
= GET_MODE (src1
);
761 enum machine_mode dst_mode
= GET_MODE (dst
);
763 gcc_assert (VECTOR_MODE_P (dst_mode
));
765 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
767 rtx (*gen
) (rtx
, rtx
, rtx
);
772 gen
= gen_aarch64_simd_combinev8qi
;
775 gen
= gen_aarch64_simd_combinev4hi
;
778 gen
= gen_aarch64_simd_combinev2si
;
781 gen
= gen_aarch64_simd_combinev2sf
;
784 gen
= gen_aarch64_simd_combinedi
;
787 gen
= gen_aarch64_simd_combinedf
;
793 emit_insn (gen (dst
, src1
, src2
));
798 /* Split a complex SIMD move. */
801 aarch64_split_simd_move (rtx dst
, rtx src
)
803 enum machine_mode src_mode
= GET_MODE (src
);
804 enum machine_mode dst_mode
= GET_MODE (dst
);
806 gcc_assert (VECTOR_MODE_P (dst_mode
));
808 if (REG_P (dst
) && REG_P (src
))
810 rtx (*gen
) (rtx
, rtx
);
812 gcc_assert (VECTOR_MODE_P (src_mode
));
817 gen
= gen_aarch64_split_simd_movv16qi
;
820 gen
= gen_aarch64_split_simd_movv8hi
;
823 gen
= gen_aarch64_split_simd_movv4si
;
826 gen
= gen_aarch64_split_simd_movv2di
;
829 gen
= gen_aarch64_split_simd_movv4sf
;
832 gen
= gen_aarch64_split_simd_movv2df
;
838 emit_insn (gen (dst
, src
));
844 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
846 if (can_create_pseudo_p ())
847 return force_reg (mode
, value
);
850 x
= aarch64_emit_move (x
, value
);
857 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
859 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
862 /* Load the full offset into a register. This
863 might be improvable in the future. */
864 high
= GEN_INT (offset
);
866 high
= aarch64_force_temporary (mode
, temp
, high
);
867 reg
= aarch64_force_temporary (mode
, temp
,
868 gen_rtx_PLUS (mode
, high
, reg
));
870 return plus_constant (mode
, reg
, offset
);
874 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
876 enum machine_mode mode
= GET_MODE (dest
);
877 unsigned HOST_WIDE_INT mask
;
880 unsigned HOST_WIDE_INT val
;
883 int one_match
, zero_match
;
885 gcc_assert (mode
== SImode
|| mode
== DImode
);
887 /* Check on what type of symbol it is. */
888 if (GET_CODE (imm
) == SYMBOL_REF
889 || GET_CODE (imm
) == LABEL_REF
890 || GET_CODE (imm
) == CONST
)
892 rtx mem
, base
, offset
;
893 enum aarch64_symbol_type sty
;
895 /* If we have (const (plus symbol offset)), separate out the offset
896 before we start classifying the symbol. */
897 split_const (imm
, &base
, &offset
);
899 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
902 case SYMBOL_FORCE_TO_MEM
:
903 if (offset
!= const0_rtx
904 && targetm
.cannot_force_const_mem (mode
, imm
))
906 gcc_assert(can_create_pseudo_p ());
907 base
= aarch64_force_temporary (mode
, dest
, base
);
908 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
909 aarch64_emit_move (dest
, base
);
912 mem
= force_const_mem (ptr_mode
, imm
);
914 if (mode
!= ptr_mode
)
915 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
916 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
919 case SYMBOL_SMALL_TLSGD
:
920 case SYMBOL_SMALL_TLSDESC
:
921 case SYMBOL_SMALL_GOTTPREL
:
922 case SYMBOL_SMALL_GOT
:
923 case SYMBOL_TINY_GOT
:
924 if (offset
!= const0_rtx
)
926 gcc_assert(can_create_pseudo_p ());
927 base
= aarch64_force_temporary (mode
, dest
, base
);
928 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
929 aarch64_emit_move (dest
, base
);
934 case SYMBOL_SMALL_TPREL
:
935 case SYMBOL_SMALL_ABSOLUTE
:
936 case SYMBOL_TINY_ABSOLUTE
:
937 aarch64_load_symref_appropriately (dest
, imm
, sty
);
945 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
947 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
951 if (!CONST_INT_P (imm
))
953 if (GET_CODE (imm
) == HIGH
)
954 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
957 rtx mem
= force_const_mem (mode
, imm
);
959 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
967 /* We know we can't do this in 1 insn, and we must be able to do it
968 in two; so don't mess around looking for sequences that don't buy
970 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
971 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
972 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
976 /* Remaining cases are all for DImode. */
979 subtargets
= optimize
&& can_create_pseudo_p ();
985 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
987 if ((val
& mask
) == 0)
989 else if ((val
& mask
) == mask
)
996 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
998 if ((val
& mask
) != mask
)
1000 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1001 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1002 GEN_INT ((val
>> i
) & 0xffff)));
1009 if (zero_match
== 2)
1010 goto simple_sequence
;
1012 mask
= 0x0ffff0000UL
;
1013 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1015 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1017 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1019 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1021 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
1022 emit_insn (gen_adddi3 (dest
, subtarget
,
1023 GEN_INT (val
- (val
& mask
))));
1026 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1028 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1030 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1031 GEN_INT ((val
+ comp
) & mask
)));
1032 emit_insn (gen_adddi3 (dest
, subtarget
,
1033 GEN_INT (val
- ((val
+ comp
) & mask
))));
1036 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1038 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1040 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1041 GEN_INT ((val
- comp
) | ~mask
)));
1042 emit_insn (gen_adddi3 (dest
, subtarget
,
1043 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1046 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1048 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1050 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1051 GEN_INT (val
| ~mask
)));
1052 emit_insn (gen_adddi3 (dest
, subtarget
,
1053 GEN_INT (val
- (val
| ~mask
))));
1058 /* See if we can do it by arithmetically combining two
1060 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1065 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1066 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1068 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1069 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1070 GEN_INT (aarch64_bitmasks
[i
])));
1071 emit_insn (gen_adddi3 (dest
, subtarget
,
1072 GEN_INT (val
- aarch64_bitmasks
[i
])));
1076 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1078 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1080 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1081 GEN_INT (aarch64_bitmasks
[i
])));
1082 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1083 GEN_INT ((val
>> j
) & 0xffff)));
1089 /* See if we can do it by logically combining two immediates. */
1090 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1092 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1096 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1097 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1099 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1100 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1101 GEN_INT (aarch64_bitmasks
[i
])));
1102 emit_insn (gen_iordi3 (dest
, subtarget
,
1103 GEN_INT (aarch64_bitmasks
[j
])));
1107 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1111 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1112 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1115 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1116 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1117 GEN_INT (aarch64_bitmasks
[j
])));
1118 emit_insn (gen_anddi3 (dest
, subtarget
,
1119 GEN_INT (aarch64_bitmasks
[i
])));
1128 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1130 if ((val
& mask
) != 0)
1134 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1135 GEN_INT (val
& mask
)));
1139 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1140 GEN_INT ((val
>> i
) & 0xffff)));
1146 aarch64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
1148 /* Indirect calls are not currently supported. */
1152 /* Cannot tail-call to long-calls, since these are outside of the
1153 range of a branch instruction (we could handle this if we added
1154 support for indirect tail-calls. */
1155 if (aarch64_decl_is_long_call_p (decl
))
1161 /* Implement TARGET_PASS_BY_REFERENCE. */
1164 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1165 enum machine_mode mode
,
1167 bool named ATTRIBUTE_UNUSED
)
1170 enum machine_mode dummymode
;
1173 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1174 size
= (mode
== BLKmode
&& type
)
1175 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1179 /* Arrays always passed by reference. */
1180 if (TREE_CODE (type
) == ARRAY_TYPE
)
1182 /* Other aggregates based on their size. */
1183 if (AGGREGATE_TYPE_P (type
))
1184 size
= int_size_in_bytes (type
);
1187 /* Variable sized arguments are always returned by reference. */
1191 /* Can this be a candidate to be passed in fp/simd register(s)? */
1192 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1197 /* Arguments which are variable sized or larger than 2 registers are
1198 passed by reference unless they are a homogenous floating point
1200 return size
> 2 * UNITS_PER_WORD
;
1203 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1205 aarch64_return_in_msb (const_tree valtype
)
1207 enum machine_mode dummy_mode
;
1210 /* Never happens in little-endian mode. */
1211 if (!BYTES_BIG_ENDIAN
)
1214 /* Only composite types smaller than or equal to 16 bytes can
1215 be potentially returned in registers. */
1216 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1217 || int_size_in_bytes (valtype
) <= 0
1218 || int_size_in_bytes (valtype
) > 16)
1221 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1222 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1223 is always passed/returned in the least significant bits of fp/simd
1225 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1226 &dummy_mode
, &dummy_int
, NULL
))
1232 /* Implement TARGET_FUNCTION_VALUE.
1233 Define how to find the value returned by a function. */
1236 aarch64_function_value (const_tree type
, const_tree func
,
1237 bool outgoing ATTRIBUTE_UNUSED
)
1239 enum machine_mode mode
;
1242 enum machine_mode ag_mode
;
1244 mode
= TYPE_MODE (type
);
1245 if (INTEGRAL_TYPE_P (type
))
1246 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1248 if (aarch64_return_in_msb (type
))
1250 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1252 if (size
% UNITS_PER_WORD
!= 0)
1254 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1255 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1259 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1260 &ag_mode
, &count
, NULL
))
1262 if (!aarch64_composite_type_p (type
, mode
))
1264 gcc_assert (count
== 1 && mode
== ag_mode
);
1265 return gen_rtx_REG (mode
, V0_REGNUM
);
1272 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1273 for (i
= 0; i
< count
; i
++)
1275 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1276 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1277 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1278 XVECEXP (par
, 0, i
) = tmp
;
1284 return gen_rtx_REG (mode
, R0_REGNUM
);
1287 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1288 Return true if REGNO is the number of a hard register in which the values
1289 of called function may come back. */
1292 aarch64_function_value_regno_p (const unsigned int regno
)
1294 /* Maximum of 16 bytes can be returned in the general registers. Examples
1295 of 16-byte return values are: 128-bit integers and 16-byte small
1296 structures (excluding homogeneous floating-point aggregates). */
1297 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1300 /* Up to four fp/simd registers can return a function value, e.g. a
1301 homogeneous floating-point aggregate having four members. */
1302 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1303 return !TARGET_GENERAL_REGS_ONLY
;
1308 /* Implement TARGET_RETURN_IN_MEMORY.
1310 If the type T of the result of a function is such that
1312 would require that arg be passed as a value in a register (or set of
1313 registers) according to the parameter passing rules, then the result
1314 is returned in the same registers as would be used for such an
1318 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1321 enum machine_mode ag_mode
;
1324 if (!AGGREGATE_TYPE_P (type
)
1325 && TREE_CODE (type
) != COMPLEX_TYPE
1326 && TREE_CODE (type
) != VECTOR_TYPE
)
1327 /* Simple scalar types always returned in registers. */
1330 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1337 /* Types larger than 2 registers returned in memory. */
1338 size
= int_size_in_bytes (type
);
1339 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1343 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1344 const_tree type
, int *nregs
)
1346 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1347 return aarch64_vfp_is_call_or_return_candidate (mode
,
1349 &pcum
->aapcs_vfp_rmode
,
1354 /* Given MODE and TYPE of a function argument, return the alignment in
1355 bits. The idea is to suppress any stronger alignment requested by
1356 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1357 This is a helper function for local use only. */
1360 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1362 unsigned int alignment
;
1366 if (!integer_zerop (TYPE_SIZE (type
)))
1368 if (TYPE_MODE (type
) == mode
)
1369 alignment
= TYPE_ALIGN (type
);
1371 alignment
= GET_MODE_ALIGNMENT (mode
);
1377 alignment
= GET_MODE_ALIGNMENT (mode
);
1382 /* Layout a function argument according to the AAPCS64 rules. The rule
1383 numbers refer to the rule numbers in the AAPCS64. */
1386 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1388 bool named ATTRIBUTE_UNUSED
)
1390 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1391 int ncrn
, nvrn
, nregs
;
1392 bool allocate_ncrn
, allocate_nvrn
;
1394 /* We need to do this once per argument. */
1395 if (pcum
->aapcs_arg_processed
)
1398 pcum
->aapcs_arg_processed
= true;
1400 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1401 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1406 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1407 The following code thus handles passing by SIMD/FP registers first. */
1409 nvrn
= pcum
->aapcs_nvrn
;
1411 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1412 and homogenous short-vector aggregates (HVA). */
1415 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1417 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1418 if (!aarch64_composite_type_p (type
, mode
))
1420 gcc_assert (nregs
== 1);
1421 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1427 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1428 for (i
= 0; i
< nregs
; i
++)
1430 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1431 V0_REGNUM
+ nvrn
+ i
);
1432 tmp
= gen_rtx_EXPR_LIST
1434 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1435 XVECEXP (par
, 0, i
) = tmp
;
1437 pcum
->aapcs_reg
= par
;
1443 /* C.3 NSRN is set to 8. */
1444 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1449 ncrn
= pcum
->aapcs_ncrn
;
1450 nregs
= ((type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
))
1451 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1454 /* C6 - C9. though the sign and zero extension semantics are
1455 handled elsewhere. This is the case where the argument fits
1456 entirely general registers. */
1457 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1459 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1461 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1463 /* C.8 if the argument has an alignment of 16 then the NGRN is
1464 rounded up to the next even number. */
1465 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1468 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1470 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1471 A reg is still generated for it, but the caller should be smart
1472 enough not to use it. */
1473 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1475 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1482 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1483 for (i
= 0; i
< nregs
; i
++)
1485 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1486 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1487 GEN_INT (i
* UNITS_PER_WORD
));
1488 XVECEXP (par
, 0, i
) = tmp
;
1490 pcum
->aapcs_reg
= par
;
1493 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1498 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1500 /* The argument is passed on stack; record the needed number of words for
1501 this argument (we can re-use NREGS) and align the total size if
1504 pcum
->aapcs_stack_words
= nregs
;
1505 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1506 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1507 16 / UNITS_PER_WORD
) + 1;
1511 /* Implement TARGET_FUNCTION_ARG. */
1514 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1515 const_tree type
, bool named
)
1517 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1518 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1520 if (mode
== VOIDmode
)
1523 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1524 return pcum
->aapcs_reg
;
1528 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1529 const_tree fntype ATTRIBUTE_UNUSED
,
1530 rtx libname ATTRIBUTE_UNUSED
,
1531 const_tree fndecl ATTRIBUTE_UNUSED
,
1532 unsigned n_named ATTRIBUTE_UNUSED
)
1534 pcum
->aapcs_ncrn
= 0;
1535 pcum
->aapcs_nvrn
= 0;
1536 pcum
->aapcs_nextncrn
= 0;
1537 pcum
->aapcs_nextnvrn
= 0;
1538 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1539 pcum
->aapcs_reg
= NULL_RTX
;
1540 pcum
->aapcs_arg_processed
= false;
1541 pcum
->aapcs_stack_words
= 0;
1542 pcum
->aapcs_stack_size
= 0;
1548 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1549 enum machine_mode mode
,
1553 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1554 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1556 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1557 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1558 != (pcum
->aapcs_stack_words
!= 0));
1559 pcum
->aapcs_arg_processed
= false;
1560 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1561 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1562 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1563 pcum
->aapcs_stack_words
= 0;
1564 pcum
->aapcs_reg
= NULL_RTX
;
1569 aarch64_function_arg_regno_p (unsigned regno
)
1571 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1572 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1575 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1576 PARM_BOUNDARY bits of alignment, but will be given anything up
1577 to STACK_BOUNDARY bits if the type requires it. This makes sure
1578 that both before and after the layout of each argument, the Next
1579 Stacked Argument Address (NSAA) will have a minimum alignment of
1583 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1585 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1587 if (alignment
< PARM_BOUNDARY
)
1588 alignment
= PARM_BOUNDARY
;
1589 if (alignment
> STACK_BOUNDARY
)
1590 alignment
= STACK_BOUNDARY
;
1594 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1596 Return true if an argument passed on the stack should be padded upwards,
1597 i.e. if the least-significant byte of the stack slot has useful data.
1599 Small aggregate types are placed in the lowest memory address.
1601 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1604 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1606 /* On little-endian targets, the least significant byte of every stack
1607 argument is passed at the lowest byte address of the stack slot. */
1608 if (!BYTES_BIG_ENDIAN
)
1611 /* Otherwise, integral, floating-point and pointer types are padded downward:
1612 the least significant byte of a stack argument is passed at the highest
1613 byte address of the stack slot. */
1615 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1616 || POINTER_TYPE_P (type
))
1617 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1620 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1624 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1626 It specifies padding for the last (may also be the only)
1627 element of a block move between registers and memory. If
1628 assuming the block is in the memory, padding upward means that
1629 the last element is padded after its highest significant byte,
1630 while in downward padding, the last element is padded at the
1631 its least significant byte side.
1633 Small aggregates and small complex types are always padded
1636 We don't need to worry about homogeneous floating-point or
1637 short-vector aggregates; their move is not affected by the
1638 padding direction determined here. Regardless of endianness,
1639 each element of such an aggregate is put in the least
1640 significant bits of a fp/simd register.
1642 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1643 register has useful data, and return the opposite if the most
1644 significant byte does. */
1647 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1648 bool first ATTRIBUTE_UNUSED
)
1651 /* Small composite types are always padded upward. */
1652 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1654 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1655 : GET_MODE_SIZE (mode
));
1656 if (size
< 2 * UNITS_PER_WORD
)
1660 /* Otherwise, use the default padding. */
1661 return !BYTES_BIG_ENDIAN
;
1664 static enum machine_mode
1665 aarch64_libgcc_cmp_return_mode (void)
1671 aarch64_frame_pointer_required (void)
1673 /* If the function contains dynamic stack allocations, we need to
1674 use the frame pointer to access the static parts of the frame. */
1675 if (cfun
->calls_alloca
)
1678 /* We may have turned flag_omit_frame_pointer on in order to have this
1679 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1680 and we'll check it here.
1681 If we really did set flag_omit_frame_pointer normally, then we return false
1682 (no frame pointer required) in all cases. */
1684 if (flag_omit_frame_pointer
&& !faked_omit_frame_pointer
)
1686 else if (flag_omit_leaf_frame_pointer
)
1687 return !crtl
->is_leaf
;
1691 /* Mark the registers that need to be saved by the callee and calculate
1692 the size of the callee-saved registers area and frame record (both FP
1693 and LR may be omitted). */
1695 aarch64_layout_frame (void)
1697 HOST_WIDE_INT offset
= 0;
1700 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1703 cfun
->machine
->frame
.fp_lr_offset
= 0;
1705 /* First mark all the registers that really need to be saved... */
1706 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1707 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1709 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1710 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1712 /* ... that includes the eh data registers (if needed)... */
1713 if (crtl
->calls_eh_return
)
1714 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1715 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)] = 0;
1717 /* ... and any callee saved register that dataflow says is live. */
1718 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1719 if (df_regs_ever_live_p (regno
)
1720 && !call_used_regs
[regno
])
1721 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1723 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1724 if (df_regs_ever_live_p (regno
)
1725 && !call_used_regs
[regno
])
1726 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1728 if (frame_pointer_needed
)
1730 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = 0;
1731 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1732 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1735 /* Now assign stack slots for them. */
1736 for (regno
= R0_REGNUM
; regno
<= R28_REGNUM
; regno
++)
1737 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1739 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1740 offset
+= UNITS_PER_WORD
;
1743 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1744 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1746 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1747 offset
+= UNITS_PER_WORD
;
1750 if (frame_pointer_needed
)
1752 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = offset
;
1753 offset
+= UNITS_PER_WORD
;
1754 cfun
->machine
->frame
.fp_lr_offset
= UNITS_PER_WORD
;
1757 if (cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] != -1)
1759 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = offset
;
1760 offset
+= UNITS_PER_WORD
;
1761 cfun
->machine
->frame
.fp_lr_offset
+= UNITS_PER_WORD
;
1764 cfun
->machine
->frame
.padding0
=
1765 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1766 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1768 cfun
->machine
->frame
.saved_regs_size
= offset
;
1769 cfun
->machine
->frame
.laid_out
= true;
1772 /* Make the last instruction frame-related and note that it performs
1773 the operation described by FRAME_PATTERN. */
1776 aarch64_set_frame_expr (rtx frame_pattern
)
1780 insn
= get_last_insn ();
1781 RTX_FRAME_RELATED_P (insn
) = 1;
1782 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1783 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1789 aarch64_register_saved_on_entry (int regno
)
1791 return cfun
->machine
->frame
.reg_offset
[regno
] != -1;
1796 aarch64_save_or_restore_fprs (int start_offset
, int increment
,
1797 bool restore
, rtx base_rtx
)
1803 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
)
1804 = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1807 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1809 if (aarch64_register_saved_on_entry (regno
))
1812 mem
= gen_mem_ref (DFmode
,
1813 plus_constant (Pmode
,
1817 for (regno2
= regno
+ 1;
1818 regno2
<= V31_REGNUM
1819 && !aarch64_register_saved_on_entry (regno2
);
1824 if (regno2
<= V31_REGNUM
&&
1825 aarch64_register_saved_on_entry (regno2
))
1828 /* Next highest register to be saved. */
1829 mem2
= gen_mem_ref (DFmode
,
1833 start_offset
+ increment
));
1834 if (restore
== false)
1837 ( gen_store_pairdf (mem
, gen_rtx_REG (DFmode
, regno
),
1838 mem2
, gen_rtx_REG (DFmode
, regno2
)));
1844 ( gen_load_pairdf (gen_rtx_REG (DFmode
, regno
), mem
,
1845 gen_rtx_REG (DFmode
, regno2
), mem2
));
1847 add_reg_note (insn
, REG_CFA_RESTORE
,
1848 gen_rtx_REG (DFmode
, regno
));
1849 add_reg_note (insn
, REG_CFA_RESTORE
,
1850 gen_rtx_REG (DFmode
, regno2
));
1853 /* The first part of a frame-related parallel insn
1854 is always assumed to be relevant to the frame
1855 calculations; subsequent parts, are only
1856 frame-related if explicitly marked. */
1857 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
1859 start_offset
+= increment
* 2;
1863 if (restore
== false)
1864 insn
= emit_move_insn (mem
, gen_rtx_REG (DFmode
, regno
));
1867 insn
= emit_move_insn (gen_rtx_REG (DFmode
, regno
), mem
);
1868 add_reg_note (insn
, REG_CFA_RESTORE
,
1869 gen_rtx_REG (DImode
, regno
));
1871 start_offset
+= increment
;
1873 RTX_FRAME_RELATED_P (insn
) = 1;
1880 /* offset from the stack pointer of where the saves and
1881 restore's have to happen. */
1883 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset
,
1887 rtx base_rtx
= stack_pointer_rtx
;
1888 HOST_WIDE_INT start_offset
= offset
;
1889 HOST_WIDE_INT increment
= UNITS_PER_WORD
;
1890 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1891 unsigned limit
= (frame_pointer_needed
)? R28_REGNUM
: R30_REGNUM
;
1895 for (regno
= R0_REGNUM
; regno
<= limit
; regno
++)
1897 if (aarch64_register_saved_on_entry (regno
))
1900 mem
= gen_mem_ref (Pmode
,
1901 plus_constant (Pmode
,
1905 for (regno2
= regno
+ 1;
1907 && !aarch64_register_saved_on_entry (regno2
);
1912 if (regno2
<= limit
&&
1913 aarch64_register_saved_on_entry (regno2
))
1916 /* Next highest register to be saved. */
1917 mem2
= gen_mem_ref (Pmode
,
1921 start_offset
+ increment
));
1922 if (restore
== false)
1925 ( gen_store_pairdi (mem
, gen_rtx_REG (DImode
, regno
),
1926 mem2
, gen_rtx_REG (DImode
, regno2
)));
1932 ( gen_load_pairdi (gen_rtx_REG (DImode
, regno
), mem
,
1933 gen_rtx_REG (DImode
, regno2
), mem2
));
1935 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1936 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno2
));
1939 /* The first part of a frame-related parallel insn
1940 is always assumed to be relevant to the frame
1941 calculations; subsequent parts, are only
1942 frame-related if explicitly marked. */
1943 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1946 start_offset
+= increment
* 2;
1950 if (restore
== false)
1951 insn
= emit_move_insn (mem
, gen_rtx_REG (DImode
, regno
));
1954 insn
= emit_move_insn (gen_rtx_REG (DImode
, regno
), mem
);
1955 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1957 start_offset
+= increment
;
1959 RTX_FRAME_RELATED_P (insn
) = 1;
1963 aarch64_save_or_restore_fprs (start_offset
, increment
, restore
, base_rtx
);
1967 /* AArch64 stack frames generated by this compiler look like:
1969 +-------------------------------+
1971 | incoming stack arguments |
1973 +-------------------------------+ <-- arg_pointer_rtx
1975 | callee-allocated save area |
1976 | for register varargs |
1978 +-------------------------------+
1982 +-------------------------------+ <-- frame_pointer_rtx
1984 | callee-saved registers |
1986 +-------------------------------+
1988 +-------------------------------+
1990 P +-------------------------------+ <-- hard_frame_pointer_rtx
1991 | dynamic allocation |
1992 +-------------------------------+
1994 | outgoing stack arguments |
1996 +-------------------------------+ <-- stack_pointer_rtx
1998 Dynamic stack allocations such as alloca insert data at point P.
1999 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2000 hard_frame_pointer_rtx unchanged. */
2002 /* Generate the prologue instructions for entry into a function.
2003 Establish the stack frame by decreasing the stack pointer with a
2004 properly calculated size and, if necessary, create a frame record
2005 filled with the values of LR and previous frame pointer. The
2006 current FP is also set up if it is in use. */
2009 aarch64_expand_prologue (void)
2011 /* sub sp, sp, #<frame_size>
2012 stp {fp, lr}, [sp, #<frame_size> - 16]
2013 add fp, sp, #<frame_size> - hardfp_offset
2014 stp {cs_reg}, [fp, #-16] etc.
2016 sub sp, sp, <final_adjustment_if_any>
2018 HOST_WIDE_INT original_frame_size
; /* local variables + vararg save */
2019 HOST_WIDE_INT frame_size
, offset
;
2020 HOST_WIDE_INT fp_offset
; /* FP offset from SP */
2023 aarch64_layout_frame ();
2024 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2025 gcc_assert ((!cfun
->machine
->saved_varargs_size
|| cfun
->stdarg
)
2026 && (cfun
->stdarg
|| !cfun
->machine
->saved_varargs_size
));
2027 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2028 + crtl
->outgoing_args_size
);
2029 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2030 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2032 if (flag_stack_usage_info
)
2033 current_function_static_stack_size
= frame_size
;
2036 - original_frame_size
2037 - cfun
->machine
->frame
.saved_regs_size
);
2039 /* Store pairs and load pairs have a range only -512 to 504. */
2042 /* When the frame has a large size, an initial decrease is done on
2043 the stack pointer to jump over the callee-allocated save area for
2044 register varargs, the local variable area and/or the callee-saved
2045 register area. This will allow the pre-index write-back
2046 store pair instructions to be used for setting up the stack frame
2048 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2050 offset
= cfun
->machine
->frame
.saved_regs_size
;
2052 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2055 if (frame_size
>= 0x1000000)
2057 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2058 emit_move_insn (op0
, GEN_INT (-frame_size
));
2059 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2060 aarch64_set_frame_expr (gen_rtx_SET
2061 (Pmode
, stack_pointer_rtx
,
2062 plus_constant (Pmode
,
2066 else if (frame_size
> 0)
2068 if ((frame_size
& 0xfff) != frame_size
)
2070 insn
= emit_insn (gen_add2_insn
2072 GEN_INT (-(frame_size
2073 & ~(HOST_WIDE_INT
)0xfff))));
2074 RTX_FRAME_RELATED_P (insn
) = 1;
2076 if ((frame_size
& 0xfff) != 0)
2078 insn
= emit_insn (gen_add2_insn
2080 GEN_INT (-(frame_size
2081 & (HOST_WIDE_INT
)0xfff))));
2082 RTX_FRAME_RELATED_P (insn
) = 1;
2091 /* Save the frame pointer and lr if the frame pointer is needed
2092 first. Make the frame pointer point to the location of the
2093 old frame pointer on the stack. */
2094 if (frame_pointer_needed
)
2100 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2101 GEN_INT (-offset
)));
2102 RTX_FRAME_RELATED_P (insn
) = 1;
2103 aarch64_set_frame_expr (gen_rtx_SET
2104 (Pmode
, stack_pointer_rtx
,
2105 gen_rtx_MINUS (Pmode
,
2107 GEN_INT (offset
))));
2108 mem_fp
= gen_frame_mem (DImode
,
2109 plus_constant (Pmode
,
2112 mem_lr
= gen_frame_mem (DImode
,
2113 plus_constant (Pmode
,
2117 insn
= emit_insn (gen_store_pairdi (mem_fp
,
2118 hard_frame_pointer_rtx
,
2120 gen_rtx_REG (DImode
,
2125 insn
= emit_insn (gen_storewb_pairdi_di
2126 (stack_pointer_rtx
, stack_pointer_rtx
,
2127 hard_frame_pointer_rtx
,
2128 gen_rtx_REG (DImode
, LR_REGNUM
),
2130 GEN_INT (GET_MODE_SIZE (DImode
) - offset
)));
2131 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2134 /* The first part of a frame-related parallel insn is always
2135 assumed to be relevant to the frame calculations;
2136 subsequent parts, are only frame-related if explicitly
2138 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2139 RTX_FRAME_RELATED_P (insn
) = 1;
2141 /* Set up frame pointer to point to the location of the
2142 previous frame pointer on the stack. */
2143 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2145 GEN_INT (fp_offset
)));
2146 aarch64_set_frame_expr (gen_rtx_SET
2147 (Pmode
, hard_frame_pointer_rtx
,
2148 plus_constant (Pmode
,
2151 RTX_FRAME_RELATED_P (insn
) = 1;
2152 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2153 hard_frame_pointer_rtx
));
2157 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2158 GEN_INT (-offset
)));
2159 RTX_FRAME_RELATED_P (insn
) = 1;
2162 aarch64_save_or_restore_callee_save_registers
2163 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 0);
2166 /* when offset >= 512,
2167 sub sp, sp, #<outgoing_args_size> */
2168 if (frame_size
> -1)
2170 if (crtl
->outgoing_args_size
> 0)
2172 insn
= emit_insn (gen_add2_insn
2174 GEN_INT (- crtl
->outgoing_args_size
)));
2175 RTX_FRAME_RELATED_P (insn
) = 1;
2180 /* Generate the epilogue instructions for returning from a function. */
2182 aarch64_expand_epilogue (bool for_sibcall
)
2184 HOST_WIDE_INT original_frame_size
, frame_size
, offset
;
2185 HOST_WIDE_INT fp_offset
;
2189 aarch64_layout_frame ();
2190 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2191 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2192 + crtl
->outgoing_args_size
);
2193 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2194 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2197 - original_frame_size
2198 - cfun
->machine
->frame
.saved_regs_size
);
2200 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2202 /* Store pairs and load pairs have a range only -512 to 504. */
2205 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2207 offset
= cfun
->machine
->frame
.saved_regs_size
;
2209 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2211 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2213 insn
= emit_insn (gen_add2_insn
2215 GEN_INT (crtl
->outgoing_args_size
)));
2216 RTX_FRAME_RELATED_P (insn
) = 1;
2222 /* If there were outgoing arguments or we've done dynamic stack
2223 allocation, then restore the stack pointer from the frame
2224 pointer. This is at most one insn and more efficient than using
2225 GCC's internal mechanism. */
2226 if (frame_pointer_needed
2227 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2229 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2230 hard_frame_pointer_rtx
,
2231 GEN_INT (- fp_offset
)));
2232 RTX_FRAME_RELATED_P (insn
) = 1;
2233 /* As SP is set to (FP - fp_offset), according to the rules in
2234 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2235 from the value of SP from now on. */
2236 cfa_reg
= stack_pointer_rtx
;
2239 aarch64_save_or_restore_callee_save_registers
2240 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 1);
2242 /* Restore the frame pointer and lr if the frame pointer is needed. */
2245 if (frame_pointer_needed
)
2251 mem_fp
= gen_frame_mem (DImode
,
2252 plus_constant (Pmode
,
2255 mem_lr
= gen_frame_mem (DImode
,
2256 plus_constant (Pmode
,
2260 insn
= emit_insn (gen_load_pairdi (hard_frame_pointer_rtx
,
2262 gen_rtx_REG (DImode
,
2268 insn
= emit_insn (gen_loadwb_pairdi_di
2271 hard_frame_pointer_rtx
,
2272 gen_rtx_REG (DImode
, LR_REGNUM
),
2274 GEN_INT (GET_MODE_SIZE (DImode
) + offset
)));
2275 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2276 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2277 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2278 plus_constant (Pmode
, cfa_reg
,
2282 /* The first part of a frame-related parallel insn
2283 is always assumed to be relevant to the frame
2284 calculations; subsequent parts, are only
2285 frame-related if explicitly marked. */
2286 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2287 RTX_FRAME_RELATED_P (insn
) = 1;
2288 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
2289 add_reg_note (insn
, REG_CFA_RESTORE
,
2290 gen_rtx_REG (DImode
, LR_REGNUM
));
2294 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2296 RTX_FRAME_RELATED_P (insn
) = 1;
2301 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2303 RTX_FRAME_RELATED_P (insn
) = 1;
2307 /* Stack adjustment for exception handler. */
2308 if (crtl
->calls_eh_return
)
2310 /* We need to unwind the stack by the offset computed by
2311 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2312 based on SP. Ideally we would update the SP and define the
2313 CFA along the lines of:
2315 SP = SP + EH_RETURN_STACKADJ_RTX
2316 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2318 However the dwarf emitter only understands a constant
2321 The solution chosen here is to use the otherwise unused IP0
2322 as a temporary register to hold the current SP value. The
2323 CFA is described using IP0 then SP is modified. */
2325 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2327 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2328 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2329 RTX_FRAME_RELATED_P (insn
) = 1;
2331 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2333 /* Ensure the assignment to IP0 does not get optimized away. */
2337 if (frame_size
> -1)
2339 if (frame_size
>= 0x1000000)
2341 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2342 emit_move_insn (op0
, GEN_INT (frame_size
));
2343 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2344 aarch64_set_frame_expr (gen_rtx_SET
2345 (Pmode
, stack_pointer_rtx
,
2346 plus_constant (Pmode
,
2350 else if (frame_size
> 0)
2352 if ((frame_size
& 0xfff) != 0)
2354 insn
= emit_insn (gen_add2_insn
2356 GEN_INT ((frame_size
2357 & (HOST_WIDE_INT
) 0xfff))));
2358 RTX_FRAME_RELATED_P (insn
) = 1;
2360 if ((frame_size
& 0xfff) != frame_size
)
2362 insn
= emit_insn (gen_add2_insn
2364 GEN_INT ((frame_size
2365 & ~ (HOST_WIDE_INT
) 0xfff))));
2366 RTX_FRAME_RELATED_P (insn
) = 1;
2370 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2371 plus_constant (Pmode
,
2376 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2378 emit_jump_insn (ret_rtx
);
2381 /* Return the place to copy the exception unwinding return address to.
2382 This will probably be a stack slot, but could (in theory be the
2383 return register). */
2385 aarch64_final_eh_return_addr (void)
2387 HOST_WIDE_INT original_frame_size
, frame_size
, offset
, fp_offset
;
2388 aarch64_layout_frame ();
2389 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2390 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2391 + crtl
->outgoing_args_size
);
2392 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2393 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2395 - original_frame_size
2396 - cfun
->machine
->frame
.saved_regs_size
;
2398 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2399 return gen_rtx_REG (DImode
, LR_REGNUM
);
2401 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2402 result in a store to save LR introduced by builtin_eh_return () being
2403 incorrectly deleted because the alias is not detected.
2404 So in the calculation of the address to copy the exception unwinding
2405 return address to, we note 2 cases.
2406 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2407 we return a SP-relative location since all the addresses are SP-relative
2408 in this case. This prevents the store from being optimized away.
2409 If the fp_offset is not 0, then the addresses will be FP-relative and
2410 therefore we return a FP-relative location. */
2412 if (frame_pointer_needed
)
2415 return gen_frame_mem (DImode
,
2416 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2418 return gen_frame_mem (DImode
,
2419 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2422 /* If FP is not needed, we calculate the location of LR, which would be
2423 at the top of the saved registers block. */
2425 return gen_frame_mem (DImode
,
2426 plus_constant (Pmode
,
2429 + cfun
->machine
->frame
.saved_regs_size
2430 - 2 * UNITS_PER_WORD
));
2433 /* Output code to build up a constant in a register. */
2435 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
)
2437 if (aarch64_bitmask_imm (val
, DImode
))
2438 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2444 HOST_WIDE_INT valp
= val
>> 16;
2448 for (i
= 16; i
< 64; i
+= 16)
2450 valm
= (valp
& 0xffff);
2461 /* zcount contains the number of additional MOVK instructions
2462 required if the constant is built up with an initial MOVZ instruction,
2463 while ncount is the number of MOVK instructions required if starting
2464 with a MOVN instruction. Choose the sequence that yields the fewest
2465 number of instructions, preferring MOVZ instructions when they are both
2467 if (ncount
< zcount
)
2469 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2470 GEN_INT ((~val
) & 0xffff));
2475 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2476 GEN_INT (val
& 0xffff));
2482 for (i
= 16; i
< 64; i
+= 16)
2484 if ((val
& 0xffff) != tval
)
2485 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2486 GEN_INT (i
), GEN_INT (val
& 0xffff)));
2493 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2495 HOST_WIDE_INT mdelta
= delta
;
2496 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2497 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2502 if (mdelta
>= 4096 * 4096)
2504 aarch64_build_constant (scratchreg
, delta
);
2505 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2507 else if (mdelta
> 0)
2511 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2512 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2514 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2515 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2517 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2518 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2520 if (mdelta
% 4096 != 0)
2522 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2523 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2524 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2529 /* Output code to add DELTA to the first argument, and then jump
2530 to FUNCTION. Used for C++ multiple inheritance. */
2532 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2533 HOST_WIDE_INT delta
,
2534 HOST_WIDE_INT vcall_offset
,
2537 /* The this pointer is always in x0. Note that this differs from
2538 Arm where the this pointer maybe bumped to r1 if r0 is required
2539 to return a pointer to an aggregate. On AArch64 a result value
2540 pointer will be in x8. */
2541 int this_regno
= R0_REGNUM
;
2542 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2544 reload_completed
= 1;
2545 emit_note (NOTE_INSN_PROLOGUE_END
);
2547 if (vcall_offset
== 0)
2548 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2551 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2553 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2554 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2555 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2560 if (delta
>= -256 && delta
< 256)
2561 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2562 plus_constant (Pmode
, this_rtx
, delta
));
2564 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2567 if (Pmode
== ptr_mode
)
2568 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2570 aarch64_emit_move (temp0
,
2571 gen_rtx_ZERO_EXTEND (Pmode
,
2572 gen_rtx_MEM (ptr_mode
, addr
)));
2574 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2575 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2578 aarch64_build_constant (IP1_REGNUM
, vcall_offset
);
2579 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2582 if (Pmode
== ptr_mode
)
2583 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2585 aarch64_emit_move (temp1
,
2586 gen_rtx_SIGN_EXTEND (Pmode
,
2587 gen_rtx_MEM (ptr_mode
, addr
)));
2589 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2592 /* Generate a tail call to the target function. */
2593 if (!TREE_USED (function
))
2595 assemble_external (function
);
2596 TREE_USED (function
) = 1;
2598 funexp
= XEXP (DECL_RTL (function
), 0);
2599 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2600 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2601 SIBLING_CALL_P (insn
) = 1;
2603 insn
= get_insns ();
2604 shorten_branches (insn
);
2605 final_start_function (insn
, file
, 1);
2606 final (insn
, file
, 1);
2607 final_end_function ();
2609 /* Stop pretending to be a post-reload pass. */
2610 reload_completed
= 0;
2614 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2616 if (GET_CODE (*x
) == SYMBOL_REF
)
2617 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2619 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2620 TLS offsets, not real symbol references. */
2621 if (GET_CODE (*x
) == UNSPEC
2622 && XINT (*x
, 1) == UNSPEC_TLS
)
2629 aarch64_tls_referenced_p (rtx x
)
2631 if (!TARGET_HAVE_TLS
)
2634 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2639 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2641 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2642 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2653 aarch64_build_bitmask_table (void)
2655 unsigned HOST_WIDE_INT mask
, imm
;
2656 unsigned int log_e
, e
, s
, r
;
2657 unsigned int nimms
= 0;
2659 for (log_e
= 1; log_e
<= 6; log_e
++)
2663 mask
= ~(HOST_WIDE_INT
) 0;
2665 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2666 for (s
= 1; s
< e
; s
++)
2668 for (r
= 0; r
< e
; r
++)
2670 /* set s consecutive bits to 1 (s < 64) */
2671 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2672 /* rotate right by r */
2674 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2675 /* replicate the constant depending on SIMD size */
2677 case 1: imm
|= (imm
<< 2);
2678 case 2: imm
|= (imm
<< 4);
2679 case 3: imm
|= (imm
<< 8);
2680 case 4: imm
|= (imm
<< 16);
2681 case 5: imm
|= (imm
<< 32);
2687 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2688 aarch64_bitmasks
[nimms
++] = imm
;
2693 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2694 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2695 aarch64_bitmasks_cmp
);
2699 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2700 a left shift of 0 or 12 bits. */
2702 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2704 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2705 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2710 /* Return true if val is an immediate that can be loaded into a
2711 register by a MOVZ instruction. */
2713 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2715 if (GET_MODE_SIZE (mode
) > 4)
2717 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2718 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2723 /* Ignore sign extension. */
2724 val
&= (HOST_WIDE_INT
) 0xffffffff;
2726 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2727 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2731 /* Return true if val is a valid bitmask immediate. */
2733 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2735 if (GET_MODE_SIZE (mode
) < 8)
2737 /* Replicate bit pattern. */
2738 val
&= (HOST_WIDE_INT
) 0xffffffff;
2741 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2742 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2746 /* Return true if val is an immediate that can be loaded into a
2747 register in a single instruction. */
2749 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2751 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2753 return aarch64_bitmask_imm (val
, mode
);
2757 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2761 if (GET_CODE (x
) == HIGH
)
2764 split_const (x
, &base
, &offset
);
2765 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2767 if (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2768 != SYMBOL_FORCE_TO_MEM
)
2771 /* Avoid generating a 64-bit relocation in ILP32; leave
2772 to aarch64_expand_mov_immediate to handle it properly. */
2773 return mode
!= ptr_mode
;
2776 return aarch64_tls_referenced_p (x
);
2779 /* Return true if register REGNO is a valid index register.
2780 STRICT_P is true if REG_OK_STRICT is in effect. */
2783 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2785 if (!HARD_REGISTER_NUM_P (regno
))
2793 regno
= reg_renumber
[regno
];
2795 return GP_REGNUM_P (regno
);
2798 /* Return true if register REGNO is a valid base register for mode MODE.
2799 STRICT_P is true if REG_OK_STRICT is in effect. */
2802 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2804 if (!HARD_REGISTER_NUM_P (regno
))
2812 regno
= reg_renumber
[regno
];
2815 /* The fake registers will be eliminated to either the stack or
2816 hard frame pointer, both of which are usually valid base registers.
2817 Reload deals with the cases where the eliminated form isn't valid. */
2818 return (GP_REGNUM_P (regno
)
2819 || regno
== SP_REGNUM
2820 || regno
== FRAME_POINTER_REGNUM
2821 || regno
== ARG_POINTER_REGNUM
);
2824 /* Return true if X is a valid base register for mode MODE.
2825 STRICT_P is true if REG_OK_STRICT is in effect. */
2828 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2830 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2833 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2836 /* Return true if address offset is a valid index. If it is, fill in INFO
2837 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2840 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2841 enum machine_mode mode
, bool strict_p
)
2843 enum aarch64_address_type type
;
2848 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2849 && GET_MODE (x
) == Pmode
)
2851 type
= ADDRESS_REG_REG
;
2855 /* (sign_extend:DI (reg:SI)) */
2856 else if ((GET_CODE (x
) == SIGN_EXTEND
2857 || GET_CODE (x
) == ZERO_EXTEND
)
2858 && GET_MODE (x
) == DImode
2859 && GET_MODE (XEXP (x
, 0)) == SImode
)
2861 type
= (GET_CODE (x
) == SIGN_EXTEND
)
2862 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2863 index
= XEXP (x
, 0);
2866 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2867 else if (GET_CODE (x
) == MULT
2868 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2869 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2870 && GET_MODE (XEXP (x
, 0)) == DImode
2871 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2872 && CONST_INT_P (XEXP (x
, 1)))
2874 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2875 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2876 index
= XEXP (XEXP (x
, 0), 0);
2877 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2879 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2880 else if (GET_CODE (x
) == ASHIFT
2881 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2882 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2883 && GET_MODE (XEXP (x
, 0)) == DImode
2884 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2885 && CONST_INT_P (XEXP (x
, 1)))
2887 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2888 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2889 index
= XEXP (XEXP (x
, 0), 0);
2890 shift
= INTVAL (XEXP (x
, 1));
2892 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2893 else if ((GET_CODE (x
) == SIGN_EXTRACT
2894 || GET_CODE (x
) == ZERO_EXTRACT
)
2895 && GET_MODE (x
) == DImode
2896 && GET_CODE (XEXP (x
, 0)) == MULT
2897 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2898 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2900 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2901 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2902 index
= XEXP (XEXP (x
, 0), 0);
2903 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2904 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2905 || INTVAL (XEXP (x
, 2)) != 0)
2908 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2909 (const_int 0xffffffff<<shift)) */
2910 else if (GET_CODE (x
) == AND
2911 && GET_MODE (x
) == DImode
2912 && GET_CODE (XEXP (x
, 0)) == MULT
2913 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2914 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2915 && CONST_INT_P (XEXP (x
, 1)))
2917 type
= ADDRESS_REG_UXTW
;
2918 index
= XEXP (XEXP (x
, 0), 0);
2919 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2920 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2923 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2924 else if ((GET_CODE (x
) == SIGN_EXTRACT
2925 || GET_CODE (x
) == ZERO_EXTRACT
)
2926 && GET_MODE (x
) == DImode
2927 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2928 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2929 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2931 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2932 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2933 index
= XEXP (XEXP (x
, 0), 0);
2934 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2935 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2936 || INTVAL (XEXP (x
, 2)) != 0)
2939 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2940 (const_int 0xffffffff<<shift)) */
2941 else if (GET_CODE (x
) == AND
2942 && GET_MODE (x
) == DImode
2943 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2944 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2945 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2946 && CONST_INT_P (XEXP (x
, 1)))
2948 type
= ADDRESS_REG_UXTW
;
2949 index
= XEXP (XEXP (x
, 0), 0);
2950 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2951 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2954 /* (mult:P (reg:P) (const_int scale)) */
2955 else if (GET_CODE (x
) == MULT
2956 && GET_MODE (x
) == Pmode
2957 && GET_MODE (XEXP (x
, 0)) == Pmode
2958 && CONST_INT_P (XEXP (x
, 1)))
2960 type
= ADDRESS_REG_REG
;
2961 index
= XEXP (x
, 0);
2962 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2964 /* (ashift:P (reg:P) (const_int shift)) */
2965 else if (GET_CODE (x
) == ASHIFT
2966 && GET_MODE (x
) == Pmode
2967 && GET_MODE (XEXP (x
, 0)) == Pmode
2968 && CONST_INT_P (XEXP (x
, 1)))
2970 type
= ADDRESS_REG_REG
;
2971 index
= XEXP (x
, 0);
2972 shift
= INTVAL (XEXP (x
, 1));
2977 if (GET_CODE (index
) == SUBREG
)
2978 index
= SUBREG_REG (index
);
2981 (shift
> 0 && shift
<= 3
2982 && (1 << shift
) == GET_MODE_SIZE (mode
)))
2984 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
2987 info
->offset
= index
;
2988 info
->shift
= shift
;
2996 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
2998 return (offset
>= -64 * GET_MODE_SIZE (mode
)
2999 && offset
< 64 * GET_MODE_SIZE (mode
)
3000 && offset
% GET_MODE_SIZE (mode
) == 0);
3004 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
3005 HOST_WIDE_INT offset
)
3007 return offset
>= -256 && offset
< 256;
3011 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3014 && offset
< 4096 * GET_MODE_SIZE (mode
)
3015 && offset
% GET_MODE_SIZE (mode
) == 0);
3018 /* Return true if X is a valid address for machine mode MODE. If it is,
3019 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3020 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3023 aarch64_classify_address (struct aarch64_address_info
*info
,
3024 rtx x
, enum machine_mode mode
,
3025 RTX_CODE outer_code
, bool strict_p
)
3027 enum rtx_code code
= GET_CODE (x
);
3029 bool allow_reg_index_p
=
3030 outer_code
!= PARALLEL
&& GET_MODE_SIZE(mode
) != 16;
3032 /* Don't support anything other than POST_INC or REG addressing for
3034 if (aarch64_vector_mode_p (mode
)
3035 && (code
!= POST_INC
&& code
!= REG
))
3042 info
->type
= ADDRESS_REG_IMM
;
3044 info
->offset
= const0_rtx
;
3045 return aarch64_base_register_rtx_p (x
, strict_p
);
3050 if (GET_MODE_SIZE (mode
) != 0
3051 && CONST_INT_P (op1
)
3052 && aarch64_base_register_rtx_p (op0
, strict_p
))
3054 HOST_WIDE_INT offset
= INTVAL (op1
);
3056 info
->type
= ADDRESS_REG_IMM
;
3060 /* TImode and TFmode values are allowed in both pairs of X
3061 registers and individual Q registers. The available
3063 X,X: 7-bit signed scaled offset
3064 Q: 9-bit signed offset
3065 We conservatively require an offset representable in either mode.
3067 if (mode
== TImode
|| mode
== TFmode
)
3068 return (offset_7bit_signed_scaled_p (mode
, offset
)
3069 && offset_9bit_signed_unscaled_p (mode
, offset
));
3071 if (outer_code
== PARALLEL
)
3072 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3073 && offset_7bit_signed_scaled_p (mode
, offset
));
3075 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3076 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3079 if (allow_reg_index_p
)
3081 /* Look for base + (scaled/extended) index register. */
3082 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3083 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3088 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3089 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3102 info
->type
= ADDRESS_REG_WB
;
3103 info
->base
= XEXP (x
, 0);
3104 info
->offset
= NULL_RTX
;
3105 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3109 info
->type
= ADDRESS_REG_WB
;
3110 info
->base
= XEXP (x
, 0);
3111 if (GET_CODE (XEXP (x
, 1)) == PLUS
3112 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3113 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3114 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3116 HOST_WIDE_INT offset
;
3117 info
->offset
= XEXP (XEXP (x
, 1), 1);
3118 offset
= INTVAL (info
->offset
);
3120 /* TImode and TFmode values are allowed in both pairs of X
3121 registers and individual Q registers. The available
3123 X,X: 7-bit signed scaled offset
3124 Q: 9-bit signed offset
3125 We conservatively require an offset representable in either mode.
3127 if (mode
== TImode
|| mode
== TFmode
)
3128 return (offset_7bit_signed_scaled_p (mode
, offset
)
3129 && offset_9bit_signed_unscaled_p (mode
, offset
));
3131 if (outer_code
== PARALLEL
)
3132 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3133 && offset_7bit_signed_scaled_p (mode
, offset
));
3135 return offset_9bit_signed_unscaled_p (mode
, offset
);
3142 /* load literal: pc-relative constant pool entry. Only supported
3143 for SI mode or larger. */
3144 info
->type
= ADDRESS_SYMBOLIC
;
3145 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3149 split_const (x
, &sym
, &addend
);
3150 return (GET_CODE (sym
) == LABEL_REF
3151 || (GET_CODE (sym
) == SYMBOL_REF
3152 && CONSTANT_POOL_ADDRESS_P (sym
)));
3157 info
->type
= ADDRESS_LO_SUM
;
3158 info
->base
= XEXP (x
, 0);
3159 info
->offset
= XEXP (x
, 1);
3160 if (allow_reg_index_p
3161 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3164 split_const (info
->offset
, &sym
, &offs
);
3165 if (GET_CODE (sym
) == SYMBOL_REF
3166 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3167 == SYMBOL_SMALL_ABSOLUTE
))
3169 /* The symbol and offset must be aligned to the access size. */
3171 unsigned int ref_size
;
3173 if (CONSTANT_POOL_ADDRESS_P (sym
))
3174 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3175 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3177 tree exp
= SYMBOL_REF_DECL (sym
);
3178 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3179 align
= CONSTANT_ALIGNMENT (exp
, align
);
3181 else if (SYMBOL_REF_DECL (sym
))
3182 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3184 align
= BITS_PER_UNIT
;
3186 ref_size
= GET_MODE_SIZE (mode
);
3188 ref_size
= GET_MODE_SIZE (DImode
);
3190 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3191 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3202 aarch64_symbolic_address_p (rtx x
)
3206 split_const (x
, &x
, &offset
);
3207 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3210 /* Classify the base of symbolic expression X, given that X appears in
3213 enum aarch64_symbol_type
3214 aarch64_classify_symbolic_expression (rtx x
,
3215 enum aarch64_symbol_context context
)
3219 split_const (x
, &x
, &offset
);
3220 return aarch64_classify_symbol (x
, context
);
3224 /* Return TRUE if X is a legitimate address for accessing memory in
3227 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3229 struct aarch64_address_info addr
;
3231 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3234 /* Return TRUE if X is a legitimate address for accessing memory in
3235 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3238 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3239 RTX_CODE outer_code
, bool strict_p
)
3241 struct aarch64_address_info addr
;
3243 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3246 /* Return TRUE if rtx X is immediate constant 0.0 */
3248 aarch64_float_const_zero_rtx_p (rtx x
)
3252 if (GET_MODE (x
) == VOIDmode
)
3255 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3256 if (REAL_VALUE_MINUS_ZERO (r
))
3257 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3258 return REAL_VALUES_EQUAL (r
, dconst0
);
3261 /* Return the fixed registers used for condition codes. */
3264 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3267 *p2
= INVALID_REGNUM
;
3272 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3274 /* All floating point compares return CCFP if it is an equality
3275 comparison, and CCFPE otherwise. */
3276 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3303 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3305 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3306 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3307 || GET_CODE (x
) == NEG
))
3310 /* A compare with a shifted or negated operand. Because of canonicalization,
3311 the comparison will have to be swapped when we emit the assembly
3313 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3314 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3315 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3316 || GET_CODE (x
) == LSHIFTRT
3317 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
3318 || GET_CODE (x
) == NEG
))
3321 /* A compare of a mode narrower than SI mode against zero can be done
3322 by extending the value in the comparison. */
3323 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3325 /* Only use sign-extension if we really need it. */
3326 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3327 ? CC_SESWPmode
: CC_ZESWPmode
);
3329 /* For everything else, return CCmode. */
3334 aarch64_get_condition_code (rtx x
)
3336 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3337 enum rtx_code comp_code
= GET_CODE (x
);
3339 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3340 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3348 case GE
: return AARCH64_GE
;
3349 case GT
: return AARCH64_GT
;
3350 case LE
: return AARCH64_LS
;
3351 case LT
: return AARCH64_MI
;
3352 case NE
: return AARCH64_NE
;
3353 case EQ
: return AARCH64_EQ
;
3354 case ORDERED
: return AARCH64_VC
;
3355 case UNORDERED
: return AARCH64_VS
;
3356 case UNLT
: return AARCH64_LT
;
3357 case UNLE
: return AARCH64_LE
;
3358 case UNGT
: return AARCH64_HI
;
3359 case UNGE
: return AARCH64_PL
;
3360 default: gcc_unreachable ();
3367 case NE
: return AARCH64_NE
;
3368 case EQ
: return AARCH64_EQ
;
3369 case GE
: return AARCH64_GE
;
3370 case GT
: return AARCH64_GT
;
3371 case LE
: return AARCH64_LE
;
3372 case LT
: return AARCH64_LT
;
3373 case GEU
: return AARCH64_CS
;
3374 case GTU
: return AARCH64_HI
;
3375 case LEU
: return AARCH64_LS
;
3376 case LTU
: return AARCH64_CC
;
3377 default: gcc_unreachable ();
3386 case NE
: return AARCH64_NE
;
3387 case EQ
: return AARCH64_EQ
;
3388 case GE
: return AARCH64_LE
;
3389 case GT
: return AARCH64_LT
;
3390 case LE
: return AARCH64_GE
;
3391 case LT
: return AARCH64_GT
;
3392 case GEU
: return AARCH64_LS
;
3393 case GTU
: return AARCH64_CC
;
3394 case LEU
: return AARCH64_CS
;
3395 case LTU
: return AARCH64_HI
;
3396 default: gcc_unreachable ();
3403 case NE
: return AARCH64_NE
;
3404 case EQ
: return AARCH64_EQ
;
3405 case GE
: return AARCH64_PL
;
3406 case LT
: return AARCH64_MI
;
3407 default: gcc_unreachable ();
3418 bit_count (unsigned HOST_WIDE_INT value
)
3432 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3436 /* An integer or symbol address without a preceding # sign. */
3438 switch (GET_CODE (x
))
3441 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3445 output_addr_const (f
, x
);
3449 if (GET_CODE (XEXP (x
, 0)) == PLUS
3450 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3452 output_addr_const (f
, x
);
3458 output_operand_lossage ("Unsupported operand for code '%c'", code
);
3463 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3467 if (GET_CODE (x
) != CONST_INT
3468 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3470 output_operand_lossage ("invalid operand for '%%%c'", code
);
3486 output_operand_lossage ("invalid operand for '%%%c'", code
);
3496 /* Print N such that 2^N == X. */
3497 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3499 output_operand_lossage ("invalid operand for '%%%c'", code
);
3503 asm_fprintf (f
, "%d", n
);
3508 /* Print the number of non-zero bits in X (a const_int). */
3509 if (GET_CODE (x
) != CONST_INT
)
3511 output_operand_lossage ("invalid operand for '%%%c'", code
);
3515 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3519 /* Print the higher numbered register of a pair (TImode) of regs. */
3520 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3522 output_operand_lossage ("invalid operand for '%%%c'", code
);
3526 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3530 /* Print a condition (eq, ne, etc). */
3532 /* CONST_TRUE_RTX means always -- that's the default. */
3533 if (x
== const_true_rtx
)
3536 if (!COMPARISON_P (x
))
3538 output_operand_lossage ("invalid operand for '%%%c'", code
);
3542 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3546 /* Print the inverse of a condition (eq <-> ne, etc). */
3548 /* CONST_TRUE_RTX means never -- that's the default. */
3549 if (x
== const_true_rtx
)
3555 if (!COMPARISON_P (x
))
3557 output_operand_lossage ("invalid operand for '%%%c'", code
);
3561 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3562 (aarch64_get_condition_code (x
))], f
);
3570 /* Print a scalar FP/SIMD register name. */
3571 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3573 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3576 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3583 /* Print the first FP/SIMD register name in a list. */
3584 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3586 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3589 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3593 /* Print bottom 16 bits of integer constant in hex. */
3594 if (GET_CODE (x
) != CONST_INT
)
3596 output_operand_lossage ("invalid operand for '%%%c'", code
);
3599 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3604 /* Print a general register name or the zero register (32-bit or
3607 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3609 asm_fprintf (f
, "%czr", code
);
3613 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3615 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3619 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3621 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3628 /* Print a normal operand, if it's a general register, then we
3632 output_operand_lossage ("missing operand");
3636 switch (GET_CODE (x
))
3639 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3643 aarch64_memory_reference_mode
= GET_MODE (x
);
3644 output_address (XEXP (x
, 0));
3649 output_addr_const (asm_out_file
, x
);
3653 asm_fprintf (f
, "%wd", INTVAL (x
));
3657 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3659 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3661 HOST_WIDE_INT_MAX
));
3662 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3664 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3673 /* CONST_DOUBLE can represent a double-width integer.
3674 In this case, the mode of x is VOIDmode. */
3675 if (GET_MODE (x
) == VOIDmode
)
3677 else if (aarch64_float_const_zero_rtx_p (x
))
3682 else if (aarch64_float_const_representable_p (x
))
3685 char float_buf
[buf_size
] = {'\0'};
3687 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3688 real_to_decimal_for_mode (float_buf
, &r
,
3691 asm_fprintf (asm_out_file
, "%s", float_buf
);
3695 output_operand_lossage ("invalid constant");
3698 output_operand_lossage ("invalid operand");
3704 if (GET_CODE (x
) == HIGH
)
3707 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3709 case SYMBOL_SMALL_GOT
:
3710 asm_fprintf (asm_out_file
, ":got:");
3713 case SYMBOL_SMALL_TLSGD
:
3714 asm_fprintf (asm_out_file
, ":tlsgd:");
3717 case SYMBOL_SMALL_TLSDESC
:
3718 asm_fprintf (asm_out_file
, ":tlsdesc:");
3721 case SYMBOL_SMALL_GOTTPREL
:
3722 asm_fprintf (asm_out_file
, ":gottprel:");
3725 case SYMBOL_SMALL_TPREL
:
3726 asm_fprintf (asm_out_file
, ":tprel:");
3729 case SYMBOL_TINY_GOT
:
3736 output_addr_const (asm_out_file
, x
);
3740 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3742 case SYMBOL_SMALL_GOT
:
3743 asm_fprintf (asm_out_file
, ":lo12:");
3746 case SYMBOL_SMALL_TLSGD
:
3747 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3750 case SYMBOL_SMALL_TLSDESC
:
3751 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3754 case SYMBOL_SMALL_GOTTPREL
:
3755 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3758 case SYMBOL_SMALL_TPREL
:
3759 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3762 case SYMBOL_TINY_GOT
:
3763 asm_fprintf (asm_out_file
, ":got:");
3769 output_addr_const (asm_out_file
, x
);
3774 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3776 case SYMBOL_SMALL_TPREL
:
3777 asm_fprintf (asm_out_file
, ":tprel_hi12:");
3782 output_addr_const (asm_out_file
, x
);
3786 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
3792 aarch64_print_operand_address (FILE *f
, rtx x
)
3794 struct aarch64_address_info addr
;
3796 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
3800 case ADDRESS_REG_IMM
:
3801 if (addr
.offset
== const0_rtx
)
3802 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
3804 asm_fprintf (f
, "[%s,%wd]", reg_names
[REGNO (addr
.base
)],
3805 INTVAL (addr
.offset
));
3808 case ADDRESS_REG_REG
:
3809 if (addr
.shift
== 0)
3810 asm_fprintf (f
, "[%s,%s]", reg_names
[REGNO (addr
.base
)],
3811 reg_names
[REGNO (addr
.offset
)]);
3813 asm_fprintf (f
, "[%s,%s,lsl %u]", reg_names
[REGNO (addr
.base
)],
3814 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
3817 case ADDRESS_REG_UXTW
:
3818 if (addr
.shift
== 0)
3819 asm_fprintf (f
, "[%s,w%d,uxtw]", reg_names
[REGNO (addr
.base
)],
3820 REGNO (addr
.offset
) - R0_REGNUM
);
3822 asm_fprintf (f
, "[%s,w%d,uxtw %u]", reg_names
[REGNO (addr
.base
)],
3823 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3826 case ADDRESS_REG_SXTW
:
3827 if (addr
.shift
== 0)
3828 asm_fprintf (f
, "[%s,w%d,sxtw]", reg_names
[REGNO (addr
.base
)],
3829 REGNO (addr
.offset
) - R0_REGNUM
);
3831 asm_fprintf (f
, "[%s,w%d,sxtw %u]", reg_names
[REGNO (addr
.base
)],
3832 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3835 case ADDRESS_REG_WB
:
3836 switch (GET_CODE (x
))
3839 asm_fprintf (f
, "[%s,%d]!", reg_names
[REGNO (addr
.base
)],
3840 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3843 asm_fprintf (f
, "[%s],%d", reg_names
[REGNO (addr
.base
)],
3844 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3847 asm_fprintf (f
, "[%s,-%d]!", reg_names
[REGNO (addr
.base
)],
3848 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3851 asm_fprintf (f
, "[%s],-%d", reg_names
[REGNO (addr
.base
)],
3852 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3855 asm_fprintf (f
, "[%s,%wd]!", reg_names
[REGNO (addr
.base
)],
3856 INTVAL (addr
.offset
));
3859 asm_fprintf (f
, "[%s],%wd", reg_names
[REGNO (addr
.base
)],
3860 INTVAL (addr
.offset
));
3867 case ADDRESS_LO_SUM
:
3868 asm_fprintf (f
, "[%s,#:lo12:", reg_names
[REGNO (addr
.base
)]);
3869 output_addr_const (f
, addr
.offset
);
3870 asm_fprintf (f
, "]");
3873 case ADDRESS_SYMBOLIC
:
3877 output_addr_const (f
, x
);
3881 aarch64_label_mentioned_p (rtx x
)
3886 if (GET_CODE (x
) == LABEL_REF
)
3889 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3890 referencing instruction, but they are constant offsets, not
3892 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3895 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
3896 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
3902 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3903 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
3906 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
3913 /* Implement REGNO_REG_CLASS. */
3916 aarch64_regno_regclass (unsigned regno
)
3918 if (GP_REGNUM_P (regno
))
3921 if (regno
== SP_REGNUM
)
3924 if (regno
== FRAME_POINTER_REGNUM
3925 || regno
== ARG_POINTER_REGNUM
)
3926 return POINTER_REGS
;
3928 if (FP_REGNUM_P (regno
))
3929 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
3934 /* Try a machine-dependent way of reloading an illegitimate address
3935 operand. If we find one, push the reload and return the new rtx. */
3938 aarch64_legitimize_reload_address (rtx
*x_p
,
3939 enum machine_mode mode
,
3940 int opnum
, int type
,
3941 int ind_levels ATTRIBUTE_UNUSED
)
3945 /* Do not allow mem (plus (reg, const)) if vector mode. */
3946 if (aarch64_vector_mode_p (mode
)
3947 && GET_CODE (x
) == PLUS
3948 && REG_P (XEXP (x
, 0))
3949 && CONST_INT_P (XEXP (x
, 1)))
3953 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
3954 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3955 opnum
, (enum reload_type
) type
);
3959 /* We must recognize output that we have already generated ourselves. */
3960 if (GET_CODE (x
) == PLUS
3961 && GET_CODE (XEXP (x
, 0)) == PLUS
3962 && REG_P (XEXP (XEXP (x
, 0), 0))
3963 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3964 && CONST_INT_P (XEXP (x
, 1)))
3966 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
3967 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3968 opnum
, (enum reload_type
) type
);
3972 /* We wish to handle large displacements off a base register by splitting
3973 the addend across an add and the mem insn. This can cut the number of
3974 extra insns needed from 3 to 1. It is only useful for load/store of a
3975 single register with 12 bit offset field. */
3976 if (GET_CODE (x
) == PLUS
3977 && REG_P (XEXP (x
, 0))
3978 && CONST_INT_P (XEXP (x
, 1))
3979 && HARD_REGISTER_P (XEXP (x
, 0))
3982 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
3984 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
3985 HOST_WIDE_INT low
= val
& 0xfff;
3986 HOST_WIDE_INT high
= val
- low
;
3989 enum machine_mode xmode
= GET_MODE (x
);
3991 /* In ILP32, xmode can be either DImode or SImode. */
3992 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
3994 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3995 BLKmode alignment. */
3996 if (GET_MODE_SIZE (mode
) == 0)
3999 offs
= low
% GET_MODE_SIZE (mode
);
4001 /* Align misaligned offset by adjusting high part to compensate. */
4004 if (aarch64_uimm12_shift (high
+ offs
))
4013 offs
= GET_MODE_SIZE (mode
) - offs
;
4015 high
= high
+ (low
& 0x1000) - offs
;
4020 /* Check for overflow. */
4021 if (high
+ low
!= val
)
4024 cst
= GEN_INT (high
);
4025 if (!aarch64_uimm12_shift (high
))
4026 cst
= force_const_mem (xmode
, cst
);
4028 /* Reload high part into base reg, leaving the low part
4029 in the mem instruction.
4030 Note that replacing this gen_rtx_PLUS with plus_constant is
4031 wrong in this case because we rely on the
4032 (plus (plus reg c1) c2) structure being preserved so that
4033 XEXP (*p, 0) in push_reload below uses the correct term. */
4034 x
= gen_rtx_PLUS (xmode
,
4035 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4038 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4039 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4040 opnum
, (enum reload_type
) type
);
4049 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4051 enum machine_mode mode
,
4052 secondary_reload_info
*sri
)
4054 /* Without the TARGET_SIMD instructions we cannot move a Q register
4055 to a Q register directly. We need a scratch. */
4056 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4057 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4058 && reg_class_subset_p (rclass
, FP_REGS
))
4061 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4062 else if (mode
== TImode
)
4063 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4067 /* A TFmode or TImode memory access should be handled via an FP_REGS
4068 because AArch64 has richer addressing modes for LDR/STR instructions
4069 than LDP/STP instructions. */
4070 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
4071 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4074 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4081 aarch64_can_eliminate (const int from
, const int to
)
4083 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4084 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4086 if (frame_pointer_needed
)
4088 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4090 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4092 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4093 && !cfun
->calls_alloca
)
4095 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4101 /* If we decided that we didn't need a leaf frame pointer but then used
4102 LR in the function, then we'll want a frame pointer after all, so
4103 prevent this elimination to ensure a frame pointer is used.
4105 NOTE: the original value of flag_omit_frame_pointer gets trashed
4106 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4107 of faked_omit_frame_pointer here (which is true when we always
4108 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4109 pointers when LR is clobbered). */
4110 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4111 && df_regs_ever_live_p (LR_REGNUM
)
4112 && faked_omit_frame_pointer
)
4120 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4122 HOST_WIDE_INT frame_size
;
4123 HOST_WIDE_INT offset
;
4125 aarch64_layout_frame ();
4126 frame_size
= (get_frame_size () + cfun
->machine
->frame
.saved_regs_size
4127 + crtl
->outgoing_args_size
4128 + cfun
->machine
->saved_varargs_size
);
4130 frame_size
= AARCH64_ROUND_UP (frame_size
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4131 offset
= frame_size
;
4133 if (to
== HARD_FRAME_POINTER_REGNUM
)
4135 if (from
== ARG_POINTER_REGNUM
)
4136 return offset
- crtl
->outgoing_args_size
;
4138 if (from
== FRAME_POINTER_REGNUM
)
4139 return cfun
->machine
->frame
.saved_regs_size
+ get_frame_size ();
4142 if (to
== STACK_POINTER_REGNUM
)
4144 if (from
== FRAME_POINTER_REGNUM
)
4146 HOST_WIDE_INT elim
= crtl
->outgoing_args_size
4147 + cfun
->machine
->frame
.saved_regs_size
4149 - cfun
->machine
->frame
.fp_lr_offset
;
4150 elim
= AARCH64_ROUND_UP (elim
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4159 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4163 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4167 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4172 aarch64_asm_trampoline_template (FILE *f
)
4176 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4177 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4181 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4182 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4184 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4185 assemble_aligned_integer (4, const0_rtx
);
4186 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4187 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4191 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4193 rtx fnaddr
, mem
, a_tramp
;
4194 const int tramp_code_sz
= 16;
4196 /* Don't need to copy the trailing D-words, we fill those in below. */
4197 emit_block_move (m_tramp
, assemble_trampoline_template (),
4198 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4199 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4200 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4201 if (GET_MODE (fnaddr
) != ptr_mode
)
4202 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4203 emit_move_insn (mem
, fnaddr
);
4205 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4206 emit_move_insn (mem
, chain_value
);
4208 /* XXX We should really define a "clear_cache" pattern and use
4209 gen_clear_cache(). */
4210 a_tramp
= XEXP (m_tramp
, 0);
4211 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4212 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4213 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4217 static unsigned char
4218 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4229 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4230 (GET_MODE_SIZE (mode
) + 7) / 8;
4244 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4246 if (regclass
== POINTER_REGS
)
4247 return GENERAL_REGS
;
4249 if (regclass
== STACK_REG
)
4252 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4258 /* If it's an integer immediate that MOVI can't handle, then
4259 FP_REGS is not an option, so we return NO_REGS instead. */
4260 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4261 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4264 /* Register eliminiation can result in a request for
4265 SP+constant->FP_REGS. We cannot support such operations which
4266 use SP as source and an FP_REG as destination, so reject out
4268 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4270 rtx lhs
= XEXP (x
, 0);
4272 /* Look through a possible SUBREG introduced by ILP32. */
4273 if (GET_CODE (lhs
) == SUBREG
)
4274 lhs
= SUBREG_REG (lhs
);
4276 gcc_assert (REG_P (lhs
));
4277 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4286 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4288 asm_fprintf (f
, "%U%s", name
);
4292 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4294 if (priority
== DEFAULT_INIT_PRIORITY
)
4295 default_ctor_section_asm_out_constructor (symbol
, priority
);
4300 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4301 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4302 switch_to_section (s
);
4303 assemble_align (POINTER_SIZE
);
4304 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4309 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4311 if (priority
== DEFAULT_INIT_PRIORITY
)
4312 default_dtor_section_asm_out_destructor (symbol
, priority
);
4317 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4318 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4319 switch_to_section (s
);
4320 assemble_align (POINTER_SIZE
);
4321 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4326 aarch64_output_casesi (rtx
*operands
)
4330 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
4332 static const char *const patterns
[4][2] =
4335 "ldrb\t%w3, [%0,%w1,uxtw]",
4336 "add\t%3, %4, %w3, sxtb #2"
4339 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4340 "add\t%3, %4, %w3, sxth #2"
4343 "ldr\t%w3, [%0,%w1,uxtw #2]",
4344 "add\t%3, %4, %w3, sxtw #2"
4346 /* We assume that DImode is only generated when not optimizing and
4347 that we don't really need 64-bit address offsets. That would
4348 imply an object file with 8GB of code in a single function! */
4350 "ldr\t%w3, [%0,%w1,uxtw #2]",
4351 "add\t%3, %4, %w3, sxtw #2"
4355 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4357 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4359 gcc_assert (index
>= 0 && index
<= 3);
4361 /* Need to implement table size reduction, by chaning the code below. */
4362 output_asm_insn (patterns
[index
][0], operands
);
4363 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4364 snprintf (buf
, sizeof (buf
),
4365 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4366 output_asm_insn (buf
, operands
);
4367 output_asm_insn (patterns
[index
][1], operands
);
4368 output_asm_insn ("br\t%3", operands
);
4369 assemble_label (asm_out_file
, label
);
4374 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4375 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4379 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4381 if (shift
>= 0 && shift
<= 3)
4384 for (size
= 8; size
<= 32; size
*= 2)
4386 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4387 if (mask
== bits
<< shift
)
4395 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4396 const_rtx x ATTRIBUTE_UNUSED
)
4398 /* We can't use blocks for constants when we're using a per-function
4404 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4405 rtx x ATTRIBUTE_UNUSED
,
4406 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4408 /* Force all constant pool entries into the current function section. */
4409 return function_section (current_function_decl
);
4415 /* Helper function for rtx cost calculation. Strip a shift expression
4416 from X. Returns the inner operand if successful, or the original
4417 expression on failure. */
4419 aarch64_strip_shift (rtx x
)
4423 if ((GET_CODE (op
) == ASHIFT
4424 || GET_CODE (op
) == ASHIFTRT
4425 || GET_CODE (op
) == LSHIFTRT
)
4426 && CONST_INT_P (XEXP (op
, 1)))
4427 return XEXP (op
, 0);
4429 if (GET_CODE (op
) == MULT
4430 && CONST_INT_P (XEXP (op
, 1))
4431 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4432 return XEXP (op
, 0);
4437 /* Helper function for rtx cost calculation. Strip a shift or extend
4438 expression from X. Returns the inner operand if successful, or the
4439 original expression on failure. We deal with a number of possible
4440 canonicalization variations here. */
4442 aarch64_strip_shift_or_extend (rtx x
)
4446 /* Zero and sign extraction of a widened value. */
4447 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4448 && XEXP (op
, 2) == const0_rtx
4449 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4451 return XEXP (XEXP (op
, 0), 0);
4453 /* It can also be represented (for zero-extend) as an AND with an
4455 if (GET_CODE (op
) == AND
4456 && GET_CODE (XEXP (op
, 0)) == MULT
4457 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4458 && CONST_INT_P (XEXP (op
, 1))
4459 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4460 INTVAL (XEXP (op
, 1))) != 0)
4461 return XEXP (XEXP (op
, 0), 0);
4463 /* Now handle extended register, as this may also have an optional
4464 left shift by 1..4. */
4465 if (GET_CODE (op
) == ASHIFT
4466 && CONST_INT_P (XEXP (op
, 1))
4467 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4470 if (GET_CODE (op
) == ZERO_EXTEND
4471 || GET_CODE (op
) == SIGN_EXTEND
)
4477 return aarch64_strip_shift (x
);
4480 /* Calculate the cost of calculating X, storing it in *COST. Result
4481 is true if the total cost of the operation has now been calculated. */
4483 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
4484 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
4487 const struct cpu_cost_table
*extra_cost
4488 = aarch64_tune_params
->insn_extra_cost
;
4496 switch (GET_CODE (op0
))
4500 *cost
+= extra_cost
->ldst
.store
;
4502 if (op1
!= const0_rtx
)
4503 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4507 if (! REG_P (SUBREG_REG (op0
)))
4508 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
4511 /* Cost is just the cost of the RHS of the set. */
4512 *cost
+= rtx_cost (op1
, SET
, 1, true);
4515 case ZERO_EXTRACT
: /* Bit-field insertion. */
4517 /* Strip any redundant widening of the RHS to meet the width of
4519 if (GET_CODE (op1
) == SUBREG
)
4520 op1
= SUBREG_REG (op1
);
4521 if ((GET_CODE (op1
) == ZERO_EXTEND
4522 || GET_CODE (op1
) == SIGN_EXTEND
)
4523 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
4524 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
4525 >= INTVAL (XEXP (op0
, 1))))
4526 op1
= XEXP (op1
, 0);
4527 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4537 *cost
+= extra_cost
->ldst
.load
;
4542 op0
= CONST0_RTX (GET_MODE (x
));
4550 if (op1
== const0_rtx
4551 && GET_CODE (op0
) == AND
)
4557 /* Comparisons can work if the order is swapped.
4558 Canonicalization puts the more complex operation first, but
4559 we want it in op1. */
4561 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
4573 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
4574 || (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
4575 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
4577 if (op0
!= const0_rtx
)
4578 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
4580 if (CONST_INT_P (op1
))
4582 if (!aarch64_uimm12_shift (INTVAL (op1
)))
4583 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4587 op1
= aarch64_strip_shift_or_extend (op1
);
4588 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4599 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4601 if (CONST_INT_P (op1
) && aarch64_uimm12_shift (INTVAL (op1
)))
4603 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
4607 rtx new_op0
= aarch64_strip_shift_or_extend (op0
);
4610 && GET_CODE (op0
) == MULT
)
4612 if ((GET_CODE (XEXP (op0
, 0)) == ZERO_EXTEND
4613 && GET_CODE (XEXP (op0
, 1)) == ZERO_EXTEND
)
4614 || (GET_CODE (XEXP (op0
, 0)) == SIGN_EXTEND
4615 && GET_CODE (XEXP (op0
, 1)) == SIGN_EXTEND
))
4617 *cost
+= (rtx_cost (XEXP (XEXP (op0
, 0), 0), MULT
, 0,
4619 + rtx_cost (XEXP (XEXP (op0
, 1), 0), MULT
, 1,
4621 + rtx_cost (op1
, PLUS
, 1, speed
));
4624 extra_cost
->mult
[GET_MODE (x
) == DImode
].extend_add
;
4627 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4628 + rtx_cost (XEXP (op0
, 1), MULT
, 1, speed
)
4629 + rtx_cost (op1
, PLUS
, 1, speed
));
4632 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].add
;
4635 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
4636 + rtx_cost (op1
, PLUS
, 1, speed
));
4650 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4652 if (CONST_INT_P (op1
)
4653 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
4655 *cost
+= rtx_cost (op0
, AND
, 0, speed
);
4659 if (GET_CODE (op0
) == NOT
)
4660 op0
= XEXP (op0
, 0);
4661 op0
= aarch64_strip_shift (op0
);
4662 *cost
+= (rtx_cost (op0
, AND
, 0, speed
)
4663 + rtx_cost (op1
, AND
, 1, speed
));
4670 if ((GET_MODE (x
) == DImode
4671 && GET_MODE (XEXP (x
, 0)) == SImode
)
4672 || GET_CODE (XEXP (x
, 0)) == MEM
)
4674 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
4680 if (GET_CODE (XEXP (x
, 0)) == MEM
)
4682 *cost
+= rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed
);
4688 if (!CONST_INT_P (XEXP (x
, 1)))
4689 *cost
+= COSTS_N_INSNS (2);
4696 /* Shifting by a register often takes an extra cycle. */
4697 if (speed
&& !CONST_INT_P (XEXP (x
, 1)))
4698 *cost
+= extra_cost
->alu
.arith_shift_reg
;
4700 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed
);
4704 if (!CONSTANT_P (XEXP (x
, 0)))
4705 *cost
+= rtx_cost (XEXP (x
, 0), HIGH
, 0, speed
);
4709 if (!CONSTANT_P (XEXP (x
, 1)))
4710 *cost
+= rtx_cost (XEXP (x
, 1), LO_SUM
, 1, speed
);
4711 *cost
+= rtx_cost (XEXP (x
, 0), LO_SUM
, 0, speed
);
4716 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTRACT
, 0, speed
);
4723 *cost
= COSTS_N_INSNS (1);
4724 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4726 if (CONST_INT_P (op1
)
4727 && exact_log2 (INTVAL (op1
)) > 0)
4729 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
4733 if ((GET_CODE (op0
) == ZERO_EXTEND
4734 && GET_CODE (op1
) == ZERO_EXTEND
)
4735 || (GET_CODE (op0
) == SIGN_EXTEND
4736 && GET_CODE (op1
) == SIGN_EXTEND
))
4738 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4739 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
));
4741 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].extend
;
4746 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].simple
;
4750 if (GET_MODE (x
) == DFmode
)
4751 *cost
+= extra_cost
->fp
[1].mult
;
4752 else if (GET_MODE (x
) == SFmode
)
4753 *cost
+= extra_cost
->fp
[0].mult
;
4756 return false; /* All arguments need to be in registers. */
4760 *cost
= COSTS_N_INSNS (2);
4763 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4764 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
4765 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
4766 else if (GET_MODE (x
) == DFmode
)
4767 *cost
+= (extra_cost
->fp
[1].mult
4768 + extra_cost
->fp
[1].div
);
4769 else if (GET_MODE (x
) == SFmode
)
4770 *cost
+= (extra_cost
->fp
[0].mult
4771 + extra_cost
->fp
[0].div
);
4773 return false; /* All arguments need to be in registers. */
4777 *cost
= COSTS_N_INSNS (1);
4780 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4781 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
;
4782 else if (GET_MODE (x
) == DFmode
)
4783 *cost
+= extra_cost
->fp
[1].div
;
4784 else if (GET_MODE (x
) == SFmode
)
4785 *cost
+= extra_cost
->fp
[0].div
;
4787 return false; /* All arguments need to be in registers. */
4796 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED
,
4797 enum machine_mode mode ATTRIBUTE_UNUSED
,
4798 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
4800 enum rtx_code c
= GET_CODE (x
);
4801 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4803 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4804 return addr_cost
->pre_modify
;
4806 if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4807 return addr_cost
->post_modify
;
4811 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4812 return addr_cost
->imm_offset
;
4813 else if (GET_CODE (XEXP (x
, 0)) == MULT
4814 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
4815 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4816 return addr_cost
->register_extend
;
4818 return addr_cost
->register_offset
;
4820 else if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
4821 return addr_cost
->imm_offset
;
4827 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4828 reg_class_t from
, reg_class_t to
)
4830 const struct cpu_regmove_cost
*regmove_cost
4831 = aarch64_tune_params
->regmove_cost
;
4833 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
4834 return regmove_cost
->GP2GP
;
4835 else if (from
== GENERAL_REGS
)
4836 return regmove_cost
->GP2FP
;
4837 else if (to
== GENERAL_REGS
)
4838 return regmove_cost
->FP2GP
;
4840 /* When AdvSIMD instructions are disabled it is not possible to move
4841 a 128-bit value directly between Q registers. This is handled in
4842 secondary reload. A general register is used as a scratch to move
4843 the upper DI value and the lower DI value is moved directly,
4844 hence the cost is the sum of three moves. */
4846 if (! TARGET_SIMD
&& GET_MODE_SIZE (from
) == 128 && GET_MODE_SIZE (to
) == 128)
4847 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
4849 return regmove_cost
->FP2FP
;
4853 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4854 reg_class_t rclass ATTRIBUTE_UNUSED
,
4855 bool in ATTRIBUTE_UNUSED
)
4857 return aarch64_tune_params
->memmov_cost
;
4860 /* Vectorizer cost model target hooks. */
4862 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4864 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4866 int misalign ATTRIBUTE_UNUSED
)
4870 switch (type_of_cost
)
4873 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
4876 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
4879 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
4882 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4885 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
4888 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
4891 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
4894 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
4896 case unaligned_load
:
4897 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
4899 case unaligned_store
:
4900 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
4902 case cond_branch_taken
:
4903 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
4905 case cond_branch_not_taken
:
4906 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
4909 case vec_promote_demote
:
4910 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4913 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
4914 return elements
/ 2 + 1;
4921 /* Implement targetm.vectorize.add_stmt_cost. */
4923 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
4924 struct _stmt_vec_info
*stmt_info
, int misalign
,
4925 enum vect_cost_model_location where
)
4927 unsigned *cost
= (unsigned *) data
;
4928 unsigned retval
= 0;
4930 if (flag_vect_cost_model
)
4932 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
4934 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
4936 /* Statements in an inner loop relative to the loop being
4937 vectorized are weighted more heavily. The value here is
4938 a function (linear for now) of the loop nest level. */
4939 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
4941 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4942 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
4943 unsigned nest_level
= loop_depth (loop
);
4945 count
*= nest_level
;
4948 retval
= (unsigned) (count
* stmt_cost
);
4949 cost
[where
] += retval
;
4955 static void initialize_aarch64_code_model (void);
4957 /* Parse the architecture extension string. */
4960 aarch64_parse_extension (char *str
)
4962 /* The extension string is parsed left to right. */
4963 const struct aarch64_option_extension
*opt
= NULL
;
4965 /* Flag to say whether we are adding or removing an extension. */
4966 int adding_ext
= -1;
4968 while (str
!= NULL
&& *str
!= 0)
4974 ext
= strchr (str
, '+');
4981 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
4992 error ("missing feature modifier after %qs", "+no");
4996 /* Scan over the extensions table trying to find an exact match. */
4997 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
4999 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
5001 /* Add or remove the extension. */
5003 aarch64_isa_flags
|= opt
->flags_on
;
5005 aarch64_isa_flags
&= ~(opt
->flags_off
);
5010 if (opt
->name
== NULL
)
5012 /* Extension not found in list. */
5013 error ("unknown feature modifier %qs", str
);
5023 /* Parse the ARCH string. */
5026 aarch64_parse_arch (void)
5029 const struct processor
*arch
;
5030 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
5033 strcpy (str
, aarch64_arch_string
);
5035 ext
= strchr (str
, '+');
5044 error ("missing arch name in -march=%qs", str
);
5048 /* Loop through the list of supported ARCHs to find a match. */
5049 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
5051 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
5053 selected_arch
= arch
;
5054 aarch64_isa_flags
= selected_arch
->flags
;
5055 selected_cpu
= &all_cores
[selected_arch
->core
];
5059 /* ARCH string contains at least one extension. */
5060 aarch64_parse_extension (ext
);
5067 /* ARCH name not found in list. */
5068 error ("unknown value %qs for -march", str
);
5072 /* Parse the CPU string. */
5075 aarch64_parse_cpu (void)
5078 const struct processor
*cpu
;
5079 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
5082 strcpy (str
, aarch64_cpu_string
);
5084 ext
= strchr (str
, '+');
5093 error ("missing cpu name in -mcpu=%qs", str
);
5097 /* Loop through the list of supported CPUs to find a match. */
5098 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5100 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
5103 aarch64_isa_flags
= selected_cpu
->flags
;
5107 /* CPU string contains at least one extension. */
5108 aarch64_parse_extension (ext
);
5115 /* CPU name not found in list. */
5116 error ("unknown value %qs for -mcpu", str
);
5120 /* Parse the TUNE string. */
5123 aarch64_parse_tune (void)
5125 const struct processor
*cpu
;
5126 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
5127 strcpy (str
, aarch64_tune_string
);
5129 /* Loop through the list of supported CPUs to find a match. */
5130 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5132 if (strcmp (cpu
->name
, str
) == 0)
5134 selected_tune
= cpu
;
5139 /* CPU name not found in list. */
5140 error ("unknown value %qs for -mtune", str
);
5145 /* Implement TARGET_OPTION_OVERRIDE. */
5148 aarch64_override_options (void)
5150 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5151 otherwise march remains undefined. mtune can be used with either march or
5154 if (aarch64_arch_string
)
5156 aarch64_parse_arch ();
5157 aarch64_cpu_string
= NULL
;
5160 if (aarch64_cpu_string
)
5162 aarch64_parse_cpu ();
5163 selected_arch
= NULL
;
5166 if (aarch64_tune_string
)
5168 aarch64_parse_tune ();
5171 initialize_aarch64_code_model ();
5173 aarch64_build_bitmask_table ();
5175 /* This target defaults to strict volatile bitfields. */
5176 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
5177 flag_strict_volatile_bitfields
= 1;
5179 /* If the user did not specify a processor, choose the default
5180 one for them. This will be the CPU set during configuration using
5181 --with-cpu, otherwise it is "coretex-a53". */
5184 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
5185 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
5188 gcc_assert (selected_cpu
);
5190 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5192 selected_tune
= &all_cores
[selected_cpu
->core
];
5194 aarch64_tune_flags
= selected_tune
->flags
;
5195 aarch64_tune
= selected_tune
->core
;
5196 aarch64_tune_params
= selected_tune
->tune
;
5198 aarch64_override_options_after_change ();
5201 /* Implement targetm.override_options_after_change. */
5204 aarch64_override_options_after_change (void)
5206 faked_omit_frame_pointer
= false;
5208 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5209 that aarch64_frame_pointer_required will be called. We need to remember
5210 whether flag_omit_frame_pointer was turned on normally or just faked. */
5212 if (flag_omit_leaf_frame_pointer
&& !flag_omit_frame_pointer
)
5214 flag_omit_frame_pointer
= true;
5215 faked_omit_frame_pointer
= true;
5219 static struct machine_function
*
5220 aarch64_init_machine_status (void)
5222 struct machine_function
*machine
;
5223 machine
= ggc_alloc_cleared_machine_function ();
5228 aarch64_init_expanders (void)
5230 init_machine_status
= aarch64_init_machine_status
;
5233 /* A checking mechanism for the implementation of the various code models. */
5235 initialize_aarch64_code_model (void)
5239 switch (aarch64_cmodel_var
)
5241 case AARCH64_CMODEL_TINY
:
5242 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
5244 case AARCH64_CMODEL_SMALL
:
5245 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
5247 case AARCH64_CMODEL_LARGE
:
5248 sorry ("code model %qs with -f%s", "large",
5249 flag_pic
> 1 ? "PIC" : "pic");
5255 aarch64_cmodel
= aarch64_cmodel_var
;
5258 /* Return true if SYMBOL_REF X binds locally. */
5261 aarch64_symbol_binds_local_p (const_rtx x
)
5263 return (SYMBOL_REF_DECL (x
)
5264 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
5265 : SYMBOL_REF_LOCAL_P (x
));
5268 /* Return true if SYMBOL_REF X is thread local */
5270 aarch64_tls_symbol_p (rtx x
)
5272 if (! TARGET_HAVE_TLS
)
5275 if (GET_CODE (x
) != SYMBOL_REF
)
5278 return SYMBOL_REF_TLS_MODEL (x
) != 0;
5281 /* Classify a TLS symbol into one of the TLS kinds. */
5282 enum aarch64_symbol_type
5283 aarch64_classify_tls_symbol (rtx x
)
5285 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
5289 case TLS_MODEL_GLOBAL_DYNAMIC
:
5290 case TLS_MODEL_LOCAL_DYNAMIC
:
5291 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
5293 case TLS_MODEL_INITIAL_EXEC
:
5294 return SYMBOL_SMALL_GOTTPREL
;
5296 case TLS_MODEL_LOCAL_EXEC
:
5297 return SYMBOL_SMALL_TPREL
;
5299 case TLS_MODEL_EMULATED
:
5300 case TLS_MODEL_NONE
:
5301 return SYMBOL_FORCE_TO_MEM
;
5308 /* Return the method that should be used to access SYMBOL_REF or
5309 LABEL_REF X in context CONTEXT. */
5311 enum aarch64_symbol_type
5312 aarch64_classify_symbol (rtx x
,
5313 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
5315 if (GET_CODE (x
) == LABEL_REF
)
5317 switch (aarch64_cmodel
)
5319 case AARCH64_CMODEL_LARGE
:
5320 return SYMBOL_FORCE_TO_MEM
;
5322 case AARCH64_CMODEL_TINY_PIC
:
5323 case AARCH64_CMODEL_TINY
:
5324 return SYMBOL_TINY_ABSOLUTE
;
5326 case AARCH64_CMODEL_SMALL_PIC
:
5327 case AARCH64_CMODEL_SMALL
:
5328 return SYMBOL_SMALL_ABSOLUTE
;
5335 if (GET_CODE (x
) == SYMBOL_REF
)
5337 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
5338 || CONSTANT_POOL_ADDRESS_P (x
))
5339 return SYMBOL_FORCE_TO_MEM
;
5341 if (aarch64_tls_symbol_p (x
))
5342 return aarch64_classify_tls_symbol (x
);
5344 switch (aarch64_cmodel
)
5346 case AARCH64_CMODEL_TINY
:
5347 if (SYMBOL_REF_WEAK (x
))
5348 return SYMBOL_FORCE_TO_MEM
;
5349 return SYMBOL_TINY_ABSOLUTE
;
5351 case AARCH64_CMODEL_SMALL
:
5352 if (SYMBOL_REF_WEAK (x
))
5353 return SYMBOL_FORCE_TO_MEM
;
5354 return SYMBOL_SMALL_ABSOLUTE
;
5356 case AARCH64_CMODEL_TINY_PIC
:
5357 if (!aarch64_symbol_binds_local_p (x
))
5358 return SYMBOL_TINY_GOT
;
5359 return SYMBOL_TINY_ABSOLUTE
;
5361 case AARCH64_CMODEL_SMALL_PIC
:
5362 if (!aarch64_symbol_binds_local_p (x
))
5363 return SYMBOL_SMALL_GOT
;
5364 return SYMBOL_SMALL_ABSOLUTE
;
5371 /* By default push everything into the constant pool. */
5372 return SYMBOL_FORCE_TO_MEM
;
5376 aarch64_constant_address_p (rtx x
)
5378 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
5382 aarch64_legitimate_pic_operand_p (rtx x
)
5384 if (GET_CODE (x
) == SYMBOL_REF
5385 || (GET_CODE (x
) == CONST
5386 && GET_CODE (XEXP (x
, 0)) == PLUS
5387 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5393 /* Return true if X holds either a quarter-precision or
5394 floating-point +0.0 constant. */
5396 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
5398 if (!CONST_DOUBLE_P (x
))
5401 /* TODO: We could handle moving 0.0 to a TFmode register,
5402 but first we would like to refactor the movtf_aarch64
5403 to be more amicable to split moves properly and
5404 correctly gate on TARGET_SIMD. For now - reject all
5405 constants which are not to SFmode or DFmode registers. */
5406 if (!(mode
== SFmode
|| mode
== DFmode
))
5409 if (aarch64_float_const_zero_rtx_p (x
))
5411 return aarch64_float_const_representable_p (x
);
5415 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
5417 /* Do not allow vector struct mode constants. We could support
5418 0 and -1 easily, but they need support in aarch64-simd.md. */
5419 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
5422 /* This could probably go away because
5423 we now decompose CONST_INTs according to expand_mov_immediate. */
5424 if ((GET_CODE (x
) == CONST_VECTOR
5425 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
5426 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
5427 return !targetm
.cannot_force_const_mem (mode
, x
);
5429 if (GET_CODE (x
) == HIGH
5430 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
5433 return aarch64_constant_address_p (x
);
5437 aarch64_load_tp (rtx target
)
5440 || GET_MODE (target
) != Pmode
5441 || !register_operand (target
, Pmode
))
5442 target
= gen_reg_rtx (Pmode
);
5444 /* Can return in any reg. */
5445 emit_insn (gen_aarch64_load_tp_hard (target
));
5449 /* On AAPCS systems, this is the "struct __va_list". */
5450 static GTY(()) tree va_list_type
;
5452 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5453 Return the type to use as __builtin_va_list.
5455 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5467 aarch64_build_builtin_va_list (void)
5470 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5472 /* Create the type. */
5473 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
5474 /* Give it the required name. */
5475 va_list_name
= build_decl (BUILTINS_LOCATION
,
5477 get_identifier ("__va_list"),
5479 DECL_ARTIFICIAL (va_list_name
) = 1;
5480 TYPE_NAME (va_list_type
) = va_list_name
;
5481 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
5483 /* Create the fields. */
5484 f_stack
= build_decl (BUILTINS_LOCATION
,
5485 FIELD_DECL
, get_identifier ("__stack"),
5487 f_grtop
= build_decl (BUILTINS_LOCATION
,
5488 FIELD_DECL
, get_identifier ("__gr_top"),
5490 f_vrtop
= build_decl (BUILTINS_LOCATION
,
5491 FIELD_DECL
, get_identifier ("__vr_top"),
5493 f_groff
= build_decl (BUILTINS_LOCATION
,
5494 FIELD_DECL
, get_identifier ("__gr_offs"),
5496 f_vroff
= build_decl (BUILTINS_LOCATION
,
5497 FIELD_DECL
, get_identifier ("__vr_offs"),
5500 DECL_ARTIFICIAL (f_stack
) = 1;
5501 DECL_ARTIFICIAL (f_grtop
) = 1;
5502 DECL_ARTIFICIAL (f_vrtop
) = 1;
5503 DECL_ARTIFICIAL (f_groff
) = 1;
5504 DECL_ARTIFICIAL (f_vroff
) = 1;
5506 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
5507 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
5508 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
5509 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
5510 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
5512 TYPE_FIELDS (va_list_type
) = f_stack
;
5513 DECL_CHAIN (f_stack
) = f_grtop
;
5514 DECL_CHAIN (f_grtop
) = f_vrtop
;
5515 DECL_CHAIN (f_vrtop
) = f_groff
;
5516 DECL_CHAIN (f_groff
) = f_vroff
;
5518 /* Compute its layout. */
5519 layout_type (va_list_type
);
5521 return va_list_type
;
5524 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5526 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
5528 const CUMULATIVE_ARGS
*cum
;
5529 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5530 tree stack
, grtop
, vrtop
, groff
, vroff
;
5532 int gr_save_area_size
;
5533 int vr_save_area_size
;
5536 cum
= &crtl
->args
.info
;
5538 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
5540 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
5542 if (TARGET_GENERAL_REGS_ONLY
)
5544 if (cum
->aapcs_nvrn
> 0)
5545 sorry ("%qs and floating point or vector arguments",
5546 "-mgeneral-regs-only");
5547 vr_save_area_size
= 0;
5550 f_stack
= TYPE_FIELDS (va_list_type_node
);
5551 f_grtop
= DECL_CHAIN (f_stack
);
5552 f_vrtop
= DECL_CHAIN (f_grtop
);
5553 f_groff
= DECL_CHAIN (f_vrtop
);
5554 f_vroff
= DECL_CHAIN (f_groff
);
5556 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
5558 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
5560 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
5562 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
5564 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
5567 /* Emit code to initialize STACK, which points to the next varargs stack
5568 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5569 by named arguments. STACK is 8-byte aligned. */
5570 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
5571 if (cum
->aapcs_stack_size
> 0)
5572 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
5573 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
5574 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5576 /* Emit code to initialize GRTOP, the top of the GR save area.
5577 virtual_incoming_args_rtx should have been 16 byte aligned. */
5578 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
5579 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
5580 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5582 /* Emit code to initialize VRTOP, the top of the VR save area.
5583 This address is gr_save_area_bytes below GRTOP, rounded
5584 down to the next 16-byte boundary. */
5585 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
5586 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
5587 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5590 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
5591 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
5592 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5594 /* Emit code to initialize GROFF, the offset from GRTOP of the
5595 next GPR argument. */
5596 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
5597 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
5598 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5600 /* Likewise emit code to initialize VROFF, the offset from FTOP
5601 of the next VR argument. */
5602 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
5603 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
5604 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5607 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5610 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
5611 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
5615 bool is_ha
; /* is HFA or HVA. */
5616 bool dw_align
; /* double-word align. */
5617 enum machine_mode ag_mode
= VOIDmode
;
5619 enum machine_mode mode
;
5621 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5622 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
5623 HOST_WIDE_INT size
, rsize
, adjust
, align
;
5624 tree t
, u
, cond1
, cond2
;
5626 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5628 type
= build_pointer_type (type
);
5630 mode
= TYPE_MODE (type
);
5632 f_stack
= TYPE_FIELDS (va_list_type_node
);
5633 f_grtop
= DECL_CHAIN (f_stack
);
5634 f_vrtop
= DECL_CHAIN (f_grtop
);
5635 f_groff
= DECL_CHAIN (f_vrtop
);
5636 f_vroff
= DECL_CHAIN (f_groff
);
5638 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
5639 f_stack
, NULL_TREE
);
5640 size
= int_size_in_bytes (type
);
5641 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
5645 if (aarch64_vfp_is_call_or_return_candidate (mode
,
5651 /* TYPE passed in fp/simd registers. */
5652 if (TARGET_GENERAL_REGS_ONLY
)
5653 sorry ("%qs and floating point or vector arguments",
5654 "-mgeneral-regs-only");
5656 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
5657 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
5658 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
5659 unshare_expr (valist
), f_vroff
, NULL_TREE
);
5661 rsize
= nregs
* UNITS_PER_VREG
;
5665 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
5666 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
5668 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5669 && size
< UNITS_PER_VREG
)
5671 adjust
= UNITS_PER_VREG
- size
;
5676 /* TYPE passed in general registers. */
5677 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
5678 unshare_expr (valist
), f_grtop
, NULL_TREE
);
5679 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
5680 unshare_expr (valist
), f_groff
, NULL_TREE
);
5681 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
5682 nregs
= rsize
/ UNITS_PER_WORD
;
5687 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5688 && size
< UNITS_PER_WORD
)
5690 adjust
= UNITS_PER_WORD
- size
;
5694 /* Get a local temporary for the field value. */
5695 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
5697 /* Emit code to branch if off >= 0. */
5698 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
5699 build_int_cst (TREE_TYPE (off
), 0));
5700 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
5704 /* Emit: offs = (offs + 15) & -16. */
5705 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5706 build_int_cst (TREE_TYPE (off
), 15));
5707 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
5708 build_int_cst (TREE_TYPE (off
), -16));
5709 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
5714 /* Update ap.__[g|v]r_offs */
5715 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5716 build_int_cst (TREE_TYPE (off
), rsize
));
5717 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
5721 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5723 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5724 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
5725 build_int_cst (TREE_TYPE (f_off
), 0));
5726 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
5728 /* String up: make sure the assignment happens before the use. */
5729 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
5730 COND_EXPR_ELSE (cond1
) = t
;
5732 /* Prepare the trees handling the argument that is passed on the stack;
5733 the top level node will store in ON_STACK. */
5734 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
5737 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5738 t
= fold_convert (intDI_type_node
, arg
);
5739 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5740 build_int_cst (TREE_TYPE (t
), 15));
5741 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5742 build_int_cst (TREE_TYPE (t
), -16));
5743 t
= fold_convert (TREE_TYPE (arg
), t
);
5744 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
5748 /* Advance ap.__stack */
5749 t
= fold_convert (intDI_type_node
, arg
);
5750 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5751 build_int_cst (TREE_TYPE (t
), size
+ 7));
5752 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5753 build_int_cst (TREE_TYPE (t
), -8));
5754 t
= fold_convert (TREE_TYPE (arg
), t
);
5755 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
5756 /* String up roundup and advance. */
5758 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5759 /* String up with arg */
5760 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
5761 /* Big-endianness related address adjustment. */
5762 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5763 && size
< UNITS_PER_WORD
)
5765 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
5766 size_int (UNITS_PER_WORD
- size
));
5767 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
5770 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
5771 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
5773 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5776 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
5777 build_int_cst (TREE_TYPE (off
), adjust
));
5779 t
= fold_convert (sizetype
, t
);
5780 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
5784 /* type ha; // treat as "struct {ftype field[n];}"
5785 ... [computing offs]
5786 for (i = 0; i <nregs; ++i, offs += 16)
5787 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5790 tree tmp_ha
, field_t
, field_ptr_t
;
5792 /* Declare a local variable. */
5793 tmp_ha
= create_tmp_var_raw (type
, "ha");
5794 gimple_add_tmp_var (tmp_ha
);
5796 /* Establish the base type. */
5800 field_t
= float_type_node
;
5801 field_ptr_t
= float_ptr_type_node
;
5804 field_t
= double_type_node
;
5805 field_ptr_t
= double_ptr_type_node
;
5808 field_t
= long_double_type_node
;
5809 field_ptr_t
= long_double_ptr_type_node
;
5811 /* The half precision and quad precision are not fully supported yet. Enable
5812 the following code after the support is complete. Need to find the correct
5813 type node for __fp16 *. */
5816 field_t
= float_type_node
;
5817 field_ptr_t
= float_ptr_type_node
;
5823 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
5824 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
5825 field_ptr_t
= build_pointer_type (field_t
);
5832 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5833 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
5835 t
= fold_convert (field_ptr_t
, addr
);
5836 t
= build2 (MODIFY_EXPR
, field_t
,
5837 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
5838 build1 (INDIRECT_REF
, field_t
, t
));
5840 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5841 for (i
= 1; i
< nregs
; ++i
)
5843 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
5844 u
= fold_convert (field_ptr_t
, addr
);
5845 u
= build2 (MODIFY_EXPR
, field_t
,
5846 build2 (MEM_REF
, field_t
, tmp_ha
,
5847 build_int_cst (field_ptr_t
,
5849 int_size_in_bytes (field_t
)))),
5850 build1 (INDIRECT_REF
, field_t
, u
));
5851 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
5854 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
5855 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
5858 COND_EXPR_ELSE (cond2
) = t
;
5859 addr
= fold_convert (build_pointer_type (type
), cond1
);
5860 addr
= build_va_arg_indirect_ref (addr
);
5863 addr
= build_va_arg_indirect_ref (addr
);
5868 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5871 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
5872 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
5875 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
5876 CUMULATIVE_ARGS local_cum
;
5877 int gr_saved
, vr_saved
;
5879 /* The caller has advanced CUM up to, but not beyond, the last named
5880 argument. Advance a local copy of CUM past the last "real" named
5881 argument, to find out how many registers are left over. */
5883 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
5885 /* Found out how many registers we need to save. */
5886 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
5887 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
5889 if (TARGET_GENERAL_REGS_ONLY
)
5891 if (local_cum
.aapcs_nvrn
> 0)
5892 sorry ("%qs and floating point or vector arguments",
5893 "-mgeneral-regs-only");
5903 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5904 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
5905 - gr_saved
* UNITS_PER_WORD
);
5906 mem
= gen_frame_mem (BLKmode
, ptr
);
5907 set_mem_alias_set (mem
, get_varargs_alias_set ());
5909 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
5914 /* We can't use move_block_from_reg, because it will use
5915 the wrong mode, storing D regs only. */
5916 enum machine_mode mode
= TImode
;
5919 /* Set OFF to the offset from virtual_incoming_args_rtx of
5920 the first vector register. The VR save area lies below
5921 the GR one, and is aligned to 16 bytes. */
5922 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5923 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5924 off
-= vr_saved
* UNITS_PER_VREG
;
5926 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
5930 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
5931 mem
= gen_frame_mem (mode
, ptr
);
5932 set_mem_alias_set (mem
, get_varargs_alias_set ());
5933 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
5934 off
+= UNITS_PER_VREG
;
5939 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5940 any complication of having crtl->args.pretend_args_size changed. */
5941 cfun
->machine
->saved_varargs_size
5942 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5943 STACK_BOUNDARY
/ BITS_PER_UNIT
)
5944 + vr_saved
* UNITS_PER_VREG
);
5948 aarch64_conditional_register_usage (void)
5953 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
5956 call_used_regs
[i
] = 1;
5961 /* Walk down the type tree of TYPE counting consecutive base elements.
5962 If *MODEP is VOIDmode, then set it to the first valid floating point
5963 type. If a non-floating point type is found, or if a floating point
5964 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5965 otherwise return the count in the sub-tree. */
5967 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
5969 enum machine_mode mode
;
5972 switch (TREE_CODE (type
))
5975 mode
= TYPE_MODE (type
);
5976 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5979 if (*modep
== VOIDmode
)
5988 mode
= TYPE_MODE (TREE_TYPE (type
));
5989 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5992 if (*modep
== VOIDmode
)
6001 /* Use V2SImode and V4SImode as representatives of all 64-bit
6002 and 128-bit vector types. */
6003 size
= int_size_in_bytes (type
);
6016 if (*modep
== VOIDmode
)
6019 /* Vector modes are considered to be opaque: two vectors are
6020 equivalent for the purposes of being homogeneous aggregates
6021 if they are the same size. */
6030 tree index
= TYPE_DOMAIN (type
);
6032 /* Can't handle incomplete types. */
6033 if (!COMPLETE_TYPE_P (type
))
6036 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
6039 || !TYPE_MAX_VALUE (index
)
6040 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6041 || !TYPE_MIN_VALUE (index
)
6042 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6046 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6047 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6049 /* There must be no padding. */
6050 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
6051 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
6052 != count
* GET_MODE_BITSIZE (*modep
)))
6064 /* Can't handle incomplete types. */
6065 if (!COMPLETE_TYPE_P (type
))
6068 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6070 if (TREE_CODE (field
) != FIELD_DECL
)
6073 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6079 /* There must be no padding. */
6080 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
6081 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
6082 != count
* GET_MODE_BITSIZE (*modep
)))
6089 case QUAL_UNION_TYPE
:
6091 /* These aren't very interesting except in a degenerate case. */
6096 /* Can't handle incomplete types. */
6097 if (!COMPLETE_TYPE_P (type
))
6100 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6102 if (TREE_CODE (field
) != FIELD_DECL
)
6105 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6108 count
= count
> sub_count
? count
: sub_count
;
6111 /* There must be no padding. */
6112 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
6113 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
6114 != count
* GET_MODE_BITSIZE (*modep
)))
6127 /* Return true if we use LRA instead of reload pass. */
6129 aarch64_lra_p (void)
6131 return aarch64_lra_flag
;
6134 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6135 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6136 array types. The C99 floating-point complex types are also considered
6137 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6138 types, which are GCC extensions and out of the scope of AAPCS64, are
6139 treated as composite types here as well.
6141 Note that MODE itself is not sufficient in determining whether a type
6142 is such a composite type or not. This is because
6143 stor-layout.c:compute_record_mode may have already changed the MODE
6144 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6145 structure with only one field may have its MODE set to the mode of the
6146 field. Also an integer mode whose size matches the size of the
6147 RECORD_TYPE type may be used to substitute the original mode
6148 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6149 solely relied on. */
6152 aarch64_composite_type_p (const_tree type
,
6153 enum machine_mode mode
)
6155 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
6159 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
6160 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
6166 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6167 type as described in AAPCS64 \S 4.1.2.
6169 See the comment above aarch64_composite_type_p for the notes on MODE. */
6172 aarch64_short_vector_p (const_tree type
,
6173 enum machine_mode mode
)
6175 HOST_WIDE_INT size
= -1;
6177 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6178 size
= int_size_in_bytes (type
);
6179 else if (!aarch64_composite_type_p (type
, mode
)
6180 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6181 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
6182 size
= GET_MODE_SIZE (mode
);
6184 return (size
== 8 || size
== 16) ? true : false;
6187 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6188 shall be passed or returned in simd/fp register(s) (providing these
6189 parameter passing registers are available).
6191 Upon successful return, *COUNT returns the number of needed registers,
6192 *BASE_MODE returns the mode of the individual register and when IS_HAF
6193 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6194 floating-point aggregate or a homogeneous short-vector aggregate. */
6197 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
6199 enum machine_mode
*base_mode
,
6203 enum machine_mode new_mode
= VOIDmode
;
6204 bool composite_p
= aarch64_composite_type_p (type
, mode
);
6206 if (is_ha
!= NULL
) *is_ha
= false;
6208 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6209 || aarch64_short_vector_p (type
, mode
))
6214 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6216 if (is_ha
!= NULL
) *is_ha
= true;
6218 new_mode
= GET_MODE_INNER (mode
);
6220 else if (type
&& composite_p
)
6222 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6224 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
6226 if (is_ha
!= NULL
) *is_ha
= true;
6235 *base_mode
= new_mode
;
6239 /* Implement TARGET_STRUCT_VALUE_RTX. */
6242 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
6243 int incoming ATTRIBUTE_UNUSED
)
6245 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
6248 /* Implements target hook vector_mode_supported_p. */
6250 aarch64_vector_mode_supported_p (enum machine_mode mode
)
6253 && (mode
== V4SImode
|| mode
== V8HImode
6254 || mode
== V16QImode
|| mode
== V2DImode
6255 || mode
== V2SImode
|| mode
== V4HImode
6256 || mode
== V8QImode
|| mode
== V2SFmode
6257 || mode
== V4SFmode
|| mode
== V2DFmode
))
6263 /* Return appropriate SIMD container
6264 for MODE within a vector of WIDTH bits. */
6265 static enum machine_mode
6266 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
6268 gcc_assert (width
== 64 || width
== 128);
6307 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6308 static enum machine_mode
6309 aarch64_preferred_simd_mode (enum machine_mode mode
)
6311 return aarch64_simd_container_mode (mode
, 128);
6314 /* Return the bitmask of possible vector sizes for the vectorizer
6317 aarch64_autovectorize_vector_sizes (void)
6322 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6323 vector types in order to conform to the AAPCS64 (see "Procedure
6324 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6325 qualify for emission with the mangled names defined in that document,
6326 a vector type must not only be of the correct mode but also be
6327 composed of AdvSIMD vector element types (e.g.
6328 _builtin_aarch64_simd_qi); these types are registered by
6329 aarch64_init_simd_builtins (). In other words, vector types defined
6330 in other ways e.g. via vector_size attribute will get default
6334 enum machine_mode mode
;
6335 const char *element_type_name
;
6336 const char *mangled_name
;
6337 } aarch64_simd_mangle_map_entry
;
6339 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
6340 /* 64-bit containerized types. */
6341 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6342 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6343 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6344 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6345 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6346 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6347 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6348 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6349 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6350 /* 128-bit containerized types. */
6351 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6352 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6353 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6354 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6355 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6356 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6357 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6358 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6359 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6360 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6361 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6362 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6363 { VOIDmode
, NULL
, NULL
}
6366 /* Implement TARGET_MANGLE_TYPE. */
6369 aarch64_mangle_type (const_tree type
)
6371 /* The AArch64 ABI documents say that "__va_list" has to be
6372 managled as if it is in the "std" namespace. */
6373 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
6374 return "St9__va_list";
6376 /* Check the mode of the vector type, and the name of the vector
6377 element type, against the table. */
6378 if (TREE_CODE (type
) == VECTOR_TYPE
)
6380 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
6382 while (pos
->mode
!= VOIDmode
)
6384 tree elt_type
= TREE_TYPE (type
);
6386 if (pos
->mode
== TYPE_MODE (type
)
6387 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
6388 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
6389 pos
->element_type_name
))
6390 return pos
->mangled_name
;
6396 /* Use the default mangling. */
6400 /* Return the equivalent letter for size. */
6402 sizetochar (int size
)
6406 case 64: return 'd';
6407 case 32: return 's';
6408 case 16: return 'h';
6409 case 8 : return 'b';
6410 default: gcc_unreachable ();
6414 /* Return true iff x is a uniform vector of floating-point
6415 constants, and the constant can be represented in
6416 quarter-precision form. Note, as aarch64_float_const_representable
6417 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6419 aarch64_vect_float_const_representable_p (rtx x
)
6422 REAL_VALUE_TYPE r0
, ri
;
6425 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
6428 x0
= CONST_VECTOR_ELT (x
, 0);
6429 if (!CONST_DOUBLE_P (x0
))
6432 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
6434 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
6436 xi
= CONST_VECTOR_ELT (x
, i
);
6437 if (!CONST_DOUBLE_P (xi
))
6440 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
6441 if (!REAL_VALUES_EQUAL (r0
, ri
))
6445 return aarch64_float_const_representable_p (x0
);
6448 /* Return true for valid and false for invalid. */
6450 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
6451 struct simd_immediate_info
*info
)
6453 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6455 for (i = 0; i < idx; i += (STRIDE)) \
6460 immtype = (CLASS); \
6461 elsize = (ELSIZE); \
6467 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
6468 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
6469 unsigned char bytes
[16];
6470 int immtype
= -1, matches
;
6471 unsigned int invmask
= inverse
? 0xff : 0;
6474 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6476 if (! (aarch64_simd_imm_zero_p (op
, mode
)
6477 || aarch64_vect_float_const_representable_p (op
)))
6482 info
->value
= CONST_VECTOR_ELT (op
, 0);
6483 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
6491 /* Splat vector constant out into a byte vector. */
6492 for (i
= 0; i
< n_elts
; i
++)
6494 rtx el
= CONST_VECTOR_ELT (op
, i
);
6495 unsigned HOST_WIDE_INT elpart
;
6496 unsigned int part
, parts
;
6498 if (GET_CODE (el
) == CONST_INT
)
6500 elpart
= INTVAL (el
);
6503 else if (GET_CODE (el
) == CONST_DOUBLE
)
6505 elpart
= CONST_DOUBLE_LOW (el
);
6511 for (part
= 0; part
< parts
; part
++)
6514 for (byte
= 0; byte
< innersize
; byte
++)
6516 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
6517 elpart
>>= BITS_PER_UNIT
;
6519 if (GET_CODE (el
) == CONST_DOUBLE
)
6520 elpart
= CONST_DOUBLE_HIGH (el
);
6525 gcc_assert (idx
== GET_MODE_SIZE (mode
));
6529 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
6530 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
6532 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6533 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6535 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6536 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6538 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6539 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
6541 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
6543 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
6545 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
6546 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
6548 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6549 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6551 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6552 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6554 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6555 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
6557 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
6559 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
6561 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6562 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6564 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6565 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6567 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6568 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6570 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6571 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6573 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
6575 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
6576 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
6585 info
->element_width
= elsize
;
6586 info
->mvn
= emvn
!= 0;
6587 info
->shift
= eshift
;
6589 unsigned HOST_WIDE_INT imm
= 0;
6591 if (immtype
>= 12 && immtype
<= 15)
6594 /* Un-invert bytes of recognized vector, if necessary. */
6596 for (i
= 0; i
< idx
; i
++)
6597 bytes
[i
] ^= invmask
;
6601 /* FIXME: Broken on 32-bit H_W_I hosts. */
6602 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
6604 for (i
= 0; i
< 8; i
++)
6605 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
6606 << (i
* BITS_PER_UNIT
);
6609 info
->value
= GEN_INT (imm
);
6613 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
6614 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
6616 /* Construct 'abcdefgh' because the assembler cannot handle
6617 generic constants. */
6620 imm
= (imm
>> info
->shift
) & 0xff;
6621 info
->value
= GEN_INT (imm
);
6630 aarch64_const_vec_all_same_int_p (rtx x
,
6631 HOST_WIDE_INT minval
,
6632 HOST_WIDE_INT maxval
)
6634 HOST_WIDE_INT firstval
;
6637 if (GET_CODE (x
) != CONST_VECTOR
6638 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
6641 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
6642 if (firstval
< minval
|| firstval
> maxval
)
6645 count
= CONST_VECTOR_NUNITS (x
);
6646 for (i
= 1; i
< count
; i
++)
6647 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
6653 /* Check of immediate shift constants are within range. */
6655 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
6657 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
6659 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
6661 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
6664 /* Return true if X is a uniform vector where all elements
6665 are either the floating-point constant 0.0 or the
6666 integer constant 0. */
6668 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
6670 return x
== CONST0_RTX (mode
);
6674 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
6676 HOST_WIDE_INT imm
= INTVAL (x
);
6679 for (i
= 0; i
< 8; i
++)
6681 unsigned int byte
= imm
& 0xff;
6682 if (byte
!= 0xff && byte
!= 0)
6691 aarch64_mov_operand_p (rtx x
,
6692 enum aarch64_symbol_context context
,
6693 enum machine_mode mode
)
6695 if (GET_CODE (x
) == HIGH
6696 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6699 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
6702 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
6705 return aarch64_classify_symbolic_expression (x
, context
)
6706 == SYMBOL_TINY_ABSOLUTE
;
6709 /* Return a const_int vector of VAL. */
6711 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
6713 int nunits
= GET_MODE_NUNITS (mode
);
6714 rtvec v
= rtvec_alloc (nunits
);
6717 for (i
=0; i
< nunits
; i
++)
6718 RTVEC_ELT (v
, i
) = GEN_INT (val
);
6720 return gen_rtx_CONST_VECTOR (mode
, v
);
6723 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6726 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
6728 enum machine_mode vmode
;
6730 gcc_assert (!VECTOR_MODE_P (mode
));
6731 vmode
= aarch64_preferred_simd_mode (mode
);
6732 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
6733 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
6736 /* Construct and return a PARALLEL RTX vector. */
6738 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
6740 int nunits
= GET_MODE_NUNITS (mode
);
6741 rtvec v
= rtvec_alloc (nunits
/ 2);
6742 int base
= high
? nunits
/ 2 : 0;
6746 for (i
=0; i
< nunits
/ 2; i
++)
6747 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
6749 t1
= gen_rtx_PARALLEL (mode
, v
);
6753 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6754 HIGH (exclusive). */
6756 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6759 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6760 lane
= INTVAL (operand
);
6762 if (lane
< low
|| lane
>= high
)
6763 error ("lane out of range");
6767 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6769 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6770 HOST_WIDE_INT lane
= INTVAL (operand
);
6772 if (lane
< low
|| lane
>= high
)
6773 error ("constant out of range");
6776 /* Emit code to reinterpret one AdvSIMD type as another,
6777 without altering bits. */
6779 aarch64_simd_reinterpret (rtx dest
, rtx src
)
6781 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
6784 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6787 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
6788 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
6791 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
6792 rtx tmp1
= gen_reg_rtx (mode
);
6793 rtx tmp2
= gen_reg_rtx (mode
);
6795 emit_insn (intfn (tmp1
, op1
, tmp2
));
6797 emit_move_insn (mem
, tmp1
);
6798 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
6799 emit_move_insn (mem
, tmp2
);
6802 /* Return TRUE if OP is a valid vector addressing mode. */
6804 aarch64_simd_mem_operand_p (rtx op
)
6806 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
6807 || GET_CODE (XEXP (op
, 0)) == REG
);
6810 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6811 not to early-clobber SRC registers in the process.
6813 We assume that the operands described by SRC and DEST represent a
6814 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6815 number of components into which the copy has been decomposed. */
6817 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
6818 rtx
*src
, unsigned int count
)
6822 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
6823 || REGNO (operands
[0]) < REGNO (operands
[1]))
6825 for (i
= 0; i
< count
; i
++)
6827 operands
[2 * i
] = dest
[i
];
6828 operands
[2 * i
+ 1] = src
[i
];
6833 for (i
= 0; i
< count
; i
++)
6835 operands
[2 * i
] = dest
[count
- i
- 1];
6836 operands
[2 * i
+ 1] = src
[count
- i
- 1];
6841 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6842 one of VSTRUCT modes: OI, CI or XI. */
6844 aarch64_simd_attr_length_move (rtx insn
)
6846 enum machine_mode mode
;
6848 extract_insn_cached (insn
);
6850 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
6852 mode
= GET_MODE (recog_data
.operand
[0]);
6868 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6869 alignment of a vector to 128 bits. */
6870 static HOST_WIDE_INT
6871 aarch64_simd_vector_alignment (const_tree type
)
6873 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
6874 return MIN (align
, 128);
6877 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6879 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
6884 /* We guarantee alignment for vectors up to 128-bits. */
6885 if (tree_int_cst_compare (TYPE_SIZE (type
),
6886 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
6889 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6893 /* If VALS is a vector constant that can be loaded into a register
6894 using DUP, generate instructions to do so and return an RTX to
6895 assign to the register. Otherwise return NULL_RTX. */
6897 aarch64_simd_dup_constant (rtx vals
)
6899 enum machine_mode mode
= GET_MODE (vals
);
6900 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6901 int n_elts
= GET_MODE_NUNITS (mode
);
6902 bool all_same
= true;
6906 if (GET_CODE (vals
) != CONST_VECTOR
)
6909 for (i
= 1; i
< n_elts
; ++i
)
6911 x
= CONST_VECTOR_ELT (vals
, i
);
6912 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
6919 /* We can load this constant by using DUP and a constant in a
6920 single ARM register. This will be cheaper than a vector
6922 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
6923 return gen_rtx_VEC_DUPLICATE (mode
, x
);
6927 /* Generate code to load VALS, which is a PARALLEL containing only
6928 constants (for vec_init) or CONST_VECTOR, efficiently into a
6929 register. Returns an RTX to copy into the register, or NULL_RTX
6930 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6932 aarch64_simd_make_constant (rtx vals
)
6934 enum machine_mode mode
= GET_MODE (vals
);
6936 rtx const_vec
= NULL_RTX
;
6937 int n_elts
= GET_MODE_NUNITS (mode
);
6941 if (GET_CODE (vals
) == CONST_VECTOR
)
6943 else if (GET_CODE (vals
) == PARALLEL
)
6945 /* A CONST_VECTOR must contain only CONST_INTs and
6946 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6947 Only store valid constants in a CONST_VECTOR. */
6948 for (i
= 0; i
< n_elts
; ++i
)
6950 rtx x
= XVECEXP (vals
, 0, i
);
6951 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
6954 if (n_const
== n_elts
)
6955 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6960 if (const_vec
!= NULL_RTX
6961 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
6962 /* Load using MOVI/MVNI. */
6964 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
6965 /* Loaded using DUP. */
6967 else if (const_vec
!= NULL_RTX
)
6968 /* Load from constant pool. We can not take advantage of single-cycle
6969 LD1 because we need a PC-relative addressing mode. */
6972 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6973 We can not construct an initializer. */
6978 aarch64_expand_vector_init (rtx target
, rtx vals
)
6980 enum machine_mode mode
= GET_MODE (target
);
6981 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6982 int n_elts
= GET_MODE_NUNITS (mode
);
6983 int n_var
= 0, one_var
= -1;
6984 bool all_same
= true;
6988 x
= XVECEXP (vals
, 0, 0);
6989 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
6990 n_var
= 1, one_var
= 0;
6992 for (i
= 1; i
< n_elts
; ++i
)
6994 x
= XVECEXP (vals
, 0, i
);
6995 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
6996 ++n_var
, one_var
= i
;
6998 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7004 rtx constant
= aarch64_simd_make_constant (vals
);
7005 if (constant
!= NULL_RTX
)
7007 emit_move_insn (target
, constant
);
7012 /* Splat a single non-constant element if we can. */
7015 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
7016 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
7020 /* One field is non-constant. Load constant then overwrite varying
7021 field. This is more efficient than using the stack. */
7024 rtx copy
= copy_rtx (vals
);
7025 rtx index
= GEN_INT (one_var
);
7026 enum insn_code icode
;
7028 /* Load constant part of vector, substitute neighboring value for
7030 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
7031 aarch64_expand_vector_init (target
, copy
);
7033 /* Insert variable. */
7034 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
7035 icode
= optab_handler (vec_set_optab
, mode
);
7036 gcc_assert (icode
!= CODE_FOR_nothing
);
7037 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
7041 /* Construct the vector in memory one field at a time
7042 and load the whole vector. */
7043 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7044 for (i
= 0; i
< n_elts
; i
++)
7045 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7046 i
* GET_MODE_SIZE (inner_mode
)),
7047 XVECEXP (vals
, 0, i
));
7048 emit_move_insn (target
, mem
);
7052 static unsigned HOST_WIDE_INT
7053 aarch64_shift_truncation_mask (enum machine_mode mode
)
7056 (aarch64_vector_mode_supported_p (mode
)
7057 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
7060 #ifndef TLS_SECTION_ASM_FLAG
7061 #define TLS_SECTION_ASM_FLAG 'T'
7065 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
7066 tree decl ATTRIBUTE_UNUSED
)
7068 char flagchars
[10], *f
= flagchars
;
7070 /* If we have already declared this section, we can use an
7071 abbreviated form to switch back to it -- unless this section is
7072 part of a COMDAT groups, in which case GAS requires the full
7073 declaration every time. */
7074 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7075 && (flags
& SECTION_DECLARED
))
7077 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
7081 if (!(flags
& SECTION_DEBUG
))
7083 if (flags
& SECTION_WRITE
)
7085 if (flags
& SECTION_CODE
)
7087 if (flags
& SECTION_SMALL
)
7089 if (flags
& SECTION_MERGE
)
7091 if (flags
& SECTION_STRINGS
)
7093 if (flags
& SECTION_TLS
)
7094 *f
++ = TLS_SECTION_ASM_FLAG
;
7095 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7099 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
7101 if (!(flags
& SECTION_NOTYPE
))
7106 if (flags
& SECTION_BSS
)
7111 #ifdef TYPE_OPERAND_FMT
7112 format
= "," TYPE_OPERAND_FMT
;
7117 fprintf (asm_out_file
, format
, type
);
7119 if (flags
& SECTION_ENTSIZE
)
7120 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
7121 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7123 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
7124 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
7126 fprintf (asm_out_file
, ",%s,comdat",
7127 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
7131 putc ('\n', asm_out_file
);
7134 /* Select a format to encode pointers in exception handling data. */
7136 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
7139 switch (aarch64_cmodel
)
7141 case AARCH64_CMODEL_TINY
:
7142 case AARCH64_CMODEL_TINY_PIC
:
7143 case AARCH64_CMODEL_SMALL
:
7144 case AARCH64_CMODEL_SMALL_PIC
:
7145 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7147 type
= DW_EH_PE_sdata4
;
7150 /* No assumptions here. 8-byte relocs required. */
7151 type
= DW_EH_PE_sdata8
;
7154 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
7157 /* Emit load exclusive. */
7160 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
7161 rtx mem
, rtx model_rtx
)
7163 rtx (*gen
) (rtx
, rtx
, rtx
);
7167 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
7168 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
7169 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
7170 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
7175 emit_insn (gen (rval
, mem
, model_rtx
));
7178 /* Emit store exclusive. */
7181 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
7182 rtx rval
, rtx mem
, rtx model_rtx
)
7184 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
7188 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
7189 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
7190 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
7191 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
7196 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
7199 /* Mark the previous jump instruction as unlikely. */
7202 aarch64_emit_unlikely_jump (rtx insn
)
7204 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
7206 insn
= emit_jump_insn (insn
);
7207 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
7210 /* Expand a compare and swap pattern. */
7213 aarch64_expand_compare_and_swap (rtx operands
[])
7215 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
7216 enum machine_mode mode
, cmp_mode
;
7217 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
7222 oldval
= operands
[3];
7223 newval
= operands
[4];
7224 is_weak
= operands
[5];
7225 mod_s
= operands
[6];
7226 mod_f
= operands
[7];
7227 mode
= GET_MODE (mem
);
7230 /* Normally the succ memory model must be stronger than fail, but in the
7231 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7232 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7234 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
7235 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
7236 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
7242 /* For short modes, we're going to perform the comparison in SImode,
7243 so do the zero-extension now. */
7245 rval
= gen_reg_rtx (SImode
);
7246 oldval
= convert_modes (SImode
, mode
, oldval
, true);
7251 /* Force the value into a register if needed. */
7252 if (!aarch64_plus_operand (oldval
, mode
))
7253 oldval
= force_reg (cmp_mode
, oldval
);
7262 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
7263 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
7264 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
7265 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
7270 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
7272 if (mode
== QImode
|| mode
== HImode
)
7273 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
7275 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7276 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
7277 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
7280 /* Split a compare and swap pattern. */
7283 aarch64_split_compare_and_swap (rtx operands
[])
7285 rtx rval
, mem
, oldval
, newval
, scratch
;
7286 enum machine_mode mode
;
7288 rtx label1
, label2
, x
, cond
;
7292 oldval
= operands
[2];
7293 newval
= operands
[3];
7294 is_weak
= (operands
[4] != const0_rtx
);
7295 scratch
= operands
[7];
7296 mode
= GET_MODE (mem
);
7301 label1
= gen_label_rtx ();
7302 emit_label (label1
);
7304 label2
= gen_label_rtx ();
7306 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
7308 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
7309 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7310 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7311 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
7312 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7314 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
7318 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
7319 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7320 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
7321 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7325 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7326 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
7327 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
7330 emit_label (label2
);
7333 /* Split an atomic operation. */
7336 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
7337 rtx value
, rtx model_rtx
, rtx cond
)
7339 enum machine_mode mode
= GET_MODE (mem
);
7340 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
7343 label
= gen_label_rtx ();
7347 new_out
= gen_lowpart (wmode
, new_out
);
7349 old_out
= gen_lowpart (wmode
, old_out
);
7352 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
7354 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
7363 x
= gen_rtx_AND (wmode
, old_out
, value
);
7364 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7365 x
= gen_rtx_NOT (wmode
, new_out
);
7366 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7370 if (CONST_INT_P (value
))
7372 value
= GEN_INT (-INTVAL (value
));
7378 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
7379 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7383 aarch64_emit_store_exclusive (mode
, cond
, mem
,
7384 gen_lowpart (mode
, new_out
), model_rtx
);
7386 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7387 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7388 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
7389 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7393 aarch64_print_extension (void)
7395 const struct aarch64_option_extension
*opt
= NULL
;
7397 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
7398 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
7399 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
7401 asm_fprintf (asm_out_file
, "\n");
7405 aarch64_start_file (void)
7409 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
7410 aarch64_print_extension ();
7412 else if (selected_cpu
)
7414 asm_fprintf (asm_out_file
, "\t.cpu %s", selected_cpu
->name
);
7415 aarch64_print_extension ();
7417 default_file_start();
7420 /* Target hook for c_mode_for_suffix. */
7421 static enum machine_mode
7422 aarch64_c_mode_for_suffix (char suffix
)
7430 /* We can only represent floating point constants which will fit in
7431 "quarter-precision" values. These values are characterised by
7432 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7435 (-1)^s * (n/16) * 2^r
7438 's' is the sign bit.
7439 'n' is an integer in the range 16 <= n <= 31.
7440 'r' is an integer in the range -3 <= r <= 4. */
7442 /* Return true iff X can be represented by a quarter-precision
7443 floating point immediate operand X. Note, we cannot represent 0.0. */
7445 aarch64_float_const_representable_p (rtx x
)
7447 /* This represents our current view of how many bits
7448 make up the mantissa. */
7449 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
7451 unsigned HOST_WIDE_INT mantissa
, mask
;
7452 HOST_WIDE_INT m1
, m2
;
7453 REAL_VALUE_TYPE r
, m
;
7455 if (!CONST_DOUBLE_P (x
))
7458 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7460 /* We cannot represent infinities, NaNs or +/-zero. We won't
7461 know if we have +zero until we analyse the mantissa, but we
7462 can reject the other invalid values. */
7463 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
7464 || REAL_VALUE_MINUS_ZERO (r
))
7467 /* Extract exponent. */
7468 r
= real_value_abs (&r
);
7469 exponent
= REAL_EXP (&r
);
7471 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7472 highest (sign) bit, with a fixed binary point at bit point_pos.
7473 m1 holds the low part of the mantissa, m2 the high part.
7474 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7475 bits for the mantissa, this can fail (low bits will be lost). */
7476 real_ldexp (&m
, &r
, point_pos
- exponent
);
7477 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
7479 /* If the low part of the mantissa has bits set we cannot represent
7483 /* We have rejected the lower HOST_WIDE_INT, so update our
7484 understanding of how many bits lie in the mantissa and
7485 look only at the high HOST_WIDE_INT. */
7487 point_pos
-= HOST_BITS_PER_WIDE_INT
;
7489 /* We can only represent values with a mantissa of the form 1.xxxx. */
7490 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
7491 if ((mantissa
& mask
) != 0)
7494 /* Having filtered unrepresentable values, we may now remove all
7495 but the highest 5 bits. */
7496 mantissa
>>= point_pos
- 5;
7498 /* We cannot represent the value 0.0, so reject it. This is handled
7503 /* Then, as bit 4 is always set, we can mask it off, leaving
7504 the mantissa in the range [0, 15]. */
7505 mantissa
&= ~(1 << 4);
7506 gcc_assert (mantissa
<= 15);
7508 /* GCC internally does not use IEEE754-like encoding (where normalized
7509 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7510 Our mantissa values are shifted 4 places to the left relative to
7511 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7512 by 5 places to correct for GCC's representation. */
7513 exponent
= 5 - exponent
;
7515 return (exponent
>= 0 && exponent
<= 7);
7519 aarch64_output_simd_mov_immediate (rtx const_vector
,
7520 enum machine_mode mode
,
7524 static char templ
[40];
7525 const char *mnemonic
;
7526 const char *shift_op
;
7527 unsigned int lane_count
= 0;
7530 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
7532 /* This will return true to show const_vector is legal for use as either
7533 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7534 also update INFO to show how the immediate should be generated. */
7535 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
7536 gcc_assert (is_valid
);
7538 element_char
= sizetochar (info
.element_width
);
7539 lane_count
= width
/ info
.element_width
;
7541 mode
= GET_MODE_INNER (mode
);
7542 if (mode
== SFmode
|| mode
== DFmode
)
7544 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
7545 if (aarch64_float_const_zero_rtx_p (info
.value
))
7546 info
.value
= GEN_INT (0);
7551 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
7552 char float_buf
[buf_size
] = {'\0'};
7553 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
7556 if (lane_count
== 1)
7557 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
7559 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
7560 lane_count
, element_char
, float_buf
);
7565 mnemonic
= info
.mvn
? "mvni" : "movi";
7566 shift_op
= info
.msl
? "msl" : "lsl";
7568 if (lane_count
== 1)
7569 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
7570 mnemonic
, UINTVAL (info
.value
));
7571 else if (info
.shift
)
7572 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7573 ", %s %d", mnemonic
, lane_count
, element_char
,
7574 UINTVAL (info
.value
), shift_op
, info
.shift
);
7576 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
7577 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
7582 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
7583 enum machine_mode mode
)
7585 enum machine_mode vmode
;
7587 gcc_assert (!VECTOR_MODE_P (mode
));
7588 vmode
= aarch64_simd_container_mode (mode
, 64);
7589 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
7590 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
7593 /* Split operands into moves from op[1] + op[2] into op[0]. */
7596 aarch64_split_combinev16qi (rtx operands
[3])
7598 unsigned int dest
= REGNO (operands
[0]);
7599 unsigned int src1
= REGNO (operands
[1]);
7600 unsigned int src2
= REGNO (operands
[2]);
7601 enum machine_mode halfmode
= GET_MODE (operands
[1]);
7602 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
7605 gcc_assert (halfmode
== V16QImode
);
7607 if (src1
== dest
&& src2
== dest
+ halfregs
)
7609 /* No-op move. Can't split to nothing; emit something. */
7610 emit_note (NOTE_INSN_DELETED
);
7614 /* Preserve register attributes for variable tracking. */
7615 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
7616 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
7617 GET_MODE_SIZE (halfmode
));
7619 /* Special case of reversed high/low parts. */
7620 if (reg_overlap_mentioned_p (operands
[2], destlo
)
7621 && reg_overlap_mentioned_p (operands
[1], desthi
))
7623 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7624 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
7625 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7627 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
7629 /* Try to avoid unnecessary moves if part of the result
7630 is in the right place already. */
7632 emit_move_insn (destlo
, operands
[1]);
7633 if (src2
!= dest
+ halfregs
)
7634 emit_move_insn (desthi
, operands
[2]);
7638 if (src2
!= dest
+ halfregs
)
7639 emit_move_insn (desthi
, operands
[2]);
7641 emit_move_insn (destlo
, operands
[1]);
7645 /* vec_perm support. */
7647 #define MAX_VECT_LEN 16
7649 struct expand_vec_perm_d
7651 rtx target
, op0
, op1
;
7652 unsigned char perm
[MAX_VECT_LEN
];
7653 enum machine_mode vmode
;
7659 /* Generate a variable permutation. */
7662 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7664 enum machine_mode vmode
= GET_MODE (target
);
7665 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7667 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
7668 gcc_checking_assert (GET_MODE (op0
) == vmode
);
7669 gcc_checking_assert (GET_MODE (op1
) == vmode
);
7670 gcc_checking_assert (GET_MODE (sel
) == vmode
);
7671 gcc_checking_assert (TARGET_SIMD
);
7675 if (vmode
== V8QImode
)
7677 /* Expand the argument to a V16QI mode by duplicating it. */
7678 rtx pair
= gen_reg_rtx (V16QImode
);
7679 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
7680 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7684 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
7691 if (vmode
== V8QImode
)
7693 pair
= gen_reg_rtx (V16QImode
);
7694 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
7695 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7699 pair
= gen_reg_rtx (OImode
);
7700 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
7701 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
7707 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7709 enum machine_mode vmode
= GET_MODE (target
);
7710 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
7711 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7712 rtx rmask
[MAX_VECT_LEN
], mask
;
7714 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
7716 /* The TBL instruction does not use a modulo index, so we must take care
7717 of that ourselves. */
7718 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7719 for (i
= 0; i
< nelt
; ++i
)
7721 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
7722 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
7724 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
7727 /* Recognize patterns suitable for the TRN instructions. */
7729 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
7731 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7732 rtx out
, in0
, in1
, x
;
7733 rtx (*gen
) (rtx
, rtx
, rtx
);
7734 enum machine_mode vmode
= d
->vmode
;
7736 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7739 /* Note that these are little-endian tests.
7740 We correct for big-endian later. */
7741 if (d
->perm
[0] == 0)
7743 else if (d
->perm
[0] == 1)
7747 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7749 for (i
= 0; i
< nelt
; i
+= 2)
7751 if (d
->perm
[i
] != i
+ odd
)
7753 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
7763 if (BYTES_BIG_ENDIAN
)
7765 x
= in0
, in0
= in1
, in1
= x
;
7774 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
7775 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
7776 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
7777 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
7778 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
7779 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
7780 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
7781 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
7782 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
7783 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
7792 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
7793 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
7794 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
7795 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
7796 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
7797 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
7798 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
7799 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
7800 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
7801 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
7807 emit_insn (gen (out
, in0
, in1
));
7811 /* Recognize patterns suitable for the UZP instructions. */
7813 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
7815 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7816 rtx out
, in0
, in1
, x
;
7817 rtx (*gen
) (rtx
, rtx
, rtx
);
7818 enum machine_mode vmode
= d
->vmode
;
7820 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7823 /* Note that these are little-endian tests.
7824 We correct for big-endian later. */
7825 if (d
->perm
[0] == 0)
7827 else if (d
->perm
[0] == 1)
7831 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7833 for (i
= 0; i
< nelt
; i
++)
7835 unsigned elt
= (i
* 2 + odd
) & mask
;
7836 if (d
->perm
[i
] != elt
)
7846 if (BYTES_BIG_ENDIAN
)
7848 x
= in0
, in0
= in1
, in1
= x
;
7857 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
7858 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
7859 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
7860 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
7861 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
7862 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
7863 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
7864 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
7865 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
7866 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
7875 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
7876 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
7877 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
7878 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
7879 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
7880 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
7881 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
7882 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
7883 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
7884 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
7890 emit_insn (gen (out
, in0
, in1
));
7894 /* Recognize patterns suitable for the ZIP instructions. */
7896 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
7898 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
7899 rtx out
, in0
, in1
, x
;
7900 rtx (*gen
) (rtx
, rtx
, rtx
);
7901 enum machine_mode vmode
= d
->vmode
;
7903 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7906 /* Note that these are little-endian tests.
7907 We correct for big-endian later. */
7909 if (d
->perm
[0] == high
)
7912 else if (d
->perm
[0] == 0)
7916 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7918 for (i
= 0; i
< nelt
/ 2; i
++)
7920 unsigned elt
= (i
+ high
) & mask
;
7921 if (d
->perm
[i
* 2] != elt
)
7923 elt
= (elt
+ nelt
) & mask
;
7924 if (d
->perm
[i
* 2 + 1] != elt
)
7934 if (BYTES_BIG_ENDIAN
)
7936 x
= in0
, in0
= in1
, in1
= x
;
7945 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
7946 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
7947 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
7948 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
7949 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
7950 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
7951 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
7952 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
7953 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
7954 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
7963 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
7964 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
7965 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
7966 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
7967 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
7968 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
7969 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
7970 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
7971 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
7972 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
7978 emit_insn (gen (out
, in0
, in1
));
7983 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
7985 rtx (*gen
) (rtx
, rtx
, rtx
);
7986 rtx out
= d
->target
;
7988 enum machine_mode vmode
= d
->vmode
;
7989 unsigned int i
, elt
, nelt
= d
->nelt
;
7992 /* TODO: This may not be big-endian safe. */
7993 if (BYTES_BIG_ENDIAN
)
7997 for (i
= 1; i
< nelt
; i
++)
7999 if (elt
!= d
->perm
[i
])
8003 /* The generic preparation in aarch64_expand_vec_perm_const_1
8004 swaps the operand order and the permute indices if it finds
8005 d->perm[0] to be in the second operand. Thus, we can always
8006 use d->op0 and need not do any extra arithmetic to get the
8007 correct lane number. */
8009 lane
= GEN_INT (elt
);
8013 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
8014 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
8015 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
8016 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
8017 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
8018 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
8019 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
8020 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
8021 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
8022 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
8027 emit_insn (gen (out
, in0
, lane
));
8032 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
8034 rtx rperm
[MAX_VECT_LEN
], sel
;
8035 enum machine_mode vmode
= d
->vmode
;
8036 unsigned int i
, nelt
= d
->nelt
;
8038 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8039 numbering of elements for big-endian, we must reverse the order. */
8040 if (BYTES_BIG_ENDIAN
)
8046 /* Generic code will try constant permutation twice. Once with the
8047 original mode and again with the elements lowered to QImode.
8048 So wait and don't do the selector expansion ourselves. */
8049 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
8052 for (i
= 0; i
< nelt
; ++i
)
8053 rperm
[i
] = GEN_INT (d
->perm
[i
]);
8054 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
8055 sel
= force_reg (vmode
, sel
);
8057 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
8062 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
8064 /* The pattern matching functions above are written to look for a small
8065 number to begin the sequence (0, 1, N/2). If we begin with an index
8066 from the second operand, we can swap the operands. */
8067 if (d
->perm
[0] >= d
->nelt
)
8069 unsigned i
, nelt
= d
->nelt
;
8072 for (i
= 0; i
< nelt
; ++i
)
8073 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
8082 if (aarch64_evpc_zip (d
))
8084 else if (aarch64_evpc_uzp (d
))
8086 else if (aarch64_evpc_trn (d
))
8088 else if (aarch64_evpc_dup (d
))
8090 return aarch64_evpc_tbl (d
);
8095 /* Expand a vec_perm_const pattern. */
8098 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8100 struct expand_vec_perm_d d
;
8107 d
.vmode
= GET_MODE (target
);
8108 gcc_assert (VECTOR_MODE_P (d
.vmode
));
8109 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8110 d
.testing_p
= false;
8112 for (i
= which
= 0; i
< nelt
; ++i
)
8114 rtx e
= XVECEXP (sel
, 0, i
);
8115 int ei
= INTVAL (e
) & (2 * nelt
- 1);
8116 which
|= (ei
< nelt
? 1 : 2);
8126 d
.one_vector_p
= false;
8127 if (!rtx_equal_p (op0
, op1
))
8130 /* The elements of PERM do not suggest that only the first operand
8131 is used, but both operands are identical. Allow easier matching
8132 of the permutation by folding the permutation into the single
8136 for (i
= 0; i
< nelt
; ++i
)
8137 d
.perm
[i
] &= nelt
- 1;
8139 d
.one_vector_p
= true;
8144 d
.one_vector_p
= true;
8148 return aarch64_expand_vec_perm_const_1 (&d
);
8152 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
8153 const unsigned char *sel
)
8155 struct expand_vec_perm_d d
;
8156 unsigned int i
, nelt
, which
;
8160 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8162 memcpy (d
.perm
, sel
, nelt
);
8164 /* Calculate whether all elements are in one vector. */
8165 for (i
= which
= 0; i
< nelt
; ++i
)
8167 unsigned char e
= d
.perm
[i
];
8168 gcc_assert (e
< 2 * nelt
);
8169 which
|= (e
< nelt
? 1 : 2);
8172 /* If all elements are from the second vector, reindex as if from the
8175 for (i
= 0; i
< nelt
; ++i
)
8178 /* Check whether the mask can be applied to a single vector. */
8179 d
.one_vector_p
= (which
!= 3);
8181 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
8182 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
8183 if (!d
.one_vector_p
)
8184 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
8187 ret
= aarch64_expand_vec_perm_const_1 (&d
);
8193 #undef TARGET_ADDRESS_COST
8194 #define TARGET_ADDRESS_COST aarch64_address_cost
8196 /* This hook will determines whether unnamed bitfields affect the alignment
8197 of the containing structure. The hook returns true if the structure
8198 should inherit the alignment requirements of an unnamed bitfield's
8200 #undef TARGET_ALIGN_ANON_BITFIELD
8201 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8203 #undef TARGET_ASM_ALIGNED_DI_OP
8204 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8206 #undef TARGET_ASM_ALIGNED_HI_OP
8207 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8209 #undef TARGET_ASM_ALIGNED_SI_OP
8210 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8212 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8213 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8214 hook_bool_const_tree_hwi_hwi_const_tree_true
8216 #undef TARGET_ASM_FILE_START
8217 #define TARGET_ASM_FILE_START aarch64_start_file
8219 #undef TARGET_ASM_OUTPUT_MI_THUNK
8220 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8222 #undef TARGET_ASM_SELECT_RTX_SECTION
8223 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8225 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8226 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8228 #undef TARGET_BUILD_BUILTIN_VA_LIST
8229 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8231 #undef TARGET_CALLEE_COPIES
8232 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8234 #undef TARGET_CAN_ELIMINATE
8235 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8237 #undef TARGET_CANNOT_FORCE_CONST_MEM
8238 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8240 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8241 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8243 /* Only the least significant bit is used for initialization guard
8245 #undef TARGET_CXX_GUARD_MASK_BIT
8246 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8248 #undef TARGET_C_MODE_FOR_SUFFIX
8249 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8251 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8252 #undef TARGET_DEFAULT_TARGET_FLAGS
8253 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8256 #undef TARGET_CLASS_MAX_NREGS
8257 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8259 #undef TARGET_BUILTIN_DECL
8260 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8262 #undef TARGET_EXPAND_BUILTIN
8263 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8265 #undef TARGET_EXPAND_BUILTIN_VA_START
8266 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8268 #undef TARGET_FOLD_BUILTIN
8269 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8271 #undef TARGET_FUNCTION_ARG
8272 #define TARGET_FUNCTION_ARG aarch64_function_arg
8274 #undef TARGET_FUNCTION_ARG_ADVANCE
8275 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8277 #undef TARGET_FUNCTION_ARG_BOUNDARY
8278 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8280 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8281 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8283 #undef TARGET_FUNCTION_VALUE
8284 #define TARGET_FUNCTION_VALUE aarch64_function_value
8286 #undef TARGET_FUNCTION_VALUE_REGNO_P
8287 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8289 #undef TARGET_FRAME_POINTER_REQUIRED
8290 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8292 #undef TARGET_GIMPLE_FOLD_BUILTIN
8293 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8295 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8296 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8298 #undef TARGET_INIT_BUILTINS
8299 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8301 #undef TARGET_LEGITIMATE_ADDRESS_P
8302 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8304 #undef TARGET_LEGITIMATE_CONSTANT_P
8305 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8307 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8308 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8311 #define TARGET_LRA_P aarch64_lra_p
8313 #undef TARGET_MANGLE_TYPE
8314 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8316 #undef TARGET_MEMORY_MOVE_COST
8317 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8319 #undef TARGET_MUST_PASS_IN_STACK
8320 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8322 /* This target hook should return true if accesses to volatile bitfields
8323 should use the narrowest mode possible. It should return false if these
8324 accesses should use the bitfield container type. */
8325 #undef TARGET_NARROW_VOLATILE_BITFIELD
8326 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8328 #undef TARGET_OPTION_OVERRIDE
8329 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8331 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8332 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8333 aarch64_override_options_after_change
8335 #undef TARGET_PASS_BY_REFERENCE
8336 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8338 #undef TARGET_PREFERRED_RELOAD_CLASS
8339 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8341 #undef TARGET_SECONDARY_RELOAD
8342 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8344 #undef TARGET_SHIFT_TRUNCATION_MASK
8345 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8347 #undef TARGET_SETUP_INCOMING_VARARGS
8348 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8350 #undef TARGET_STRUCT_VALUE_RTX
8351 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8353 #undef TARGET_REGISTER_MOVE_COST
8354 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8356 #undef TARGET_RETURN_IN_MEMORY
8357 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8359 #undef TARGET_RETURN_IN_MSB
8360 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8362 #undef TARGET_RTX_COSTS
8363 #define TARGET_RTX_COSTS aarch64_rtx_costs
8365 #undef TARGET_TRAMPOLINE_INIT
8366 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8368 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8369 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8371 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8372 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8374 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8375 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8377 #undef TARGET_VECTORIZE_ADD_STMT_COST
8378 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8380 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8381 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8382 aarch64_builtin_vectorization_cost
8384 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8385 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8387 #undef TARGET_VECTORIZE_BUILTINS
8388 #define TARGET_VECTORIZE_BUILTINS
8390 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8391 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8392 aarch64_builtin_vectorized_function
8394 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8395 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8396 aarch64_autovectorize_vector_sizes
8398 /* Section anchor support. */
8400 #undef TARGET_MIN_ANCHOR_OFFSET
8401 #define TARGET_MIN_ANCHOR_OFFSET -256
8403 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8404 byte offset; we can do much more for larger data types, but have no way
8405 to determine the size of the access. We assume accesses are aligned. */
8406 #undef TARGET_MAX_ANCHOR_OFFSET
8407 #define TARGET_MAX_ANCHOR_OFFSET 4095
8409 #undef TARGET_VECTOR_ALIGNMENT
8410 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8412 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8413 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8414 aarch64_simd_vector_alignment_reachable
8416 /* vec_perm support. */
8418 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8419 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8420 aarch64_vectorize_vec_perm_const_ok
8423 #undef TARGET_FIXED_CONDITION_CODE_REGS
8424 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8426 struct gcc_target targetm
= TARGET_INITIALIZER
;
8428 #include "gt-aarch64.h"