1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode
;
63 typedef struct minipool_fixup Mfix
;
65 void (*arm_lang_output_object_attributes_hook
)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets
*arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
73 HOST_WIDE_INT
, rtx
, rtx
, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx
, int);
76 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
77 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
78 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
79 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
80 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
81 inline static int thumb1_index_register_rtx_p (rtx
, int);
82 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT
, enum rtx_code
);
86 static rtx
emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx
, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx
, int);
90 static void arm_print_operand_address (FILE *, rtx
);
91 static bool arm_print_operand_punct_valid_p (unsigned char code
);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
93 static arm_cc
get_arm_condition_code (rtx
);
94 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
95 static rtx
is_jump_table (rtx
);
96 static const char *output_multi_immediate (rtx
*, const char *, const char *,
98 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
99 static struct machine_function
*arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx
is_jump_table (rtx
);
102 static HOST_WIDE_INT
get_jump_table_size (rtx
);
103 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
104 static Mnode
*add_minipool_forward_ref (Mfix
*);
105 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
106 static Mnode
*add_minipool_backward_ref (Mfix
*);
107 static void assign_minipool_offsets (Mfix
*);
108 static void arm_print_value (FILE *, rtx
);
109 static void dump_minipool (rtx
);
110 static int arm_barrier_cost (rtx
);
111 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
112 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
113 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree
);
120 static unsigned long arm_compute_func_type (void);
121 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
122 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
123 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT
);
130 static int arm_comp_type_attributes (const_tree
, const_tree
);
131 static void arm_set_default_type_attributes (tree
);
132 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
133 static int count_insns_for_constant (HOST_WIDE_INT
, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree
, tree
);
136 static enum machine_mode
arm_promote_function_mode (const_tree
,
137 enum machine_mode
, int *,
139 static bool arm_return_in_memory (const_tree
, const_tree
);
140 static rtx
arm_function_value (const_tree
, const_tree
, bool);
141 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
146 static bool arm_have_conditional_execution (void);
147 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
148 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
149 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
150 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
151 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
152 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
153 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
154 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
155 static bool arm_rtx_costs (rtx
, int, int, int *, bool);
156 static int arm_address_cost (rtx
, bool);
157 static bool arm_memory_load_p (rtx
);
158 static bool arm_cirrus_insn_p (rtx
);
159 static void cirrus_reorg (rtx
);
160 static void arm_init_builtins (void);
161 static void arm_init_iwmmxt_builtins (void);
162 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
163 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
164 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
165 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
166 static tree
arm_builtin_decl (unsigned, bool);
167 static void emit_constant_insn (rtx cond
, rtx pattern
);
168 static rtx
emit_set_insn (rtx
, rtx
);
169 static int arm_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
171 static rtx
arm_function_arg (CUMULATIVE_ARGS
*, enum machine_mode
,
173 static void arm_function_arg_advance (CUMULATIVE_ARGS
*, enum machine_mode
,
175 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
176 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
178 static int aapcs_select_return_coproc (const_tree
, const_tree
);
180 #ifdef OBJECT_FORMAT_ELF
181 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
182 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
185 static void arm_encode_section_info (tree
, rtx
, int);
188 static void arm_file_end (void);
189 static void arm_file_start (void);
191 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
193 static bool arm_pass_by_reference (CUMULATIVE_ARGS
*,
194 enum machine_mode
, const_tree
, bool);
195 static bool arm_promote_prototypes (const_tree
);
196 static bool arm_default_short_enums (void);
197 static bool arm_align_anon_bitfield (void);
198 static bool arm_return_in_msb (const_tree
);
199 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
200 static bool arm_return_in_memory (const_tree
, const_tree
);
202 static void arm_unwind_emit (FILE *, rtx
);
203 static bool arm_output_ttype (rtx
);
204 static void arm_asm_emit_except_personality (rtx
);
205 static void arm_asm_init_sections (void);
207 static void arm_dwarf_handle_frame_unspec (const char *, rtx
, int);
208 static rtx
arm_dwarf_register_span (rtx
);
210 static tree
arm_cxx_guard_type (void);
211 static bool arm_cxx_guard_mask_bit (void);
212 static tree
arm_get_cookie_size (tree
);
213 static bool arm_cookie_has_size (void);
214 static bool arm_cxx_cdtor_returns_this (void);
215 static bool arm_cxx_key_method_may_be_inline (void);
216 static void arm_cxx_determine_class_data_visibility (tree
);
217 static bool arm_cxx_class_data_always_comdat (void);
218 static bool arm_cxx_use_aeabi_atexit (void);
219 static void arm_init_libfuncs (void);
220 static tree
arm_build_builtin_va_list (void);
221 static void arm_expand_builtin_va_start (tree
, rtx
);
222 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
223 static void arm_option_override (void);
224 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
225 static bool arm_cannot_copy_insn_p (rtx
);
226 static bool arm_tls_symbol_p (rtx x
);
227 static int arm_issue_rate (void);
228 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
229 static bool arm_output_addr_const_extra (FILE *, rtx
);
230 static bool arm_allocate_stack_slots_for_args (void);
231 static const char *arm_invalid_parameter_type (const_tree t
);
232 static const char *arm_invalid_return_type (const_tree t
);
233 static tree
arm_promoted_type (const_tree t
);
234 static tree
arm_convert_to_type (tree type
, tree expr
);
235 static bool arm_scalar_mode_supported_p (enum machine_mode
);
236 static bool arm_frame_pointer_required (void);
237 static bool arm_can_eliminate (const int, const int);
238 static void arm_asm_trampoline_template (FILE *);
239 static void arm_trampoline_init (rtx
, tree
, rtx
);
240 static rtx
arm_trampoline_adjust_address (rtx
);
241 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
242 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
243 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
244 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
245 static bool arm_array_mode_supported_p (enum machine_mode
,
246 unsigned HOST_WIDE_INT
);
247 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
248 static bool arm_class_likely_spilled_p (reg_class_t
);
249 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
250 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
254 static void arm_conditional_register_usage (void);
255 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
256 static unsigned int arm_autovectorize_vector_sizes (void);
257 static int arm_default_branch_cost (bool, bool);
258 static int arm_cortex_a5_branch_cost (bool, bool);
261 /* Table of machine attributes. */
262 static const struct attribute_spec arm_attribute_table
[] =
264 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
265 affects_type_identity } */
266 /* Function calls made to this symbol must be done indirectly, because
267 it may lie outside of the 26 bit addressing range of a normal function
269 { "long_call", 0, 0, false, true, true, NULL
, false },
270 /* Whereas these functions are always known to reside within the 26 bit
272 { "short_call", 0, 0, false, true, true, NULL
, false },
273 /* Specify the procedure call conventions for a function. */
274 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
276 /* Interrupt Service Routines have special prologue and epilogue requirements. */
277 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
279 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
281 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
284 /* ARM/PE has three new attributes:
286 dllexport - for exporting a function/variable that will live in a dll
287 dllimport - for importing a function/variable from a dll
289 Microsoft allows multiple declspecs in one __declspec, separating
290 them with spaces. We do NOT support this. Instead, use __declspec
293 { "dllimport", 0, 0, true, false, false, NULL
, false },
294 { "dllexport", 0, 0, true, false, false, NULL
, false },
295 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
297 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
298 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
299 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
300 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
303 { NULL
, 0, 0, false, false, false, NULL
, false }
306 /* Initialize the GCC target structure. */
307 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
308 #undef TARGET_MERGE_DECL_ATTRIBUTES
309 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
312 #undef TARGET_LEGITIMIZE_ADDRESS
313 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
315 #undef TARGET_ATTRIBUTE_TABLE
316 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
318 #undef TARGET_ASM_FILE_START
319 #define TARGET_ASM_FILE_START arm_file_start
320 #undef TARGET_ASM_FILE_END
321 #define TARGET_ASM_FILE_END arm_file_end
323 #undef TARGET_ASM_ALIGNED_SI_OP
324 #define TARGET_ASM_ALIGNED_SI_OP NULL
325 #undef TARGET_ASM_INTEGER
326 #define TARGET_ASM_INTEGER arm_assemble_integer
328 #undef TARGET_PRINT_OPERAND
329 #define TARGET_PRINT_OPERAND arm_print_operand
330 #undef TARGET_PRINT_OPERAND_ADDRESS
331 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
332 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
333 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
335 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
336 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
338 #undef TARGET_ASM_FUNCTION_PROLOGUE
339 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
341 #undef TARGET_ASM_FUNCTION_EPILOGUE
342 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
344 #undef TARGET_OPTION_OVERRIDE
345 #define TARGET_OPTION_OVERRIDE arm_option_override
347 #undef TARGET_COMP_TYPE_ATTRIBUTES
348 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
350 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
351 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
353 #undef TARGET_SCHED_ADJUST_COST
354 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
356 #undef TARGET_ENCODE_SECTION_INFO
358 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
360 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
363 #undef TARGET_STRIP_NAME_ENCODING
364 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
366 #undef TARGET_ASM_INTERNAL_LABEL
367 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
369 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
370 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
372 #undef TARGET_FUNCTION_VALUE
373 #define TARGET_FUNCTION_VALUE arm_function_value
375 #undef TARGET_LIBCALL_VALUE
376 #define TARGET_LIBCALL_VALUE arm_libcall_value
378 #undef TARGET_ASM_OUTPUT_MI_THUNK
379 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
380 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
381 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
383 #undef TARGET_RTX_COSTS
384 #define TARGET_RTX_COSTS arm_rtx_costs
385 #undef TARGET_ADDRESS_COST
386 #define TARGET_ADDRESS_COST arm_address_cost
388 #undef TARGET_SHIFT_TRUNCATION_MASK
389 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
390 #undef TARGET_VECTOR_MODE_SUPPORTED_P
391 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
392 #undef TARGET_ARRAY_MODE_SUPPORTED_P
393 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
394 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
395 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
396 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
397 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
398 arm_autovectorize_vector_sizes
400 #undef TARGET_MACHINE_DEPENDENT_REORG
401 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
403 #undef TARGET_INIT_BUILTINS
404 #define TARGET_INIT_BUILTINS arm_init_builtins
405 #undef TARGET_EXPAND_BUILTIN
406 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
407 #undef TARGET_BUILTIN_DECL
408 #define TARGET_BUILTIN_DECL arm_builtin_decl
410 #undef TARGET_INIT_LIBFUNCS
411 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
413 #undef TARGET_PROMOTE_FUNCTION_MODE
414 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
415 #undef TARGET_PROMOTE_PROTOTYPES
416 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
417 #undef TARGET_PASS_BY_REFERENCE
418 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
419 #undef TARGET_ARG_PARTIAL_BYTES
420 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
421 #undef TARGET_FUNCTION_ARG
422 #define TARGET_FUNCTION_ARG arm_function_arg
423 #undef TARGET_FUNCTION_ARG_ADVANCE
424 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
425 #undef TARGET_FUNCTION_ARG_BOUNDARY
426 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
428 #undef TARGET_SETUP_INCOMING_VARARGS
429 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
431 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
432 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
434 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
435 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
436 #undef TARGET_TRAMPOLINE_INIT
437 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
438 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
439 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
441 #undef TARGET_DEFAULT_SHORT_ENUMS
442 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
444 #undef TARGET_ALIGN_ANON_BITFIELD
445 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
447 #undef TARGET_NARROW_VOLATILE_BITFIELD
448 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
450 #undef TARGET_CXX_GUARD_TYPE
451 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
453 #undef TARGET_CXX_GUARD_MASK_BIT
454 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
456 #undef TARGET_CXX_GET_COOKIE_SIZE
457 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
459 #undef TARGET_CXX_COOKIE_HAS_SIZE
460 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
462 #undef TARGET_CXX_CDTOR_RETURNS_THIS
463 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
465 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
466 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
468 #undef TARGET_CXX_USE_AEABI_ATEXIT
469 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
471 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
472 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
473 arm_cxx_determine_class_data_visibility
475 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
476 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
478 #undef TARGET_RETURN_IN_MSB
479 #define TARGET_RETURN_IN_MSB arm_return_in_msb
481 #undef TARGET_RETURN_IN_MEMORY
482 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
484 #undef TARGET_MUST_PASS_IN_STACK
485 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
488 #undef TARGET_ASM_UNWIND_EMIT
489 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
491 /* EABI unwinding tables use a different format for the typeinfo tables. */
492 #undef TARGET_ASM_TTYPE
493 #define TARGET_ASM_TTYPE arm_output_ttype
495 #undef TARGET_ARM_EABI_UNWINDER
496 #define TARGET_ARM_EABI_UNWINDER true
498 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
499 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
501 #undef TARGET_ASM_INIT_SECTIONS
502 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
503 #endif /* ARM_UNWIND_INFO */
505 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
506 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
508 #undef TARGET_DWARF_REGISTER_SPAN
509 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
511 #undef TARGET_CANNOT_COPY_INSN_P
512 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
515 #undef TARGET_HAVE_TLS
516 #define TARGET_HAVE_TLS true
519 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
520 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
522 #undef TARGET_LEGITIMATE_CONSTANT_P
523 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
525 #undef TARGET_CANNOT_FORCE_CONST_MEM
526 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
528 #undef TARGET_MAX_ANCHOR_OFFSET
529 #define TARGET_MAX_ANCHOR_OFFSET 4095
531 /* The minimum is set such that the total size of the block
532 for a particular anchor is -4088 + 1 + 4095 bytes, which is
533 divisible by eight, ensuring natural spacing of anchors. */
534 #undef TARGET_MIN_ANCHOR_OFFSET
535 #define TARGET_MIN_ANCHOR_OFFSET -4088
537 #undef TARGET_SCHED_ISSUE_RATE
538 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
540 #undef TARGET_MANGLE_TYPE
541 #define TARGET_MANGLE_TYPE arm_mangle_type
543 #undef TARGET_BUILD_BUILTIN_VA_LIST
544 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
545 #undef TARGET_EXPAND_BUILTIN_VA_START
546 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
547 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
548 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
551 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
552 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
555 #undef TARGET_LEGITIMATE_ADDRESS_P
556 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
558 #undef TARGET_INVALID_PARAMETER_TYPE
559 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
561 #undef TARGET_INVALID_RETURN_TYPE
562 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
564 #undef TARGET_PROMOTED_TYPE
565 #define TARGET_PROMOTED_TYPE arm_promoted_type
567 #undef TARGET_CONVERT_TO_TYPE
568 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
570 #undef TARGET_SCALAR_MODE_SUPPORTED_P
571 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
573 #undef TARGET_FRAME_POINTER_REQUIRED
574 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
576 #undef TARGET_CAN_ELIMINATE
577 #define TARGET_CAN_ELIMINATE arm_can_eliminate
579 #undef TARGET_CONDITIONAL_REGISTER_USAGE
580 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
582 #undef TARGET_CLASS_LIKELY_SPILLED_P
583 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
585 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
586 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
587 arm_vector_alignment_reachable
589 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
590 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
591 arm_builtin_support_vector_misalignment
593 #undef TARGET_PREFERRED_RENAME_CLASS
594 #define TARGET_PREFERRED_RENAME_CLASS \
595 arm_preferred_rename_class
597 struct gcc_target targetm
= TARGET_INITIALIZER
;
599 /* Obstack for minipool constant handling. */
600 static struct obstack minipool_obstack
;
601 static char * minipool_startobj
;
603 /* The maximum number of insns skipped which
604 will be conditionalised if possible. */
605 static int max_insns_skipped
= 5;
607 extern FILE * asm_out_file
;
609 /* True if we are currently building a constant table. */
610 int making_const_table
;
612 /* The processor for which instructions should be scheduled. */
613 enum processor_type arm_tune
= arm_none
;
615 /* The current tuning set. */
616 const struct tune_params
*current_tune
;
618 /* Which floating point hardware to schedule for. */
621 /* Which floating popint hardware to use. */
622 const struct arm_fpu_desc
*arm_fpu_desc
;
624 /* Used for Thumb call_via trampolines. */
625 rtx thumb_call_via_label
[14];
626 static int thumb_call_reg_needed
;
628 /* Bit values used to identify processor capabilities. */
629 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
630 #define FL_ARCH3M (1 << 1) /* Extended multiply */
631 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
632 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
633 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
634 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
635 #define FL_THUMB (1 << 6) /* Thumb aware */
636 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
637 #define FL_STRONG (1 << 8) /* StrongARM */
638 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
639 #define FL_XSCALE (1 << 10) /* XScale */
640 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
641 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
642 media instructions. */
643 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
644 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
645 Note: ARM6 & 7 derivatives only. */
646 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
647 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
648 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
650 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
651 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
652 #define FL_NEON (1 << 20) /* Neon instructions. */
653 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
655 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
656 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
658 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
660 /* Flags that only effect tuning, not available instructions. */
661 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
664 #define FL_FOR_ARCH2 FL_NOTM
665 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
666 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
667 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
668 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
669 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
670 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
671 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
672 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
673 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
674 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
675 #define FL_FOR_ARCH6J FL_FOR_ARCH6
676 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
677 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
678 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
679 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
680 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
681 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
682 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
683 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
684 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
685 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
687 /* The bits in this mask specify which
688 instructions we are allowed to generate. */
689 static unsigned long insn_flags
= 0;
691 /* The bits in this mask specify which instruction scheduling options should
693 static unsigned long tune_flags
= 0;
695 /* The following are used in the arm.md file as equivalents to bits
696 in the above two flag variables. */
698 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
701 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
704 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
707 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
710 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
713 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
716 /* Nonzero if this chip supports the ARM 6K extensions. */
719 /* Nonzero if this chip supports the ARM 7 extensions. */
722 /* Nonzero if instructions not present in the 'M' profile can be used. */
723 int arm_arch_notm
= 0;
725 /* Nonzero if instructions present in ARMv7E-M can be used. */
728 /* Nonzero if this chip can benefit from load scheduling. */
729 int arm_ld_sched
= 0;
731 /* Nonzero if this chip is a StrongARM. */
732 int arm_tune_strongarm
= 0;
734 /* Nonzero if this chip is a Cirrus variant. */
735 int arm_arch_cirrus
= 0;
737 /* Nonzero if this chip supports Intel Wireless MMX technology. */
738 int arm_arch_iwmmxt
= 0;
740 /* Nonzero if this chip is an XScale. */
741 int arm_arch_xscale
= 0;
743 /* Nonzero if tuning for XScale */
744 int arm_tune_xscale
= 0;
746 /* Nonzero if we want to tune for stores that access the write-buffer.
747 This typically means an ARM6 or ARM7 with MMU or MPU. */
748 int arm_tune_wbuf
= 0;
750 /* Nonzero if tuning for Cortex-A9. */
751 int arm_tune_cortex_a9
= 0;
753 /* Nonzero if generating Thumb instructions. */
756 /* Nonzero if generating Thumb-1 instructions. */
759 /* Nonzero if we should define __THUMB_INTERWORK__ in the
761 XXX This is a bit of a hack, it's intended to help work around
762 problems in GLD which doesn't understand that armv5t code is
763 interworking clean. */
764 int arm_cpp_interwork
= 0;
766 /* Nonzero if chip supports Thumb 2. */
769 /* Nonzero if chip supports integer division instruction. */
770 int arm_arch_arm_hwdiv
;
771 int arm_arch_thumb_hwdiv
;
773 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
774 we must report the mode of the memory reference from
775 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
776 enum machine_mode output_memory_reference_mode
;
778 /* The register number to be used for the PIC offset register. */
779 unsigned arm_pic_register
= INVALID_REGNUM
;
781 /* Set to 1 after arm_reorg has started. Reset to start at the start of
782 the next function. */
783 static int after_arm_reorg
= 0;
785 enum arm_pcs arm_pcs_default
;
787 /* For an explanation of these variables, see final_prescan_insn below. */
789 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
790 enum arm_cond_code arm_current_cc
;
793 int arm_target_label
;
794 /* The number of conditionally executed insns, including the current insn. */
795 int arm_condexec_count
= 0;
796 /* A bitmask specifying the patterns for the IT block.
797 Zero means do not output an IT block before this insn. */
798 int arm_condexec_mask
= 0;
799 /* The number of bits used in arm_condexec_mask. */
800 int arm_condexec_masklen
= 0;
802 /* The condition codes of the ARM, and the inverse function. */
803 static const char * const arm_condition_codes
[] =
805 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
806 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
809 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
810 int arm_regs_in_sequence
[] =
812 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
815 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
816 #define streq(string1, string2) (strcmp (string1, string2) == 0)
818 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
819 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
820 | (1 << PIC_OFFSET_TABLE_REGNUM)))
822 /* Initialization code. */
826 const char *const name
;
827 enum processor_type core
;
829 const unsigned long flags
;
830 const struct tune_params
*const tune
;
834 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
835 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
840 const struct tune_params arm_slowmul_tune
=
842 arm_slowmul_rtx_costs
,
844 3, /* Constant limit. */
845 5, /* Max cond insns. */
846 ARM_PREFETCH_NOT_BENEFICIAL
,
847 true, /* Prefer constant pool. */
848 arm_default_branch_cost
851 const struct tune_params arm_fastmul_tune
=
853 arm_fastmul_rtx_costs
,
855 1, /* Constant limit. */
856 5, /* Max cond insns. */
857 ARM_PREFETCH_NOT_BENEFICIAL
,
858 true, /* Prefer constant pool. */
859 arm_default_branch_cost
862 /* StrongARM has early execution of branches, so a sequence that is worth
863 skipping is shorter. Set max_insns_skipped to a lower value. */
865 const struct tune_params arm_strongarm_tune
=
867 arm_fastmul_rtx_costs
,
869 1, /* Constant limit. */
870 3, /* Max cond insns. */
871 ARM_PREFETCH_NOT_BENEFICIAL
,
872 true, /* Prefer constant pool. */
873 arm_default_branch_cost
876 const struct tune_params arm_xscale_tune
=
878 arm_xscale_rtx_costs
,
879 xscale_sched_adjust_cost
,
880 2, /* Constant limit. */
881 3, /* Max cond insns. */
882 ARM_PREFETCH_NOT_BENEFICIAL
,
883 true, /* Prefer constant pool. */
884 arm_default_branch_cost
887 const struct tune_params arm_9e_tune
=
891 1, /* Constant limit. */
892 5, /* Max cond insns. */
893 ARM_PREFETCH_NOT_BENEFICIAL
,
894 true, /* Prefer constant pool. */
895 arm_default_branch_cost
898 const struct tune_params arm_v6t2_tune
=
902 1, /* Constant limit. */
903 5, /* Max cond insns. */
904 ARM_PREFETCH_NOT_BENEFICIAL
,
905 false, /* Prefer constant pool. */
906 arm_default_branch_cost
909 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
910 const struct tune_params arm_cortex_tune
=
914 1, /* Constant limit. */
915 5, /* Max cond insns. */
916 ARM_PREFETCH_NOT_BENEFICIAL
,
917 false, /* Prefer constant pool. */
918 arm_default_branch_cost
921 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
922 less appealing. Set max_insns_skipped to a low value. */
924 const struct tune_params arm_cortex_a5_tune
=
928 1, /* Constant limit. */
929 1, /* Max cond insns. */
930 ARM_PREFETCH_NOT_BENEFICIAL
,
931 false, /* Prefer constant pool. */
932 arm_cortex_a5_branch_cost
935 const struct tune_params arm_cortex_a9_tune
=
938 cortex_a9_sched_adjust_cost
,
939 1, /* Constant limit. */
940 5, /* Max cond insns. */
941 ARM_PREFETCH_BENEFICIAL(4,32,32),
942 false, /* Prefer constant pool. */
943 arm_default_branch_cost
946 const struct tune_params arm_fa726te_tune
=
949 fa726te_sched_adjust_cost
,
950 1, /* Constant limit. */
951 5, /* Max cond insns. */
952 ARM_PREFETCH_NOT_BENEFICIAL
,
953 true, /* Prefer constant pool. */
954 arm_default_branch_cost
958 /* Not all of these give usefully different compilation alternatives,
959 but there is no simple way of generalizing them. */
960 static const struct processors all_cores
[] =
963 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
964 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
965 #include "arm-cores.def"
967 {NULL
, arm_none
, NULL
, 0, NULL
}
970 static const struct processors all_architectures
[] =
972 /* ARM Architectures */
973 /* We don't specify tuning costs here as it will be figured out
976 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
977 {NAME, CORE, #ARCH, FLAGS, NULL},
978 #include "arm-arches.def"
980 {NULL
, arm_none
, NULL
, 0 , NULL
}
984 /* These are populated as commandline arguments are processed, or NULL
986 static const struct processors
*arm_selected_arch
;
987 static const struct processors
*arm_selected_cpu
;
988 static const struct processors
*arm_selected_tune
;
990 /* The name of the preprocessor macro to define for this architecture. */
992 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
994 /* Available values for -mfpu=. */
996 static const struct arm_fpu_desc all_fpus
[] =
998 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
999 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1000 #include "arm-fpus.def"
1005 /* Supported TLS relocations. */
1015 /* The maximum number of insns to be used when loading a constant. */
1017 arm_constant_limit (bool size_p
)
1019 return size_p
? 1 : current_tune
->constant_limit
;
1022 /* Emit an insn that's a simple single-set. Both the operands must be known
1025 emit_set_insn (rtx x
, rtx y
)
1027 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1030 /* Return the number of bits set in VALUE. */
1032 bit_count (unsigned long value
)
1034 unsigned long count
= 0;
1039 value
&= value
- 1; /* Clear the least-significant set bit. */
1045 /* Set up library functions unique to ARM. */
1048 arm_init_libfuncs (void)
1050 /* There are no special library functions unless we are using the
1055 /* The functions below are described in Section 4 of the "Run-Time
1056 ABI for the ARM architecture", Version 1.0. */
1058 /* Double-precision floating-point arithmetic. Table 2. */
1059 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1060 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1061 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1062 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1063 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1065 /* Double-precision comparisons. Table 3. */
1066 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1067 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1068 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1069 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1070 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1071 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1072 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1074 /* Single-precision floating-point arithmetic. Table 4. */
1075 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1076 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1077 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1078 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1079 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1081 /* Single-precision comparisons. Table 5. */
1082 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1083 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1084 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1085 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1086 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1087 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1088 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1090 /* Floating-point to integer conversions. Table 6. */
1091 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1092 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1093 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1094 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1095 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1096 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1097 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1098 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1100 /* Conversions between floating types. Table 7. */
1101 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1102 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1104 /* Integer to floating-point conversions. Table 8. */
1105 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1106 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1107 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1108 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1109 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1110 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1111 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1112 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1114 /* Long long. Table 9. */
1115 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1116 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1117 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1118 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1119 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1120 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1121 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1122 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1124 /* Integer (32/32->32) division. \S 4.3.1. */
1125 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1126 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1128 /* The divmod functions are designed so that they can be used for
1129 plain division, even though they return both the quotient and the
1130 remainder. The quotient is returned in the usual location (i.e.,
1131 r0 for SImode, {r0, r1} for DImode), just as would be expected
1132 for an ordinary division routine. Because the AAPCS calling
1133 conventions specify that all of { r0, r1, r2, r3 } are
1134 callee-saved registers, there is no need to tell the compiler
1135 explicitly that those registers are clobbered by these
1137 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1138 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1140 /* For SImode division the ABI provides div-without-mod routines,
1141 which are faster. */
1142 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1143 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1145 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1146 divmod libcalls instead. */
1147 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1148 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1149 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1150 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1152 /* Half-precision float operations. The compiler handles all operations
1153 with NULL libfuncs by converting the SFmode. */
1154 switch (arm_fp16_format
)
1156 case ARM_FP16_FORMAT_IEEE
:
1157 case ARM_FP16_FORMAT_ALTERNATIVE
:
1160 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1161 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1163 : "__gnu_f2h_alternative"));
1164 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1165 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1167 : "__gnu_h2f_alternative"));
1170 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1171 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1172 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1173 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1174 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1177 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1178 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1179 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1180 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1181 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1182 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1183 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1190 if (TARGET_AAPCS_BASED
)
1191 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1194 /* On AAPCS systems, this is the "struct __va_list". */
1195 static GTY(()) tree va_list_type
;
1197 /* Return the type to use as __builtin_va_list. */
1199 arm_build_builtin_va_list (void)
1204 if (!TARGET_AAPCS_BASED
)
1205 return std_build_builtin_va_list ();
1207 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1215 The C Library ABI further reinforces this definition in \S
1218 We must follow this definition exactly. The structure tag
1219 name is visible in C++ mangled names, and thus forms a part
1220 of the ABI. The field name may be used by people who
1221 #include <stdarg.h>. */
1222 /* Create the type. */
1223 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1224 /* Give it the required name. */
1225 va_list_name
= build_decl (BUILTINS_LOCATION
,
1227 get_identifier ("__va_list"),
1229 DECL_ARTIFICIAL (va_list_name
) = 1;
1230 TYPE_NAME (va_list_type
) = va_list_name
;
1231 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
1232 /* Create the __ap field. */
1233 ap_field
= build_decl (BUILTINS_LOCATION
,
1235 get_identifier ("__ap"),
1237 DECL_ARTIFICIAL (ap_field
) = 1;
1238 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1239 TYPE_FIELDS (va_list_type
) = ap_field
;
1240 /* Compute its layout. */
1241 layout_type (va_list_type
);
1243 return va_list_type
;
1246 /* Return an expression of type "void *" pointing to the next
1247 available argument in a variable-argument list. VALIST is the
1248 user-level va_list object, of type __builtin_va_list. */
1250 arm_extract_valist_ptr (tree valist
)
1252 if (TREE_TYPE (valist
) == error_mark_node
)
1253 return error_mark_node
;
1255 /* On an AAPCS target, the pointer is stored within "struct
1257 if (TARGET_AAPCS_BASED
)
1259 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1260 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1261 valist
, ap_field
, NULL_TREE
);
1267 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1269 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1271 valist
= arm_extract_valist_ptr (valist
);
1272 std_expand_builtin_va_start (valist
, nextarg
);
1275 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1277 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1280 valist
= arm_extract_valist_ptr (valist
);
1281 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1284 /* Fix up any incompatible options that the user has specified. */
1286 arm_option_override (void)
1288 if (global_options_set
.x_arm_arch_option
)
1289 arm_selected_arch
= &all_architectures
[arm_arch_option
];
1291 if (global_options_set
.x_arm_cpu_option
)
1292 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
1294 if (global_options_set
.x_arm_tune_option
)
1295 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
1297 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1298 SUBTARGET_OVERRIDE_OPTIONS
;
1301 if (arm_selected_arch
)
1303 if (arm_selected_cpu
)
1305 /* Check for conflict between mcpu and march. */
1306 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
1308 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1309 arm_selected_cpu
->name
, arm_selected_arch
->name
);
1310 /* -march wins for code generation.
1311 -mcpu wins for default tuning. */
1312 if (!arm_selected_tune
)
1313 arm_selected_tune
= arm_selected_cpu
;
1315 arm_selected_cpu
= arm_selected_arch
;
1319 arm_selected_arch
= NULL
;
1322 /* Pick a CPU based on the architecture. */
1323 arm_selected_cpu
= arm_selected_arch
;
1326 /* If the user did not specify a processor, choose one for them. */
1327 if (!arm_selected_cpu
)
1329 const struct processors
* sel
;
1330 unsigned int sought
;
1332 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
1333 if (!arm_selected_cpu
->name
)
1335 #ifdef SUBTARGET_CPU_DEFAULT
1336 /* Use the subtarget default CPU if none was specified by
1338 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
1340 /* Default to ARM6. */
1341 if (!arm_selected_cpu
->name
)
1342 arm_selected_cpu
= &all_cores
[arm6
];
1345 sel
= arm_selected_cpu
;
1346 insn_flags
= sel
->flags
;
1348 /* Now check to see if the user has specified some command line
1349 switch that require certain abilities from the cpu. */
1352 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1354 sought
|= (FL_THUMB
| FL_MODE32
);
1356 /* There are no ARM processors that support both APCS-26 and
1357 interworking. Therefore we force FL_MODE26 to be removed
1358 from insn_flags here (if it was set), so that the search
1359 below will always be able to find a compatible processor. */
1360 insn_flags
&= ~FL_MODE26
;
1363 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1365 /* Try to locate a CPU type that supports all of the abilities
1366 of the default CPU, plus the extra abilities requested by
1368 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1369 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1372 if (sel
->name
== NULL
)
1374 unsigned current_bit_count
= 0;
1375 const struct processors
* best_fit
= NULL
;
1377 /* Ideally we would like to issue an error message here
1378 saying that it was not possible to find a CPU compatible
1379 with the default CPU, but which also supports the command
1380 line options specified by the programmer, and so they
1381 ought to use the -mcpu=<name> command line option to
1382 override the default CPU type.
1384 If we cannot find a cpu that has both the
1385 characteristics of the default cpu and the given
1386 command line options we scan the array again looking
1387 for a best match. */
1388 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1389 if ((sel
->flags
& sought
) == sought
)
1393 count
= bit_count (sel
->flags
& insn_flags
);
1395 if (count
>= current_bit_count
)
1398 current_bit_count
= count
;
1402 gcc_assert (best_fit
);
1406 arm_selected_cpu
= sel
;
1410 gcc_assert (arm_selected_cpu
);
1411 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1412 if (!arm_selected_tune
)
1413 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
1415 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
1416 insn_flags
= arm_selected_cpu
->flags
;
1418 arm_tune
= arm_selected_tune
->core
;
1419 tune_flags
= arm_selected_tune
->flags
;
1420 current_tune
= arm_selected_tune
->tune
;
1422 /* Make sure that the processor choice does not conflict with any of the
1423 other command line choices. */
1424 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1425 error ("target CPU does not support ARM mode");
1427 /* BPABI targets use linker tricks to allow interworking on cores
1428 without thumb support. */
1429 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
1431 warning (0, "target CPU does not support interworking" );
1432 target_flags
&= ~MASK_INTERWORK
;
1435 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1437 warning (0, "target CPU does not support THUMB instructions");
1438 target_flags
&= ~MASK_THUMB
;
1441 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1443 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1444 target_flags
&= ~MASK_APCS_FRAME
;
1447 /* Callee super interworking implies thumb interworking. Adding
1448 this to the flags here simplifies the logic elsewhere. */
1449 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1450 target_flags
|= MASK_INTERWORK
;
1452 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1453 from here where no function is being compiled currently. */
1454 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1455 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1457 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1458 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1460 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1462 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1463 target_flags
|= MASK_APCS_FRAME
;
1466 if (TARGET_POKE_FUNCTION_NAME
)
1467 target_flags
|= MASK_APCS_FRAME
;
1469 if (TARGET_APCS_REENT
&& flag_pic
)
1470 error ("-fpic and -mapcs-reent are incompatible");
1472 if (TARGET_APCS_REENT
)
1473 warning (0, "APCS reentrant code not supported. Ignored");
1475 /* If this target is normally configured to use APCS frames, warn if they
1476 are turned off and debugging is turned on. */
1478 && write_symbols
!= NO_DEBUG
1479 && !TARGET_APCS_FRAME
1480 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1481 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1483 if (TARGET_APCS_FLOAT
)
1484 warning (0, "passing floating point arguments in fp regs not yet supported");
1486 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1487 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1488 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1489 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1490 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1491 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1492 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1493 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1494 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1495 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
1496 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
1497 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1498 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1499 arm_arch_cirrus
= (insn_flags
& FL_CIRRUS
) != 0;
1501 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1502 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1503 thumb_code
= TARGET_ARM
== 0;
1504 thumb1_code
= TARGET_THUMB1
!= 0;
1505 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1506 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1507 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1508 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
1509 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
1510 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
1512 /* If we are not using the default (ARM mode) section anchor offset
1513 ranges, then set the correct ranges now. */
1516 /* Thumb-1 LDR instructions cannot have negative offsets.
1517 Permissible positive offset ranges are 5-bit (for byte loads),
1518 6-bit (for halfword loads), or 7-bit (for word loads).
1519 Empirical results suggest a 7-bit anchor range gives the best
1520 overall code size. */
1521 targetm
.min_anchor_offset
= 0;
1522 targetm
.max_anchor_offset
= 127;
1524 else if (TARGET_THUMB2
)
1526 /* The minimum is set such that the total size of the block
1527 for a particular anchor is 248 + 1 + 4095 bytes, which is
1528 divisible by eight, ensuring natural spacing of anchors. */
1529 targetm
.min_anchor_offset
= -248;
1530 targetm
.max_anchor_offset
= 4095;
1533 /* V5 code we generate is completely interworking capable, so we turn off
1534 TARGET_INTERWORK here to avoid many tests later on. */
1536 /* XXX However, we must pass the right pre-processor defines to CPP
1537 or GLD can get confused. This is a hack. */
1538 if (TARGET_INTERWORK
)
1539 arm_cpp_interwork
= 1;
1542 target_flags
&= ~MASK_INTERWORK
;
1544 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1545 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1547 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1548 error ("iwmmxt abi requires an iwmmxt capable cpu");
1550 if (!global_options_set
.x_arm_fpu_index
)
1552 const char *target_fpu_name
;
1555 #ifdef FPUTYPE_DEFAULT
1556 target_fpu_name
= FPUTYPE_DEFAULT
;
1558 if (arm_arch_cirrus
)
1559 target_fpu_name
= "maverick";
1561 target_fpu_name
= "fpe2";
1564 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
1569 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
1571 switch (arm_fpu_desc
->model
)
1573 case ARM_FP_MODEL_FPA
:
1574 if (arm_fpu_desc
->rev
== 2)
1575 arm_fpu_attr
= FPU_FPE2
;
1576 else if (arm_fpu_desc
->rev
== 3)
1577 arm_fpu_attr
= FPU_FPE3
;
1579 arm_fpu_attr
= FPU_FPA
;
1582 case ARM_FP_MODEL_MAVERICK
:
1583 arm_fpu_attr
= FPU_MAVERICK
;
1586 case ARM_FP_MODEL_VFP
:
1587 arm_fpu_attr
= FPU_VFP
;
1594 if (TARGET_AAPCS_BASED
1595 && (arm_fpu_desc
->model
== ARM_FP_MODEL_FPA
))
1596 error ("FPA is unsupported in the AAPCS");
1598 if (TARGET_AAPCS_BASED
)
1600 if (TARGET_CALLER_INTERWORKING
)
1601 error ("AAPCS does not support -mcaller-super-interworking");
1603 if (TARGET_CALLEE_INTERWORKING
)
1604 error ("AAPCS does not support -mcallee-super-interworking");
1607 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1608 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1609 will ever exist. GCC makes no attempt to support this combination. */
1610 if (TARGET_IWMMXT
&& !TARGET_SOFT_FLOAT
)
1611 sorry ("iWMMXt and hardware floating point");
1613 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1614 if (TARGET_THUMB2
&& TARGET_IWMMXT
)
1615 sorry ("Thumb-2 iWMMXt");
1617 /* __fp16 support currently assumes the core has ldrh. */
1618 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
1619 sorry ("__fp16 and no ldrh");
1621 /* If soft-float is specified then don't use FPU. */
1622 if (TARGET_SOFT_FLOAT
)
1623 arm_fpu_attr
= FPU_NONE
;
1625 if (TARGET_AAPCS_BASED
)
1627 if (arm_abi
== ARM_ABI_IWMMXT
)
1628 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
1629 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
1630 && TARGET_HARD_FLOAT
1632 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
1634 arm_pcs_default
= ARM_PCS_AAPCS
;
1638 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1639 sorry ("-mfloat-abi=hard and VFP");
1641 if (arm_abi
== ARM_ABI_APCS
)
1642 arm_pcs_default
= ARM_PCS_APCS
;
1644 arm_pcs_default
= ARM_PCS_ATPCS
;
1647 /* For arm2/3 there is no need to do any scheduling if there is only
1648 a floating point emulator, or we are doing software floating-point. */
1649 if ((TARGET_SOFT_FLOAT
1650 || (TARGET_FPA
&& arm_fpu_desc
->rev
))
1651 && (tune_flags
& FL_MODE32
) == 0)
1652 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1654 /* Use the cp15 method if it is available. */
1655 if (target_thread_pointer
== TP_AUTO
)
1657 if (arm_arch6k
&& !TARGET_THUMB1
)
1658 target_thread_pointer
= TP_CP15
;
1660 target_thread_pointer
= TP_SOFT
;
1663 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1664 error ("can not use -mtp=cp15 with 16-bit Thumb");
1666 /* Override the default structure alignment for AAPCS ABI. */
1667 if (!global_options_set
.x_arm_structure_size_boundary
)
1669 if (TARGET_AAPCS_BASED
)
1670 arm_structure_size_boundary
= 8;
1674 if (arm_structure_size_boundary
!= 8
1675 && arm_structure_size_boundary
!= 32
1676 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
1678 if (ARM_DOUBLEWORD_ALIGN
)
1680 "structure size boundary can only be set to 8, 32 or 64");
1682 warning (0, "structure size boundary can only be set to 8 or 32");
1683 arm_structure_size_boundary
1684 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
1688 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1690 error ("RTP PIC is incompatible with Thumb");
1694 /* If stack checking is disabled, we can use r10 as the PIC register,
1695 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1696 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1698 if (TARGET_VXWORKS_RTP
)
1699 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1700 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1703 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1704 arm_pic_register
= 9;
1706 if (arm_pic_register_string
!= NULL
)
1708 int pic_register
= decode_reg_name (arm_pic_register_string
);
1711 warning (0, "-mpic-register= is useless without -fpic");
1713 /* Prevent the user from choosing an obviously stupid PIC register. */
1714 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1715 || pic_register
== HARD_FRAME_POINTER_REGNUM
1716 || pic_register
== STACK_POINTER_REGNUM
1717 || pic_register
>= PC_REGNUM
1718 || (TARGET_VXWORKS_RTP
1719 && (unsigned int) pic_register
!= arm_pic_register
))
1720 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1722 arm_pic_register
= pic_register
;
1725 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1726 if (fix_cm3_ldrd
== 2)
1728 if (arm_selected_cpu
->core
== cortexm3
)
1734 if (TARGET_THUMB1
&& flag_schedule_insns
)
1736 /* Don't warn since it's on by default in -O2. */
1737 flag_schedule_insns
= 0;
1742 /* If optimizing for size, bump the number of instructions that we
1743 are prepared to conditionally execute (even on a StrongARM). */
1744 max_insns_skipped
= 6;
1747 max_insns_skipped
= current_tune
->max_insns_skipped
;
1749 /* Hot/Cold partitioning is not currently supported, since we can't
1750 handle literal pool placement in that case. */
1751 if (flag_reorder_blocks_and_partition
)
1753 inform (input_location
,
1754 "-freorder-blocks-and-partition not supported on this architecture");
1755 flag_reorder_blocks_and_partition
= 0;
1756 flag_reorder_blocks
= 1;
1760 /* Hoisting PIC address calculations more aggressively provides a small,
1761 but measurable, size reduction for PIC code. Therefore, we decrease
1762 the bar for unrestricted expression hoisting to the cost of PIC address
1763 calculation, which is 2 instructions. */
1764 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
1765 global_options
.x_param_values
,
1766 global_options_set
.x_param_values
);
1768 /* ARM EABI defaults to strict volatile bitfields. */
1769 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0)
1770 flag_strict_volatile_bitfields
= 1;
1772 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1773 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1774 if (flag_prefetch_loop_arrays
< 0
1777 && current_tune
->num_prefetch_slots
> 0)
1778 flag_prefetch_loop_arrays
= 1;
1780 /* Set up parameters to be used in prefetching algorithm. Do not override the
1781 defaults unless we are tuning for a core we have researched values for. */
1782 if (current_tune
->num_prefetch_slots
> 0)
1783 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
1784 current_tune
->num_prefetch_slots
,
1785 global_options
.x_param_values
,
1786 global_options_set
.x_param_values
);
1787 if (current_tune
->l1_cache_line_size
>= 0)
1788 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
1789 current_tune
->l1_cache_line_size
,
1790 global_options
.x_param_values
,
1791 global_options_set
.x_param_values
);
1792 if (current_tune
->l1_cache_size
>= 0)
1793 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
1794 current_tune
->l1_cache_size
,
1795 global_options
.x_param_values
,
1796 global_options_set
.x_param_values
);
1798 /* Register global variables with the garbage collector. */
1799 arm_add_gc_roots ();
1803 arm_add_gc_roots (void)
1805 gcc_obstack_init(&minipool_obstack
);
1806 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
1809 /* A table of known ARM exception types.
1810 For use with the interrupt function attribute. */
1814 const char *const arg
;
1815 const unsigned long return_value
;
1819 static const isr_attribute_arg isr_attribute_args
[] =
1821 { "IRQ", ARM_FT_ISR
},
1822 { "irq", ARM_FT_ISR
},
1823 { "FIQ", ARM_FT_FIQ
},
1824 { "fiq", ARM_FT_FIQ
},
1825 { "ABORT", ARM_FT_ISR
},
1826 { "abort", ARM_FT_ISR
},
1827 { "ABORT", ARM_FT_ISR
},
1828 { "abort", ARM_FT_ISR
},
1829 { "UNDEF", ARM_FT_EXCEPTION
},
1830 { "undef", ARM_FT_EXCEPTION
},
1831 { "SWI", ARM_FT_EXCEPTION
},
1832 { "swi", ARM_FT_EXCEPTION
},
1833 { NULL
, ARM_FT_NORMAL
}
1836 /* Returns the (interrupt) function type of the current
1837 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1839 static unsigned long
1840 arm_isr_value (tree argument
)
1842 const isr_attribute_arg
* ptr
;
1846 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
1848 /* No argument - default to IRQ. */
1849 if (argument
== NULL_TREE
)
1852 /* Get the value of the argument. */
1853 if (TREE_VALUE (argument
) == NULL_TREE
1854 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
1855 return ARM_FT_UNKNOWN
;
1857 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
1859 /* Check it against the list of known arguments. */
1860 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
1861 if (streq (arg
, ptr
->arg
))
1862 return ptr
->return_value
;
1864 /* An unrecognized interrupt type. */
1865 return ARM_FT_UNKNOWN
;
1868 /* Computes the type of the current function. */
1870 static unsigned long
1871 arm_compute_func_type (void)
1873 unsigned long type
= ARM_FT_UNKNOWN
;
1877 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
1879 /* Decide if the current function is volatile. Such functions
1880 never return, and many memory cycles can be saved by not storing
1881 register values that will never be needed again. This optimization
1882 was added to speed up context switching in a kernel application. */
1884 && (TREE_NOTHROW (current_function_decl
)
1885 || !(flag_unwind_tables
1887 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
1888 && TREE_THIS_VOLATILE (current_function_decl
))
1889 type
|= ARM_FT_VOLATILE
;
1891 if (cfun
->static_chain_decl
!= NULL
)
1892 type
|= ARM_FT_NESTED
;
1894 attr
= DECL_ATTRIBUTES (current_function_decl
);
1896 a
= lookup_attribute ("naked", attr
);
1898 type
|= ARM_FT_NAKED
;
1900 a
= lookup_attribute ("isr", attr
);
1902 a
= lookup_attribute ("interrupt", attr
);
1905 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
1907 type
|= arm_isr_value (TREE_VALUE (a
));
1912 /* Returns the type of the current function. */
1915 arm_current_func_type (void)
1917 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
1918 cfun
->machine
->func_type
= arm_compute_func_type ();
1920 return cfun
->machine
->func_type
;
1924 arm_allocate_stack_slots_for_args (void)
1926 /* Naked functions should not allocate stack slots for arguments. */
1927 return !IS_NAKED (arm_current_func_type ());
1931 /* Output assembler code for a block containing the constant parts
1932 of a trampoline, leaving space for the variable parts.
1934 On the ARM, (if r8 is the static chain regnum, and remembering that
1935 referencing pc adds an offset of 8) the trampoline looks like:
1938 .word static chain value
1939 .word function's address
1940 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
1943 arm_asm_trampoline_template (FILE *f
)
1947 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
1948 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
1950 else if (TARGET_THUMB2
)
1952 /* The Thumb-2 trampoline is similar to the arm implementation.
1953 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
1954 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
1955 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
1956 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
1960 ASM_OUTPUT_ALIGN (f
, 2);
1961 fprintf (f
, "\t.code\t16\n");
1962 fprintf (f
, ".Ltrampoline_start:\n");
1963 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
1964 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
1965 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
1966 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
1967 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
1968 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
1970 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
1971 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
1974 /* Emit RTL insns to initialize the variable parts of a trampoline. */
1977 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
1979 rtx fnaddr
, mem
, a_tramp
;
1981 emit_block_move (m_tramp
, assemble_trampoline_template (),
1982 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
1984 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
1985 emit_move_insn (mem
, chain_value
);
1987 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
1988 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
1989 emit_move_insn (mem
, fnaddr
);
1991 a_tramp
= XEXP (m_tramp
, 0);
1992 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
1993 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
1994 plus_constant (a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
1997 /* Thumb trampolines should be entered in thumb mode, so set
1998 the bottom bit of the address. */
2001 arm_trampoline_adjust_address (rtx addr
)
2004 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
2005 NULL
, 0, OPTAB_LIB_WIDEN
);
2009 /* Return 1 if it is possible to return using a single instruction.
2010 If SIBLING is non-null, this is a test for a return before a sibling
2011 call. SIBLING is the call insn, so we can examine its register usage. */
2014 use_return_insn (int iscond
, rtx sibling
)
2017 unsigned int func_type
;
2018 unsigned long saved_int_regs
;
2019 unsigned HOST_WIDE_INT stack_adjust
;
2020 arm_stack_offsets
*offsets
;
2022 /* Never use a return instruction before reload has run. */
2023 if (!reload_completed
)
2026 func_type
= arm_current_func_type ();
2028 /* Naked, volatile and stack alignment functions need special
2030 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2033 /* So do interrupt functions that use the frame pointer and Thumb
2034 interrupt functions. */
2035 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2038 offsets
= arm_get_frame_offsets ();
2039 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2041 /* As do variadic functions. */
2042 if (crtl
->args
.pretend_args_size
2043 || cfun
->machine
->uses_anonymous_args
2044 /* Or if the function calls __builtin_eh_return () */
2045 || crtl
->calls_eh_return
2046 /* Or if the function calls alloca */
2047 || cfun
->calls_alloca
2048 /* Or if there is a stack adjustment. However, if the stack pointer
2049 is saved on the stack, we can use a pre-incrementing stack load. */
2050 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2051 && stack_adjust
== 4)))
2054 saved_int_regs
= offsets
->saved_regs_mask
;
2056 /* Unfortunately, the insn
2058 ldmib sp, {..., sp, ...}
2060 triggers a bug on most SA-110 based devices, such that the stack
2061 pointer won't be correctly restored if the instruction takes a
2062 page fault. We work around this problem by popping r3 along with
2063 the other registers, since that is never slower than executing
2064 another instruction.
2066 We test for !arm_arch5 here, because code for any architecture
2067 less than this could potentially be run on one of the buggy
2069 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2071 /* Validate that r3 is a call-clobbered register (always true in
2072 the default abi) ... */
2073 if (!call_used_regs
[3])
2076 /* ... that it isn't being used for a return value ... */
2077 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2080 /* ... or for a tail-call argument ... */
2083 gcc_assert (GET_CODE (sibling
) == CALL_INSN
);
2085 if (find_regno_fusage (sibling
, USE
, 3))
2089 /* ... and that there are no call-saved registers in r0-r2
2090 (always true in the default ABI). */
2091 if (saved_int_regs
& 0x7)
2095 /* Can't be done if interworking with Thumb, and any registers have been
2097 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2100 /* On StrongARM, conditional returns are expensive if they aren't
2101 taken and multiple registers have been stacked. */
2102 if (iscond
&& arm_tune_strongarm
)
2104 /* Conditional return when just the LR is stored is a simple
2105 conditional-load instruction, that's not expensive. */
2106 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2110 && arm_pic_register
!= INVALID_REGNUM
2111 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2115 /* If there are saved registers but the LR isn't saved, then we need
2116 two instructions for the return. */
2117 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2120 /* Can't be done if any of the FPA regs are pushed,
2121 since this also requires an insn. */
2122 if (TARGET_HARD_FLOAT
&& TARGET_FPA
)
2123 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
2124 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2127 /* Likewise VFP regs. */
2128 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2129 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2130 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2133 if (TARGET_REALLY_IWMMXT
)
2134 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2135 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2141 /* Return TRUE if int I is a valid immediate ARM constant. */
2144 const_ok_for_arm (HOST_WIDE_INT i
)
2148 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2149 be all zero, or all one. */
2150 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2151 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2152 != ((~(unsigned HOST_WIDE_INT
) 0)
2153 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2156 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2158 /* Fast return for 0 and small values. We must do this for zero, since
2159 the code below can't handle that one case. */
2160 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2163 /* Get the number of trailing zeros. */
2164 lowbit
= ffs((int) i
) - 1;
2166 /* Only even shifts are allowed in ARM mode so round down to the
2167 nearest even number. */
2171 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2176 /* Allow rotated constants in ARM mode. */
2178 && ((i
& ~0xc000003f) == 0
2179 || (i
& ~0xf000000f) == 0
2180 || (i
& ~0xfc000003) == 0))
2187 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2190 if (i
== v
|| i
== (v
| (v
<< 8)))
2193 /* Allow repeated pattern 0xXY00XY00. */
2203 /* Return true if I is a valid constant for the operation CODE. */
2205 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2207 if (const_ok_for_arm (i
))
2213 /* See if we can use movw. */
2214 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
2217 /* Otherwise, try mvn. */
2218 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2239 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2241 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2247 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2251 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2258 /* Emit a sequence of insns to handle a large constant.
2259 CODE is the code of the operation required, it can be any of SET, PLUS,
2260 IOR, AND, XOR, MINUS;
2261 MODE is the mode in which the operation is being performed;
2262 VAL is the integer to operate on;
2263 SOURCE is the other operand (a register, or a null-pointer for SET);
2264 SUBTARGETS means it is safe to create scratch registers if that will
2265 either produce a simpler sequence, or we will want to cse the values.
2266 Return value is the number of insns emitted. */
2268 /* ??? Tweak this for thumb2. */
2270 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2271 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2275 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2276 cond
= COND_EXEC_TEST (PATTERN (insn
));
2280 if (subtargets
|| code
== SET
2281 || (GET_CODE (target
) == REG
&& GET_CODE (source
) == REG
2282 && REGNO (target
) != REGNO (source
)))
2284 /* After arm_reorg has been called, we can't fix up expensive
2285 constants by pushing them into memory so we must synthesize
2286 them in-line, regardless of the cost. This is only likely to
2287 be more costly on chips that have load delay slots and we are
2288 compiling without running the scheduler (so no splitting
2289 occurred before the final instruction emission).
2291 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2293 if (!after_arm_reorg
2295 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2297 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2302 /* Currently SET is the only monadic value for CODE, all
2303 the rest are diadic. */
2304 if (TARGET_USE_MOVT
)
2305 arm_emit_movpair (target
, GEN_INT (val
));
2307 emit_set_insn (target
, GEN_INT (val
));
2313 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2315 if (TARGET_USE_MOVT
)
2316 arm_emit_movpair (temp
, GEN_INT (val
));
2318 emit_set_insn (temp
, GEN_INT (val
));
2320 /* For MINUS, the value is subtracted from, since we never
2321 have subtraction of a constant. */
2323 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2325 emit_set_insn (target
,
2326 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2332 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2336 /* Return the number of instructions required to synthesize the given
2337 constant, if we start emitting them from bit-position I. */
2339 count_insns_for_constant (HOST_WIDE_INT remainder
, int i
)
2341 HOST_WIDE_INT temp1
;
2342 int step_size
= TARGET_ARM
? 2 : 1;
2345 gcc_assert (TARGET_ARM
|| i
== 0);
2353 if (remainder
& (((1 << step_size
) - 1) << (i
- step_size
)))
2358 temp1
= remainder
& ((0x0ff << end
)
2359 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
2360 remainder
&= ~temp1
;
2365 } while (remainder
);
2370 find_best_start (unsigned HOST_WIDE_INT remainder
)
2372 int best_consecutive_zeros
= 0;
2376 /* If we aren't targetting ARM, the best place to start is always at
2381 for (i
= 0; i
< 32; i
+= 2)
2383 int consecutive_zeros
= 0;
2385 if (!(remainder
& (3 << i
)))
2387 while ((i
< 32) && !(remainder
& (3 << i
)))
2389 consecutive_zeros
+= 2;
2392 if (consecutive_zeros
> best_consecutive_zeros
)
2394 best_consecutive_zeros
= consecutive_zeros
;
2395 best_start
= i
- consecutive_zeros
;
2401 /* So long as it won't require any more insns to do so, it's
2402 desirable to emit a small constant (in bits 0...9) in the last
2403 insn. This way there is more chance that it can be combined with
2404 a later addressing insn to form a pre-indexed load or store
2405 operation. Consider:
2407 *((volatile int *)0xe0000100) = 1;
2408 *((volatile int *)0xe0000110) = 2;
2410 We want this to wind up as:
2414 str rB, [rA, #0x100]
2416 str rB, [rA, #0x110]
2418 rather than having to synthesize both large constants from scratch.
2420 Therefore, we calculate how many insns would be required to emit
2421 the constant starting from `best_start', and also starting from
2422 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2423 yield a shorter sequence, we may as well use zero. */
2425 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < remainder
)
2426 && (count_insns_for_constant (remainder
, 0) <=
2427 count_insns_for_constant (remainder
, best_start
)))
2433 /* Emit an instruction with the indicated PATTERN. If COND is
2434 non-NULL, conditionalize the execution of the instruction on COND
2438 emit_constant_insn (rtx cond
, rtx pattern
)
2441 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
2442 emit_insn (pattern
);
2445 /* As above, but extra parameter GENERATE which, if clear, suppresses
2447 /* ??? This needs more work for thumb2. */
2450 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
2451 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
2456 int final_invert
= 0;
2458 int num_bits_set
= 0;
2459 int set_sign_bit_copies
= 0;
2460 int clear_sign_bit_copies
= 0;
2461 int clear_zero_bit_copies
= 0;
2462 int set_zero_bit_copies
= 0;
2464 unsigned HOST_WIDE_INT temp1
, temp2
;
2465 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
2466 int step_size
= TARGET_ARM
? 2 : 1;
2468 /* Find out which operations are safe for a given CODE. Also do a quick
2469 check for degenerate cases; these can occur when DImode operations
2483 if (remainder
== 0xffffffff)
2486 emit_constant_insn (cond
,
2487 gen_rtx_SET (VOIDmode
, target
,
2488 GEN_INT (ARM_SIGN_EXTEND (val
))));
2494 if (reload_completed
&& rtx_equal_p (target
, source
))
2498 emit_constant_insn (cond
,
2499 gen_rtx_SET (VOIDmode
, target
, source
));
2508 emit_constant_insn (cond
,
2509 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2512 if (remainder
== 0xffffffff)
2514 if (reload_completed
&& rtx_equal_p (target
, source
))
2517 emit_constant_insn (cond
,
2518 gen_rtx_SET (VOIDmode
, target
, source
));
2527 if (reload_completed
&& rtx_equal_p (target
, source
))
2530 emit_constant_insn (cond
,
2531 gen_rtx_SET (VOIDmode
, target
, source
));
2535 if (remainder
== 0xffffffff)
2538 emit_constant_insn (cond
,
2539 gen_rtx_SET (VOIDmode
, target
,
2540 gen_rtx_NOT (mode
, source
)));
2546 /* We treat MINUS as (val - source), since (source - val) is always
2547 passed as (source + (-val)). */
2551 emit_constant_insn (cond
,
2552 gen_rtx_SET (VOIDmode
, target
,
2553 gen_rtx_NEG (mode
, source
)));
2556 if (const_ok_for_arm (val
))
2559 emit_constant_insn (cond
,
2560 gen_rtx_SET (VOIDmode
, target
,
2561 gen_rtx_MINUS (mode
, GEN_INT (val
),
2573 /* If we can do it in one insn get out quickly. */
2574 if (const_ok_for_op (val
, code
))
2577 emit_constant_insn (cond
,
2578 gen_rtx_SET (VOIDmode
, target
,
2580 ? gen_rtx_fmt_ee (code
, mode
, source
,
2586 /* Calculate a few attributes that may be useful for specific
2588 /* Count number of leading zeros. */
2589 for (i
= 31; i
>= 0; i
--)
2591 if ((remainder
& (1 << i
)) == 0)
2592 clear_sign_bit_copies
++;
2597 /* Count number of leading 1's. */
2598 for (i
= 31; i
>= 0; i
--)
2600 if ((remainder
& (1 << i
)) != 0)
2601 set_sign_bit_copies
++;
2606 /* Count number of trailing zero's. */
2607 for (i
= 0; i
<= 31; i
++)
2609 if ((remainder
& (1 << i
)) == 0)
2610 clear_zero_bit_copies
++;
2615 /* Count number of trailing 1's. */
2616 for (i
= 0; i
<= 31; i
++)
2618 if ((remainder
& (1 << i
)) != 0)
2619 set_zero_bit_copies
++;
2627 /* See if we can do this by sign_extending a constant that is known
2628 to be negative. This is a good, way of doing it, since the shift
2629 may well merge into a subsequent insn. */
2630 if (set_sign_bit_copies
> 1)
2632 if (const_ok_for_arm
2633 (temp1
= ARM_SIGN_EXTEND (remainder
2634 << (set_sign_bit_copies
- 1))))
2638 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2639 emit_constant_insn (cond
,
2640 gen_rtx_SET (VOIDmode
, new_src
,
2642 emit_constant_insn (cond
,
2643 gen_ashrsi3 (target
, new_src
,
2644 GEN_INT (set_sign_bit_copies
- 1)));
2648 /* For an inverted constant, we will need to set the low bits,
2649 these will be shifted out of harm's way. */
2650 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
2651 if (const_ok_for_arm (~temp1
))
2655 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2656 emit_constant_insn (cond
,
2657 gen_rtx_SET (VOIDmode
, new_src
,
2659 emit_constant_insn (cond
,
2660 gen_ashrsi3 (target
, new_src
,
2661 GEN_INT (set_sign_bit_copies
- 1)));
2667 /* See if we can calculate the value as the difference between two
2668 valid immediates. */
2669 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
2671 int topshift
= clear_sign_bit_copies
& ~1;
2673 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
2674 & (0xff000000 >> topshift
));
2676 /* If temp1 is zero, then that means the 9 most significant
2677 bits of remainder were 1 and we've caused it to overflow.
2678 When topshift is 0 we don't need to do anything since we
2679 can borrow from 'bit 32'. */
2680 if (temp1
== 0 && topshift
!= 0)
2681 temp1
= 0x80000000 >> (topshift
- 1);
2683 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
2685 if (const_ok_for_arm (temp2
))
2689 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2690 emit_constant_insn (cond
,
2691 gen_rtx_SET (VOIDmode
, new_src
,
2693 emit_constant_insn (cond
,
2694 gen_addsi3 (target
, new_src
,
2702 /* See if we can generate this by setting the bottom (or the top)
2703 16 bits, and then shifting these into the other half of the
2704 word. We only look for the simplest cases, to do more would cost
2705 too much. Be careful, however, not to generate this when the
2706 alternative would take fewer insns. */
2707 if (val
& 0xffff0000)
2709 temp1
= remainder
& 0xffff0000;
2710 temp2
= remainder
& 0x0000ffff;
2712 /* Overlaps outside this range are best done using other methods. */
2713 for (i
= 9; i
< 24; i
++)
2715 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
2716 && !const_ok_for_arm (temp2
))
2718 rtx new_src
= (subtargets
2719 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2721 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
2722 source
, subtargets
, generate
);
2730 gen_rtx_ASHIFT (mode
, source
,
2737 /* Don't duplicate cases already considered. */
2738 for (i
= 17; i
< 24; i
++)
2740 if (((temp1
| (temp1
>> i
)) == remainder
)
2741 && !const_ok_for_arm (temp1
))
2743 rtx new_src
= (subtargets
2744 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2746 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
2747 source
, subtargets
, generate
);
2752 gen_rtx_SET (VOIDmode
, target
,
2755 gen_rtx_LSHIFTRT (mode
, source
,
2766 /* If we have IOR or XOR, and the constant can be loaded in a
2767 single instruction, and we can find a temporary to put it in,
2768 then this can be done in two instructions instead of 3-4. */
2770 /* TARGET can't be NULL if SUBTARGETS is 0 */
2771 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
2773 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
2777 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2779 emit_constant_insn (cond
,
2780 gen_rtx_SET (VOIDmode
, sub
,
2782 emit_constant_insn (cond
,
2783 gen_rtx_SET (VOIDmode
, target
,
2784 gen_rtx_fmt_ee (code
, mode
,
2795 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2796 and the remainder 0s for e.g. 0xfff00000)
2797 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2799 This can be done in 2 instructions by using shifts with mov or mvn.
2804 mvn r0, r0, lsr #12 */
2805 if (set_sign_bit_copies
> 8
2806 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
2810 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2811 rtx shift
= GEN_INT (set_sign_bit_copies
);
2815 gen_rtx_SET (VOIDmode
, sub
,
2817 gen_rtx_ASHIFT (mode
,
2822 gen_rtx_SET (VOIDmode
, target
,
2824 gen_rtx_LSHIFTRT (mode
, sub
,
2831 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2833 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2835 For eg. r0 = r0 | 0xfff
2840 if (set_zero_bit_copies
> 8
2841 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
2845 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2846 rtx shift
= GEN_INT (set_zero_bit_copies
);
2850 gen_rtx_SET (VOIDmode
, sub
,
2852 gen_rtx_LSHIFTRT (mode
,
2857 gen_rtx_SET (VOIDmode
, target
,
2859 gen_rtx_ASHIFT (mode
, sub
,
2865 /* This will never be reached for Thumb2 because orn is a valid
2866 instruction. This is for Thumb1 and the ARM 32 bit cases.
2868 x = y | constant (such that ~constant is a valid constant)
2870 x = ~(~y & ~constant).
2872 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
2876 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2877 emit_constant_insn (cond
,
2878 gen_rtx_SET (VOIDmode
, sub
,
2879 gen_rtx_NOT (mode
, source
)));
2882 sub
= gen_reg_rtx (mode
);
2883 emit_constant_insn (cond
,
2884 gen_rtx_SET (VOIDmode
, sub
,
2885 gen_rtx_AND (mode
, source
,
2887 emit_constant_insn (cond
,
2888 gen_rtx_SET (VOIDmode
, target
,
2889 gen_rtx_NOT (mode
, sub
)));
2896 /* See if two shifts will do 2 or more insn's worth of work. */
2897 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
2899 HOST_WIDE_INT shift_mask
= ((0xffffffff
2900 << (32 - clear_sign_bit_copies
))
2903 if ((remainder
| shift_mask
) != 0xffffffff)
2907 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2908 insns
= arm_gen_constant (AND
, mode
, cond
,
2909 remainder
| shift_mask
,
2910 new_src
, source
, subtargets
, 1);
2915 rtx targ
= subtargets
? NULL_RTX
: target
;
2916 insns
= arm_gen_constant (AND
, mode
, cond
,
2917 remainder
| shift_mask
,
2918 targ
, source
, subtargets
, 0);
2924 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2925 rtx shift
= GEN_INT (clear_sign_bit_copies
);
2927 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
2928 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
2934 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
2936 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
2938 if ((remainder
| shift_mask
) != 0xffffffff)
2942 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2944 insns
= arm_gen_constant (AND
, mode
, cond
,
2945 remainder
| shift_mask
,
2946 new_src
, source
, subtargets
, 1);
2951 rtx targ
= subtargets
? NULL_RTX
: target
;
2953 insns
= arm_gen_constant (AND
, mode
, cond
,
2954 remainder
| shift_mask
,
2955 targ
, source
, subtargets
, 0);
2961 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2962 rtx shift
= GEN_INT (clear_zero_bit_copies
);
2964 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
2965 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
2977 for (i
= 0; i
< 32; i
++)
2978 if (remainder
& (1 << i
))
2981 if ((code
== AND
) || (can_invert
&& num_bits_set
> 16))
2982 remainder
^= 0xffffffff;
2983 else if (code
== PLUS
&& num_bits_set
> 16)
2984 remainder
= (-remainder
) & 0xffffffff;
2986 /* For XOR, if more than half the bits are set and there's a sequence
2987 of more than 8 consecutive ones in the pattern then we can XOR by the
2988 inverted constant and then invert the final result; this may save an
2989 instruction and might also lead to the final mvn being merged with
2990 some other operation. */
2991 else if (code
== XOR
&& num_bits_set
> 16
2992 && (count_insns_for_constant (remainder
^ 0xffffffff,
2994 (remainder
^ 0xffffffff))
2995 < count_insns_for_constant (remainder
,
2996 find_best_start (remainder
))))
2998 remainder
^= 0xffffffff;
3007 /* Now try and find a way of doing the job in either two or three
3009 We start by looking for the largest block of zeros that are aligned on
3010 a 2-bit boundary, we then fill up the temps, wrapping around to the
3011 top of the word when we drop off the bottom.
3012 In the worst case this code should produce no more than four insns.
3013 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3014 best place to start. */
3016 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3019 /* Now start emitting the insns. */
3020 i
= find_best_start (remainder
);
3027 if (remainder
& (3 << (i
- 2)))
3032 temp1
= remainder
& ((0x0ff << end
)
3033 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
3034 remainder
&= ~temp1
;
3038 rtx new_src
, temp1_rtx
;
3040 if (code
== SET
|| code
== MINUS
)
3042 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3043 if (can_invert
&& code
!= MINUS
)
3048 if ((final_invert
|| remainder
) && subtargets
)
3049 new_src
= gen_reg_rtx (mode
);
3054 else if (can_negate
)
3058 temp1
= trunc_int_for_mode (temp1
, mode
);
3059 temp1_rtx
= GEN_INT (temp1
);
3063 else if (code
== MINUS
)
3064 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3066 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3068 emit_constant_insn (cond
,
3069 gen_rtx_SET (VOIDmode
, new_src
,
3079 else if (code
== MINUS
)
3085 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3095 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3096 gen_rtx_NOT (mode
, source
)));
3103 /* Canonicalize a comparison so that we are more likely to recognize it.
3104 This can be done for a few constant compares, where we can make the
3105 immediate value easier to load. */
3108 arm_canonicalize_comparison (enum rtx_code code
, rtx
*op0
, rtx
*op1
)
3110 enum machine_mode mode
;
3111 unsigned HOST_WIDE_INT i
, maxval
;
3113 mode
= GET_MODE (*op0
);
3114 if (mode
== VOIDmode
)
3115 mode
= GET_MODE (*op1
);
3117 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3119 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3120 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3121 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3122 for GTU/LEU in Thumb mode. */
3127 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3129 if (TARGET_ARM
&& TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
3132 if (code
== GT
|| code
== LE
3133 || (!TARGET_ARM
&& (code
== GTU
|| code
== LEU
)))
3135 /* Missing comparison. First try to use an available
3137 if (GET_CODE (*op1
) == CONST_INT
)
3145 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3147 *op1
= GEN_INT (i
+ 1);
3148 return code
== GT
? GE
: LT
;
3153 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3154 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3156 *op1
= GEN_INT (i
+ 1);
3157 return code
== GTU
? GEU
: LTU
;
3165 /* If that did not work, reverse the condition. */
3169 return swap_condition (code
);
3175 /* Comparisons smaller than DImode. Only adjust comparisons against
3176 an out-of-range constant. */
3177 if (GET_CODE (*op1
) != CONST_INT
3178 || const_ok_for_arm (INTVAL (*op1
))
3179 || const_ok_for_arm (- INTVAL (*op1
)))
3193 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3195 *op1
= GEN_INT (i
+ 1);
3196 return code
== GT
? GE
: LT
;
3203 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3205 *op1
= GEN_INT (i
- 1);
3206 return code
== GE
? GT
: LE
;
3212 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3213 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3215 *op1
= GEN_INT (i
+ 1);
3216 return code
== GTU
? GEU
: LTU
;
3223 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3225 *op1
= GEN_INT (i
- 1);
3226 return code
== GEU
? GTU
: LEU
;
3238 /* Define how to find the value returned by a function. */
3241 arm_function_value(const_tree type
, const_tree func
,
3242 bool outgoing ATTRIBUTE_UNUSED
)
3244 enum machine_mode mode
;
3245 int unsignedp ATTRIBUTE_UNUSED
;
3246 rtx r ATTRIBUTE_UNUSED
;
3248 mode
= TYPE_MODE (type
);
3250 if (TARGET_AAPCS_BASED
)
3251 return aapcs_allocate_return_reg (mode
, type
, func
);
3253 /* Promote integer types. */
3254 if (INTEGRAL_TYPE_P (type
))
3255 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
3257 /* Promotes small structs returned in a register to full-word size
3258 for big-endian AAPCS. */
3259 if (arm_return_in_msb (type
))
3261 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3262 if (size
% UNITS_PER_WORD
!= 0)
3264 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
3265 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
3269 return LIBCALL_VALUE (mode
);
3273 libcall_eq (const void *p1
, const void *p2
)
3275 return rtx_equal_p ((const_rtx
) p1
, (const_rtx
) p2
);
3279 libcall_hash (const void *p1
)
3281 return hash_rtx ((const_rtx
) p1
, VOIDmode
, NULL
, NULL
, FALSE
);
3285 add_libcall (htab_t htab
, rtx libcall
)
3287 *htab_find_slot (htab
, libcall
, INSERT
) = libcall
;
3291 arm_libcall_uses_aapcs_base (const_rtx libcall
)
3293 static bool init_done
= false;
3294 static htab_t libcall_htab
;
3300 libcall_htab
= htab_create (31, libcall_hash
, libcall_eq
,
3302 add_libcall (libcall_htab
,
3303 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
3304 add_libcall (libcall_htab
,
3305 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
3306 add_libcall (libcall_htab
,
3307 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
3308 add_libcall (libcall_htab
,
3309 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
3311 add_libcall (libcall_htab
,
3312 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
3313 add_libcall (libcall_htab
,
3314 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
3315 add_libcall (libcall_htab
,
3316 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
3317 add_libcall (libcall_htab
,
3318 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
3320 add_libcall (libcall_htab
,
3321 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
3322 add_libcall (libcall_htab
,
3323 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
3324 add_libcall (libcall_htab
,
3325 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
3326 add_libcall (libcall_htab
,
3327 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
3328 add_libcall (libcall_htab
,
3329 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
3330 add_libcall (libcall_htab
,
3331 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
3333 /* Values from double-precision helper functions are returned in core
3334 registers if the selected core only supports single-precision
3335 arithmetic, even if we are using the hard-float ABI. The same is
3336 true for single-precision helpers, but we will never be using the
3337 hard-float ABI on a CPU which doesn't support single-precision
3338 operations in hardware. */
3339 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
3340 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
3341 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
3342 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
3343 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
3344 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
3345 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
3346 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
3347 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
3348 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
3349 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
3350 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
3352 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
3356 return libcall
&& htab_find (libcall_htab
, libcall
) != NULL
;
3360 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
3362 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
3363 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
3365 /* The following libcalls return their result in integer registers,
3366 even though they return a floating point value. */
3367 if (arm_libcall_uses_aapcs_base (libcall
))
3368 return gen_rtx_REG (mode
, ARG_REGISTER(1));
3372 return LIBCALL_VALUE (mode
);
3375 /* Determine the amount of memory needed to store the possible return
3376 registers of an untyped call. */
3378 arm_apply_result_size (void)
3384 if (TARGET_HARD_FLOAT_ABI
)
3390 if (TARGET_MAVERICK
)
3393 if (TARGET_IWMMXT_ABI
)
3400 /* Decide whether TYPE should be returned in memory (true)
3401 or in a register (false). FNTYPE is the type of the function making
3404 arm_return_in_memory (const_tree type
, const_tree fntype
)
3408 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
3410 if (TARGET_AAPCS_BASED
)
3412 /* Simple, non-aggregate types (ie not including vectors and
3413 complex) are always returned in a register (or registers).
3414 We don't care about which register here, so we can short-cut
3415 some of the detail. */
3416 if (!AGGREGATE_TYPE_P (type
)
3417 && TREE_CODE (type
) != VECTOR_TYPE
3418 && TREE_CODE (type
) != COMPLEX_TYPE
)
3421 /* Any return value that is no larger than one word can be
3423 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
3426 /* Check any available co-processors to see if they accept the
3427 type as a register candidate (VFP, for example, can return
3428 some aggregates in consecutive registers). These aren't
3429 available if the call is variadic. */
3430 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
3433 /* Vector values should be returned using ARM registers, not
3434 memory (unless they're over 16 bytes, which will break since
3435 we only have four call-clobbered registers to play with). */
3436 if (TREE_CODE (type
) == VECTOR_TYPE
)
3437 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3439 /* The rest go in memory. */
3443 if (TREE_CODE (type
) == VECTOR_TYPE
)
3444 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3446 if (!AGGREGATE_TYPE_P (type
) &&
3447 (TREE_CODE (type
) != VECTOR_TYPE
))
3448 /* All simple types are returned in registers. */
3451 if (arm_abi
!= ARM_ABI_APCS
)
3453 /* ATPCS and later return aggregate types in memory only if they are
3454 larger than a word (or are variable size). */
3455 return (size
< 0 || size
> UNITS_PER_WORD
);
3458 /* For the arm-wince targets we choose to be compatible with Microsoft's
3459 ARM and Thumb compilers, which always return aggregates in memory. */
3461 /* All structures/unions bigger than one word are returned in memory.
3462 Also catch the case where int_size_in_bytes returns -1. In this case
3463 the aggregate is either huge or of variable size, and in either case
3464 we will want to return it via memory and not in a register. */
3465 if (size
< 0 || size
> UNITS_PER_WORD
)
3468 if (TREE_CODE (type
) == RECORD_TYPE
)
3472 /* For a struct the APCS says that we only return in a register
3473 if the type is 'integer like' and every addressable element
3474 has an offset of zero. For practical purposes this means
3475 that the structure can have at most one non bit-field element
3476 and that this element must be the first one in the structure. */
3478 /* Find the first field, ignoring non FIELD_DECL things which will
3479 have been created by C++. */
3480 for (field
= TYPE_FIELDS (type
);
3481 field
&& TREE_CODE (field
) != FIELD_DECL
;
3482 field
= DECL_CHAIN (field
))
3486 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3488 /* Check that the first field is valid for returning in a register. */
3490 /* ... Floats are not allowed */
3491 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3494 /* ... Aggregates that are not themselves valid for returning in
3495 a register are not allowed. */
3496 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3499 /* Now check the remaining fields, if any. Only bitfields are allowed,
3500 since they are not addressable. */
3501 for (field
= DECL_CHAIN (field
);
3503 field
= DECL_CHAIN (field
))
3505 if (TREE_CODE (field
) != FIELD_DECL
)
3508 if (!DECL_BIT_FIELD_TYPE (field
))
3515 if (TREE_CODE (type
) == UNION_TYPE
)
3519 /* Unions can be returned in registers if every element is
3520 integral, or can be returned in an integer register. */
3521 for (field
= TYPE_FIELDS (type
);
3523 field
= DECL_CHAIN (field
))
3525 if (TREE_CODE (field
) != FIELD_DECL
)
3528 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3531 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3537 #endif /* not ARM_WINCE */
3539 /* Return all other types in memory. */
3543 /* Indicate whether or not words of a double are in big-endian order. */
3546 arm_float_words_big_endian (void)
3548 if (TARGET_MAVERICK
)
3551 /* For FPA, float words are always big-endian. For VFP, floats words
3552 follow the memory system mode. */
3560 return (TARGET_BIG_END
? 1 : 0);
3565 const struct pcs_attribute_arg
3569 } pcs_attribute_args
[] =
3571 {"aapcs", ARM_PCS_AAPCS
},
3572 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
3574 /* We could recognize these, but changes would be needed elsewhere
3575 * to implement them. */
3576 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
3577 {"atpcs", ARM_PCS_ATPCS
},
3578 {"apcs", ARM_PCS_APCS
},
3580 {NULL
, ARM_PCS_UNKNOWN
}
3584 arm_pcs_from_attribute (tree attr
)
3586 const struct pcs_attribute_arg
*ptr
;
3589 /* Get the value of the argument. */
3590 if (TREE_VALUE (attr
) == NULL_TREE
3591 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
3592 return ARM_PCS_UNKNOWN
;
3594 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
3596 /* Check it against the list of known arguments. */
3597 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3598 if (streq (arg
, ptr
->arg
))
3601 /* An unrecognized interrupt type. */
3602 return ARM_PCS_UNKNOWN
;
3605 /* Get the PCS variant to use for this call. TYPE is the function's type
3606 specification, DECL is the specific declartion. DECL may be null if
3607 the call could be indirect or if this is a library call. */
3609 arm_get_pcs_model (const_tree type
, const_tree decl
)
3611 bool user_convention
= false;
3612 enum arm_pcs user_pcs
= arm_pcs_default
;
3617 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
3620 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
3621 user_convention
= true;
3624 if (TARGET_AAPCS_BASED
)
3626 /* Detect varargs functions. These always use the base rules
3627 (no argument is ever a candidate for a co-processor
3629 bool base_rules
= stdarg_p (type
);
3631 if (user_convention
)
3633 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
3634 sorry ("non-AAPCS derived PCS variant");
3635 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
3636 error ("variadic functions must use the base AAPCS variant");
3640 return ARM_PCS_AAPCS
;
3641 else if (user_convention
)
3643 else if (decl
&& flag_unit_at_a_time
)
3645 /* Local functions never leak outside this compilation unit,
3646 so we are free to use whatever conventions are
3648 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3649 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3651 return ARM_PCS_AAPCS_LOCAL
;
3654 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
3655 sorry ("PCS variant");
3657 /* For everything else we use the target's default. */
3658 return arm_pcs_default
;
3663 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
3664 const_tree fntype ATTRIBUTE_UNUSED
,
3665 rtx libcall ATTRIBUTE_UNUSED
,
3666 const_tree fndecl ATTRIBUTE_UNUSED
)
3668 /* Record the unallocated VFP registers. */
3669 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
3670 pcum
->aapcs_vfp_reg_alloc
= 0;
3673 /* Walk down the type tree of TYPE counting consecutive base elements.
3674 If *MODEP is VOIDmode, then set it to the first valid floating point
3675 type. If a non-floating point type is found, or if a floating point
3676 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3677 otherwise return the count in the sub-tree. */
3679 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
3681 enum machine_mode mode
;
3684 switch (TREE_CODE (type
))
3687 mode
= TYPE_MODE (type
);
3688 if (mode
!= DFmode
&& mode
!= SFmode
)
3691 if (*modep
== VOIDmode
)
3700 mode
= TYPE_MODE (TREE_TYPE (type
));
3701 if (mode
!= DFmode
&& mode
!= SFmode
)
3704 if (*modep
== VOIDmode
)
3713 /* Use V2SImode and V4SImode as representatives of all 64-bit
3714 and 128-bit vector types, whether or not those modes are
3715 supported with the present options. */
3716 size
= int_size_in_bytes (type
);
3729 if (*modep
== VOIDmode
)
3732 /* Vector modes are considered to be opaque: two vectors are
3733 equivalent for the purposes of being homogeneous aggregates
3734 if they are the same size. */
3743 tree index
= TYPE_DOMAIN (type
);
3745 /* Can't handle incomplete types. */
3746 if (!COMPLETE_TYPE_P(type
))
3749 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
3752 || !TYPE_MAX_VALUE (index
)
3753 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
3754 || !TYPE_MIN_VALUE (index
)
3755 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
3759 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
3760 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
3762 /* There must be no padding. */
3763 if (!host_integerp (TYPE_SIZE (type
), 1)
3764 || (tree_low_cst (TYPE_SIZE (type
), 1)
3765 != count
* GET_MODE_BITSIZE (*modep
)))
3777 /* Can't handle incomplete types. */
3778 if (!COMPLETE_TYPE_P(type
))
3781 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3783 if (TREE_CODE (field
) != FIELD_DECL
)
3786 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3792 /* There must be no padding. */
3793 if (!host_integerp (TYPE_SIZE (type
), 1)
3794 || (tree_low_cst (TYPE_SIZE (type
), 1)
3795 != count
* GET_MODE_BITSIZE (*modep
)))
3802 case QUAL_UNION_TYPE
:
3804 /* These aren't very interesting except in a degenerate case. */
3809 /* Can't handle incomplete types. */
3810 if (!COMPLETE_TYPE_P(type
))
3813 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3815 if (TREE_CODE (field
) != FIELD_DECL
)
3818 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3821 count
= count
> sub_count
? count
: sub_count
;
3824 /* There must be no padding. */
3825 if (!host_integerp (TYPE_SIZE (type
), 1)
3826 || (tree_low_cst (TYPE_SIZE (type
), 1)
3827 != count
* GET_MODE_BITSIZE (*modep
)))
3840 /* Return true if PCS_VARIANT should use VFP registers. */
3842 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
3844 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
3846 static bool seen_thumb1_vfp
= false;
3848 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
3850 sorry ("Thumb-1 hard-float VFP ABI");
3851 /* sorry() is not immediately fatal, so only display this once. */
3852 seen_thumb1_vfp
= true;
3858 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
3861 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
3862 (TARGET_VFP_DOUBLE
|| !is_double
));
3866 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
3867 enum machine_mode mode
, const_tree type
,
3868 enum machine_mode
*base_mode
, int *count
)
3870 enum machine_mode new_mode
= VOIDmode
;
3872 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
3873 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3874 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
3879 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
3882 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
3884 else if (type
&& (mode
== BLKmode
|| TREE_CODE (type
) == VECTOR_TYPE
))
3886 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
3888 if (ag_count
> 0 && ag_count
<= 4)
3897 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
3900 *base_mode
= new_mode
;
3905 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
3906 enum machine_mode mode
, const_tree type
)
3908 int count ATTRIBUTE_UNUSED
;
3909 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
3911 if (!use_vfp_abi (pcs_variant
, false))
3913 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
3918 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
3921 if (!use_vfp_abi (pcum
->pcs_variant
, false))
3924 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
3925 &pcum
->aapcs_vfp_rmode
,
3926 &pcum
->aapcs_vfp_rcount
);
3930 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
3931 const_tree type ATTRIBUTE_UNUSED
)
3933 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
3934 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
3937 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
3938 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
3940 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
3941 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
3944 int rcount
= pcum
->aapcs_vfp_rcount
;
3946 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
3950 /* Avoid using unsupported vector modes. */
3951 if (rmode
== V2SImode
)
3953 else if (rmode
== V4SImode
)
3960 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
3961 for (i
= 0; i
< rcount
; i
++)
3963 rtx tmp
= gen_rtx_REG (rmode
,
3964 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
3965 tmp
= gen_rtx_EXPR_LIST
3967 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
3968 XVECEXP (par
, 0, i
) = tmp
;
3971 pcum
->aapcs_reg
= par
;
3974 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
3981 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
3982 enum machine_mode mode
,
3983 const_tree type ATTRIBUTE_UNUSED
)
3985 if (!use_vfp_abi (pcs_variant
, false))
3988 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
3991 enum machine_mode ag_mode
;
3996 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4001 if (ag_mode
== V2SImode
)
4003 else if (ag_mode
== V4SImode
)
4009 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
4010 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
4011 for (i
= 0; i
< count
; i
++)
4013 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
4014 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
4015 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
4016 XVECEXP (par
, 0, i
) = tmp
;
4022 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
4026 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4027 enum machine_mode mode ATTRIBUTE_UNUSED
,
4028 const_tree type ATTRIBUTE_UNUSED
)
4030 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
4031 pcum
->aapcs_vfp_reg_alloc
= 0;
4035 #define AAPCS_CP(X) \
4037 aapcs_ ## X ## _cum_init, \
4038 aapcs_ ## X ## _is_call_candidate, \
4039 aapcs_ ## X ## _allocate, \
4040 aapcs_ ## X ## _is_return_candidate, \
4041 aapcs_ ## X ## _allocate_return_reg, \
4042 aapcs_ ## X ## _advance \
4045 /* Table of co-processors that can be used to pass arguments in
4046 registers. Idealy no arugment should be a candidate for more than
4047 one co-processor table entry, but the table is processed in order
4048 and stops after the first match. If that entry then fails to put
4049 the argument into a co-processor register, the argument will go on
4053 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4054 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4056 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4057 BLKmode) is a candidate for this co-processor's registers; this
4058 function should ignore any position-dependent state in
4059 CUMULATIVE_ARGS and only use call-type dependent information. */
4060 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4062 /* Return true if the argument does get a co-processor register; it
4063 should set aapcs_reg to an RTX of the register allocated as is
4064 required for a return from FUNCTION_ARG. */
4065 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4067 /* Return true if a result of mode MODE (or type TYPE if MODE is
4068 BLKmode) is can be returned in this co-processor's registers. */
4069 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4071 /* Allocate and return an RTX element to hold the return type of a
4072 call, this routine must not fail and will only be called if
4073 is_return_candidate returned true with the same parameters. */
4074 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4076 /* Finish processing this argument and prepare to start processing
4078 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4079 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4087 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4092 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4093 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4100 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4102 /* We aren't passed a decl, so we can't check that a call is local.
4103 However, it isn't clear that that would be a win anyway, since it
4104 might limit some tail-calling opportunities. */
4105 enum arm_pcs pcs_variant
;
4109 const_tree fndecl
= NULL_TREE
;
4111 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4114 fntype
= TREE_TYPE (fntype
);
4117 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4120 pcs_variant
= arm_pcs_default
;
4122 if (pcs_variant
!= ARM_PCS_AAPCS
)
4126 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4127 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4136 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4139 /* We aren't passed a decl, so we can't check that a call is local.
4140 However, it isn't clear that that would be a win anyway, since it
4141 might limit some tail-calling opportunities. */
4142 enum arm_pcs pcs_variant
;
4143 int unsignedp ATTRIBUTE_UNUSED
;
4147 const_tree fndecl
= NULL_TREE
;
4149 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4152 fntype
= TREE_TYPE (fntype
);
4155 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4158 pcs_variant
= arm_pcs_default
;
4160 /* Promote integer types. */
4161 if (type
&& INTEGRAL_TYPE_P (type
))
4162 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4164 if (pcs_variant
!= ARM_PCS_AAPCS
)
4168 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4169 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4171 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
4175 /* Promotes small structs returned in a register to full-word size
4176 for big-endian AAPCS. */
4177 if (type
&& arm_return_in_msb (type
))
4179 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4180 if (size
% UNITS_PER_WORD
!= 0)
4182 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4183 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4187 return gen_rtx_REG (mode
, R0_REGNUM
);
4191 aapcs_libcall_value (enum machine_mode mode
)
4193 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
4196 /* Lay out a function argument using the AAPCS rules. The rule
4197 numbers referred to here are those in the AAPCS. */
4199 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4200 const_tree type
, bool named
)
4205 /* We only need to do this once per argument. */
4206 if (pcum
->aapcs_arg_processed
)
4209 pcum
->aapcs_arg_processed
= true;
4211 /* Special case: if named is false then we are handling an incoming
4212 anonymous argument which is on the stack. */
4216 /* Is this a potential co-processor register candidate? */
4217 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4219 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
4220 pcum
->aapcs_cprc_slot
= slot
;
4222 /* We don't have to apply any of the rules from part B of the
4223 preparation phase, these are handled elsewhere in the
4228 /* A Co-processor register candidate goes either in its own
4229 class of registers or on the stack. */
4230 if (!pcum
->aapcs_cprc_failed
[slot
])
4232 /* C1.cp - Try to allocate the argument to co-processor
4234 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
4237 /* C2.cp - Put the argument on the stack and note that we
4238 can't assign any more candidates in this slot. We also
4239 need to note that we have allocated stack space, so that
4240 we won't later try to split a non-cprc candidate between
4241 core registers and the stack. */
4242 pcum
->aapcs_cprc_failed
[slot
] = true;
4243 pcum
->can_split
= false;
4246 /* We didn't get a register, so this argument goes on the
4248 gcc_assert (pcum
->can_split
== false);
4253 /* C3 - For double-word aligned arguments, round the NCRN up to the
4254 next even number. */
4255 ncrn
= pcum
->aapcs_ncrn
;
4256 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
4259 nregs
= ARM_NUM_REGS2(mode
, type
);
4261 /* Sigh, this test should really assert that nregs > 0, but a GCC
4262 extension allows empty structs and then gives them empty size; it
4263 then allows such a structure to be passed by value. For some of
4264 the code below we have to pretend that such an argument has
4265 non-zero size so that we 'locate' it correctly either in
4266 registers or on the stack. */
4267 gcc_assert (nregs
>= 0);
4269 nregs2
= nregs
? nregs
: 1;
4271 /* C4 - Argument fits entirely in core registers. */
4272 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
4274 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4275 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
4279 /* C5 - Some core registers left and there are no arguments already
4280 on the stack: split this argument between the remaining core
4281 registers and the stack. */
4282 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
4284 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4285 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4286 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
4290 /* C6 - NCRN is set to 4. */
4291 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4293 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4297 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4298 for a call to a function whose data type is FNTYPE.
4299 For a library call, FNTYPE is NULL. */
4301 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
4303 tree fndecl ATTRIBUTE_UNUSED
)
4305 /* Long call handling. */
4307 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4309 pcum
->pcs_variant
= arm_pcs_default
;
4311 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4313 if (arm_libcall_uses_aapcs_base (libname
))
4314 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
4316 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
4317 pcum
->aapcs_reg
= NULL_RTX
;
4318 pcum
->aapcs_partial
= 0;
4319 pcum
->aapcs_arg_processed
= false;
4320 pcum
->aapcs_cprc_slot
= -1;
4321 pcum
->can_split
= true;
4323 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4327 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4329 pcum
->aapcs_cprc_failed
[i
] = false;
4330 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
4338 /* On the ARM, the offset starts at 0. */
4340 pcum
->iwmmxt_nregs
= 0;
4341 pcum
->can_split
= true;
4343 /* Varargs vectors are treated the same as long long.
4344 named_count avoids having to change the way arm handles 'named' */
4345 pcum
->named_count
= 0;
4348 if (TARGET_REALLY_IWMMXT
&& fntype
)
4352 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
4354 fn_arg
= TREE_CHAIN (fn_arg
))
4355 pcum
->named_count
+= 1;
4357 if (! pcum
->named_count
)
4358 pcum
->named_count
= INT_MAX
;
4363 /* Return true if mode/type need doubleword alignment. */
4365 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
4367 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
4368 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
4372 /* Determine where to put an argument to a function.
4373 Value is zero to push the argument on the stack,
4374 or a hard register in which to store the argument.
4376 MODE is the argument's machine mode.
4377 TYPE is the data type of the argument (as a tree).
4378 This is null for libcalls where that information may
4380 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4381 the preceding args and about the function being called.
4382 NAMED is nonzero if this argument is a named parameter
4383 (otherwise it is an extra parameter matching an ellipsis).
4385 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4386 other arguments are passed on the stack. If (NAMED == 0) (which happens
4387 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4388 defined), say it is passed in the stack (function_prologue will
4389 indeed make it pass in the stack if necessary). */
4392 arm_function_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4393 const_tree type
, bool named
)
4397 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4398 a call insn (op3 of a call_value insn). */
4399 if (mode
== VOIDmode
)
4402 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4404 aapcs_layout_arg (pcum
, mode
, type
, named
);
4405 return pcum
->aapcs_reg
;
4408 /* Varargs vectors are treated the same as long long.
4409 named_count avoids having to change the way arm handles 'named' */
4410 if (TARGET_IWMMXT_ABI
4411 && arm_vector_mode_supported_p (mode
)
4412 && pcum
->named_count
> pcum
->nargs
+ 1)
4414 if (pcum
->iwmmxt_nregs
<= 9)
4415 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
4418 pcum
->can_split
= false;
4423 /* Put doubleword aligned quantities in even register pairs. */
4425 && ARM_DOUBLEWORD_ALIGN
4426 && arm_needs_doubleword_align (mode
, type
))
4429 /* Only allow splitting an arg between regs and memory if all preceding
4430 args were allocated to regs. For args passed by reference we only count
4431 the reference pointer. */
4432 if (pcum
->can_split
)
4435 nregs
= ARM_NUM_REGS2 (mode
, type
);
4437 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
4440 return gen_rtx_REG (mode
, pcum
->nregs
);
4444 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
4446 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
4447 ? DOUBLEWORD_ALIGNMENT
4452 arm_arg_partial_bytes (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4453 tree type
, bool named
)
4455 int nregs
= pcum
->nregs
;
4457 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4459 aapcs_layout_arg (pcum
, mode
, type
, named
);
4460 return pcum
->aapcs_partial
;
4463 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
4466 if (NUM_ARG_REGS
> nregs
4467 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
4469 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
4474 /* Update the data in PCUM to advance over an argument
4475 of mode MODE and data type TYPE.
4476 (TYPE is null for libcalls where that information may not be available.) */
4479 arm_function_arg_advance (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4480 const_tree type
, bool named
)
4482 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4484 aapcs_layout_arg (pcum
, mode
, type
, named
);
4486 if (pcum
->aapcs_cprc_slot
>= 0)
4488 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
4490 pcum
->aapcs_cprc_slot
= -1;
4493 /* Generic stuff. */
4494 pcum
->aapcs_arg_processed
= false;
4495 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
4496 pcum
->aapcs_reg
= NULL_RTX
;
4497 pcum
->aapcs_partial
= 0;
4502 if (arm_vector_mode_supported_p (mode
)
4503 && pcum
->named_count
> pcum
->nargs
4504 && TARGET_IWMMXT_ABI
)
4505 pcum
->iwmmxt_nregs
+= 1;
4507 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
4511 /* Variable sized types are passed by reference. This is a GCC
4512 extension to the ARM ABI. */
4515 arm_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4516 enum machine_mode mode ATTRIBUTE_UNUSED
,
4517 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4519 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4522 /* Encode the current state of the #pragma [no_]long_calls. */
4525 OFF
, /* No #pragma [no_]long_calls is in effect. */
4526 LONG
, /* #pragma long_calls is in effect. */
4527 SHORT
/* #pragma no_long_calls is in effect. */
4530 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
4533 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4535 arm_pragma_long_calls
= LONG
;
4539 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4541 arm_pragma_long_calls
= SHORT
;
4545 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4547 arm_pragma_long_calls
= OFF
;
4550 /* Handle an attribute requiring a FUNCTION_DECL;
4551 arguments as in struct attribute_spec.handler. */
4553 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
4554 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4556 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4558 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4560 *no_add_attrs
= true;
4566 /* Handle an "interrupt" or "isr" attribute;
4567 arguments as in struct attribute_spec.handler. */
4569 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
4574 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4576 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4578 *no_add_attrs
= true;
4580 /* FIXME: the argument if any is checked for type attributes;
4581 should it be checked for decl ones? */
4585 if (TREE_CODE (*node
) == FUNCTION_TYPE
4586 || TREE_CODE (*node
) == METHOD_TYPE
)
4588 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
4590 warning (OPT_Wattributes
, "%qE attribute ignored",
4592 *no_add_attrs
= true;
4595 else if (TREE_CODE (*node
) == POINTER_TYPE
4596 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
4597 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
4598 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
4600 *node
= build_variant_type_copy (*node
);
4601 TREE_TYPE (*node
) = build_type_attribute_variant
4603 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
4604 *no_add_attrs
= true;
4608 /* Possibly pass this attribute on from the type to a decl. */
4609 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
4610 | (int) ATTR_FLAG_FUNCTION_NEXT
4611 | (int) ATTR_FLAG_ARRAY_NEXT
))
4613 *no_add_attrs
= true;
4614 return tree_cons (name
, args
, NULL_TREE
);
4618 warning (OPT_Wattributes
, "%qE attribute ignored",
4627 /* Handle a "pcs" attribute; arguments as in struct
4628 attribute_spec.handler. */
4630 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
4631 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4633 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
4635 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
4636 *no_add_attrs
= true;
4641 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4642 /* Handle the "notshared" attribute. This attribute is another way of
4643 requesting hidden visibility. ARM's compiler supports
4644 "__declspec(notshared)"; we support the same thing via an
4648 arm_handle_notshared_attribute (tree
*node
,
4649 tree name ATTRIBUTE_UNUSED
,
4650 tree args ATTRIBUTE_UNUSED
,
4651 int flags ATTRIBUTE_UNUSED
,
4654 tree decl
= TYPE_NAME (*node
);
4658 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
4659 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
4660 *no_add_attrs
= false;
4666 /* Return 0 if the attributes for two types are incompatible, 1 if they
4667 are compatible, and 2 if they are nearly compatible (which causes a
4668 warning to be generated). */
4670 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
4674 /* Check for mismatch of non-default calling convention. */
4675 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
4678 /* Check for mismatched call attributes. */
4679 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4680 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4681 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4682 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4684 /* Only bother to check if an attribute is defined. */
4685 if (l1
| l2
| s1
| s2
)
4687 /* If one type has an attribute, the other must have the same attribute. */
4688 if ((l1
!= l2
) || (s1
!= s2
))
4691 /* Disallow mixed attributes. */
4692 if ((l1
& s2
) || (l2
& s1
))
4696 /* Check for mismatched ISR attribute. */
4697 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
4699 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
4700 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
4702 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
4709 /* Assigns default attributes to newly defined type. This is used to
4710 set short_call/long_call attributes for function types of
4711 functions defined inside corresponding #pragma scopes. */
4713 arm_set_default_type_attributes (tree type
)
4715 /* Add __attribute__ ((long_call)) to all functions, when
4716 inside #pragma long_calls or __attribute__ ((short_call)),
4717 when inside #pragma no_long_calls. */
4718 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
4720 tree type_attr_list
, attr_name
;
4721 type_attr_list
= TYPE_ATTRIBUTES (type
);
4723 if (arm_pragma_long_calls
== LONG
)
4724 attr_name
= get_identifier ("long_call");
4725 else if (arm_pragma_long_calls
== SHORT
)
4726 attr_name
= get_identifier ("short_call");
4730 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
4731 TYPE_ATTRIBUTES (type
) = type_attr_list
;
4735 /* Return true if DECL is known to be linked into section SECTION. */
4738 arm_function_in_section_p (tree decl
, section
*section
)
4740 /* We can only be certain about functions defined in the same
4741 compilation unit. */
4742 if (!TREE_STATIC (decl
))
4745 /* Make sure that SYMBOL always binds to the definition in this
4746 compilation unit. */
4747 if (!targetm
.binds_local_p (decl
))
4750 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4751 if (!DECL_SECTION_NAME (decl
))
4753 /* Make sure that we will not create a unique section for DECL. */
4754 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
4758 return function_section (decl
) == section
;
4761 /* Return nonzero if a 32-bit "long_call" should be generated for
4762 a call from the current function to DECL. We generate a long_call
4765 a. has an __attribute__((long call))
4766 or b. is within the scope of a #pragma long_calls
4767 or c. the -mlong-calls command line switch has been specified
4769 However we do not generate a long call if the function:
4771 d. has an __attribute__ ((short_call))
4772 or e. is inside the scope of a #pragma no_long_calls
4773 or f. is defined in the same section as the current function. */
4776 arm_is_long_call_p (tree decl
)
4781 return TARGET_LONG_CALLS
;
4783 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
4784 if (lookup_attribute ("short_call", attrs
))
4787 /* For "f", be conservative, and only cater for cases in which the
4788 whole of the current function is placed in the same section. */
4789 if (!flag_reorder_blocks_and_partition
4790 && TREE_CODE (decl
) == FUNCTION_DECL
4791 && arm_function_in_section_p (decl
, current_function_section ()))
4794 if (lookup_attribute ("long_call", attrs
))
4797 return TARGET_LONG_CALLS
;
4800 /* Return nonzero if it is ok to make a tail-call to DECL. */
4802 arm_function_ok_for_sibcall (tree decl
, tree exp
)
4804 unsigned long func_type
;
4806 if (cfun
->machine
->sibcall_blocked
)
4809 /* Never tailcall something for which we have no decl, or if we
4810 are generating code for Thumb-1. */
4811 if (decl
== NULL
|| TARGET_THUMB1
)
4814 /* The PIC register is live on entry to VxWorks PLT entries, so we
4815 must make the call before restoring the PIC register. */
4816 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
4819 /* Cannot tail-call to long calls, since these are out of range of
4820 a branch instruction. */
4821 if (arm_is_long_call_p (decl
))
4824 /* If we are interworking and the function is not declared static
4825 then we can't tail-call it unless we know that it exists in this
4826 compilation unit (since it might be a Thumb routine). */
4827 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
4830 func_type
= arm_current_func_type ();
4831 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4832 if (IS_INTERRUPT (func_type
))
4835 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4837 /* Check that the return value locations are the same. For
4838 example that we aren't returning a value from the sibling in
4839 a VFP register but then need to transfer it to a core
4843 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
4844 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4846 if (!rtx_equal_p (a
, b
))
4850 /* Never tailcall if function may be called with a misaligned SP. */
4851 if (IS_STACKALIGN (func_type
))
4854 /* Everything else is ok. */
4859 /* Addressing mode support functions. */
4861 /* Return nonzero if X is a legitimate immediate operand when compiling
4862 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4864 legitimate_pic_operand_p (rtx x
)
4866 if (GET_CODE (x
) == SYMBOL_REF
4867 || (GET_CODE (x
) == CONST
4868 && GET_CODE (XEXP (x
, 0)) == PLUS
4869 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
4875 /* Record that the current function needs a PIC register. Initialize
4876 cfun->machine->pic_reg if we have not already done so. */
4879 require_pic_register (void)
4881 /* A lot of the logic here is made obscure by the fact that this
4882 routine gets called as part of the rtx cost estimation process.
4883 We don't want those calls to affect any assumptions about the real
4884 function; and further, we can't call entry_of_function() until we
4885 start the real expansion process. */
4886 if (!crtl
->uses_pic_offset_table
)
4888 gcc_assert (can_create_pseudo_p ());
4889 if (arm_pic_register
!= INVALID_REGNUM
)
4891 if (!cfun
->machine
->pic_reg
)
4892 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
4894 /* Play games to avoid marking the function as needing pic
4895 if we are being called as part of the cost-estimation
4897 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
4898 crtl
->uses_pic_offset_table
= 1;
4904 if (!cfun
->machine
->pic_reg
)
4905 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
4907 /* Play games to avoid marking the function as needing pic
4908 if we are being called as part of the cost-estimation
4910 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
4912 crtl
->uses_pic_offset_table
= 1;
4915 arm_load_pic_register (0UL);
4920 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
4922 INSN_LOCATOR (insn
) = prologue_locator
;
4924 /* We can be called during expansion of PHI nodes, where
4925 we can't yet emit instructions directly in the final
4926 insn stream. Queue the insns on the entry edge, they will
4927 be committed after everything else is expanded. */
4928 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
4935 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
4937 if (GET_CODE (orig
) == SYMBOL_REF
4938 || GET_CODE (orig
) == LABEL_REF
)
4944 gcc_assert (can_create_pseudo_p ());
4945 reg
= gen_reg_rtx (Pmode
);
4948 /* VxWorks does not impose a fixed gap between segments; the run-time
4949 gap can be different from the object-file gap. We therefore can't
4950 use GOTOFF unless we are absolutely sure that the symbol is in the
4951 same segment as the GOT. Unfortunately, the flexibility of linker
4952 scripts means that we can't be sure of that in general, so assume
4953 that GOTOFF is never valid on VxWorks. */
4954 if ((GET_CODE (orig
) == LABEL_REF
4955 || (GET_CODE (orig
) == SYMBOL_REF
&&
4956 SYMBOL_REF_LOCAL_P (orig
)))
4958 && !TARGET_VXWORKS_RTP
)
4959 insn
= arm_pic_static_addr (orig
, reg
);
4965 /* If this function doesn't have a pic register, create one now. */
4966 require_pic_register ();
4968 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
4970 /* Make the MEM as close to a constant as possible. */
4971 mem
= SET_SRC (pat
);
4972 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
4973 MEM_READONLY_P (mem
) = 1;
4974 MEM_NOTRAP_P (mem
) = 1;
4976 insn
= emit_insn (pat
);
4979 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4981 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
4985 else if (GET_CODE (orig
) == CONST
)
4989 if (GET_CODE (XEXP (orig
, 0)) == PLUS
4990 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
4993 /* Handle the case where we have: const (UNSPEC_TLS). */
4994 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
4995 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
4998 /* Handle the case where we have:
4999 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5001 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5002 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
5003 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
5005 gcc_assert (GET_CODE (XEXP (XEXP (orig
, 0), 1)) == CONST_INT
);
5011 gcc_assert (can_create_pseudo_p ());
5012 reg
= gen_reg_rtx (Pmode
);
5015 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
5017 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
5018 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
5019 base
== reg
? 0 : reg
);
5021 if (GET_CODE (offset
) == CONST_INT
)
5023 /* The base register doesn't really matter, we only want to
5024 test the index for the appropriate mode. */
5025 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
5027 gcc_assert (can_create_pseudo_p ());
5028 offset
= force_reg (Pmode
, offset
);
5031 if (GET_CODE (offset
) == CONST_INT
)
5032 return plus_constant (base
, INTVAL (offset
));
5035 if (GET_MODE_SIZE (mode
) > 4
5036 && (GET_MODE_CLASS (mode
) == MODE_INT
5037 || TARGET_SOFT_FLOAT
))
5039 emit_insn (gen_addsi3 (reg
, base
, offset
));
5043 return gen_rtx_PLUS (Pmode
, base
, offset
);
5050 /* Find a spare register to use during the prolog of a function. */
5053 thumb_find_work_register (unsigned long pushed_regs_mask
)
5057 /* Check the argument registers first as these are call-used. The
5058 register allocation order means that sometimes r3 might be used
5059 but earlier argument registers might not, so check them all. */
5060 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5061 if (!df_regs_ever_live_p (reg
))
5064 /* Before going on to check the call-saved registers we can try a couple
5065 more ways of deducing that r3 is available. The first is when we are
5066 pushing anonymous arguments onto the stack and we have less than 4
5067 registers worth of fixed arguments(*). In this case r3 will be part of
5068 the variable argument list and so we can be sure that it will be
5069 pushed right at the start of the function. Hence it will be available
5070 for the rest of the prologue.
5071 (*): ie crtl->args.pretend_args_size is greater than 0. */
5072 if (cfun
->machine
->uses_anonymous_args
5073 && crtl
->args
.pretend_args_size
> 0)
5074 return LAST_ARG_REGNUM
;
5076 /* The other case is when we have fixed arguments but less than 4 registers
5077 worth. In this case r3 might be used in the body of the function, but
5078 it is not being used to convey an argument into the function. In theory
5079 we could just check crtl->args.size to see how many bytes are
5080 being passed in argument registers, but it seems that it is unreliable.
5081 Sometimes it will have the value 0 when in fact arguments are being
5082 passed. (See testcase execute/20021111-1.c for an example). So we also
5083 check the args_info.nregs field as well. The problem with this field is
5084 that it makes no allowances for arguments that are passed to the
5085 function but which are not used. Hence we could miss an opportunity
5086 when a function has an unused argument in r3. But it is better to be
5087 safe than to be sorry. */
5088 if (! cfun
->machine
->uses_anonymous_args
5089 && crtl
->args
.size
>= 0
5090 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5091 && crtl
->args
.info
.nregs
< 4)
5092 return LAST_ARG_REGNUM
;
5094 /* Otherwise look for a call-saved register that is going to be pushed. */
5095 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5096 if (pushed_regs_mask
& (1 << reg
))
5101 /* Thumb-2 can use high regs. */
5102 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5103 if (pushed_regs_mask
& (1 << reg
))
5106 /* Something went wrong - thumb_compute_save_reg_mask()
5107 should have arranged for a suitable register to be pushed. */
5111 static GTY(()) int pic_labelno
;
5113 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5117 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5119 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5121 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5124 gcc_assert (flag_pic
);
5126 pic_reg
= cfun
->machine
->pic_reg
;
5127 if (TARGET_VXWORKS_RTP
)
5129 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5130 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5131 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5133 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5135 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5136 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5140 /* We use an UNSPEC rather than a LABEL_REF because this label
5141 never appears in the code stream. */
5143 labelno
= GEN_INT (pic_labelno
++);
5144 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5145 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5147 /* On the ARM the PC register contains 'dot + 8' at the time of the
5148 addition, on the Thumb it is 'dot + 4'. */
5149 pic_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5150 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
5152 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5156 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5158 emit_insn (gen_pic_add_dot_plus_eight (pic_reg
, pic_reg
, labelno
));
5160 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5162 else /* TARGET_THUMB1 */
5164 if (arm_pic_register
!= INVALID_REGNUM
5165 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
5167 /* We will have pushed the pic register, so we should always be
5168 able to find a work register. */
5169 pic_tmp
= gen_rtx_REG (SImode
,
5170 thumb_find_work_register (saved_regs
));
5171 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
5172 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
5175 emit_insn (gen_pic_load_addr_thumb1 (pic_reg
, pic_rtx
));
5176 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5180 /* Need to emit this whether or not we obey regdecls,
5181 since setjmp/longjmp can cause life info to screw up. */
5185 /* Generate code to load the address of a static var when flag_pic is set. */
5187 arm_pic_static_addr (rtx orig
, rtx reg
)
5189 rtx l1
, labelno
, offset_rtx
, insn
;
5191 gcc_assert (flag_pic
);
5193 /* We use an UNSPEC rather than a LABEL_REF because this label
5194 never appears in the code stream. */
5195 labelno
= GEN_INT (pic_labelno
++);
5196 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5197 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5199 /* On the ARM the PC register contains 'dot + 8' at the time of the
5200 addition, on the Thumb it is 'dot + 4'. */
5201 offset_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5202 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
5203 UNSPEC_SYMBOL_OFFSET
);
5204 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
5208 emit_insn (gen_pic_load_addr_32bit (reg
, offset_rtx
));
5210 insn
= emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5212 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5214 else /* TARGET_THUMB1 */
5216 emit_insn (gen_pic_load_addr_thumb1 (reg
, offset_rtx
));
5217 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5223 /* Return nonzero if X is valid as an ARM state addressing register. */
5225 arm_address_register_rtx_p (rtx x
, int strict_p
)
5229 if (GET_CODE (x
) != REG
)
5235 return ARM_REGNO_OK_FOR_BASE_P (regno
);
5237 return (regno
<= LAST_ARM_REGNUM
5238 || regno
>= FIRST_PSEUDO_REGISTER
5239 || regno
== FRAME_POINTER_REGNUM
5240 || regno
== ARG_POINTER_REGNUM
);
5243 /* Return TRUE if this rtx is the difference of a symbol and a label,
5244 and will reduce to a PC-relative relocation in the object file.
5245 Expressions like this can be left alone when generating PIC, rather
5246 than forced through the GOT. */
5248 pcrel_constant_p (rtx x
)
5250 if (GET_CODE (x
) == MINUS
)
5251 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
5256 /* Return true if X will surely end up in an index register after next
5259 will_be_in_index_register (const_rtx x
)
5261 /* arm.md: calculate_pic_address will split this into a register. */
5262 return GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_SYM
;
5265 /* Return nonzero if X is a valid ARM state address operand. */
5267 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
5271 enum rtx_code code
= GET_CODE (x
);
5273 if (arm_address_register_rtx_p (x
, strict_p
))
5276 use_ldrd
= (TARGET_LDRD
5278 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5280 if (code
== POST_INC
|| code
== PRE_DEC
5281 || ((code
== PRE_INC
|| code
== POST_DEC
)
5282 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5283 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5285 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5286 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5287 && GET_CODE (XEXP (x
, 1)) == PLUS
5288 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5290 rtx addend
= XEXP (XEXP (x
, 1), 1);
5292 /* Don't allow ldrd post increment by register because it's hard
5293 to fixup invalid register choices. */
5295 && GET_CODE (x
) == POST_MODIFY
5296 && GET_CODE (addend
) == REG
)
5299 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
5300 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
5303 /* After reload constants split into minipools will have addresses
5304 from a LABEL_REF. */
5305 else if (reload_completed
5306 && (code
== LABEL_REF
5308 && GET_CODE (XEXP (x
, 0)) == PLUS
5309 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5310 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5313 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5316 else if (code
== PLUS
)
5318 rtx xop0
= XEXP (x
, 0);
5319 rtx xop1
= XEXP (x
, 1);
5321 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5322 && ((GET_CODE(xop1
) == CONST_INT
5323 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
5324 || (!strict_p
&& will_be_in_index_register (xop1
))))
5325 || (arm_address_register_rtx_p (xop1
, strict_p
)
5326 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
5330 /* Reload currently can't handle MINUS, so disable this for now */
5331 else if (GET_CODE (x
) == MINUS
)
5333 rtx xop0
= XEXP (x
, 0);
5334 rtx xop1
= XEXP (x
, 1);
5336 return (arm_address_register_rtx_p (xop0
, strict_p
)
5337 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
5341 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5342 && code
== SYMBOL_REF
5343 && CONSTANT_POOL_ADDRESS_P (x
)
5345 && symbol_mentioned_p (get_pool_constant (x
))
5346 && ! pcrel_constant_p (get_pool_constant (x
))))
5352 /* Return nonzero if X is a valid Thumb-2 address operand. */
5354 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5357 enum rtx_code code
= GET_CODE (x
);
5359 if (arm_address_register_rtx_p (x
, strict_p
))
5362 use_ldrd
= (TARGET_LDRD
5364 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5366 if (code
== POST_INC
|| code
== PRE_DEC
5367 || ((code
== PRE_INC
|| code
== POST_DEC
)
5368 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5369 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5371 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5372 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5373 && GET_CODE (XEXP (x
, 1)) == PLUS
5374 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5376 /* Thumb-2 only has autoincrement by constant. */
5377 rtx addend
= XEXP (XEXP (x
, 1), 1);
5378 HOST_WIDE_INT offset
;
5380 if (GET_CODE (addend
) != CONST_INT
)
5383 offset
= INTVAL(addend
);
5384 if (GET_MODE_SIZE (mode
) <= 4)
5385 return (offset
> -256 && offset
< 256);
5387 return (use_ldrd
&& offset
> -1024 && offset
< 1024
5388 && (offset
& 3) == 0);
5391 /* After reload constants split into minipools will have addresses
5392 from a LABEL_REF. */
5393 else if (reload_completed
5394 && (code
== LABEL_REF
5396 && GET_CODE (XEXP (x
, 0)) == PLUS
5397 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5398 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5401 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5404 else if (code
== PLUS
)
5406 rtx xop0
= XEXP (x
, 0);
5407 rtx xop1
= XEXP (x
, 1);
5409 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5410 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
5411 || (!strict_p
&& will_be_in_index_register (xop1
))))
5412 || (arm_address_register_rtx_p (xop1
, strict_p
)
5413 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
5416 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5417 && code
== SYMBOL_REF
5418 && CONSTANT_POOL_ADDRESS_P (x
)
5420 && symbol_mentioned_p (get_pool_constant (x
))
5421 && ! pcrel_constant_p (get_pool_constant (x
))))
5427 /* Return nonzero if INDEX is valid for an address index operand in
5430 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
5433 HOST_WIDE_INT range
;
5434 enum rtx_code code
= GET_CODE (index
);
5436 /* Standard coprocessor addressing modes. */
5437 if (TARGET_HARD_FLOAT
5438 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
5439 && (mode
== SFmode
|| mode
== DFmode
5440 || (TARGET_MAVERICK
&& mode
== DImode
)))
5441 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5442 && INTVAL (index
) > -1024
5443 && (INTVAL (index
) & 3) == 0);
5445 /* For quad modes, we restrict the constant offset to be slightly less
5446 than what the instruction format permits. We do this because for
5447 quad mode moves, we will actually decompose them into two separate
5448 double-mode reads or writes. INDEX must therefore be a valid
5449 (double-mode) offset and so should INDEX+8. */
5450 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5451 return (code
== CONST_INT
5452 && INTVAL (index
) < 1016
5453 && INTVAL (index
) > -1024
5454 && (INTVAL (index
) & 3) == 0);
5456 /* We have no such constraint on double mode offsets, so we permit the
5457 full range of the instruction format. */
5458 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5459 return (code
== CONST_INT
5460 && INTVAL (index
) < 1024
5461 && INTVAL (index
) > -1024
5462 && (INTVAL (index
) & 3) == 0);
5464 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5465 return (code
== CONST_INT
5466 && INTVAL (index
) < 1024
5467 && INTVAL (index
) > -1024
5468 && (INTVAL (index
) & 3) == 0);
5470 if (arm_address_register_rtx_p (index
, strict_p
)
5471 && (GET_MODE_SIZE (mode
) <= 4))
5474 if (mode
== DImode
|| mode
== DFmode
)
5476 if (code
== CONST_INT
)
5478 HOST_WIDE_INT val
= INTVAL (index
);
5481 return val
> -256 && val
< 256;
5483 return val
> -4096 && val
< 4092;
5486 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
5489 if (GET_MODE_SIZE (mode
) <= 4
5493 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
5497 rtx xiop0
= XEXP (index
, 0);
5498 rtx xiop1
= XEXP (index
, 1);
5500 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5501 && power_of_two_operand (xiop1
, SImode
))
5502 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5503 && power_of_two_operand (xiop0
, SImode
)));
5505 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
5506 || code
== ASHIFT
|| code
== ROTATERT
)
5508 rtx op
= XEXP (index
, 1);
5510 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5511 && GET_CODE (op
) == CONST_INT
5513 && INTVAL (op
) <= 31);
5517 /* For ARM v4 we may be doing a sign-extend operation during the
5523 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
5529 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
5531 return (code
== CONST_INT
5532 && INTVAL (index
) < range
5533 && INTVAL (index
) > -range
);
5536 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5537 index operand. i.e. 1, 2, 4 or 8. */
5539 thumb2_index_mul_operand (rtx op
)
5543 if (GET_CODE(op
) != CONST_INT
)
5547 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
5550 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5552 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
5554 enum rtx_code code
= GET_CODE (index
);
5556 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5557 /* Standard coprocessor addressing modes. */
5558 if (TARGET_HARD_FLOAT
5559 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
5560 && (mode
== SFmode
|| mode
== DFmode
5561 || (TARGET_MAVERICK
&& mode
== DImode
)))
5562 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5563 /* Thumb-2 allows only > -256 index range for it's core register
5564 load/stores. Since we allow SF/DF in core registers, we have
5565 to use the intersection between -256~4096 (core) and -1024~1024
5567 && INTVAL (index
) > -256
5568 && (INTVAL (index
) & 3) == 0);
5570 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5572 /* For DImode assume values will usually live in core regs
5573 and only allow LDRD addressing modes. */
5574 if (!TARGET_LDRD
|| mode
!= DImode
)
5575 return (code
== CONST_INT
5576 && INTVAL (index
) < 1024
5577 && INTVAL (index
) > -1024
5578 && (INTVAL (index
) & 3) == 0);
5581 /* For quad modes, we restrict the constant offset to be slightly less
5582 than what the instruction format permits. We do this because for
5583 quad mode moves, we will actually decompose them into two separate
5584 double-mode reads or writes. INDEX must therefore be a valid
5585 (double-mode) offset and so should INDEX+8. */
5586 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5587 return (code
== CONST_INT
5588 && INTVAL (index
) < 1016
5589 && INTVAL (index
) > -1024
5590 && (INTVAL (index
) & 3) == 0);
5592 /* We have no such constraint on double mode offsets, so we permit the
5593 full range of the instruction format. */
5594 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5595 return (code
== CONST_INT
5596 && INTVAL (index
) < 1024
5597 && INTVAL (index
) > -1024
5598 && (INTVAL (index
) & 3) == 0);
5600 if (arm_address_register_rtx_p (index
, strict_p
)
5601 && (GET_MODE_SIZE (mode
) <= 4))
5604 if (mode
== DImode
|| mode
== DFmode
)
5606 if (code
== CONST_INT
)
5608 HOST_WIDE_INT val
= INTVAL (index
);
5609 /* ??? Can we assume ldrd for thumb2? */
5610 /* Thumb-2 ldrd only has reg+const addressing modes. */
5611 /* ldrd supports offsets of +-1020.
5612 However the ldr fallback does not. */
5613 return val
> -256 && val
< 256 && (val
& 3) == 0;
5621 rtx xiop0
= XEXP (index
, 0);
5622 rtx xiop1
= XEXP (index
, 1);
5624 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5625 && thumb2_index_mul_operand (xiop1
))
5626 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5627 && thumb2_index_mul_operand (xiop0
)));
5629 else if (code
== ASHIFT
)
5631 rtx op
= XEXP (index
, 1);
5633 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5634 && GET_CODE (op
) == CONST_INT
5636 && INTVAL (op
) <= 3);
5639 return (code
== CONST_INT
5640 && INTVAL (index
) < 4096
5641 && INTVAL (index
) > -256);
5644 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5646 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
5650 if (GET_CODE (x
) != REG
)
5656 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
5658 return (regno
<= LAST_LO_REGNUM
5659 || regno
> LAST_VIRTUAL_REGISTER
5660 || regno
== FRAME_POINTER_REGNUM
5661 || (GET_MODE_SIZE (mode
) >= 4
5662 && (regno
== STACK_POINTER_REGNUM
5663 || regno
>= FIRST_PSEUDO_REGISTER
5664 || x
== hard_frame_pointer_rtx
5665 || x
== arg_pointer_rtx
)));
5668 /* Return nonzero if x is a legitimate index register. This is the case
5669 for any base register that can access a QImode object. */
5671 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
5673 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
5676 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5678 The AP may be eliminated to either the SP or the FP, so we use the
5679 least common denominator, e.g. SImode, and offsets from 0 to 64.
5681 ??? Verify whether the above is the right approach.
5683 ??? Also, the FP may be eliminated to the SP, so perhaps that
5684 needs special handling also.
5686 ??? Look at how the mips16 port solves this problem. It probably uses
5687 better ways to solve some of these problems.
5689 Although it is not incorrect, we don't accept QImode and HImode
5690 addresses based on the frame pointer or arg pointer until the
5691 reload pass starts. This is so that eliminating such addresses
5692 into stack based ones won't produce impossible code. */
5694 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5696 /* ??? Not clear if this is right. Experiment. */
5697 if (GET_MODE_SIZE (mode
) < 4
5698 && !(reload_in_progress
|| reload_completed
)
5699 && (reg_mentioned_p (frame_pointer_rtx
, x
)
5700 || reg_mentioned_p (arg_pointer_rtx
, x
)
5701 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
5702 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
5703 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
5704 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
5707 /* Accept any base register. SP only in SImode or larger. */
5708 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
5711 /* This is PC relative data before arm_reorg runs. */
5712 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
5713 && GET_CODE (x
) == SYMBOL_REF
5714 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
5717 /* This is PC relative data after arm_reorg runs. */
5718 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
5720 && (GET_CODE (x
) == LABEL_REF
5721 || (GET_CODE (x
) == CONST
5722 && GET_CODE (XEXP (x
, 0)) == PLUS
5723 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5724 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5727 /* Post-inc indexing only supported for SImode and larger. */
5728 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
5729 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
5732 else if (GET_CODE (x
) == PLUS
)
5734 /* REG+REG address can be any two index registers. */
5735 /* We disallow FRAME+REG addressing since we know that FRAME
5736 will be replaced with STACK, and SP relative addressing only
5737 permits SP+OFFSET. */
5738 if (GET_MODE_SIZE (mode
) <= 4
5739 && XEXP (x
, 0) != frame_pointer_rtx
5740 && XEXP (x
, 1) != frame_pointer_rtx
5741 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5742 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
5743 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
5746 /* REG+const has 5-7 bit offset for non-SP registers. */
5747 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5748 || XEXP (x
, 0) == arg_pointer_rtx
)
5749 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5750 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
5753 /* REG+const has 10-bit offset for SP, but only SImode and
5754 larger is supported. */
5755 /* ??? Should probably check for DI/DFmode overflow here
5756 just like GO_IF_LEGITIMATE_OFFSET does. */
5757 else if (GET_CODE (XEXP (x
, 0)) == REG
5758 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
5759 && GET_MODE_SIZE (mode
) >= 4
5760 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5761 && INTVAL (XEXP (x
, 1)) >= 0
5762 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
5763 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5766 else if (GET_CODE (XEXP (x
, 0)) == REG
5767 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
5768 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
5769 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
5770 && REGNO (XEXP (x
, 0))
5771 <= LAST_VIRTUAL_POINTER_REGISTER
))
5772 && GET_MODE_SIZE (mode
) >= 4
5773 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5774 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5778 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5779 && GET_MODE_SIZE (mode
) == 4
5780 && GET_CODE (x
) == SYMBOL_REF
5781 && CONSTANT_POOL_ADDRESS_P (x
)
5783 && symbol_mentioned_p (get_pool_constant (x
))
5784 && ! pcrel_constant_p (get_pool_constant (x
))))
5790 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5791 instruction of mode MODE. */
5793 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
5795 switch (GET_MODE_SIZE (mode
))
5798 return val
>= 0 && val
< 32;
5801 return val
>= 0 && val
< 64 && (val
& 1) == 0;
5805 && (val
+ GET_MODE_SIZE (mode
)) <= 128
5811 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
5814 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
5815 else if (TARGET_THUMB2
)
5816 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
5817 else /* if (TARGET_THUMB1) */
5818 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
5821 /* Build the SYMBOL_REF for __tls_get_addr. */
5823 static GTY(()) rtx tls_get_addr_libfunc
;
5826 get_tls_get_addr (void)
5828 if (!tls_get_addr_libfunc
)
5829 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
5830 return tls_get_addr_libfunc
;
5834 arm_load_tp (rtx target
)
5837 target
= gen_reg_rtx (SImode
);
5841 /* Can return in any reg. */
5842 emit_insn (gen_load_tp_hard (target
));
5846 /* Always returned in r0. Immediately copy the result into a pseudo,
5847 otherwise other uses of r0 (e.g. setting up function arguments) may
5848 clobber the value. */
5852 emit_insn (gen_load_tp_soft ());
5854 tmp
= gen_rtx_REG (SImode
, 0);
5855 emit_move_insn (target
, tmp
);
5861 load_tls_operand (rtx x
, rtx reg
)
5865 if (reg
== NULL_RTX
)
5866 reg
= gen_reg_rtx (SImode
);
5868 tmp
= gen_rtx_CONST (SImode
, x
);
5870 emit_move_insn (reg
, tmp
);
5876 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
5878 rtx insns
, label
, labelno
, sum
;
5882 labelno
= GEN_INT (pic_labelno
++);
5883 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5884 label
= gen_rtx_CONST (VOIDmode
, label
);
5886 sum
= gen_rtx_UNSPEC (Pmode
,
5887 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
5888 GEN_INT (TARGET_ARM
? 8 : 4)),
5890 reg
= load_tls_operand (sum
, reg
);
5893 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5894 else if (TARGET_THUMB2
)
5895 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5896 else /* TARGET_THUMB1 */
5897 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5899 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
, LCT_PURE
, /* LCT_CONST? */
5900 Pmode
, 1, reg
, Pmode
);
5902 insns
= get_insns ();
5909 legitimize_tls_address (rtx x
, rtx reg
)
5911 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
5912 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
5916 case TLS_MODEL_GLOBAL_DYNAMIC
:
5917 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
5918 dest
= gen_reg_rtx (Pmode
);
5919 emit_libcall_block (insns
, dest
, ret
, x
);
5922 case TLS_MODEL_LOCAL_DYNAMIC
:
5923 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
5925 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5926 share the LDM result with other LD model accesses. */
5927 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
5929 dest
= gen_reg_rtx (Pmode
);
5930 emit_libcall_block (insns
, dest
, ret
, eqv
);
5932 /* Load the addend. */
5933 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
, GEN_INT (TLS_LDO32
)),
5935 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
5936 return gen_rtx_PLUS (Pmode
, dest
, addend
);
5938 case TLS_MODEL_INITIAL_EXEC
:
5939 labelno
= GEN_INT (pic_labelno
++);
5940 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5941 label
= gen_rtx_CONST (VOIDmode
, label
);
5942 sum
= gen_rtx_UNSPEC (Pmode
,
5943 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
5944 GEN_INT (TARGET_ARM
? 8 : 4)),
5946 reg
= load_tls_operand (sum
, reg
);
5949 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
5950 else if (TARGET_THUMB2
)
5951 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
5954 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5955 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
5958 tp
= arm_load_tp (NULL_RTX
);
5960 return gen_rtx_PLUS (Pmode
, tp
, reg
);
5962 case TLS_MODEL_LOCAL_EXEC
:
5963 tp
= arm_load_tp (NULL_RTX
);
5965 reg
= gen_rtx_UNSPEC (Pmode
,
5966 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
5968 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
5970 return gen_rtx_PLUS (Pmode
, tp
, reg
);
5977 /* Try machine-dependent ways of modifying an illegitimate address
5978 to be legitimate. If we find one, return the new, valid address. */
5980 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
5984 /* TODO: legitimize_address for Thumb2. */
5987 return thumb_legitimize_address (x
, orig_x
, mode
);
5990 if (arm_tls_symbol_p (x
))
5991 return legitimize_tls_address (x
, NULL_RTX
);
5993 if (GET_CODE (x
) == PLUS
)
5995 rtx xop0
= XEXP (x
, 0);
5996 rtx xop1
= XEXP (x
, 1);
5998 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
5999 xop0
= force_reg (SImode
, xop0
);
6001 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
6002 xop1
= force_reg (SImode
, xop1
);
6004 if (ARM_BASE_REGISTER_RTX_P (xop0
)
6005 && GET_CODE (xop1
) == CONST_INT
)
6007 HOST_WIDE_INT n
, low_n
;
6011 /* VFP addressing modes actually allow greater offsets, but for
6012 now we just stick with the lowest common denominator. */
6014 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
6026 low_n
= ((mode
) == TImode
? 0
6027 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
6031 base_reg
= gen_reg_rtx (SImode
);
6032 val
= force_operand (plus_constant (xop0
, n
), NULL_RTX
);
6033 emit_move_insn (base_reg
, val
);
6034 x
= plus_constant (base_reg
, low_n
);
6036 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6037 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6040 /* XXX We don't allow MINUS any more -- see comment in
6041 arm_legitimate_address_outer_p (). */
6042 else if (GET_CODE (x
) == MINUS
)
6044 rtx xop0
= XEXP (x
, 0);
6045 rtx xop1
= XEXP (x
, 1);
6047 if (CONSTANT_P (xop0
))
6048 xop0
= force_reg (SImode
, xop0
);
6050 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
6051 xop1
= force_reg (SImode
, xop1
);
6053 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6054 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
6057 /* Make sure to take full advantage of the pre-indexed addressing mode
6058 with absolute addresses which often allows for the base register to
6059 be factorized for multiple adjacent memory references, and it might
6060 even allows for the mini pool to be avoided entirely. */
6061 else if (GET_CODE (x
) == CONST_INT
&& optimize
> 0)
6064 HOST_WIDE_INT mask
, base
, index
;
6067 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6068 use a 8-bit index. So let's use a 12-bit index for SImode only and
6069 hope that arm_gen_constant will enable ldrb to use more bits. */
6070 bits
= (mode
== SImode
) ? 12 : 8;
6071 mask
= (1 << bits
) - 1;
6072 base
= INTVAL (x
) & ~mask
;
6073 index
= INTVAL (x
) & mask
;
6074 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
6076 /* It'll most probably be more efficient to generate the base
6077 with more bits set and use a negative index instead. */
6081 base_reg
= force_reg (SImode
, GEN_INT (base
));
6082 x
= plus_constant (base_reg
, index
);
6087 /* We need to find and carefully transform any SYMBOL and LABEL
6088 references; so go back to the original address expression. */
6089 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6091 if (new_x
!= orig_x
)
6099 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6100 to be legitimate. If we find one, return the new, valid address. */
6102 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6104 if (arm_tls_symbol_p (x
))
6105 return legitimize_tls_address (x
, NULL_RTX
);
6107 if (GET_CODE (x
) == PLUS
6108 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6109 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
6110 || INTVAL (XEXP (x
, 1)) < 0))
6112 rtx xop0
= XEXP (x
, 0);
6113 rtx xop1
= XEXP (x
, 1);
6114 HOST_WIDE_INT offset
= INTVAL (xop1
);
6116 /* Try and fold the offset into a biasing of the base register and
6117 then offsetting that. Don't do this when optimizing for space
6118 since it can cause too many CSEs. */
6119 if (optimize_size
&& offset
>= 0
6120 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
6122 HOST_WIDE_INT delta
;
6125 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
6126 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
6127 delta
= 31 * GET_MODE_SIZE (mode
);
6129 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
6131 xop0
= force_operand (plus_constant (xop0
, offset
- delta
),
6133 x
= plus_constant (xop0
, delta
);
6135 else if (offset
< 0 && offset
> -256)
6136 /* Small negative offsets are best done with a subtract before the
6137 dereference, forcing these into a register normally takes two
6139 x
= force_operand (x
, NULL_RTX
);
6142 /* For the remaining cases, force the constant into a register. */
6143 xop1
= force_reg (SImode
, xop1
);
6144 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6147 else if (GET_CODE (x
) == PLUS
6148 && s_register_operand (XEXP (x
, 1), SImode
)
6149 && !s_register_operand (XEXP (x
, 0), SImode
))
6151 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
6153 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
6158 /* We need to find and carefully transform any SYMBOL and LABEL
6159 references; so go back to the original address expression. */
6160 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6162 if (new_x
!= orig_x
)
6170 arm_legitimize_reload_address (rtx
*p
,
6171 enum machine_mode mode
,
6172 int opnum
, int type
,
6173 int ind_levels ATTRIBUTE_UNUSED
)
6175 if (GET_CODE (*p
) == PLUS
6176 && GET_CODE (XEXP (*p
, 0)) == REG
6177 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
6178 && GET_CODE (XEXP (*p
, 1)) == CONST_INT
)
6180 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
6181 HOST_WIDE_INT low
, high
;
6183 /* Detect coprocessor load/stores. */
6184 bool coproc_p
= ((TARGET_HARD_FLOAT
6185 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
6186 && (mode
== SFmode
|| mode
== DFmode
6187 || (mode
== DImode
&& TARGET_MAVERICK
)))
6188 || (TARGET_REALLY_IWMMXT
6189 && VALID_IWMMXT_REG_MODE (mode
))
6191 && (VALID_NEON_DREG_MODE (mode
)
6192 || VALID_NEON_QREG_MODE (mode
))));
6194 /* For some conditions, bail out when lower two bits are unaligned. */
6195 if ((val
& 0x3) != 0
6196 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6198 /* For DI, and DF under soft-float: */
6199 || ((mode
== DImode
|| mode
== DFmode
)
6200 /* Without ldrd, we use stm/ldm, which does not
6201 fair well with unaligned bits. */
6203 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6204 || TARGET_THUMB2
))))
6207 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6208 of which the (reg+high) gets turned into a reload add insn,
6209 we try to decompose the index into high/low values that can often
6210 also lead to better reload CSE.
6212 ldr r0, [r2, #4100] // Offset too large
6213 ldr r1, [r2, #4104] // Offset too large
6215 is best reloaded as:
6221 which post-reload CSE can simplify in most cases to eliminate the
6222 second add instruction:
6227 The idea here is that we want to split out the bits of the constant
6228 as a mask, rather than as subtracting the maximum offset that the
6229 respective type of load/store used can handle.
6231 When encountering negative offsets, we can still utilize it even if
6232 the overall offset is positive; sometimes this may lead to an immediate
6233 that can be constructed with fewer instructions.
6235 ldr r0, [r2, #0x3FFFFC]
6237 This is best reloaded as:
6238 add t1, r2, #0x400000
6241 The trick for spotting this for a load insn with N bits of offset
6242 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6243 negative offset that is going to make bit N and all the bits below
6244 it become zero in the remainder part.
6246 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6247 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6248 used in most cases of ARM load/store instructions. */
6250 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6251 (((VAL) & ((1 << (N)) - 1)) \
6252 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6257 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
6259 /* NEON quad-word load/stores are made of two double-word accesses,
6260 so the valid index range is reduced by 8. Treat as 9-bit range if
6262 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
6263 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
6265 else if (GET_MODE_SIZE (mode
) == 8)
6268 low
= (TARGET_THUMB2
6269 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
6270 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
6272 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6273 to access doublewords. The supported load/store offsets are
6274 -8, -4, and 4, which we try to produce here. */
6275 low
= ((val
& 0xf) ^ 0x8) - 0x8;
6277 else if (GET_MODE_SIZE (mode
) < 8)
6279 /* NEON element load/stores do not have an offset. */
6280 if (TARGET_NEON_FP16
&& mode
== HFmode
)
6285 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6286 Try the wider 12-bit range first, and re-try if the result
6288 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6290 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6294 if (mode
== HImode
|| mode
== HFmode
)
6297 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6300 /* The storehi/movhi_bytes fallbacks can use only
6301 [-4094,+4094] of the full ldrb/strb index range. */
6302 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6303 if (low
== 4095 || low
== -4095)
6308 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6314 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
6315 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
6316 - (unsigned HOST_WIDE_INT
) 0x80000000);
6317 /* Check for overflow or zero */
6318 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
6321 /* Reload the high part into a base reg; leave the low part
6323 *p
= gen_rtx_PLUS (GET_MODE (*p
),
6324 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
6327 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
6328 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
6329 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6337 thumb_legitimize_reload_address (rtx
*x_p
,
6338 enum machine_mode mode
,
6339 int opnum
, int type
,
6340 int ind_levels ATTRIBUTE_UNUSED
)
6344 if (GET_CODE (x
) == PLUS
6345 && GET_MODE_SIZE (mode
) < 4
6346 && REG_P (XEXP (x
, 0))
6347 && XEXP (x
, 0) == stack_pointer_rtx
6348 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6349 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6354 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6355 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6359 /* If both registers are hi-regs, then it's better to reload the
6360 entire expression rather than each register individually. That
6361 only requires one reload register rather than two. */
6362 if (GET_CODE (x
) == PLUS
6363 && REG_P (XEXP (x
, 0))
6364 && REG_P (XEXP (x
, 1))
6365 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
6366 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
6371 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6372 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6379 /* Test for various thread-local symbols. */
6381 /* Return TRUE if X is a thread-local symbol. */
6384 arm_tls_symbol_p (rtx x
)
6386 if (! TARGET_HAVE_TLS
)
6389 if (GET_CODE (x
) != SYMBOL_REF
)
6392 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6395 /* Helper for arm_tls_referenced_p. */
6398 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
6400 if (GET_CODE (*x
) == SYMBOL_REF
)
6401 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
6403 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6404 TLS offsets, not real symbol references. */
6405 if (GET_CODE (*x
) == UNSPEC
6406 && XINT (*x
, 1) == UNSPEC_TLS
)
6412 /* Return TRUE if X contains any TLS symbol references. */
6415 arm_tls_referenced_p (rtx x
)
6417 if (! TARGET_HAVE_TLS
)
6420 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
6423 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6425 On the ARM, allow any integer (invalid ones are removed later by insn
6426 patterns), nice doubles and symbol_refs which refer to the function's
6429 When generating pic allow anything. */
6432 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
6434 /* At present, we have no support for Neon structure constants, so forbid
6435 them here. It might be possible to handle simple cases like 0 and -1
6437 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
6440 return flag_pic
|| !label_mentioned_p (x
);
6444 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6446 return (GET_CODE (x
) == CONST_INT
6447 || GET_CODE (x
) == CONST_DOUBLE
6448 || CONSTANT_ADDRESS_P (x
)
6453 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
6455 return (!arm_cannot_force_const_mem (mode
, x
)
6457 ? arm_legitimate_constant_p_1 (mode
, x
)
6458 : thumb_legitimate_constant_p (mode
, x
)));
6461 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6464 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6468 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
6470 split_const (x
, &base
, &offset
);
6471 if (GET_CODE (base
) == SYMBOL_REF
6472 && !offset_within_block_p (base
, INTVAL (offset
)))
6475 return arm_tls_referenced_p (x
);
6478 #define REG_OR_SUBREG_REG(X) \
6479 (GET_CODE (X) == REG \
6480 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6482 #define REG_OR_SUBREG_RTX(X) \
6483 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6486 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
6488 enum machine_mode mode
= GET_MODE (x
);
6502 return COSTS_N_INSNS (1);
6505 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6508 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
6515 return COSTS_N_INSNS (2) + cycles
;
6517 return COSTS_N_INSNS (1) + 16;
6520 return (COSTS_N_INSNS (1)
6521 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
6522 + GET_CODE (SET_DEST (x
)) == MEM
));
6527 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
6529 if (thumb_shiftable_const (INTVAL (x
)))
6530 return COSTS_N_INSNS (2);
6531 return COSTS_N_INSNS (3);
6533 else if ((outer
== PLUS
|| outer
== COMPARE
)
6534 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
6536 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
6537 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
6538 return COSTS_N_INSNS (1);
6539 else if (outer
== AND
)
6542 /* This duplicates the tests in the andsi3 expander. */
6543 for (i
= 9; i
<= 31; i
++)
6544 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
6545 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
6546 return COSTS_N_INSNS (2);
6548 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
6549 || outer
== LSHIFTRT
)
6551 return COSTS_N_INSNS (2);
6557 return COSTS_N_INSNS (3);
6575 /* XXX another guess. */
6576 /* Memory costs quite a lot for the first word, but subsequent words
6577 load at the equivalent of a single insn each. */
6578 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
6579 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
6584 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6590 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
6591 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
6597 return total
+ COSTS_N_INSNS (1);
6599 /* Assume a two-shift sequence. Increase the cost slightly so
6600 we prefer actual shifts over an extend operation. */
6601 return total
+ 1 + COSTS_N_INSNS (2);
6609 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
6611 enum machine_mode mode
= GET_MODE (x
);
6612 enum rtx_code subcode
;
6614 enum rtx_code code
= GET_CODE (x
);
6620 /* Memory costs quite a lot for the first word, but subsequent words
6621 load at the equivalent of a single insn each. */
6622 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
6629 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
6630 *total
= COSTS_N_INSNS (2);
6631 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
6632 *total
= COSTS_N_INSNS (4);
6634 *total
= COSTS_N_INSNS (20);
6638 if (GET_CODE (XEXP (x
, 1)) == REG
)
6639 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
6640 else if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6641 *total
= rtx_cost (XEXP (x
, 1), code
, speed
);
6647 *total
+= COSTS_N_INSNS (4);
6652 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
6653 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6656 *total
+= COSTS_N_INSNS (3);
6660 *total
+= COSTS_N_INSNS (1);
6661 /* Increase the cost of complex shifts because they aren't any faster,
6662 and reduce dual issue opportunities. */
6663 if (arm_tune_cortex_a9
6664 && outer
!= SET
&& GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6672 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
6673 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6674 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6676 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6680 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6681 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
6683 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6690 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6692 if (TARGET_HARD_FLOAT
6694 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6696 *total
= COSTS_N_INSNS (1);
6697 if (GET_CODE (XEXP (x
, 0)) == CONST_DOUBLE
6698 && arm_const_double_rtx (XEXP (x
, 0)))
6700 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6704 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6705 && arm_const_double_rtx (XEXP (x
, 1)))
6707 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6713 *total
= COSTS_N_INSNS (20);
6717 *total
= COSTS_N_INSNS (1);
6718 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6719 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6721 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6725 subcode
= GET_CODE (XEXP (x
, 1));
6726 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6727 || subcode
== LSHIFTRT
6728 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6730 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6731 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6735 /* A shift as a part of RSB costs no more than RSB itself. */
6736 if (GET_CODE (XEXP (x
, 0)) == MULT
6737 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6739 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, speed
);
6740 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6745 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
6747 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6748 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6752 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
6753 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
6755 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6756 if (GET_CODE (XEXP (XEXP (x
, 1), 0)) == REG
6757 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
6758 *total
+= COSTS_N_INSNS (1);
6766 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
6767 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
6768 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
6770 *total
= COSTS_N_INSNS (1);
6771 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
6773 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6777 /* MLA: All arguments must be registers. We filter out
6778 multiplication by a power of two, so that we fall down into
6780 if (GET_CODE (XEXP (x
, 0)) == MULT
6781 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6783 /* The cost comes from the cost of the multiply. */
6787 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6789 if (TARGET_HARD_FLOAT
6791 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6793 *total
= COSTS_N_INSNS (1);
6794 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6795 && arm_const_double_rtx (XEXP (x
, 1)))
6797 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6804 *total
= COSTS_N_INSNS (20);
6808 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
6809 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
6811 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, speed
);
6812 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
6813 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
6814 *total
+= COSTS_N_INSNS (1);
6820 case AND
: case XOR
: case IOR
:
6822 /* Normally the frame registers will be spilt into reg+const during
6823 reload, so it is a bad idea to combine them with other instructions,
6824 since then they might not be moved outside of loops. As a compromise
6825 we allow integration with ops that have a constant as their second
6827 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
6828 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
6829 && GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6830 *total
= COSTS_N_INSNS (1);
6834 *total
+= COSTS_N_INSNS (2);
6835 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6836 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6838 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6845 *total
+= COSTS_N_INSNS (1);
6846 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6847 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6849 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6852 subcode
= GET_CODE (XEXP (x
, 0));
6853 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6854 || subcode
== LSHIFTRT
6855 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6857 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6858 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6863 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6865 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6866 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6870 if (subcode
== UMIN
|| subcode
== UMAX
6871 || subcode
== SMIN
|| subcode
== SMAX
)
6873 *total
= COSTS_N_INSNS (3);
6880 /* This should have been handled by the CPU specific routines. */
6884 if (arm_arch3m
&& mode
== SImode
6885 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6886 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6887 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
6888 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
6889 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6890 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
6892 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, speed
);
6895 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6899 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6901 if (TARGET_HARD_FLOAT
6903 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6905 *total
= COSTS_N_INSNS (1);
6908 *total
= COSTS_N_INSNS (2);
6914 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
6915 if (mode
== SImode
&& code
== NOT
)
6917 subcode
= GET_CODE (XEXP (x
, 0));
6918 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6919 || subcode
== LSHIFTRT
6920 || subcode
== ROTATE
|| subcode
== ROTATERT
6922 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
6924 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6925 /* Register shifts cost an extra cycle. */
6926 if (GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
6927 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
6936 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6938 *total
= COSTS_N_INSNS (4);
6942 operand
= XEXP (x
, 0);
6944 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
6945 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
6946 && GET_CODE (XEXP (operand
, 0)) == REG
6947 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
6948 *total
+= COSTS_N_INSNS (1);
6949 *total
+= (rtx_cost (XEXP (x
, 1), code
, speed
)
6950 + rtx_cost (XEXP (x
, 2), code
, speed
));
6954 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6956 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6962 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6963 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6965 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6971 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6972 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6974 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6994 /* SCC insns. In the case where the comparison has already been
6995 performed, then they cost 2 instructions. Otherwise they need
6996 an additional comparison before them. */
6997 *total
= COSTS_N_INSNS (2);
6998 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7005 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7011 *total
+= COSTS_N_INSNS (1);
7012 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
7013 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7015 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7019 subcode
= GET_CODE (XEXP (x
, 0));
7020 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7021 || subcode
== LSHIFTRT
7022 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7024 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7025 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
7030 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7032 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7033 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
7043 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
7044 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
7045 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7046 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7050 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7052 if (TARGET_HARD_FLOAT
7054 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7056 *total
= COSTS_N_INSNS (1);
7059 *total
= COSTS_N_INSNS (20);
7062 *total
= COSTS_N_INSNS (1);
7064 *total
+= COSTS_N_INSNS (3);
7070 if (GET_MODE_CLASS (mode
) == MODE_INT
)
7072 rtx op
= XEXP (x
, 0);
7073 enum machine_mode opmode
= GET_MODE (op
);
7076 *total
+= COSTS_N_INSNS (1);
7078 if (opmode
!= SImode
)
7082 /* If !arm_arch4, we use one of the extendhisi2_mem
7083 or movhi_bytes patterns for HImode. For a QImode
7084 sign extension, we first zero-extend from memory
7085 and then perform a shift sequence. */
7086 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
7087 *total
+= COSTS_N_INSNS (2);
7090 *total
+= COSTS_N_INSNS (1);
7092 /* We don't have the necessary insn, so we need to perform some
7094 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
7095 /* An and with constant 255. */
7096 *total
+= COSTS_N_INSNS (1);
7098 /* A shift sequence. Increase costs slightly to avoid
7099 combining two shifts into an extend operation. */
7100 *total
+= COSTS_N_INSNS (2) + 1;
7106 switch (GET_MODE (XEXP (x
, 0)))
7113 *total
= COSTS_N_INSNS (1);
7123 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
7127 if (const_ok_for_arm (INTVAL (x
))
7128 || const_ok_for_arm (~INTVAL (x
)))
7129 *total
= COSTS_N_INSNS (1);
7131 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
7132 INTVAL (x
), NULL_RTX
,
7139 *total
= COSTS_N_INSNS (3);
7143 *total
= COSTS_N_INSNS (1);
7147 *total
= COSTS_N_INSNS (1);
7148 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7152 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
7153 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7154 *total
= COSTS_N_INSNS (1);
7156 *total
= COSTS_N_INSNS (4);
7160 *total
= COSTS_N_INSNS (4);
7165 /* Estimates the size cost of thumb1 instructions.
7166 For now most of the code is copied from thumb1_rtx_costs. We need more
7167 fine grain tuning when we have more related test cases. */
7169 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7171 enum machine_mode mode
= GET_MODE (x
);
7184 return COSTS_N_INSNS (1);
7187 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7189 /* Thumb1 mul instruction can't operate on const. We must Load it
7190 into a register first. */
7191 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
7192 return COSTS_N_INSNS (1) + const_size
;
7194 return COSTS_N_INSNS (1);
7197 return (COSTS_N_INSNS (1)
7198 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
7199 + GET_CODE (SET_DEST (x
)) == MEM
));
7204 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7205 return COSTS_N_INSNS (1);
7206 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7207 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
7208 return COSTS_N_INSNS (2);
7209 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7210 if (thumb_shiftable_const (INTVAL (x
)))
7211 return COSTS_N_INSNS (2);
7212 return COSTS_N_INSNS (3);
7214 else if ((outer
== PLUS
|| outer
== COMPARE
)
7215 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7217 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7218 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7219 return COSTS_N_INSNS (1);
7220 else if (outer
== AND
)
7223 /* This duplicates the tests in the andsi3 expander. */
7224 for (i
= 9; i
<= 31; i
++)
7225 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7226 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7227 return COSTS_N_INSNS (2);
7229 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7230 || outer
== LSHIFTRT
)
7232 return COSTS_N_INSNS (2);
7238 return COSTS_N_INSNS (3);
7256 /* XXX another guess. */
7257 /* Memory costs quite a lot for the first word, but subsequent words
7258 load at the equivalent of a single insn each. */
7259 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7260 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7265 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7270 /* XXX still guessing. */
7271 switch (GET_MODE (XEXP (x
, 0)))
7274 return (1 + (mode
== DImode
? 4 : 0)
7275 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7278 return (4 + (mode
== DImode
? 4 : 0)
7279 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7282 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7293 /* RTX costs when optimizing for size. */
7295 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7298 enum machine_mode mode
= GET_MODE (x
);
7301 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
7305 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7309 /* A memory access costs 1 insn if the mode is small, or the address is
7310 a single register, otherwise it costs one insn per word. */
7311 if (REG_P (XEXP (x
, 0)))
7312 *total
= COSTS_N_INSNS (1);
7314 && GET_CODE (XEXP (x
, 0)) == PLUS
7315 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
7316 /* This will be split into two instructions.
7317 See arm.md:calculate_pic_address. */
7318 *total
= COSTS_N_INSNS (2);
7320 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7327 /* Needs a libcall, so it costs about this. */
7328 *total
= COSTS_N_INSNS (2);
7332 if (mode
== SImode
&& GET_CODE (XEXP (x
, 1)) == REG
)
7334 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, false);
7342 if (mode
== DImode
&& GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7344 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, false);
7347 else if (mode
== SImode
)
7349 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, false);
7350 /* Slightly disparage register shifts, but not by much. */
7351 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7352 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, false);
7356 /* Needs a libcall. */
7357 *total
= COSTS_N_INSNS (2);
7361 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7362 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7364 *total
= COSTS_N_INSNS (1);
7370 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
7371 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
7373 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
7374 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
7375 || subcode1
== ROTATE
|| subcode1
== ROTATERT
7376 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
7377 || subcode1
== ASHIFTRT
)
7379 /* It's just the cost of the two operands. */
7384 *total
= COSTS_N_INSNS (1);
7388 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7392 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7393 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7395 *total
= COSTS_N_INSNS (1);
7399 /* A shift as a part of ADD costs nothing. */
7400 if (GET_CODE (XEXP (x
, 0)) == MULT
7401 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7403 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
7404 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, false);
7405 *total
+= rtx_cost (XEXP (x
, 1), code
, false);
7410 case AND
: case XOR
: case IOR
:
7413 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
7415 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
7416 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
7417 || (code
== AND
&& subcode
== NOT
))
7419 /* It's just the cost of the two operands. */
7425 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7429 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7433 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7434 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7436 *total
= COSTS_N_INSNS (1);
7442 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7451 if (cc_register (XEXP (x
, 0), VOIDmode
))
7454 *total
= COSTS_N_INSNS (1);
7458 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7459 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7460 *total
= COSTS_N_INSNS (1);
7462 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
7467 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
7470 if (const_ok_for_arm (INTVAL (x
)))
7471 /* A multiplication by a constant requires another instruction
7472 to load the constant to a register. */
7473 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
7475 else if (const_ok_for_arm (~INTVAL (x
)))
7476 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
7477 else if (const_ok_for_arm (-INTVAL (x
)))
7479 if (outer_code
== COMPARE
|| outer_code
== PLUS
7480 || outer_code
== MINUS
)
7483 *total
= COSTS_N_INSNS (1);
7486 *total
= COSTS_N_INSNS (2);
7492 *total
= COSTS_N_INSNS (2);
7496 *total
= COSTS_N_INSNS (4);
7501 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7502 cost of these slightly. */
7503 *total
= COSTS_N_INSNS (1) + 1;
7507 if (mode
!= VOIDmode
)
7508 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7510 *total
= COSTS_N_INSNS (4); /* How knows? */
7515 /* RTX costs when optimizing for size. */
7517 arm_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
7521 return arm_size_rtx_costs (x
, (enum rtx_code
) code
,
7522 (enum rtx_code
) outer_code
, total
);
7524 return current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
7525 (enum rtx_code
) outer_code
,
7529 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7530 supported on any "slowmul" cores, so it can be ignored. */
7533 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7534 int *total
, bool speed
)
7536 enum machine_mode mode
= GET_MODE (x
);
7540 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7547 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
7550 *total
= COSTS_N_INSNS (20);
7554 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7556 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7557 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7558 int cost
, const_ok
= const_ok_for_arm (i
);
7559 int j
, booth_unit_size
;
7561 /* Tune as appropriate. */
7562 cost
= const_ok
? 4 : 8;
7563 booth_unit_size
= 2;
7564 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7566 i
>>= booth_unit_size
;
7570 *total
= COSTS_N_INSNS (cost
);
7571 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7575 *total
= COSTS_N_INSNS (20);
7579 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
7584 /* RTX cost for cores with a fast multiply unit (M variants). */
7587 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7588 int *total
, bool speed
)
7590 enum machine_mode mode
= GET_MODE (x
);
7594 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7598 /* ??? should thumb2 use different costs? */
7602 /* There is no point basing this on the tuning, since it is always the
7603 fast variant if it exists at all. */
7605 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7606 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7607 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7609 *total
= COSTS_N_INSNS(2);
7616 *total
= COSTS_N_INSNS (5);
7620 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7622 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7623 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7624 int cost
, const_ok
= const_ok_for_arm (i
);
7625 int j
, booth_unit_size
;
7627 /* Tune as appropriate. */
7628 cost
= const_ok
? 4 : 8;
7629 booth_unit_size
= 8;
7630 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7632 i
>>= booth_unit_size
;
7636 *total
= COSTS_N_INSNS(cost
);
7642 *total
= COSTS_N_INSNS (4);
7646 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7648 if (TARGET_HARD_FLOAT
7650 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7652 *total
= COSTS_N_INSNS (1);
7657 /* Requires a lib call */
7658 *total
= COSTS_N_INSNS (20);
7662 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7667 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7668 so it can be ignored. */
7671 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7672 int *total
, bool speed
)
7674 enum machine_mode mode
= GET_MODE (x
);
7678 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7685 if (GET_CODE (XEXP (x
, 0)) != MULT
)
7686 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7688 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7689 will stall until the multiplication is complete. */
7690 *total
= COSTS_N_INSNS (3);
7694 /* There is no point basing this on the tuning, since it is always the
7695 fast variant if it exists at all. */
7697 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7698 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7699 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7701 *total
= COSTS_N_INSNS (2);
7708 *total
= COSTS_N_INSNS (5);
7712 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7714 /* If operand 1 is a constant we can more accurately
7715 calculate the cost of the multiply. The multiplier can
7716 retire 15 bits on the first cycle and a further 12 on the
7717 second. We do, of course, have to load the constant into
7718 a register first. */
7719 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7720 /* There's a general overhead of one cycle. */
7722 unsigned HOST_WIDE_INT masked_const
;
7727 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
7729 masked_const
= i
& 0xffff8000;
7730 if (masked_const
!= 0)
7733 masked_const
= i
& 0xf8000000;
7734 if (masked_const
!= 0)
7737 *total
= COSTS_N_INSNS (cost
);
7743 *total
= COSTS_N_INSNS (3);
7747 /* Requires a lib call */
7748 *total
= COSTS_N_INSNS (20);
7752 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7757 /* RTX costs for 9e (and later) cores. */
7760 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7761 int *total
, bool speed
)
7763 enum machine_mode mode
= GET_MODE (x
);
7770 *total
= COSTS_N_INSNS (3);
7774 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7782 /* There is no point basing this on the tuning, since it is always the
7783 fast variant if it exists at all. */
7785 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7786 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7787 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7789 *total
= COSTS_N_INSNS (2);
7796 *total
= COSTS_N_INSNS (5);
7802 *total
= COSTS_N_INSNS (2);
7806 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7808 if (TARGET_HARD_FLOAT
7810 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7812 *total
= COSTS_N_INSNS (1);
7817 *total
= COSTS_N_INSNS (20);
7821 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7824 /* All address computations that can be done are free, but rtx cost returns
7825 the same for practically all of them. So we weight the different types
7826 of address here in the order (most pref first):
7827 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7829 arm_arm_address_cost (rtx x
)
7831 enum rtx_code c
= GET_CODE (x
);
7833 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
7835 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
7840 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7843 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
7853 arm_thumb_address_cost (rtx x
)
7855 enum rtx_code c
= GET_CODE (x
);
7860 && GET_CODE (XEXP (x
, 0)) == REG
7861 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7868 arm_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
7870 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
7873 /* Adjust cost hook for XScale. */
7875 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
7877 /* Some true dependencies can have a higher cost depending
7878 on precisely how certain input operands are used. */
7879 if (REG_NOTE_KIND(link
) == 0
7880 && recog_memoized (insn
) >= 0
7881 && recog_memoized (dep
) >= 0)
7883 int shift_opnum
= get_attr_shift (insn
);
7884 enum attr_type attr_type
= get_attr_type (dep
);
7886 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7887 operand for INSN. If we have a shifted input operand and the
7888 instruction we depend on is another ALU instruction, then we may
7889 have to account for an additional stall. */
7890 if (shift_opnum
!= 0
7891 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
7893 rtx shifted_operand
;
7896 /* Get the shifted operand. */
7897 extract_insn (insn
);
7898 shifted_operand
= recog_data
.operand
[shift_opnum
];
7900 /* Iterate over all the operands in DEP. If we write an operand
7901 that overlaps with SHIFTED_OPERAND, then we have increase the
7902 cost of this dependency. */
7904 preprocess_constraints ();
7905 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
7907 /* We can ignore strict inputs. */
7908 if (recog_data
.operand_type
[opno
] == OP_IN
)
7911 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
7923 /* Adjust cost hook for Cortex A9. */
7925 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
7927 switch (REG_NOTE_KIND (link
))
7934 case REG_DEP_OUTPUT
:
7935 if (recog_memoized (insn
) >= 0
7936 && recog_memoized (dep
) >= 0)
7938 if (GET_CODE (PATTERN (insn
)) == SET
)
7941 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
7943 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
7945 enum attr_type attr_type_insn
= get_attr_type (insn
);
7946 enum attr_type attr_type_dep
= get_attr_type (dep
);
7948 /* By default all dependencies of the form
7951 have an extra latency of 1 cycle because
7952 of the input and output dependency in this
7953 case. However this gets modeled as an true
7954 dependency and hence all these checks. */
7955 if (REG_P (SET_DEST (PATTERN (insn
)))
7956 && REG_P (SET_DEST (PATTERN (dep
)))
7957 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
7958 SET_DEST (PATTERN (dep
))))
7960 /* FMACS is a special case where the dependant
7961 instruction can be issued 3 cycles before
7962 the normal latency in case of an output
7964 if ((attr_type_insn
== TYPE_FMACS
7965 || attr_type_insn
== TYPE_FMACD
)
7966 && (attr_type_dep
== TYPE_FMACS
7967 || attr_type_dep
== TYPE_FMACD
))
7969 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7970 *cost
= insn_default_latency (dep
) - 3;
7972 *cost
= insn_default_latency (dep
);
7977 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7978 *cost
= insn_default_latency (dep
) + 1;
7980 *cost
= insn_default_latency (dep
);
7996 /* Adjust cost hook for FA726TE. */
7998 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8000 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8001 have penalty of 3. */
8002 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
8003 && recog_memoized (insn
) >= 0
8004 && recog_memoized (dep
) >= 0
8005 && get_attr_conds (dep
) == CONDS_SET
)
8007 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8008 if (get_attr_conds (insn
) == CONDS_USE
8009 && get_attr_type (insn
) != TYPE_BRANCH
)
8015 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
8016 || get_attr_conds (insn
) == CONDS_USE
)
8026 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8027 It corrects the value of COST based on the relationship between
8028 INSN and DEP through the dependence LINK. It returns the new
8029 value. There is a per-core adjust_cost hook to adjust scheduler costs
8030 and the per-core hook can choose to completely override the generic
8031 adjust_cost function. Only put bits of code into arm_adjust_cost that
8032 are common across all cores. */
8034 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
8038 /* When generating Thumb-1 code, we want to place flag-setting operations
8039 close to a conditional branch which depends on them, so that we can
8040 omit the comparison. */
8042 && REG_NOTE_KIND (link
) == 0
8043 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
8044 && recog_memoized (dep
) >= 0
8045 && get_attr_conds (dep
) == CONDS_SET
)
8048 if (current_tune
->sched_adjust_cost
!= NULL
)
8050 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
8054 /* XXX This is not strictly true for the FPA. */
8055 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8056 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8059 /* Call insns don't incur a stall, even if they follow a load. */
8060 if (REG_NOTE_KIND (link
) == 0
8061 && GET_CODE (insn
) == CALL_INSN
)
8064 if ((i_pat
= single_set (insn
)) != NULL
8065 && GET_CODE (SET_SRC (i_pat
)) == MEM
8066 && (d_pat
= single_set (dep
)) != NULL
8067 && GET_CODE (SET_DEST (d_pat
)) == MEM
)
8069 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
8070 /* This is a load after a store, there is no conflict if the load reads
8071 from a cached area. Assume that loads from the stack, and from the
8072 constant pool are cached, and that others will miss. This is a
8075 if ((GET_CODE (src_mem
) == SYMBOL_REF
8076 && CONSTANT_POOL_ADDRESS_P (src_mem
))
8077 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
8078 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
8079 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
8087 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
8090 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
8092 return (optimize
> 0) ? 2 : 0;
8096 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
8098 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
8101 static int fp_consts_inited
= 0;
8103 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8104 static const char * const strings_fp
[8] =
8107 "4", "5", "0.5", "10"
8110 static REAL_VALUE_TYPE values_fp
[8];
8113 init_fp_table (void)
8119 fp_consts_inited
= 1;
8121 fp_consts_inited
= 8;
8123 for (i
= 0; i
< fp_consts_inited
; i
++)
8125 r
= REAL_VALUE_ATOF (strings_fp
[i
], DFmode
);
8130 /* Return TRUE if rtx X is a valid immediate FP constant. */
8132 arm_const_double_rtx (rtx x
)
8137 if (!fp_consts_inited
)
8140 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8141 if (REAL_VALUE_MINUS_ZERO (r
))
8144 for (i
= 0; i
< fp_consts_inited
; i
++)
8145 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
8151 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8153 neg_const_double_rtx_ok_for_fpa (rtx x
)
8158 if (!fp_consts_inited
)
8161 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8162 r
= real_value_negate (&r
);
8163 if (REAL_VALUE_MINUS_ZERO (r
))
8166 for (i
= 0; i
< 8; i
++)
8167 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
8174 /* VFPv3 has a fairly wide range of representable immediates, formed from
8175 "quarter-precision" floating-point values. These can be evaluated using this
8176 formula (with ^ for exponentiation):
8180 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8181 16 <= n <= 31 and 0 <= r <= 7.
8183 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8185 - A (most-significant) is the sign bit.
8186 - BCD are the exponent (encoded as r XOR 3).
8187 - EFGH are the mantissa (encoded as n - 16).
8190 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8191 fconst[sd] instruction, or -1 if X isn't suitable. */
8193 vfp3_const_double_index (rtx x
)
8195 REAL_VALUE_TYPE r
, m
;
8197 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
8198 unsigned HOST_WIDE_INT mask
;
8199 HOST_WIDE_INT m1
, m2
;
8200 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8202 if (!TARGET_VFP3
|| GET_CODE (x
) != CONST_DOUBLE
)
8205 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8207 /* We can't represent these things, so detect them first. */
8208 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
8211 /* Extract sign, exponent and mantissa. */
8212 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
8213 r
= real_value_abs (&r
);
8214 exponent
= REAL_EXP (&r
);
8215 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8216 highest (sign) bit, with a fixed binary point at bit point_pos.
8217 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8218 bits for the mantissa, this may fail (low bits would be lost). */
8219 real_ldexp (&m
, &r
, point_pos
- exponent
);
8220 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
8224 /* If there are bits set in the low part of the mantissa, we can't
8225 represent this value. */
8229 /* Now make it so that mantissa contains the most-significant bits, and move
8230 the point_pos to indicate that the least-significant bits have been
8232 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8235 /* We can permit four significant bits of mantissa only, plus a high bit
8236 which is always 1. */
8237 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8238 if ((mantissa
& mask
) != 0)
8241 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8242 mantissa
>>= point_pos
- 5;
8244 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8245 floating-point immediate zero with Neon using an integer-zero load, but
8246 that case is handled elsewhere.) */
8250 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
8252 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8253 normalized significands are in the range [1, 2). (Our mantissa is shifted
8254 left 4 places at this point relative to normalized IEEE754 values). GCC
8255 internally uses [0.5, 1) (see real.c), so the exponent returned from
8256 REAL_EXP must be altered. */
8257 exponent
= 5 - exponent
;
8259 if (exponent
< 0 || exponent
> 7)
8262 /* Sign, mantissa and exponent are now in the correct form to plug into the
8263 formula described in the comment above. */
8264 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
8267 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8269 vfp3_const_double_rtx (rtx x
)
8274 return vfp3_const_double_index (x
) != -1;
8277 /* Recognize immediates which can be used in various Neon instructions. Legal
8278 immediates are described by the following table (for VMVN variants, the
8279 bitwise inverse of the constant shown is recognized. In either case, VMOV
8280 is output and the correct instruction to use for a given constant is chosen
8281 by the assembler). The constant shown is replicated across all elements of
8282 the destination vector.
8284 insn elems variant constant (binary)
8285 ---- ----- ------- -----------------
8286 vmov i32 0 00000000 00000000 00000000 abcdefgh
8287 vmov i32 1 00000000 00000000 abcdefgh 00000000
8288 vmov i32 2 00000000 abcdefgh 00000000 00000000
8289 vmov i32 3 abcdefgh 00000000 00000000 00000000
8290 vmov i16 4 00000000 abcdefgh
8291 vmov i16 5 abcdefgh 00000000
8292 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8293 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8294 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8295 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8296 vmvn i16 10 00000000 abcdefgh
8297 vmvn i16 11 abcdefgh 00000000
8298 vmov i32 12 00000000 00000000 abcdefgh 11111111
8299 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8300 vmov i32 14 00000000 abcdefgh 11111111 11111111
8301 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8303 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8304 eeeeeeee ffffffff gggggggg hhhhhhhh
8305 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8307 For case 18, B = !b. Representable values are exactly those accepted by
8308 vfp3_const_double_index, but are output as floating-point numbers rather
8311 Variants 0-5 (inclusive) may also be used as immediates for the second
8312 operand of VORR/VBIC instructions.
8314 The INVERSE argument causes the bitwise inverse of the given operand to be
8315 recognized instead (used for recognizing legal immediates for the VAND/VORN
8316 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8317 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8318 output, rather than the real insns vbic/vorr).
8320 INVERSE makes no difference to the recognition of float vectors.
8322 The return value is the variant of immediate as shown in the above table, or
8323 -1 if the given value doesn't match any of the listed patterns.
8326 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
8327 rtx
*modconst
, int *elementwidth
)
8329 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8331 for (i = 0; i < idx; i += (STRIDE)) \
8336 immtype = (CLASS); \
8337 elsize = (ELSIZE); \
8341 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8342 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8343 unsigned char bytes
[16];
8344 int immtype
= -1, matches
;
8345 unsigned int invmask
= inverse
? 0xff : 0;
8347 /* Vectors of float constants. */
8348 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8350 rtx el0
= CONST_VECTOR_ELT (op
, 0);
8353 if (!vfp3_const_double_rtx (el0
))
8356 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
8358 for (i
= 1; i
< n_elts
; i
++)
8360 rtx elt
= CONST_VECTOR_ELT (op
, i
);
8363 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
8365 if (!REAL_VALUES_EQUAL (r0
, re
))
8370 *modconst
= CONST_VECTOR_ELT (op
, 0);
8378 /* Splat vector constant out into a byte vector. */
8379 for (i
= 0; i
< n_elts
; i
++)
8381 rtx el
= CONST_VECTOR_ELT (op
, i
);
8382 unsigned HOST_WIDE_INT elpart
;
8383 unsigned int part
, parts
;
8385 if (GET_CODE (el
) == CONST_INT
)
8387 elpart
= INTVAL (el
);
8390 else if (GET_CODE (el
) == CONST_DOUBLE
)
8392 elpart
= CONST_DOUBLE_LOW (el
);
8398 for (part
= 0; part
< parts
; part
++)
8401 for (byte
= 0; byte
< innersize
; byte
++)
8403 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8404 elpart
>>= BITS_PER_UNIT
;
8406 if (GET_CODE (el
) == CONST_DOUBLE
)
8407 elpart
= CONST_DOUBLE_HIGH (el
);
8412 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8416 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8417 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8419 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8420 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8422 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8423 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8425 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8426 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
8428 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
8430 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
8432 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8433 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8435 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8436 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8438 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8439 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8441 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8442 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
8444 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
8446 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
8448 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8449 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8451 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8452 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8454 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8455 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8457 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8458 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8460 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
8462 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8463 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
8471 *elementwidth
= elsize
;
8475 unsigned HOST_WIDE_INT imm
= 0;
8477 /* Un-invert bytes of recognized vector, if necessary. */
8479 for (i
= 0; i
< idx
; i
++)
8480 bytes
[i
] ^= invmask
;
8484 /* FIXME: Broken on 32-bit H_W_I hosts. */
8485 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8487 for (i
= 0; i
< 8; i
++)
8488 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8489 << (i
* BITS_PER_UNIT
);
8491 *modconst
= GEN_INT (imm
);
8495 unsigned HOST_WIDE_INT imm
= 0;
8497 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8498 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8500 *modconst
= GEN_INT (imm
);
8508 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8509 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8510 float elements), and a modified constant (whatever should be output for a
8511 VMOV) in *MODCONST. */
8514 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
8515 rtx
*modconst
, int *elementwidth
)
8519 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
8525 *modconst
= tmpconst
;
8528 *elementwidth
= tmpwidth
;
8533 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8534 the immediate is valid, write a constant suitable for using as an operand
8535 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8536 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8539 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
8540 rtx
*modconst
, int *elementwidth
)
8544 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
8546 if (retval
< 0 || retval
> 5)
8550 *modconst
= tmpconst
;
8553 *elementwidth
= tmpwidth
;
8558 /* Return a string suitable for output of Neon immediate logic operation
8562 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
8563 int inverse
, int quad
)
8565 int width
, is_valid
;
8566 static char templ
[40];
8568 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
8570 gcc_assert (is_valid
!= 0);
8573 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
8575 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
8580 /* Output a sequence of pairwise operations to implement a reduction.
8581 NOTE: We do "too much work" here, because pairwise operations work on two
8582 registers-worth of operands in one go. Unfortunately we can't exploit those
8583 extra calculations to do the full operation in fewer steps, I don't think.
8584 Although all vector elements of the result but the first are ignored, we
8585 actually calculate the same result in each of the elements. An alternative
8586 such as initially loading a vector with zero to use as each of the second
8587 operands would use up an additional register and take an extra instruction,
8588 for no particular gain. */
8591 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
8592 rtx (*reduc
) (rtx
, rtx
, rtx
))
8594 enum machine_mode inner
= GET_MODE_INNER (mode
);
8595 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
8598 for (i
= parts
/ 2; i
>= 1; i
/= 2)
8600 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
8601 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
8606 /* If VALS is a vector constant that can be loaded into a register
8607 using VDUP, generate instructions to do so and return an RTX to
8608 assign to the register. Otherwise return NULL_RTX. */
8611 neon_vdup_constant (rtx vals
)
8613 enum machine_mode mode
= GET_MODE (vals
);
8614 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8615 int n_elts
= GET_MODE_NUNITS (mode
);
8616 bool all_same
= true;
8620 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
8623 for (i
= 0; i
< n_elts
; ++i
)
8625 x
= XVECEXP (vals
, 0, i
);
8626 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8631 /* The elements are not all the same. We could handle repeating
8632 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8633 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8637 /* We can load this constant by using VDUP and a constant in a
8638 single ARM register. This will be cheaper than a vector
8641 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8642 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8645 /* Generate code to load VALS, which is a PARALLEL containing only
8646 constants (for vec_init) or CONST_VECTOR, efficiently into a
8647 register. Returns an RTX to copy into the register, or NULL_RTX
8648 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8651 neon_make_constant (rtx vals
)
8653 enum machine_mode mode
= GET_MODE (vals
);
8655 rtx const_vec
= NULL_RTX
;
8656 int n_elts
= GET_MODE_NUNITS (mode
);
8660 if (GET_CODE (vals
) == CONST_VECTOR
)
8662 else if (GET_CODE (vals
) == PARALLEL
)
8664 /* A CONST_VECTOR must contain only CONST_INTs and
8665 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8666 Only store valid constants in a CONST_VECTOR. */
8667 for (i
= 0; i
< n_elts
; ++i
)
8669 rtx x
= XVECEXP (vals
, 0, i
);
8670 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8673 if (n_const
== n_elts
)
8674 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8679 if (const_vec
!= NULL
8680 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
8681 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8683 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
8684 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8685 pipeline cycle; creating the constant takes one or two ARM
8688 else if (const_vec
!= NULL_RTX
)
8689 /* Load from constant pool. On Cortex-A8 this takes two cycles
8690 (for either double or quad vectors). We can not take advantage
8691 of single-cycle VLD1 because we need a PC-relative addressing
8695 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8696 We can not construct an initializer. */
8700 /* Initialize vector TARGET to VALS. */
8703 neon_expand_vector_init (rtx target
, rtx vals
)
8705 enum machine_mode mode
= GET_MODE (target
);
8706 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8707 int n_elts
= GET_MODE_NUNITS (mode
);
8708 int n_var
= 0, one_var
= -1;
8709 bool all_same
= true;
8713 for (i
= 0; i
< n_elts
; ++i
)
8715 x
= XVECEXP (vals
, 0, i
);
8716 if (!CONSTANT_P (x
))
8717 ++n_var
, one_var
= i
;
8719 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8725 rtx constant
= neon_make_constant (vals
);
8726 if (constant
!= NULL_RTX
)
8728 emit_move_insn (target
, constant
);
8733 /* Splat a single non-constant element if we can. */
8734 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
8736 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8737 emit_insn (gen_rtx_SET (VOIDmode
, target
,
8738 gen_rtx_VEC_DUPLICATE (mode
, x
)));
8742 /* One field is non-constant. Load constant then overwrite varying
8743 field. This is more efficient than using the stack. */
8746 rtx copy
= copy_rtx (vals
);
8747 rtx index
= GEN_INT (one_var
);
8749 /* Load constant part of vector, substitute neighboring value for
8751 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
8752 neon_expand_vector_init (target
, copy
);
8754 /* Insert variable. */
8755 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8759 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
8762 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
8765 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
8768 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
8771 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
8774 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
8777 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
8780 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
8783 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
8791 /* Construct the vector in memory one field at a time
8792 and load the whole vector. */
8793 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), 0);
8794 for (i
= 0; i
< n_elts
; i
++)
8795 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8796 i
* GET_MODE_SIZE (inner_mode
)),
8797 XVECEXP (vals
, 0, i
));
8798 emit_move_insn (target
, mem
);
8801 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8802 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8803 reported source locations are bogus. */
8806 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8811 gcc_assert (GET_CODE (operand
) == CONST_INT
);
8813 lane
= INTVAL (operand
);
8815 if (lane
< low
|| lane
>= high
)
8819 /* Bounds-check lanes. */
8822 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8824 bounds_check (operand
, low
, high
, "lane out of range");
8827 /* Bounds-check constants. */
8830 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8832 bounds_check (operand
, low
, high
, "constant out of range");
8836 neon_element_bits (enum machine_mode mode
)
8839 return GET_MODE_BITSIZE (mode
);
8841 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
8845 /* Predicates for `match_operand' and `match_operator'. */
8847 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8849 cirrus_memory_offset (rtx op
)
8851 /* Reject eliminable registers. */
8852 if (! (reload_in_progress
|| reload_completed
)
8853 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8854 || reg_mentioned_p (arg_pointer_rtx
, op
)
8855 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8856 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8857 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8858 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8861 if (GET_CODE (op
) == MEM
)
8867 /* Match: (mem (reg)). */
8868 if (GET_CODE (ind
) == REG
)
8874 if (GET_CODE (ind
) == PLUS
8875 && GET_CODE (XEXP (ind
, 0)) == REG
8876 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8877 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
)
8884 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8885 WB is true if full writeback address modes are allowed and is false
8886 if limited writeback address modes (POST_INC and PRE_DEC) are
8890 arm_coproc_mem_operand (rtx op
, bool wb
)
8894 /* Reject eliminable registers. */
8895 if (! (reload_in_progress
|| reload_completed
)
8896 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8897 || reg_mentioned_p (arg_pointer_rtx
, op
)
8898 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8899 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8900 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8901 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8904 /* Constants are converted into offsets from labels. */
8905 if (GET_CODE (op
) != MEM
)
8910 if (reload_completed
8911 && (GET_CODE (ind
) == LABEL_REF
8912 || (GET_CODE (ind
) == CONST
8913 && GET_CODE (XEXP (ind
, 0)) == PLUS
8914 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8915 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8918 /* Match: (mem (reg)). */
8919 if (GET_CODE (ind
) == REG
)
8920 return arm_address_register_rtx_p (ind
, 0);
8922 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8923 acceptable in any case (subject to verification by
8924 arm_address_register_rtx_p). We need WB to be true to accept
8925 PRE_INC and POST_DEC. */
8926 if (GET_CODE (ind
) == POST_INC
8927 || GET_CODE (ind
) == PRE_DEC
8929 && (GET_CODE (ind
) == PRE_INC
8930 || GET_CODE (ind
) == POST_DEC
)))
8931 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8934 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
8935 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
8936 && GET_CODE (XEXP (ind
, 1)) == PLUS
8937 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
8938 ind
= XEXP (ind
, 1);
8943 if (GET_CODE (ind
) == PLUS
8944 && GET_CODE (XEXP (ind
, 0)) == REG
8945 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8946 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
8947 && INTVAL (XEXP (ind
, 1)) > -1024
8948 && INTVAL (XEXP (ind
, 1)) < 1024
8949 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
8955 /* Return TRUE if OP is a memory operand which we can load or store a vector
8956 to/from. TYPE is one of the following values:
8957 0 - Vector load/stor (vldr)
8958 1 - Core registers (ldm)
8959 2 - Element/structure loads (vld1)
8962 neon_vector_mem_operand (rtx op
, int type
)
8966 /* Reject eliminable registers. */
8967 if (! (reload_in_progress
|| reload_completed
)
8968 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8969 || reg_mentioned_p (arg_pointer_rtx
, op
)
8970 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8971 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8972 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8973 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8976 /* Constants are converted into offsets from labels. */
8977 if (GET_CODE (op
) != MEM
)
8982 if (reload_completed
8983 && (GET_CODE (ind
) == LABEL_REF
8984 || (GET_CODE (ind
) == CONST
8985 && GET_CODE (XEXP (ind
, 0)) == PLUS
8986 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8987 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8990 /* Match: (mem (reg)). */
8991 if (GET_CODE (ind
) == REG
)
8992 return arm_address_register_rtx_p (ind
, 0);
8994 /* Allow post-increment with Neon registers. */
8995 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
8996 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
8997 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8999 /* FIXME: vld1 allows register post-modify. */
9005 && GET_CODE (ind
) == PLUS
9006 && GET_CODE (XEXP (ind
, 0)) == REG
9007 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9008 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
9009 && INTVAL (XEXP (ind
, 1)) > -1024
9010 && INTVAL (XEXP (ind
, 1)) < 1016
9011 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
9017 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9020 neon_struct_mem_operand (rtx op
)
9024 /* Reject eliminable registers. */
9025 if (! (reload_in_progress
|| reload_completed
)
9026 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9027 || reg_mentioned_p (arg_pointer_rtx
, op
)
9028 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9029 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9030 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9031 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9034 /* Constants are converted into offsets from labels. */
9035 if (GET_CODE (op
) != MEM
)
9040 if (reload_completed
9041 && (GET_CODE (ind
) == LABEL_REF
9042 || (GET_CODE (ind
) == CONST
9043 && GET_CODE (XEXP (ind
, 0)) == PLUS
9044 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9045 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
9048 /* Match: (mem (reg)). */
9049 if (GET_CODE (ind
) == REG
)
9050 return arm_address_register_rtx_p (ind
, 0);
9052 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9053 if (GET_CODE (ind
) == POST_INC
9054 || GET_CODE (ind
) == PRE_DEC
)
9055 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9060 /* Return true if X is a register that will be eliminated later on. */
9062 arm_eliminable_register (rtx x
)
9064 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
9065 || REGNO (x
) == ARG_POINTER_REGNUM
9066 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
9067 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
9070 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9071 coprocessor registers. Otherwise return NO_REGS. */
9074 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
9078 if (!TARGET_NEON_FP16
)
9079 return GENERAL_REGS
;
9080 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2))
9082 return GENERAL_REGS
;
9085 /* The neon move patterns handle all legitimate vector and struct
9088 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
9089 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
9090 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
9091 || VALID_NEON_STRUCT_MODE (mode
)))
9094 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
9097 return GENERAL_REGS
;
9100 /* Values which must be returned in the most-significant end of the return
9104 arm_return_in_msb (const_tree valtype
)
9106 return (TARGET_AAPCS_BASED
9108 && (AGGREGATE_TYPE_P (valtype
)
9109 || TREE_CODE (valtype
) == COMPLEX_TYPE
));
9112 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9113 Use by the Cirrus Maverick code which has to workaround
9114 a hardware bug triggered by such instructions. */
9116 arm_memory_load_p (rtx insn
)
9118 rtx body
, lhs
, rhs
;;
9120 if (insn
== NULL_RTX
|| GET_CODE (insn
) != INSN
)
9123 body
= PATTERN (insn
);
9125 if (GET_CODE (body
) != SET
)
9128 lhs
= XEXP (body
, 0);
9129 rhs
= XEXP (body
, 1);
9131 lhs
= REG_OR_SUBREG_RTX (lhs
);
9133 /* If the destination is not a general purpose
9134 register we do not have to worry. */
9135 if (GET_CODE (lhs
) != REG
9136 || REGNO_REG_CLASS (REGNO (lhs
)) != GENERAL_REGS
)
9139 /* As well as loads from memory we also have to react
9140 to loads of invalid constants which will be turned
9141 into loads from the minipool. */
9142 return (GET_CODE (rhs
) == MEM
9143 || GET_CODE (rhs
) == SYMBOL_REF
9144 || note_invalid_constants (insn
, -1, false));
9147 /* Return TRUE if INSN is a Cirrus instruction. */
9149 arm_cirrus_insn_p (rtx insn
)
9151 enum attr_cirrus attr
;
9153 /* get_attr cannot accept USE or CLOBBER. */
9155 || GET_CODE (insn
) != INSN
9156 || GET_CODE (PATTERN (insn
)) == USE
9157 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
9160 attr
= get_attr_cirrus (insn
);
9162 return attr
!= CIRRUS_NOT
;
9165 /* Cirrus reorg for invalid instruction combinations. */
9167 cirrus_reorg (rtx first
)
9169 enum attr_cirrus attr
;
9170 rtx body
= PATTERN (first
);
9174 /* Any branch must be followed by 2 non Cirrus instructions. */
9175 if (GET_CODE (first
) == JUMP_INSN
&& GET_CODE (body
) != RETURN
)
9178 t
= next_nonnote_insn (first
);
9180 if (arm_cirrus_insn_p (t
))
9183 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
9187 emit_insn_after (gen_nop (), first
);
9192 /* (float (blah)) is in parallel with a clobber. */
9193 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
9194 body
= XVECEXP (body
, 0, 0);
9196 if (GET_CODE (body
) == SET
)
9198 rtx lhs
= XEXP (body
, 0), rhs
= XEXP (body
, 1);
9200 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9201 be followed by a non Cirrus insn. */
9202 if (get_attr_cirrus (first
) == CIRRUS_DOUBLE
)
9204 if (arm_cirrus_insn_p (next_nonnote_insn (first
)))
9205 emit_insn_after (gen_nop (), first
);
9209 else if (arm_memory_load_p (first
))
9211 unsigned int arm_regno
;
9213 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9214 ldr/cfmv64hr combination where the Rd field is the same
9215 in both instructions must be split with a non Cirrus
9222 /* Get Arm register number for ldr insn. */
9223 if (GET_CODE (lhs
) == REG
)
9224 arm_regno
= REGNO (lhs
);
9227 gcc_assert (GET_CODE (rhs
) == REG
);
9228 arm_regno
= REGNO (rhs
);
9232 first
= next_nonnote_insn (first
);
9234 if (! arm_cirrus_insn_p (first
))
9237 body
= PATTERN (first
);
9239 /* (float (blah)) is in parallel with a clobber. */
9240 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0))
9241 body
= XVECEXP (body
, 0, 0);
9243 if (GET_CODE (body
) == FLOAT
)
9244 body
= XEXP (body
, 0);
9246 if (get_attr_cirrus (first
) == CIRRUS_MOVE
9247 && GET_CODE (XEXP (body
, 1)) == REG
9248 && arm_regno
== REGNO (XEXP (body
, 1)))
9249 emit_insn_after (gen_nop (), first
);
9255 /* get_attr cannot accept USE or CLOBBER. */
9257 || GET_CODE (first
) != INSN
9258 || GET_CODE (PATTERN (first
)) == USE
9259 || GET_CODE (PATTERN (first
)) == CLOBBER
)
9262 attr
= get_attr_cirrus (first
);
9264 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9265 must be followed by a non-coprocessor instruction. */
9266 if (attr
== CIRRUS_COMPARE
)
9270 t
= next_nonnote_insn (first
);
9272 if (arm_cirrus_insn_p (t
))
9275 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
9279 emit_insn_after (gen_nop (), first
);
9285 /* Return TRUE if X references a SYMBOL_REF. */
9287 symbol_mentioned_p (rtx x
)
9292 if (GET_CODE (x
) == SYMBOL_REF
)
9295 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9296 are constant offsets, not symbols. */
9297 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9300 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9302 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9308 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9309 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
9312 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
9319 /* Return TRUE if X references a LABEL_REF. */
9321 label_mentioned_p (rtx x
)
9326 if (GET_CODE (x
) == LABEL_REF
)
9329 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9330 instruction, but they are constant offsets, not symbols. */
9331 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9334 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9335 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9341 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9342 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
9345 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
9353 tls_mentioned_p (rtx x
)
9355 switch (GET_CODE (x
))
9358 return tls_mentioned_p (XEXP (x
, 0));
9361 if (XINT (x
, 1) == UNSPEC_TLS
)
9369 /* Must not copy any rtx that uses a pc-relative address. */
9372 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
9374 if (GET_CODE (*x
) == UNSPEC
9375 && XINT (*x
, 1) == UNSPEC_PIC_BASE
)
9381 arm_cannot_copy_insn_p (rtx insn
)
9383 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
9389 enum rtx_code code
= GET_CODE (x
);
9406 /* Return 1 if memory locations are adjacent. */
9408 adjacent_mem_locations (rtx a
, rtx b
)
9410 /* We don't guarantee to preserve the order of these memory refs. */
9411 if (volatile_refs_p (a
) || volatile_refs_p (b
))
9414 if ((GET_CODE (XEXP (a
, 0)) == REG
9415 || (GET_CODE (XEXP (a
, 0)) == PLUS
9416 && GET_CODE (XEXP (XEXP (a
, 0), 1)) == CONST_INT
))
9417 && (GET_CODE (XEXP (b
, 0)) == REG
9418 || (GET_CODE (XEXP (b
, 0)) == PLUS
9419 && GET_CODE (XEXP (XEXP (b
, 0), 1)) == CONST_INT
)))
9421 HOST_WIDE_INT val0
= 0, val1
= 0;
9425 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
9427 reg0
= XEXP (XEXP (a
, 0), 0);
9428 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
9433 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
9435 reg1
= XEXP (XEXP (b
, 0), 0);
9436 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
9441 /* Don't accept any offset that will require multiple
9442 instructions to handle, since this would cause the
9443 arith_adjacentmem pattern to output an overlong sequence. */
9444 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
9447 /* Don't allow an eliminable register: register elimination can make
9448 the offset too large. */
9449 if (arm_eliminable_register (reg0
))
9452 val_diff
= val1
- val0
;
9456 /* If the target has load delay slots, then there's no benefit
9457 to using an ldm instruction unless the offset is zero and
9458 we are optimizing for size. */
9459 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
9460 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
9461 && (val_diff
== 4 || val_diff
== -4));
9464 return ((REGNO (reg0
) == REGNO (reg1
))
9465 && (val_diff
== 4 || val_diff
== -4));
9471 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9472 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9473 instruction. ADD_OFFSET is nonzero if the base address register needs
9474 to be modified with an add instruction before we can use it. */
9477 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
9478 int nops
, HOST_WIDE_INT add_offset
)
9480 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9481 if the offset isn't small enough. The reason 2 ldrs are faster
9482 is because these ARMs are able to do more than one cache access
9483 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9484 whilst the ARM8 has a double bandwidth cache. This means that
9485 these cores can do both an instruction fetch and a data fetch in
9486 a single cycle, so the trick of calculating the address into a
9487 scratch register (one of the result regs) and then doing a load
9488 multiple actually becomes slower (and no smaller in code size).
9489 That is the transformation
9491 ldr rd1, [rbase + offset]
9492 ldr rd2, [rbase + offset + 4]
9496 add rd1, rbase, offset
9497 ldmia rd1, {rd1, rd2}
9499 produces worse code -- '3 cycles + any stalls on rd2' instead of
9500 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9501 access per cycle, the first sequence could never complete in less
9502 than 6 cycles, whereas the ldm sequence would only take 5 and
9503 would make better use of sequential accesses if not hitting the
9506 We cheat here and test 'arm_ld_sched' which we currently know to
9507 only be true for the ARM8, ARM9 and StrongARM. If this ever
9508 changes, then the test below needs to be reworked. */
9509 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
9512 /* XScale has load-store double instructions, but they have stricter
9513 alignment requirements than load-store multiple, so we cannot
9516 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9517 the pipeline until completion.
9525 An ldr instruction takes 1-3 cycles, but does not block the
9534 Best case ldr will always win. However, the more ldr instructions
9535 we issue, the less likely we are to be able to schedule them well.
9536 Using ldr instructions also increases code size.
9538 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9539 for counts of 3 or 4 regs. */
9540 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
9545 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9546 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9547 an array ORDER which describes the sequence to use when accessing the
9548 offsets that produces an ascending order. In this sequence, each
9549 offset must be larger by exactly 4 than the previous one. ORDER[0]
9550 must have been filled in with the lowest offset by the caller.
9551 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9552 we use to verify that ORDER produces an ascending order of registers.
9553 Return true if it was possible to construct such an order, false if
9557 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
9561 for (i
= 1; i
< nops
; i
++)
9565 order
[i
] = order
[i
- 1];
9566 for (j
= 0; j
< nops
; j
++)
9567 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
9569 /* We must find exactly one offset that is higher than the
9570 previous one by 4. */
9571 if (order
[i
] != order
[i
- 1])
9575 if (order
[i
] == order
[i
- 1])
9577 /* The register numbers must be ascending. */
9578 if (unsorted_regs
!= NULL
9579 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
9585 /* Used to determine in a peephole whether a sequence of load
9586 instructions can be changed into a load-multiple instruction.
9587 NOPS is the number of separate load instructions we are examining. The
9588 first NOPS entries in OPERANDS are the destination registers, the
9589 next NOPS entries are memory operands. If this function is
9590 successful, *BASE is set to the common base register of the memory
9591 accesses; *LOAD_OFFSET is set to the first memory location's offset
9592 from that base register.
9593 REGS is an array filled in with the destination register numbers.
9594 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9595 insn numbers to an ascending order of stores. If CHECK_REGS is true,
9596 the sequence of registers in REGS matches the loads from ascending memory
9597 locations, and the function verifies that the register numbers are
9598 themselves ascending. If CHECK_REGS is false, the register numbers
9599 are stored in the order they are found in the operands. */
9601 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
9602 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
9604 int unsorted_regs
[MAX_LDM_STM_OPS
];
9605 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9606 int order
[MAX_LDM_STM_OPS
];
9607 rtx base_reg_rtx
= NULL
;
9611 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9612 easily extended if required. */
9613 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9615 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9617 /* Loop over the operands and check that the memory references are
9618 suitable (i.e. immediate offsets from the same base register). At
9619 the same time, extract the target register, and the memory
9621 for (i
= 0; i
< nops
; i
++)
9626 /* Convert a subreg of a mem into the mem itself. */
9627 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9628 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9630 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9632 /* Don't reorder volatile memory references; it doesn't seem worth
9633 looking for the case where the order is ok anyway. */
9634 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9637 offset
= const0_rtx
;
9639 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9640 || (GET_CODE (reg
) == SUBREG
9641 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9642 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9643 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9645 || (GET_CODE (reg
) == SUBREG
9646 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9647 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9652 base_reg
= REGNO (reg
);
9654 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
9657 else if (base_reg
!= (int) REGNO (reg
))
9658 /* Not addressed from the same base register. */
9661 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
9662 ? REGNO (operands
[i
])
9663 : REGNO (SUBREG_REG (operands
[i
])));
9665 /* If it isn't an integer register, or if it overwrites the
9666 base register but isn't the last insn in the list, then
9667 we can't do this. */
9668 if (unsorted_regs
[i
] < 0
9669 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
9670 || unsorted_regs
[i
] > 14
9671 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
9674 unsorted_offsets
[i
] = INTVAL (offset
);
9675 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
9679 /* Not a suitable memory address. */
9683 /* All the useful information has now been extracted from the
9684 operands into unsorted_regs and unsorted_offsets; additionally,
9685 order[0] has been set to the lowest offset in the list. Sort
9686 the offsets into order, verifying that they are adjacent, and
9687 check that the register numbers are ascending. */
9688 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
9689 check_regs
? unsorted_regs
: NULL
))
9693 memcpy (saved_order
, order
, sizeof order
);
9699 for (i
= 0; i
< nops
; i
++)
9700 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
9702 *load_offset
= unsorted_offsets
[order
[0]];
9706 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
9709 if (unsorted_offsets
[order
[0]] == 0)
9710 ldm_case
= 1; /* ldmia */
9711 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9712 ldm_case
= 2; /* ldmib */
9713 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9714 ldm_case
= 3; /* ldmda */
9715 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
9716 ldm_case
= 4; /* ldmdb */
9717 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
9718 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
9723 if (!multiple_operation_profitable_p (false, nops
,
9725 ? unsorted_offsets
[order
[0]] : 0))
9731 /* Used to determine in a peephole whether a sequence of store instructions can
9732 be changed into a store-multiple instruction.
9733 NOPS is the number of separate store instructions we are examining.
9734 NOPS_TOTAL is the total number of instructions recognized by the peephole
9736 The first NOPS entries in OPERANDS are the source registers, the next
9737 NOPS entries are memory operands. If this function is successful, *BASE is
9738 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9739 to the first memory location's offset from that base register. REGS is an
9740 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9741 likewise filled with the corresponding rtx's.
9742 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9743 numbers to an ascending order of stores.
9744 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9745 from ascending memory locations, and the function verifies that the register
9746 numbers are themselves ascending. If CHECK_REGS is false, the register
9747 numbers are stored in the order they are found in the operands. */
9749 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
9750 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
9751 HOST_WIDE_INT
*load_offset
, bool check_regs
)
9753 int unsorted_regs
[MAX_LDM_STM_OPS
];
9754 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
9755 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9756 int order
[MAX_LDM_STM_OPS
];
9758 rtx base_reg_rtx
= NULL
;
9761 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9762 easily extended if required. */
9763 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9765 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9767 /* Loop over the operands and check that the memory references are
9768 suitable (i.e. immediate offsets from the same base register). At
9769 the same time, extract the target register, and the memory
9771 for (i
= 0; i
< nops
; i
++)
9776 /* Convert a subreg of a mem into the mem itself. */
9777 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9778 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9780 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9782 /* Don't reorder volatile memory references; it doesn't seem worth
9783 looking for the case where the order is ok anyway. */
9784 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9787 offset
= const0_rtx
;
9789 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9790 || (GET_CODE (reg
) == SUBREG
9791 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9792 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9793 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9795 || (GET_CODE (reg
) == SUBREG
9796 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9797 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9800 unsorted_reg_rtxs
[i
] = (GET_CODE (operands
[i
]) == REG
9801 ? operands
[i
] : SUBREG_REG (operands
[i
]));
9802 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
9806 base_reg
= REGNO (reg
);
9808 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
9811 else if (base_reg
!= (int) REGNO (reg
))
9812 /* Not addressed from the same base register. */
9815 /* If it isn't an integer register, then we can't do this. */
9816 if (unsorted_regs
[i
] < 0
9817 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
9818 || (TARGET_THUMB2
&& unsorted_regs
[i
] == base_reg
)
9819 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
9820 || unsorted_regs
[i
] > 14)
9823 unsorted_offsets
[i
] = INTVAL (offset
);
9824 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
9828 /* Not a suitable memory address. */
9832 /* All the useful information has now been extracted from the
9833 operands into unsorted_regs and unsorted_offsets; additionally,
9834 order[0] has been set to the lowest offset in the list. Sort
9835 the offsets into order, verifying that they are adjacent, and
9836 check that the register numbers are ascending. */
9837 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
9838 check_regs
? unsorted_regs
: NULL
))
9842 memcpy (saved_order
, order
, sizeof order
);
9848 for (i
= 0; i
< nops
; i
++)
9850 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
9852 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
9855 *load_offset
= unsorted_offsets
[order
[0]];
9859 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
9862 if (unsorted_offsets
[order
[0]] == 0)
9863 stm_case
= 1; /* stmia */
9864 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9865 stm_case
= 2; /* stmib */
9866 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9867 stm_case
= 3; /* stmda */
9868 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
9869 stm_case
= 4; /* stmdb */
9873 if (!multiple_operation_profitable_p (false, nops
, 0))
9879 /* Routines for use in generating RTL. */
9881 /* Generate a load-multiple instruction. COUNT is the number of loads in
9882 the instruction; REGS and MEMS are arrays containing the operands.
9883 BASEREG is the base register to be used in addressing the memory operands.
9884 WBACK_OFFSET is nonzero if the instruction should update the base
9888 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
9889 HOST_WIDE_INT wback_offset
)
9894 if (!multiple_operation_profitable_p (false, count
, 0))
9900 for (i
= 0; i
< count
; i
++)
9901 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
9903 if (wback_offset
!= 0)
9904 emit_move_insn (basereg
, plus_constant (basereg
, wback_offset
));
9912 result
= gen_rtx_PARALLEL (VOIDmode
,
9913 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
9914 if (wback_offset
!= 0)
9916 XVECEXP (result
, 0, 0)
9917 = gen_rtx_SET (VOIDmode
, basereg
,
9918 plus_constant (basereg
, wback_offset
));
9923 for (j
= 0; i
< count
; i
++, j
++)
9924 XVECEXP (result
, 0, i
)
9925 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
9930 /* Generate a store-multiple instruction. COUNT is the number of stores in
9931 the instruction; REGS and MEMS are arrays containing the operands.
9932 BASEREG is the base register to be used in addressing the memory operands.
9933 WBACK_OFFSET is nonzero if the instruction should update the base
9937 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
9938 HOST_WIDE_INT wback_offset
)
9943 if (GET_CODE (basereg
) == PLUS
)
9944 basereg
= XEXP (basereg
, 0);
9946 if (!multiple_operation_profitable_p (false, count
, 0))
9952 for (i
= 0; i
< count
; i
++)
9953 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
9955 if (wback_offset
!= 0)
9956 emit_move_insn (basereg
, plus_constant (basereg
, wback_offset
));
9964 result
= gen_rtx_PARALLEL (VOIDmode
,
9965 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
9966 if (wback_offset
!= 0)
9968 XVECEXP (result
, 0, 0)
9969 = gen_rtx_SET (VOIDmode
, basereg
,
9970 plus_constant (basereg
, wback_offset
));
9975 for (j
= 0; i
< count
; i
++, j
++)
9976 XVECEXP (result
, 0, i
)
9977 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
9982 /* Generate either a load-multiple or a store-multiple instruction. This
9983 function can be used in situations where we can start with a single MEM
9984 rtx and adjust its address upwards.
9985 COUNT is the number of operations in the instruction, not counting a
9986 possible update of the base register. REGS is an array containing the
9988 BASEREG is the base register to be used in addressing the memory operands,
9989 which are constructed from BASEMEM.
9990 WRITE_BACK specifies whether the generated instruction should include an
9991 update of the base register.
9992 OFFSETP is used to pass an offset to and from this function; this offset
9993 is not used when constructing the address (instead BASEMEM should have an
9994 appropriate offset in its address), it is used only for setting
9995 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9998 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
9999 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
10001 rtx mems
[MAX_LDM_STM_OPS
];
10002 HOST_WIDE_INT offset
= *offsetp
;
10005 gcc_assert (count
<= MAX_LDM_STM_OPS
);
10007 if (GET_CODE (basereg
) == PLUS
)
10008 basereg
= XEXP (basereg
, 0);
10010 for (i
= 0; i
< count
; i
++)
10012 rtx addr
= plus_constant (basereg
, i
* 4);
10013 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
10021 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
10022 write_back
? 4 * count
: 0);
10024 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
10025 write_back
? 4 * count
: 0);
10029 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10030 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10032 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
10037 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10038 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10040 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
10044 /* Called from a peephole2 expander to turn a sequence of loads into an
10045 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10046 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10047 is true if we can reorder the registers because they are used commutatively
10049 Returns true iff we could generate a new instruction. */
10052 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
10054 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10055 rtx mems
[MAX_LDM_STM_OPS
];
10056 int i
, j
, base_reg
;
10058 HOST_WIDE_INT offset
;
10059 int write_back
= FALSE
;
10063 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
10064 &base_reg
, &offset
, !sort_regs
);
10070 for (i
= 0; i
< nops
- 1; i
++)
10071 for (j
= i
+ 1; j
< nops
; j
++)
10072 if (regs
[i
] > regs
[j
])
10078 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10082 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
10083 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
10089 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
10090 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
10092 if (!TARGET_THUMB1
)
10094 base_reg
= regs
[0];
10095 base_reg_rtx
= newbase
;
10099 for (i
= 0; i
< nops
; i
++)
10101 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10102 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10105 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10106 write_back
? offset
+ i
* 4 : 0));
10110 /* Called from a peephole2 expander to turn a sequence of stores into an
10111 STM instruction. OPERANDS are the operands found by the peephole matcher;
10112 NOPS indicates how many separate stores we are trying to combine.
10113 Returns true iff we could generate a new instruction. */
10116 gen_stm_seq (rtx
*operands
, int nops
)
10119 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10120 rtx mems
[MAX_LDM_STM_OPS
];
10123 HOST_WIDE_INT offset
;
10124 int write_back
= FALSE
;
10127 bool base_reg_dies
;
10129 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
10130 mem_order
, &base_reg
, &offset
, true);
10135 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10137 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
10140 gcc_assert (base_reg_dies
);
10146 gcc_assert (base_reg_dies
);
10147 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10151 addr
= plus_constant (base_reg_rtx
, offset
);
10153 for (i
= 0; i
< nops
; i
++)
10155 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10156 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10159 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10160 write_back
? offset
+ i
* 4 : 0));
10164 /* Called from a peephole2 expander to turn a sequence of stores that are
10165 preceded by constant loads into an STM instruction. OPERANDS are the
10166 operands found by the peephole matcher; NOPS indicates how many
10167 separate stores we are trying to combine; there are 2 * NOPS
10168 instructions in the peephole.
10169 Returns true iff we could generate a new instruction. */
10172 gen_const_stm_seq (rtx
*operands
, int nops
)
10174 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
10175 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10176 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
10177 rtx mems
[MAX_LDM_STM_OPS
];
10180 HOST_WIDE_INT offset
;
10181 int write_back
= FALSE
;
10184 bool base_reg_dies
;
10186 HARD_REG_SET allocated
;
10188 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
10189 mem_order
, &base_reg
, &offset
, false);
10194 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
10196 /* If the same register is used more than once, try to find a free
10198 CLEAR_HARD_REG_SET (allocated
);
10199 for (i
= 0; i
< nops
; i
++)
10201 for (j
= i
+ 1; j
< nops
; j
++)
10202 if (regs
[i
] == regs
[j
])
10204 rtx t
= peep2_find_free_register (0, nops
* 2,
10205 TARGET_THUMB1
? "l" : "r",
10206 SImode
, &allocated
);
10210 regs
[i
] = REGNO (t
);
10214 /* Compute an ordering that maps the register numbers to an ascending
10217 for (i
= 0; i
< nops
; i
++)
10218 if (regs
[i
] < regs
[reg_order
[0]])
10221 for (i
= 1; i
< nops
; i
++)
10223 int this_order
= reg_order
[i
- 1];
10224 for (j
= 0; j
< nops
; j
++)
10225 if (regs
[j
] > regs
[reg_order
[i
- 1]]
10226 && (this_order
== reg_order
[i
- 1]
10227 || regs
[j
] < regs
[this_order
]))
10229 reg_order
[i
] = this_order
;
10232 /* Ensure that registers that must be live after the instruction end
10233 up with the correct value. */
10234 for (i
= 0; i
< nops
; i
++)
10236 int this_order
= reg_order
[i
];
10237 if ((this_order
!= mem_order
[i
]
10238 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
10239 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
10243 /* Load the constants. */
10244 for (i
= 0; i
< nops
; i
++)
10246 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
10247 sorted_regs
[i
] = regs
[reg_order
[i
]];
10248 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
10251 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10253 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
10256 gcc_assert (base_reg_dies
);
10262 gcc_assert (base_reg_dies
);
10263 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10267 addr
= plus_constant (base_reg_rtx
, offset
);
10269 for (i
= 0; i
< nops
; i
++)
10271 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10272 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10275 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
10276 write_back
? offset
+ i
* 4 : 0));
10281 arm_gen_movmemqi (rtx
*operands
)
10283 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
10284 HOST_WIDE_INT srcoffset
, dstoffset
;
10286 rtx src
, dst
, srcbase
, dstbase
;
10287 rtx part_bytes_reg
= NULL
;
10290 if (GET_CODE (operands
[2]) != CONST_INT
10291 || GET_CODE (operands
[3]) != CONST_INT
10292 || INTVAL (operands
[2]) > 64
10293 || INTVAL (operands
[3]) & 3)
10296 dstbase
= operands
[0];
10297 srcbase
= operands
[1];
10299 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
10300 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
10302 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
10303 out_words_to_go
= INTVAL (operands
[2]) / 4;
10304 last_bytes
= INTVAL (operands
[2]) & 3;
10305 dstoffset
= srcoffset
= 0;
10307 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
10308 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
10310 for (i
= 0; in_words_to_go
>= 2; i
+=4)
10312 if (in_words_to_go
> 4)
10313 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
10314 TRUE
, srcbase
, &srcoffset
));
10316 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
10317 src
, FALSE
, srcbase
,
10320 if (out_words_to_go
)
10322 if (out_words_to_go
> 4)
10323 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
10324 TRUE
, dstbase
, &dstoffset
));
10325 else if (out_words_to_go
!= 1)
10326 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
10327 out_words_to_go
, dst
,
10330 dstbase
, &dstoffset
));
10333 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
10334 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
10335 if (last_bytes
!= 0)
10337 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
10343 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
10344 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
10347 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10348 if (out_words_to_go
)
10352 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
10353 sreg
= copy_to_reg (mem
);
10355 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
10356 emit_move_insn (mem
, sreg
);
10359 gcc_assert (!in_words_to_go
); /* Sanity check */
10362 if (in_words_to_go
)
10364 gcc_assert (in_words_to_go
> 0);
10366 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
10367 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
10370 gcc_assert (!last_bytes
|| part_bytes_reg
);
10372 if (BYTES_BIG_ENDIAN
&& last_bytes
)
10374 rtx tmp
= gen_reg_rtx (SImode
);
10376 /* The bytes we want are in the top end of the word. */
10377 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
10378 GEN_INT (8 * (4 - last_bytes
))));
10379 part_bytes_reg
= tmp
;
10383 mem
= adjust_automodify_address (dstbase
, QImode
,
10384 plus_constant (dst
, last_bytes
- 1),
10385 dstoffset
+ last_bytes
- 1);
10386 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
10390 tmp
= gen_reg_rtx (SImode
);
10391 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
10392 part_bytes_reg
= tmp
;
10399 if (last_bytes
> 1)
10401 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
10402 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
10406 rtx tmp
= gen_reg_rtx (SImode
);
10407 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
10408 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
10409 part_bytes_reg
= tmp
;
10416 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
10417 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
10424 /* Select a dominance comparison mode if possible for a test of the general
10425 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10426 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10427 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10428 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10429 In all cases OP will be either EQ or NE, but we don't need to know which
10430 here. If we are unable to support a dominance comparison we return
10431 CC mode. This will then fail to match for the RTL expressions that
10432 generate this call. */
10434 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
10436 enum rtx_code cond1
, cond2
;
10439 /* Currently we will probably get the wrong result if the individual
10440 comparisons are not simple. This also ensures that it is safe to
10441 reverse a comparison if necessary. */
10442 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
10444 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
10448 /* The if_then_else variant of this tests the second condition if the
10449 first passes, but is true if the first fails. Reverse the first
10450 condition to get a true "inclusive-or" expression. */
10451 if (cond_or
== DOM_CC_NX_OR_Y
)
10452 cond1
= reverse_condition (cond1
);
10454 /* If the comparisons are not equal, and one doesn't dominate the other,
10455 then we can't do this. */
10457 && !comparison_dominates_p (cond1
, cond2
)
10458 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
10463 enum rtx_code temp
= cond1
;
10471 if (cond_or
== DOM_CC_X_AND_Y
)
10476 case EQ
: return CC_DEQmode
;
10477 case LE
: return CC_DLEmode
;
10478 case LEU
: return CC_DLEUmode
;
10479 case GE
: return CC_DGEmode
;
10480 case GEU
: return CC_DGEUmode
;
10481 default: gcc_unreachable ();
10485 if (cond_or
== DOM_CC_X_AND_Y
)
10497 gcc_unreachable ();
10501 if (cond_or
== DOM_CC_X_AND_Y
)
10513 gcc_unreachable ();
10517 if (cond_or
== DOM_CC_X_AND_Y
)
10518 return CC_DLTUmode
;
10523 return CC_DLTUmode
;
10525 return CC_DLEUmode
;
10529 gcc_unreachable ();
10533 if (cond_or
== DOM_CC_X_AND_Y
)
10534 return CC_DGTUmode
;
10539 return CC_DGTUmode
;
10541 return CC_DGEUmode
;
10545 gcc_unreachable ();
10548 /* The remaining cases only occur when both comparisons are the
10551 gcc_assert (cond1
== cond2
);
10555 gcc_assert (cond1
== cond2
);
10559 gcc_assert (cond1
== cond2
);
10563 gcc_assert (cond1
== cond2
);
10564 return CC_DLEUmode
;
10567 gcc_assert (cond1
== cond2
);
10568 return CC_DGEUmode
;
10571 gcc_unreachable ();
10576 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
10578 /* All floating point compares return CCFP if it is an equality
10579 comparison, and CCFPE otherwise. */
10580 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
10600 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10605 gcc_unreachable ();
10609 /* A compare with a shifted operand. Because of canonicalization, the
10610 comparison will have to be swapped when we emit the assembler. */
10611 if (GET_MODE (y
) == SImode
10612 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
10613 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10614 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
10615 || GET_CODE (x
) == ROTATERT
))
10618 /* This operation is performed swapped, but since we only rely on the Z
10619 flag we don't need an additional mode. */
10620 if (GET_MODE (y
) == SImode
10621 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
10622 && GET_CODE (x
) == NEG
10623 && (op
== EQ
|| op
== NE
))
10626 /* This is a special case that is used by combine to allow a
10627 comparison of a shifted byte load to be split into a zero-extend
10628 followed by a comparison of the shifted integer (only valid for
10629 equalities and unsigned inequalities). */
10630 if (GET_MODE (x
) == SImode
10631 && GET_CODE (x
) == ASHIFT
10632 && GET_CODE (XEXP (x
, 1)) == CONST_INT
&& INTVAL (XEXP (x
, 1)) == 24
10633 && GET_CODE (XEXP (x
, 0)) == SUBREG
10634 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == MEM
10635 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
10636 && (op
== EQ
|| op
== NE
10637 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
10638 && GET_CODE (y
) == CONST_INT
)
10641 /* A construct for a conditional compare, if the false arm contains
10642 0, then both conditions must be true, otherwise either condition
10643 must be true. Not all conditions are possible, so CCmode is
10644 returned if it can't be done. */
10645 if (GET_CODE (x
) == IF_THEN_ELSE
10646 && (XEXP (x
, 2) == const0_rtx
10647 || XEXP (x
, 2) == const1_rtx
)
10648 && COMPARISON_P (XEXP (x
, 0))
10649 && COMPARISON_P (XEXP (x
, 1)))
10650 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10651 INTVAL (XEXP (x
, 2)));
10653 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10654 if (GET_CODE (x
) == AND
10655 && (op
== EQ
|| op
== NE
)
10656 && COMPARISON_P (XEXP (x
, 0))
10657 && COMPARISON_P (XEXP (x
, 1)))
10658 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10661 if (GET_CODE (x
) == IOR
10662 && (op
== EQ
|| op
== NE
)
10663 && COMPARISON_P (XEXP (x
, 0))
10664 && COMPARISON_P (XEXP (x
, 1)))
10665 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10668 /* An operation (on Thumb) where we want to test for a single bit.
10669 This is done by shifting that bit up into the top bit of a
10670 scratch register; we can then branch on the sign bit. */
10672 && GET_MODE (x
) == SImode
10673 && (op
== EQ
|| op
== NE
)
10674 && GET_CODE (x
) == ZERO_EXTRACT
10675 && XEXP (x
, 1) == const1_rtx
)
10678 /* An operation that sets the condition codes as a side-effect, the
10679 V flag is not set correctly, so we can only use comparisons where
10680 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10682 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10683 if (GET_MODE (x
) == SImode
10685 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
10686 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
10687 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
10688 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
10689 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
10690 || GET_CODE (x
) == LSHIFTRT
10691 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10692 || GET_CODE (x
) == ROTATERT
10693 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
10694 return CC_NOOVmode
;
10696 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
10699 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
10700 && GET_CODE (x
) == PLUS
10701 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
10704 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
10706 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10708 if (TARGET_ARM
&& TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10715 /* A DImode comparison against zero can be implemented by
10716 or'ing the two halves together. */
10717 if (y
== const0_rtx
)
10720 /* We can do an equality test in three Thumb instructions. */
10730 /* DImode unsigned comparisons can be implemented by cmp +
10731 cmpeq without a scratch register. Not worth doing in
10742 /* DImode signed and unsigned comparisons can be implemented
10743 by cmp + sbcs with a scratch register, but that does not
10744 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10745 gcc_assert (op
!= EQ
&& op
!= NE
);
10749 gcc_unreachable ();
10756 /* X and Y are two things to compare using CODE. Emit the compare insn and
10757 return the rtx for register 0 in the proper mode. FP means this is a
10758 floating point compare: I don't think that it is needed on the arm. */
10760 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
)
10762 enum machine_mode mode
;
10764 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
10766 /* We might have X as a constant, Y as a register because of the predicates
10767 used for cmpdi. If so, force X to a register here. */
10768 if (dimode_comparison
&& !REG_P (x
))
10769 x
= force_reg (DImode
, x
);
10771 mode
= SELECT_CC_MODE (code
, x
, y
);
10772 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
10774 if (dimode_comparison
10775 && !(TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10776 && mode
!= CC_CZmode
)
10780 /* To compare two non-zero values for equality, XOR them and
10781 then compare against zero. Not used for ARM mode; there
10782 CC_CZmode is cheaper. */
10783 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
10785 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
10788 /* A scratch register is required. */
10789 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (SImode
));
10790 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
10791 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
10794 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
10799 /* Generate a sequence of insns that will generate the correct return
10800 address mask depending on the physical architecture that the program
10803 arm_gen_return_addr_mask (void)
10805 rtx reg
= gen_reg_rtx (Pmode
);
10807 emit_insn (gen_return_addr_mask (reg
));
10812 arm_reload_in_hi (rtx
*operands
)
10814 rtx ref
= operands
[1];
10816 HOST_WIDE_INT offset
= 0;
10818 if (GET_CODE (ref
) == SUBREG
)
10820 offset
= SUBREG_BYTE (ref
);
10821 ref
= SUBREG_REG (ref
);
10824 if (GET_CODE (ref
) == REG
)
10826 /* We have a pseudo which has been spilt onto the stack; there
10827 are two cases here: the first where there is a simple
10828 stack-slot replacement and a second where the stack-slot is
10829 out of range, or is used as a subreg. */
10830 if (reg_equiv_mem (REGNO (ref
)))
10832 ref
= reg_equiv_mem (REGNO (ref
));
10833 base
= find_replacement (&XEXP (ref
, 0));
10836 /* The slot is out of range, or was dressed up in a SUBREG. */
10837 base
= reg_equiv_address (REGNO (ref
));
10840 base
= find_replacement (&XEXP (ref
, 0));
10842 /* Handle the case where the address is too complex to be offset by 1. */
10843 if (GET_CODE (base
) == MINUS
10844 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10846 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10848 emit_set_insn (base_plus
, base
);
10851 else if (GET_CODE (base
) == PLUS
)
10853 /* The addend must be CONST_INT, or we would have dealt with it above. */
10854 HOST_WIDE_INT hi
, lo
;
10856 offset
+= INTVAL (XEXP (base
, 1));
10857 base
= XEXP (base
, 0);
10859 /* Rework the address into a legal sequence of insns. */
10860 /* Valid range for lo is -4095 -> 4095 */
10863 : -((-offset
) & 0xfff));
10865 /* Corner case, if lo is the max offset then we would be out of range
10866 once we have added the additional 1 below, so bump the msb into the
10867 pre-loading insn(s). */
10871 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
10872 ^ (HOST_WIDE_INT
) 0x80000000)
10873 - (HOST_WIDE_INT
) 0x80000000);
10875 gcc_assert (hi
+ lo
== offset
);
10879 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10881 /* Get the base address; addsi3 knows how to handle constants
10882 that require more than one insn. */
10883 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
10889 /* Operands[2] may overlap operands[0] (though it won't overlap
10890 operands[1]), that's why we asked for a DImode reg -- so we can
10891 use the bit that does not overlap. */
10892 if (REGNO (operands
[2]) == REGNO (operands
[0]))
10893 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10895 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10897 emit_insn (gen_zero_extendqisi2 (scratch
,
10898 gen_rtx_MEM (QImode
,
10899 plus_constant (base
,
10901 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10902 gen_rtx_MEM (QImode
,
10903 plus_constant (base
,
10905 if (!BYTES_BIG_ENDIAN
)
10906 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10907 gen_rtx_IOR (SImode
,
10910 gen_rtx_SUBREG (SImode
, operands
[0], 0),
10914 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10915 gen_rtx_IOR (SImode
,
10916 gen_rtx_ASHIFT (SImode
, scratch
,
10918 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
10921 /* Handle storing a half-word to memory during reload by synthesizing as two
10922 byte stores. Take care not to clobber the input values until after we
10923 have moved them somewhere safe. This code assumes that if the DImode
10924 scratch in operands[2] overlaps either the input value or output address
10925 in some way, then that value must die in this insn (we absolutely need
10926 two scratch registers for some corner cases). */
10928 arm_reload_out_hi (rtx
*operands
)
10930 rtx ref
= operands
[0];
10931 rtx outval
= operands
[1];
10933 HOST_WIDE_INT offset
= 0;
10935 if (GET_CODE (ref
) == SUBREG
)
10937 offset
= SUBREG_BYTE (ref
);
10938 ref
= SUBREG_REG (ref
);
10941 if (GET_CODE (ref
) == REG
)
10943 /* We have a pseudo which has been spilt onto the stack; there
10944 are two cases here: the first where there is a simple
10945 stack-slot replacement and a second where the stack-slot is
10946 out of range, or is used as a subreg. */
10947 if (reg_equiv_mem (REGNO (ref
)))
10949 ref
= reg_equiv_mem (REGNO (ref
));
10950 base
= find_replacement (&XEXP (ref
, 0));
10953 /* The slot is out of range, or was dressed up in a SUBREG. */
10954 base
= reg_equiv_address (REGNO (ref
));
10957 base
= find_replacement (&XEXP (ref
, 0));
10959 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10961 /* Handle the case where the address is too complex to be offset by 1. */
10962 if (GET_CODE (base
) == MINUS
10963 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10965 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10967 /* Be careful not to destroy OUTVAL. */
10968 if (reg_overlap_mentioned_p (base_plus
, outval
))
10970 /* Updating base_plus might destroy outval, see if we can
10971 swap the scratch and base_plus. */
10972 if (!reg_overlap_mentioned_p (scratch
, outval
))
10975 scratch
= base_plus
;
10980 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
10982 /* Be conservative and copy OUTVAL into the scratch now,
10983 this should only be necessary if outval is a subreg
10984 of something larger than a word. */
10985 /* XXX Might this clobber base? I can't see how it can,
10986 since scratch is known to overlap with OUTVAL, and
10987 must be wider than a word. */
10988 emit_insn (gen_movhi (scratch_hi
, outval
));
10989 outval
= scratch_hi
;
10993 emit_set_insn (base_plus
, base
);
10996 else if (GET_CODE (base
) == PLUS
)
10998 /* The addend must be CONST_INT, or we would have dealt with it above. */
10999 HOST_WIDE_INT hi
, lo
;
11001 offset
+= INTVAL (XEXP (base
, 1));
11002 base
= XEXP (base
, 0);
11004 /* Rework the address into a legal sequence of insns. */
11005 /* Valid range for lo is -4095 -> 4095 */
11008 : -((-offset
) & 0xfff));
11010 /* Corner case, if lo is the max offset then we would be out of range
11011 once we have added the additional 1 below, so bump the msb into the
11012 pre-loading insn(s). */
11016 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
11017 ^ (HOST_WIDE_INT
) 0x80000000)
11018 - (HOST_WIDE_INT
) 0x80000000);
11020 gcc_assert (hi
+ lo
== offset
);
11024 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11026 /* Be careful not to destroy OUTVAL. */
11027 if (reg_overlap_mentioned_p (base_plus
, outval
))
11029 /* Updating base_plus might destroy outval, see if we
11030 can swap the scratch and base_plus. */
11031 if (!reg_overlap_mentioned_p (scratch
, outval
))
11034 scratch
= base_plus
;
11039 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
11041 /* Be conservative and copy outval into scratch now,
11042 this should only be necessary if outval is a
11043 subreg of something larger than a word. */
11044 /* XXX Might this clobber base? I can't see how it
11045 can, since scratch is known to overlap with
11047 emit_insn (gen_movhi (scratch_hi
, outval
));
11048 outval
= scratch_hi
;
11052 /* Get the base address; addsi3 knows how to handle constants
11053 that require more than one insn. */
11054 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
11060 if (BYTES_BIG_ENDIAN
)
11062 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
11063 plus_constant (base
, offset
+ 1)),
11064 gen_lowpart (QImode
, outval
)));
11065 emit_insn (gen_lshrsi3 (scratch
,
11066 gen_rtx_SUBREG (SImode
, outval
, 0),
11068 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
11069 gen_lowpart (QImode
, scratch
)));
11073 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
11074 gen_lowpart (QImode
, outval
)));
11075 emit_insn (gen_lshrsi3 (scratch
,
11076 gen_rtx_SUBREG (SImode
, outval
, 0),
11078 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
11079 plus_constant (base
, offset
+ 1)),
11080 gen_lowpart (QImode
, scratch
)));
11084 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11085 (padded to the size of a word) should be passed in a register. */
11088 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
11090 if (TARGET_AAPCS_BASED
)
11091 return must_pass_in_stack_var_size (mode
, type
);
11093 return must_pass_in_stack_var_size_or_pad (mode
, type
);
11097 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11098 Return true if an argument passed on the stack should be padded upwards,
11099 i.e. if the least-significant byte has useful data.
11100 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11101 aggregate types are placed in the lowest memory address. */
11104 arm_pad_arg_upward (enum machine_mode mode
, const_tree type
)
11106 if (!TARGET_AAPCS_BASED
)
11107 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
11109 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
11116 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11117 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11118 byte of the register has useful data, and return the opposite if the
11119 most significant byte does.
11120 For AAPCS, small aggregates and small complex types are always padded
11124 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
,
11125 tree type
, int first ATTRIBUTE_UNUSED
)
11127 if (TARGET_AAPCS_BASED
11128 && BYTES_BIG_ENDIAN
11129 && (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
)
11130 && int_size_in_bytes (type
) <= 4)
11133 /* Otherwise, use default padding. */
11134 return !BYTES_BIG_ENDIAN
;
11138 /* Print a symbolic form of X to the debug file, F. */
11140 arm_print_value (FILE *f
, rtx x
)
11142 switch (GET_CODE (x
))
11145 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
11149 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
11157 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
11159 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
11160 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
11168 fprintf (f
, "\"%s\"", XSTR (x
, 0));
11172 fprintf (f
, "`%s'", XSTR (x
, 0));
11176 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
11180 arm_print_value (f
, XEXP (x
, 0));
11184 arm_print_value (f
, XEXP (x
, 0));
11186 arm_print_value (f
, XEXP (x
, 1));
11194 fprintf (f
, "????");
11199 /* Routines for manipulation of the constant pool. */
11201 /* Arm instructions cannot load a large constant directly into a
11202 register; they have to come from a pc relative load. The constant
11203 must therefore be placed in the addressable range of the pc
11204 relative load. Depending on the precise pc relative load
11205 instruction the range is somewhere between 256 bytes and 4k. This
11206 means that we often have to dump a constant inside a function, and
11207 generate code to branch around it.
11209 It is important to minimize this, since the branches will slow
11210 things down and make the code larger.
11212 Normally we can hide the table after an existing unconditional
11213 branch so that there is no interruption of the flow, but in the
11214 worst case the code looks like this:
11232 We fix this by performing a scan after scheduling, which notices
11233 which instructions need to have their operands fetched from the
11234 constant table and builds the table.
11236 The algorithm starts by building a table of all the constants that
11237 need fixing up and all the natural barriers in the function (places
11238 where a constant table can be dropped without breaking the flow).
11239 For each fixup we note how far the pc-relative replacement will be
11240 able to reach and the offset of the instruction into the function.
11242 Having built the table we then group the fixes together to form
11243 tables that are as large as possible (subject to addressing
11244 constraints) and emit each table of constants after the last
11245 barrier that is within range of all the instructions in the group.
11246 If a group does not contain a barrier, then we forcibly create one
11247 by inserting a jump instruction into the flow. Once the table has
11248 been inserted, the insns are then modified to reference the
11249 relevant entry in the pool.
11251 Possible enhancements to the algorithm (not implemented) are:
11253 1) For some processors and object formats, there may be benefit in
11254 aligning the pools to the start of cache lines; this alignment
11255 would need to be taken into account when calculating addressability
11258 /* These typedefs are located at the start of this file, so that
11259 they can be used in the prototypes there. This comment is to
11260 remind readers of that fact so that the following structures
11261 can be understood more easily.
11263 typedef struct minipool_node Mnode;
11264 typedef struct minipool_fixup Mfix; */
11266 struct minipool_node
11268 /* Doubly linked chain of entries. */
11271 /* The maximum offset into the code that this entry can be placed. While
11272 pushing fixes for forward references, all entries are sorted in order
11273 of increasing max_address. */
11274 HOST_WIDE_INT max_address
;
11275 /* Similarly for an entry inserted for a backwards ref. */
11276 HOST_WIDE_INT min_address
;
11277 /* The number of fixes referencing this entry. This can become zero
11278 if we "unpush" an entry. In this case we ignore the entry when we
11279 come to emit the code. */
11281 /* The offset from the start of the minipool. */
11282 HOST_WIDE_INT offset
;
11283 /* The value in table. */
11285 /* The mode of value. */
11286 enum machine_mode mode
;
11287 /* The size of the value. With iWMMXt enabled
11288 sizes > 4 also imply an alignment of 8-bytes. */
11292 struct minipool_fixup
11296 HOST_WIDE_INT address
;
11298 enum machine_mode mode
;
11302 HOST_WIDE_INT forwards
;
11303 HOST_WIDE_INT backwards
;
11306 /* Fixes less than a word need padding out to a word boundary. */
11307 #define MINIPOOL_FIX_SIZE(mode) \
11308 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11310 static Mnode
* minipool_vector_head
;
11311 static Mnode
* minipool_vector_tail
;
11312 static rtx minipool_vector_label
;
11313 static int minipool_pad
;
11315 /* The linked list of all minipool fixes required for this function. */
11316 Mfix
* minipool_fix_head
;
11317 Mfix
* minipool_fix_tail
;
11318 /* The fix entry for the current minipool, once it has been placed. */
11319 Mfix
* minipool_barrier
;
11321 /* Determines if INSN is the start of a jump table. Returns the end
11322 of the TABLE or NULL_RTX. */
11324 is_jump_table (rtx insn
)
11328 if (GET_CODE (insn
) == JUMP_INSN
11329 && JUMP_LABEL (insn
) != NULL
11330 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
11331 == next_real_insn (insn
))
11333 && GET_CODE (table
) == JUMP_INSN
11334 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
11335 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
11341 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11342 #define JUMP_TABLES_IN_TEXT_SECTION 0
11345 static HOST_WIDE_INT
11346 get_jump_table_size (rtx insn
)
11348 /* ADDR_VECs only take room if read-only data does into the text
11350 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
11352 rtx body
= PATTERN (insn
);
11353 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
11354 HOST_WIDE_INT size
;
11355 HOST_WIDE_INT modesize
;
11357 modesize
= GET_MODE_SIZE (GET_MODE (body
));
11358 size
= modesize
* XVECLEN (body
, elt
);
11362 /* Round up size of TBB table to a halfword boundary. */
11363 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
11366 /* No padding necessary for TBH. */
11369 /* Add two bytes for alignment on Thumb. */
11374 gcc_unreachable ();
11382 /* Move a minipool fix MP from its current location to before MAX_MP.
11383 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11384 constraints may need updating. */
11386 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
11387 HOST_WIDE_INT max_address
)
11389 /* The code below assumes these are different. */
11390 gcc_assert (mp
!= max_mp
);
11392 if (max_mp
== NULL
)
11394 if (max_address
< mp
->max_address
)
11395 mp
->max_address
= max_address
;
11399 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
11400 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
11402 mp
->max_address
= max_address
;
11404 /* Unlink MP from its current position. Since max_mp is non-null,
11405 mp->prev must be non-null. */
11406 mp
->prev
->next
= mp
->next
;
11407 if (mp
->next
!= NULL
)
11408 mp
->next
->prev
= mp
->prev
;
11410 minipool_vector_tail
= mp
->prev
;
11412 /* Re-insert it before MAX_MP. */
11414 mp
->prev
= max_mp
->prev
;
11417 if (mp
->prev
!= NULL
)
11418 mp
->prev
->next
= mp
;
11420 minipool_vector_head
= mp
;
11423 /* Save the new entry. */
11426 /* Scan over the preceding entries and adjust their addresses as
11428 while (mp
->prev
!= NULL
11429 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
11431 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
11438 /* Add a constant to the minipool for a forward reference. Returns the
11439 node added or NULL if the constant will not fit in this pool. */
11441 add_minipool_forward_ref (Mfix
*fix
)
11443 /* If set, max_mp is the first pool_entry that has a lower
11444 constraint than the one we are trying to add. */
11445 Mnode
* max_mp
= NULL
;
11446 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
11449 /* If the minipool starts before the end of FIX->INSN then this FIX
11450 can not be placed into the current pool. Furthermore, adding the
11451 new constant pool entry may cause the pool to start FIX_SIZE bytes
11453 if (minipool_vector_head
&&
11454 (fix
->address
+ get_attr_length (fix
->insn
)
11455 >= minipool_vector_head
->max_address
- fix
->fix_size
))
11458 /* Scan the pool to see if a constant with the same value has
11459 already been added. While we are doing this, also note the
11460 location where we must insert the constant if it doesn't already
11462 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11464 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
11465 && fix
->mode
== mp
->mode
11466 && (GET_CODE (fix
->value
) != CODE_LABEL
11467 || (CODE_LABEL_NUMBER (fix
->value
)
11468 == CODE_LABEL_NUMBER (mp
->value
)))
11469 && rtx_equal_p (fix
->value
, mp
->value
))
11471 /* More than one fix references this entry. */
11473 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
11476 /* Note the insertion point if necessary. */
11478 && mp
->max_address
> max_address
)
11481 /* If we are inserting an 8-bytes aligned quantity and
11482 we have not already found an insertion point, then
11483 make sure that all such 8-byte aligned quantities are
11484 placed at the start of the pool. */
11485 if (ARM_DOUBLEWORD_ALIGN
11487 && fix
->fix_size
>= 8
11488 && mp
->fix_size
< 8)
11491 max_address
= mp
->max_address
;
11495 /* The value is not currently in the minipool, so we need to create
11496 a new entry for it. If MAX_MP is NULL, the entry will be put on
11497 the end of the list since the placement is less constrained than
11498 any existing entry. Otherwise, we insert the new fix before
11499 MAX_MP and, if necessary, adjust the constraints on the other
11502 mp
->fix_size
= fix
->fix_size
;
11503 mp
->mode
= fix
->mode
;
11504 mp
->value
= fix
->value
;
11506 /* Not yet required for a backwards ref. */
11507 mp
->min_address
= -65536;
11509 if (max_mp
== NULL
)
11511 mp
->max_address
= max_address
;
11513 mp
->prev
= minipool_vector_tail
;
11515 if (mp
->prev
== NULL
)
11517 minipool_vector_head
= mp
;
11518 minipool_vector_label
= gen_label_rtx ();
11521 mp
->prev
->next
= mp
;
11523 minipool_vector_tail
= mp
;
11527 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
11528 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
11530 mp
->max_address
= max_address
;
11533 mp
->prev
= max_mp
->prev
;
11535 if (mp
->prev
!= NULL
)
11536 mp
->prev
->next
= mp
;
11538 minipool_vector_head
= mp
;
11541 /* Save the new entry. */
11544 /* Scan over the preceding entries and adjust their addresses as
11546 while (mp
->prev
!= NULL
11547 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
11549 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
11557 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
11558 HOST_WIDE_INT min_address
)
11560 HOST_WIDE_INT offset
;
11562 /* The code below assumes these are different. */
11563 gcc_assert (mp
!= min_mp
);
11565 if (min_mp
== NULL
)
11567 if (min_address
> mp
->min_address
)
11568 mp
->min_address
= min_address
;
11572 /* We will adjust this below if it is too loose. */
11573 mp
->min_address
= min_address
;
11575 /* Unlink MP from its current position. Since min_mp is non-null,
11576 mp->next must be non-null. */
11577 mp
->next
->prev
= mp
->prev
;
11578 if (mp
->prev
!= NULL
)
11579 mp
->prev
->next
= mp
->next
;
11581 minipool_vector_head
= mp
->next
;
11583 /* Reinsert it after MIN_MP. */
11585 mp
->next
= min_mp
->next
;
11587 if (mp
->next
!= NULL
)
11588 mp
->next
->prev
= mp
;
11590 minipool_vector_tail
= mp
;
11596 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11598 mp
->offset
= offset
;
11599 if (mp
->refcount
> 0)
11600 offset
+= mp
->fix_size
;
11602 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
11603 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
11609 /* Add a constant to the minipool for a backward reference. Returns the
11610 node added or NULL if the constant will not fit in this pool.
11612 Note that the code for insertion for a backwards reference can be
11613 somewhat confusing because the calculated offsets for each fix do
11614 not take into account the size of the pool (which is still under
11617 add_minipool_backward_ref (Mfix
*fix
)
11619 /* If set, min_mp is the last pool_entry that has a lower constraint
11620 than the one we are trying to add. */
11621 Mnode
*min_mp
= NULL
;
11622 /* This can be negative, since it is only a constraint. */
11623 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
11626 /* If we can't reach the current pool from this insn, or if we can't
11627 insert this entry at the end of the pool without pushing other
11628 fixes out of range, then we don't try. This ensures that we
11629 can't fail later on. */
11630 if (min_address
>= minipool_barrier
->address
11631 || (minipool_vector_tail
->min_address
+ fix
->fix_size
11632 >= minipool_barrier
->address
))
11635 /* Scan the pool to see if a constant with the same value has
11636 already been added. While we are doing this, also note the
11637 location where we must insert the constant if it doesn't already
11639 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
11641 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
11642 && fix
->mode
== mp
->mode
11643 && (GET_CODE (fix
->value
) != CODE_LABEL
11644 || (CODE_LABEL_NUMBER (fix
->value
)
11645 == CODE_LABEL_NUMBER (mp
->value
)))
11646 && rtx_equal_p (fix
->value
, mp
->value
)
11647 /* Check that there is enough slack to move this entry to the
11648 end of the table (this is conservative). */
11649 && (mp
->max_address
11650 > (minipool_barrier
->address
11651 + minipool_vector_tail
->offset
11652 + minipool_vector_tail
->fix_size
)))
11655 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
11658 if (min_mp
!= NULL
)
11659 mp
->min_address
+= fix
->fix_size
;
11662 /* Note the insertion point if necessary. */
11663 if (mp
->min_address
< min_address
)
11665 /* For now, we do not allow the insertion of 8-byte alignment
11666 requiring nodes anywhere but at the start of the pool. */
11667 if (ARM_DOUBLEWORD_ALIGN
11668 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
11673 else if (mp
->max_address
11674 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
11676 /* Inserting before this entry would push the fix beyond
11677 its maximum address (which can happen if we have
11678 re-located a forwards fix); force the new fix to come
11680 if (ARM_DOUBLEWORD_ALIGN
11681 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
11686 min_address
= mp
->min_address
+ fix
->fix_size
;
11689 /* Do not insert a non-8-byte aligned quantity before 8-byte
11690 aligned quantities. */
11691 else if (ARM_DOUBLEWORD_ALIGN
11692 && fix
->fix_size
< 8
11693 && mp
->fix_size
>= 8)
11696 min_address
= mp
->min_address
+ fix
->fix_size
;
11701 /* We need to create a new entry. */
11703 mp
->fix_size
= fix
->fix_size
;
11704 mp
->mode
= fix
->mode
;
11705 mp
->value
= fix
->value
;
11707 mp
->max_address
= minipool_barrier
->address
+ 65536;
11709 mp
->min_address
= min_address
;
11711 if (min_mp
== NULL
)
11714 mp
->next
= minipool_vector_head
;
11716 if (mp
->next
== NULL
)
11718 minipool_vector_tail
= mp
;
11719 minipool_vector_label
= gen_label_rtx ();
11722 mp
->next
->prev
= mp
;
11724 minipool_vector_head
= mp
;
11728 mp
->next
= min_mp
->next
;
11732 if (mp
->next
!= NULL
)
11733 mp
->next
->prev
= mp
;
11735 minipool_vector_tail
= mp
;
11738 /* Save the new entry. */
11746 /* Scan over the following entries and adjust their offsets. */
11747 while (mp
->next
!= NULL
)
11749 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
11750 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
11753 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
11755 mp
->next
->offset
= mp
->offset
;
11764 assign_minipool_offsets (Mfix
*barrier
)
11766 HOST_WIDE_INT offset
= 0;
11769 minipool_barrier
= barrier
;
11771 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11773 mp
->offset
= offset
;
11775 if (mp
->refcount
> 0)
11776 offset
+= mp
->fix_size
;
11780 /* Output the literal table */
11782 dump_minipool (rtx scan
)
11788 if (ARM_DOUBLEWORD_ALIGN
)
11789 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11790 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
11797 fprintf (dump_file
,
11798 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11799 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
11801 scan
= emit_label_after (gen_label_rtx (), scan
);
11802 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
11803 scan
= emit_label_after (minipool_vector_label
, scan
);
11805 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
11807 if (mp
->refcount
> 0)
11811 fprintf (dump_file
,
11812 ";; Offset %u, min %ld, max %ld ",
11813 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
11814 (unsigned long) mp
->max_address
);
11815 arm_print_value (dump_file
, mp
->value
);
11816 fputc ('\n', dump_file
);
11819 switch (mp
->fix_size
)
11821 #ifdef HAVE_consttable_1
11823 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
11827 #ifdef HAVE_consttable_2
11829 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
11833 #ifdef HAVE_consttable_4
11835 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
11839 #ifdef HAVE_consttable_8
11841 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
11845 #ifdef HAVE_consttable_16
11847 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
11852 gcc_unreachable ();
11860 minipool_vector_head
= minipool_vector_tail
= NULL
;
11861 scan
= emit_insn_after (gen_consttable_end (), scan
);
11862 scan
= emit_barrier_after (scan
);
11865 /* Return the cost of forcibly inserting a barrier after INSN. */
11867 arm_barrier_cost (rtx insn
)
11869 /* Basing the location of the pool on the loop depth is preferable,
11870 but at the moment, the basic block information seems to be
11871 corrupt by this stage of the compilation. */
11872 int base_cost
= 50;
11873 rtx next
= next_nonnote_insn (insn
);
11875 if (next
!= NULL
&& GET_CODE (next
) == CODE_LABEL
)
11878 switch (GET_CODE (insn
))
11881 /* It will always be better to place the table before the label, rather
11890 return base_cost
- 10;
11893 return base_cost
+ 10;
11897 /* Find the best place in the insn stream in the range
11898 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11899 Create the barrier by inserting a jump and add a new fix entry for
11902 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
11904 HOST_WIDE_INT count
= 0;
11906 rtx from
= fix
->insn
;
11907 /* The instruction after which we will insert the jump. */
11908 rtx selected
= NULL
;
11910 /* The address at which the jump instruction will be placed. */
11911 HOST_WIDE_INT selected_address
;
11913 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
11914 rtx label
= gen_label_rtx ();
11916 selected_cost
= arm_barrier_cost (from
);
11917 selected_address
= fix
->address
;
11919 while (from
&& count
< max_count
)
11924 /* This code shouldn't have been called if there was a natural barrier
11926 gcc_assert (GET_CODE (from
) != BARRIER
);
11928 /* Count the length of this insn. */
11929 count
+= get_attr_length (from
);
11931 /* If there is a jump table, add its length. */
11932 tmp
= is_jump_table (from
);
11935 count
+= get_jump_table_size (tmp
);
11937 /* Jump tables aren't in a basic block, so base the cost on
11938 the dispatch insn. If we select this location, we will
11939 still put the pool after the table. */
11940 new_cost
= arm_barrier_cost (from
);
11942 if (count
< max_count
11943 && (!selected
|| new_cost
<= selected_cost
))
11946 selected_cost
= new_cost
;
11947 selected_address
= fix
->address
+ count
;
11950 /* Continue after the dispatch table. */
11951 from
= NEXT_INSN (tmp
);
11955 new_cost
= arm_barrier_cost (from
);
11957 if (count
< max_count
11958 && (!selected
|| new_cost
<= selected_cost
))
11961 selected_cost
= new_cost
;
11962 selected_address
= fix
->address
+ count
;
11965 from
= NEXT_INSN (from
);
11968 /* Make sure that we found a place to insert the jump. */
11969 gcc_assert (selected
);
11971 /* Make sure we do not split a call and its corresponding
11972 CALL_ARG_LOCATION note. */
11973 if (CALL_P (selected
))
11975 rtx next
= NEXT_INSN (selected
);
11976 if (next
&& NOTE_P (next
)
11977 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
11981 /* Create a new JUMP_INSN that branches around a barrier. */
11982 from
= emit_jump_insn_after (gen_jump (label
), selected
);
11983 JUMP_LABEL (from
) = label
;
11984 barrier
= emit_barrier_after (from
);
11985 emit_label_after (label
, barrier
);
11987 /* Create a minipool barrier entry for the new barrier. */
11988 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
11989 new_fix
->insn
= barrier
;
11990 new_fix
->address
= selected_address
;
11991 new_fix
->next
= fix
->next
;
11992 fix
->next
= new_fix
;
11997 /* Record that there is a natural barrier in the insn stream at
12000 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
12002 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
12005 fix
->address
= address
;
12008 if (minipool_fix_head
!= NULL
)
12009 minipool_fix_tail
->next
= fix
;
12011 minipool_fix_head
= fix
;
12013 minipool_fix_tail
= fix
;
12016 /* Record INSN, which will need fixing up to load a value from the
12017 minipool. ADDRESS is the offset of the insn since the start of the
12018 function; LOC is a pointer to the part of the insn which requires
12019 fixing; VALUE is the constant that must be loaded, which is of type
12022 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
12023 enum machine_mode mode
, rtx value
)
12025 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
12028 fix
->address
= address
;
12031 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
12032 fix
->value
= value
;
12033 fix
->forwards
= get_attr_pool_range (insn
);
12034 fix
->backwards
= get_attr_neg_pool_range (insn
);
12035 fix
->minipool
= NULL
;
12037 /* If an insn doesn't have a range defined for it, then it isn't
12038 expecting to be reworked by this code. Better to stop now than
12039 to generate duff assembly code. */
12040 gcc_assert (fix
->forwards
|| fix
->backwards
);
12042 /* If an entry requires 8-byte alignment then assume all constant pools
12043 require 4 bytes of padding. Trying to do this later on a per-pool
12044 basis is awkward because existing pool entries have to be modified. */
12045 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
12050 fprintf (dump_file
,
12051 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12052 GET_MODE_NAME (mode
),
12053 INSN_UID (insn
), (unsigned long) address
,
12054 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
12055 arm_print_value (dump_file
, fix
->value
);
12056 fprintf (dump_file
, "\n");
12059 /* Add it to the chain of fixes. */
12062 if (minipool_fix_head
!= NULL
)
12063 minipool_fix_tail
->next
= fix
;
12065 minipool_fix_head
= fix
;
12067 minipool_fix_tail
= fix
;
12070 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12071 Returns the number of insns needed, or 99 if we don't know how to
12074 arm_const_double_inline_cost (rtx val
)
12076 rtx lowpart
, highpart
;
12077 enum machine_mode mode
;
12079 mode
= GET_MODE (val
);
12081 if (mode
== VOIDmode
)
12084 gcc_assert (GET_MODE_SIZE (mode
) == 8);
12086 lowpart
= gen_lowpart (SImode
, val
);
12087 highpart
= gen_highpart_mode (SImode
, mode
, val
);
12089 gcc_assert (GET_CODE (lowpart
) == CONST_INT
);
12090 gcc_assert (GET_CODE (highpart
) == CONST_INT
);
12092 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
12093 NULL_RTX
, NULL_RTX
, 0, 0)
12094 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
12095 NULL_RTX
, NULL_RTX
, 0, 0));
12098 /* Return true if it is worthwhile to split a 64-bit constant into two
12099 32-bit operations. This is the case if optimizing for size, or
12100 if we have load delay slots, or if one 32-bit part can be done with
12101 a single data operation. */
12103 arm_const_double_by_parts (rtx val
)
12105 enum machine_mode mode
= GET_MODE (val
);
12108 if (optimize_size
|| arm_ld_sched
)
12111 if (mode
== VOIDmode
)
12114 part
= gen_highpart_mode (SImode
, mode
, val
);
12116 gcc_assert (GET_CODE (part
) == CONST_INT
);
12118 if (const_ok_for_arm (INTVAL (part
))
12119 || const_ok_for_arm (~INTVAL (part
)))
12122 part
= gen_lowpart (SImode
, val
);
12124 gcc_assert (GET_CODE (part
) == CONST_INT
);
12126 if (const_ok_for_arm (INTVAL (part
))
12127 || const_ok_for_arm (~INTVAL (part
)))
12133 /* Return true if it is possible to inline both the high and low parts
12134 of a 64-bit constant into 32-bit data processing instructions. */
12136 arm_const_double_by_immediates (rtx val
)
12138 enum machine_mode mode
= GET_MODE (val
);
12141 if (mode
== VOIDmode
)
12144 part
= gen_highpart_mode (SImode
, mode
, val
);
12146 gcc_assert (GET_CODE (part
) == CONST_INT
);
12148 if (!const_ok_for_arm (INTVAL (part
)))
12151 part
= gen_lowpart (SImode
, val
);
12153 gcc_assert (GET_CODE (part
) == CONST_INT
);
12155 if (!const_ok_for_arm (INTVAL (part
)))
12161 /* Scan INSN and note any of its operands that need fixing.
12162 If DO_PUSHES is false we do not actually push any of the fixups
12163 needed. The function returns TRUE if any fixups were needed/pushed.
12164 This is used by arm_memory_load_p() which needs to know about loads
12165 of constants that will be converted into minipool loads. */
12167 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
12169 bool result
= false;
12172 extract_insn (insn
);
12174 if (!constrain_operands (1))
12175 fatal_insn_not_found (insn
);
12177 if (recog_data
.n_alternatives
== 0)
12180 /* Fill in recog_op_alt with information about the constraints of
12182 preprocess_constraints ();
12184 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12186 /* Things we need to fix can only occur in inputs. */
12187 if (recog_data
.operand_type
[opno
] != OP_IN
)
12190 /* If this alternative is a memory reference, then any mention
12191 of constants in this alternative is really to fool reload
12192 into allowing us to accept one there. We need to fix them up
12193 now so that we output the right code. */
12194 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
12196 rtx op
= recog_data
.operand
[opno
];
12198 if (CONSTANT_P (op
))
12201 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
12202 recog_data
.operand_mode
[opno
], op
);
12205 else if (GET_CODE (op
) == MEM
12206 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
12207 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
12211 rtx cop
= avoid_constant_pool_reference (op
);
12213 /* Casting the address of something to a mode narrower
12214 than a word can cause avoid_constant_pool_reference()
12215 to return the pool reference itself. That's no good to
12216 us here. Lets just hope that we can use the
12217 constant pool value directly. */
12219 cop
= get_pool_constant (XEXP (op
, 0));
12221 push_minipool_fix (insn
, address
,
12222 recog_data
.operand_loc
[opno
],
12223 recog_data
.operand_mode
[opno
], cop
);
12234 /* Convert instructions to their cc-clobbering variant if possible, since
12235 that allows us to use smaller encodings. */
12238 thumb2_reorg (void)
12243 INIT_REG_SET (&live
);
12245 /* We are freeing block_for_insn in the toplev to keep compatibility
12246 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12247 compute_bb_for_insn ();
12254 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
12255 df_simulate_initialize_backwards (bb
, &live
);
12256 FOR_BB_INSNS_REVERSE (bb
, insn
)
12258 if (NONJUMP_INSN_P (insn
)
12259 && !REGNO_REG_SET_P (&live
, CC_REGNUM
))
12261 rtx pat
= PATTERN (insn
);
12262 if (GET_CODE (pat
) == SET
12263 && low_register_operand (XEXP (pat
, 0), SImode
)
12264 && thumb_16bit_operator (XEXP (pat
, 1), SImode
)
12265 && low_register_operand (XEXP (XEXP (pat
, 1), 0), SImode
)
12266 && low_register_operand (XEXP (XEXP (pat
, 1), 1), SImode
))
12268 rtx dst
= XEXP (pat
, 0);
12269 rtx src
= XEXP (pat
, 1);
12270 rtx op0
= XEXP (src
, 0);
12271 rtx op1
= (GET_RTX_CLASS (GET_CODE (src
)) == RTX_COMM_ARITH
12272 ? XEXP (src
, 1) : NULL
);
12274 if (rtx_equal_p (dst
, op0
)
12275 || GET_CODE (src
) == PLUS
|| GET_CODE (src
) == MINUS
)
12277 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12278 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
12279 rtvec vec
= gen_rtvec (2, pat
, clobber
);
12281 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
12282 INSN_CODE (insn
) = -1;
12284 /* We can also handle a commutative operation where the
12285 second operand matches the destination. */
12286 else if (op1
&& rtx_equal_p (dst
, op1
))
12288 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12289 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
12292 src
= copy_rtx (src
);
12293 XEXP (src
, 0) = op1
;
12294 XEXP (src
, 1) = op0
;
12295 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
12296 vec
= gen_rtvec (2, pat
, clobber
);
12297 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
12298 INSN_CODE (insn
) = -1;
12303 if (NONDEBUG_INSN_P (insn
))
12304 df_simulate_one_insn_backwards (bb
, insn
, &live
);
12308 CLEAR_REG_SET (&live
);
12311 /* Gcc puts the pool in the wrong place for ARM, since we can only
12312 load addresses a limited distance around the pc. We do some
12313 special munging to move the constant pool values to the correct
12314 point in the code. */
12319 HOST_WIDE_INT address
= 0;
12325 minipool_fix_head
= minipool_fix_tail
= NULL
;
12327 /* The first insn must always be a note, or the code below won't
12328 scan it properly. */
12329 insn
= get_insns ();
12330 gcc_assert (GET_CODE (insn
) == NOTE
);
12333 /* Scan all the insns and record the operands that will need fixing. */
12334 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
12336 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12337 && (arm_cirrus_insn_p (insn
)
12338 || GET_CODE (insn
) == JUMP_INSN
12339 || arm_memory_load_p (insn
)))
12340 cirrus_reorg (insn
);
12342 if (GET_CODE (insn
) == BARRIER
)
12343 push_minipool_barrier (insn
, address
);
12344 else if (INSN_P (insn
))
12348 note_invalid_constants (insn
, address
, true);
12349 address
+= get_attr_length (insn
);
12351 /* If the insn is a vector jump, add the size of the table
12352 and skip the table. */
12353 if ((table
= is_jump_table (insn
)) != NULL
)
12355 address
+= get_jump_table_size (table
);
12361 fix
= minipool_fix_head
;
12363 /* Now scan the fixups and perform the required changes. */
12368 Mfix
* last_added_fix
;
12369 Mfix
* last_barrier
= NULL
;
12372 /* Skip any further barriers before the next fix. */
12373 while (fix
&& GET_CODE (fix
->insn
) == BARRIER
)
12376 /* No more fixes. */
12380 last_added_fix
= NULL
;
12382 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
12384 if (GET_CODE (ftmp
->insn
) == BARRIER
)
12386 if (ftmp
->address
>= minipool_vector_head
->max_address
)
12389 last_barrier
= ftmp
;
12391 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
12394 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
12397 /* If we found a barrier, drop back to that; any fixes that we
12398 could have reached but come after the barrier will now go in
12399 the next mini-pool. */
12400 if (last_barrier
!= NULL
)
12402 /* Reduce the refcount for those fixes that won't go into this
12404 for (fdel
= last_barrier
->next
;
12405 fdel
&& fdel
!= ftmp
;
12408 fdel
->minipool
->refcount
--;
12409 fdel
->minipool
= NULL
;
12412 ftmp
= last_barrier
;
12416 /* ftmp is first fix that we can't fit into this pool and
12417 there no natural barriers that we could use. Insert a
12418 new barrier in the code somewhere between the previous
12419 fix and this one, and arrange to jump around it. */
12420 HOST_WIDE_INT max_address
;
12422 /* The last item on the list of fixes must be a barrier, so
12423 we can never run off the end of the list of fixes without
12424 last_barrier being set. */
12427 max_address
= minipool_vector_head
->max_address
;
12428 /* Check that there isn't another fix that is in range that
12429 we couldn't fit into this pool because the pool was
12430 already too large: we need to put the pool before such an
12431 instruction. The pool itself may come just after the
12432 fix because create_fix_barrier also allows space for a
12433 jump instruction. */
12434 if (ftmp
->address
< max_address
)
12435 max_address
= ftmp
->address
+ 1;
12437 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
12440 assign_minipool_offsets (last_barrier
);
12444 if (GET_CODE (ftmp
->insn
) != BARRIER
12445 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
12452 /* Scan over the fixes we have identified for this pool, fixing them
12453 up and adding the constants to the pool itself. */
12454 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
12455 this_fix
= this_fix
->next
)
12456 if (GET_CODE (this_fix
->insn
) != BARRIER
)
12459 = plus_constant (gen_rtx_LABEL_REF (VOIDmode
,
12460 minipool_vector_label
),
12461 this_fix
->minipool
->offset
);
12462 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
12465 dump_minipool (last_barrier
->insn
);
12469 /* From now on we must synthesize any constants that we can't handle
12470 directly. This can happen if the RTL gets split during final
12471 instruction generation. */
12472 after_arm_reorg
= 1;
12474 /* Free the minipool memory. */
12475 obstack_free (&minipool_obstack
, minipool_startobj
);
12478 /* Routines to output assembly language. */
12480 /* If the rtx is the correct value then return the string of the number.
12481 In this way we can ensure that valid double constants are generated even
12482 when cross compiling. */
12484 fp_immediate_constant (rtx x
)
12489 if (!fp_consts_inited
)
12492 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12493 for (i
= 0; i
< 8; i
++)
12494 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
12495 return strings_fp
[i
];
12497 gcc_unreachable ();
12500 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12501 static const char *
12502 fp_const_from_val (REAL_VALUE_TYPE
*r
)
12506 if (!fp_consts_inited
)
12509 for (i
= 0; i
< 8; i
++)
12510 if (REAL_VALUES_EQUAL (*r
, values_fp
[i
]))
12511 return strings_fp
[i
];
12513 gcc_unreachable ();
12516 /* Output the operands of a LDM/STM instruction to STREAM.
12517 MASK is the ARM register set mask of which only bits 0-15 are important.
12518 REG is the base register, either the frame pointer or the stack pointer,
12519 INSTR is the possibly suffixed load or store instruction.
12520 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12523 print_multi_reg (FILE *stream
, const char *instr
, unsigned reg
,
12524 unsigned long mask
, int rfe
)
12527 bool not_first
= FALSE
;
12529 gcc_assert (!rfe
|| (mask
& (1 << PC_REGNUM
)));
12530 fputc ('\t', stream
);
12531 asm_fprintf (stream
, instr
, reg
);
12532 fputc ('{', stream
);
12534 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
12535 if (mask
& (1 << i
))
12538 fprintf (stream
, ", ");
12540 asm_fprintf (stream
, "%r", i
);
12545 fprintf (stream
, "}^\n");
12547 fprintf (stream
, "}\n");
12551 /* Output a FLDMD instruction to STREAM.
12552 BASE if the register containing the address.
12553 REG and COUNT specify the register range.
12554 Extra registers may be added to avoid hardware bugs.
12556 We output FLDMD even for ARMv5 VFP implementations. Although
12557 FLDMD is technically not supported until ARMv6, it is believed
12558 that all VFP implementations support its use in this context. */
12561 vfp_output_fldmd (FILE * stream
, unsigned int base
, int reg
, int count
)
12565 /* Workaround ARM10 VFPr1 bug. */
12566 if (count
== 2 && !arm_arch6
)
12573 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12574 load into multiple parts if we have to handle more than 16 registers. */
12577 vfp_output_fldmd (stream
, base
, reg
, 16);
12578 vfp_output_fldmd (stream
, base
, reg
+ 16, count
- 16);
12582 fputc ('\t', stream
);
12583 asm_fprintf (stream
, "fldmfdd\t%r!, {", base
);
12585 for (i
= reg
; i
< reg
+ count
; i
++)
12588 fputs (", ", stream
);
12589 asm_fprintf (stream
, "d%d", i
);
12591 fputs ("}\n", stream
);
12596 /* Output the assembly for a store multiple. */
12599 vfp_output_fstmd (rtx
* operands
)
12606 strcpy (pattern
, "fstmfdd\t%m0!, {%P1");
12607 p
= strlen (pattern
);
12609 gcc_assert (GET_CODE (operands
[1]) == REG
);
12611 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
12612 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
12614 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
12616 strcpy (&pattern
[p
], "}");
12618 output_asm_insn (pattern
, operands
);
12623 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12624 number of bytes pushed. */
12627 vfp_emit_fstmd (int base_reg
, int count
)
12634 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12635 register pairs are stored by a store multiple insn. We avoid this
12636 by pushing an extra pair. */
12637 if (count
== 2 && !arm_arch6
)
12639 if (base_reg
== LAST_VFP_REGNUM
- 3)
12644 /* FSTMD may not store more than 16 doubleword registers at once. Split
12645 larger stores into multiple parts (up to a maximum of two, in
12650 /* NOTE: base_reg is an internal register number, so each D register
12652 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
12653 saved
+= vfp_emit_fstmd (base_reg
, 16);
12657 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
12658 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
12660 reg
= gen_rtx_REG (DFmode
, base_reg
);
12663 XVECEXP (par
, 0, 0)
12664 = gen_rtx_SET (VOIDmode
,
12667 gen_rtx_PRE_MODIFY (Pmode
,
12670 (stack_pointer_rtx
,
12673 gen_rtx_UNSPEC (BLKmode
,
12674 gen_rtvec (1, reg
),
12675 UNSPEC_PUSH_MULT
));
12677 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
12678 plus_constant (stack_pointer_rtx
, -(count
* 8)));
12679 RTX_FRAME_RELATED_P (tmp
) = 1;
12680 XVECEXP (dwarf
, 0, 0) = tmp
;
12682 tmp
= gen_rtx_SET (VOIDmode
,
12683 gen_frame_mem (DFmode
, stack_pointer_rtx
),
12685 RTX_FRAME_RELATED_P (tmp
) = 1;
12686 XVECEXP (dwarf
, 0, 1) = tmp
;
12688 for (i
= 1; i
< count
; i
++)
12690 reg
= gen_rtx_REG (DFmode
, base_reg
);
12692 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
12694 tmp
= gen_rtx_SET (VOIDmode
,
12695 gen_frame_mem (DFmode
,
12696 plus_constant (stack_pointer_rtx
,
12699 RTX_FRAME_RELATED_P (tmp
) = 1;
12700 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
12703 par
= emit_insn (par
);
12704 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
12705 RTX_FRAME_RELATED_P (par
) = 1;
12710 /* Emit a call instruction with pattern PAT. ADDR is the address of
12711 the call target. */
12714 arm_emit_call_insn (rtx pat
, rtx addr
)
12718 insn
= emit_call_insn (pat
);
12720 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12721 If the call might use such an entry, add a use of the PIC register
12722 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12723 if (TARGET_VXWORKS_RTP
12725 && GET_CODE (addr
) == SYMBOL_REF
12726 && (SYMBOL_REF_DECL (addr
)
12727 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
12728 : !SYMBOL_REF_LOCAL_P (addr
)))
12730 require_pic_register ();
12731 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
12735 /* Output a 'call' insn. */
12737 output_call (rtx
*operands
)
12739 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
12741 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12742 if (REGNO (operands
[0]) == LR_REGNUM
)
12744 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
12745 output_asm_insn ("mov%?\t%0, %|lr", operands
);
12748 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12750 if (TARGET_INTERWORK
|| arm_arch4t
)
12751 output_asm_insn ("bx%?\t%0", operands
);
12753 output_asm_insn ("mov%?\t%|pc, %0", operands
);
12758 /* Output a 'call' insn that is a reference in memory. This is
12759 disabled for ARMv5 and we prefer a blx instead because otherwise
12760 there's a significant performance overhead. */
12762 output_call_mem (rtx
*operands
)
12764 gcc_assert (!arm_arch5
);
12765 if (TARGET_INTERWORK
)
12767 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
12768 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12769 output_asm_insn ("bx%?\t%|ip", operands
);
12771 else if (regno_use_in (LR_REGNUM
, operands
[0]))
12773 /* LR is used in the memory address. We load the address in the
12774 first instruction. It's safe to use IP as the target of the
12775 load since the call will kill it anyway. */
12776 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
12777 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12779 output_asm_insn ("bx%?\t%|ip", operands
);
12781 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
12785 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12786 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
12793 /* Output a move from arm registers to an fpa registers.
12794 OPERANDS[0] is an fpa register.
12795 OPERANDS[1] is the first registers of an arm register pair. */
12797 output_mov_long_double_fpa_from_arm (rtx
*operands
)
12799 int arm_reg0
= REGNO (operands
[1]);
12802 gcc_assert (arm_reg0
!= IP_REGNUM
);
12804 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12805 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12806 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
12808 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
12809 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands
);
12814 /* Output a move from an fpa register to arm registers.
12815 OPERANDS[0] is the first registers of an arm register pair.
12816 OPERANDS[1] is an fpa register. */
12818 output_mov_long_double_arm_from_fpa (rtx
*operands
)
12820 int arm_reg0
= REGNO (operands
[0]);
12823 gcc_assert (arm_reg0
!= IP_REGNUM
);
12825 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12826 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12827 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
12829 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands
);
12830 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
12834 /* Output a move from arm registers to arm registers of a long double
12835 OPERANDS[0] is the destination.
12836 OPERANDS[1] is the source. */
12838 output_mov_long_double_arm_from_arm (rtx
*operands
)
12840 /* We have to be careful here because the two might overlap. */
12841 int dest_start
= REGNO (operands
[0]);
12842 int src_start
= REGNO (operands
[1]);
12846 if (dest_start
< src_start
)
12848 for (i
= 0; i
< 3; i
++)
12850 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
12851 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
12852 output_asm_insn ("mov%?\t%0, %1", ops
);
12857 for (i
= 2; i
>= 0; i
--)
12859 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
12860 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
12861 output_asm_insn ("mov%?\t%0, %1", ops
);
12869 arm_emit_movpair (rtx dest
, rtx src
)
12871 /* If the src is an immediate, simplify it. */
12872 if (CONST_INT_P (src
))
12874 HOST_WIDE_INT val
= INTVAL (src
);
12875 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
12876 if ((val
>> 16) & 0x0000ffff)
12877 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
12879 GEN_INT ((val
>> 16) & 0x0000ffff));
12882 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
12883 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
12886 /* Output a move from arm registers to an fpa registers.
12887 OPERANDS[0] is an fpa register.
12888 OPERANDS[1] is the first registers of an arm register pair. */
12890 output_mov_double_fpa_from_arm (rtx
*operands
)
12892 int arm_reg0
= REGNO (operands
[1]);
12895 gcc_assert (arm_reg0
!= IP_REGNUM
);
12897 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12898 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12899 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops
);
12900 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands
);
12904 /* Output a move from an fpa register to arm registers.
12905 OPERANDS[0] is the first registers of an arm register pair.
12906 OPERANDS[1] is an fpa register. */
12908 output_mov_double_arm_from_fpa (rtx
*operands
)
12910 int arm_reg0
= REGNO (operands
[0]);
12913 gcc_assert (arm_reg0
!= IP_REGNUM
);
12915 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12916 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12917 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands
);
12918 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops
);
12922 /* Output a move between double words. It must be REG<-MEM
12925 output_move_double (rtx
*operands
)
12927 enum rtx_code code0
= GET_CODE (operands
[0]);
12928 enum rtx_code code1
= GET_CODE (operands
[1]);
12933 unsigned int reg0
= REGNO (operands
[0]);
12935 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
12937 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
12939 switch (GET_CODE (XEXP (operands
[1], 0)))
12943 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
12944 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
12946 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
12950 gcc_assert (TARGET_LDRD
);
12951 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
12956 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
12958 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
12963 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
12965 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
12969 gcc_assert (TARGET_LDRD
);
12970 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
12975 /* Autoicrement addressing modes should never have overlapping
12976 base and destination registers, and overlapping index registers
12977 are already prohibited, so this doesn't need to worry about
12979 otherops
[0] = operands
[0];
12980 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
12981 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
12983 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
12985 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
12987 /* Registers overlap so split out the increment. */
12988 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
12989 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
12993 /* Use a single insn if we can.
12994 FIXME: IWMMXT allows offsets larger than ldrd can
12995 handle, fix these up with a pair of ldr. */
12997 || GET_CODE (otherops
[2]) != CONST_INT
12998 || (INTVAL (otherops
[2]) > -256
12999 && INTVAL (otherops
[2]) < 256))
13000 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
13003 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
13004 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
13010 /* Use a single insn if we can.
13011 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13012 fix these up with a pair of ldr. */
13014 || GET_CODE (otherops
[2]) != CONST_INT
13015 || (INTVAL (otherops
[2]) > -256
13016 && INTVAL (otherops
[2]) < 256))
13017 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
13020 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
13021 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
13028 /* We might be able to use ldrd %0, %1 here. However the range is
13029 different to ldr/adr, and it is broken on some ARMv7-M
13030 implementations. */
13031 /* Use the second register of the pair to avoid problematic
13033 otherops
[1] = operands
[1];
13034 output_asm_insn ("adr%?\t%0, %1", otherops
);
13035 operands
[1] = otherops
[0];
13037 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
13039 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
13042 /* ??? This needs checking for thumb2. */
13044 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
13045 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
13047 otherops
[0] = operands
[0];
13048 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
13049 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
13051 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
13053 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
13055 switch ((int) INTVAL (otherops
[2]))
13058 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
13063 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
13068 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
13072 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
13073 operands
[1] = otherops
[0];
13075 && (GET_CODE (otherops
[2]) == REG
13077 || (GET_CODE (otherops
[2]) == CONST_INT
13078 && INTVAL (otherops
[2]) > -256
13079 && INTVAL (otherops
[2]) < 256)))
13081 if (reg_overlap_mentioned_p (operands
[0],
13085 /* Swap base and index registers over to
13086 avoid a conflict. */
13088 otherops
[1] = otherops
[2];
13091 /* If both registers conflict, it will usually
13092 have been fixed by a splitter. */
13093 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
13094 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
13096 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13097 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
13101 otherops
[0] = operands
[0];
13102 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
13107 if (GET_CODE (otherops
[2]) == CONST_INT
)
13109 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
13110 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
13112 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13115 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13118 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
13121 return "ldr%(d%)\t%0, [%1]";
13123 return "ldm%(ia%)\t%1, %M0";
13127 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
13128 /* Take care of overlapping base/data reg. */
13129 if (reg_mentioned_p (operands
[0], operands
[1]))
13131 output_asm_insn ("ldr%?\t%0, %1", otherops
);
13132 output_asm_insn ("ldr%?\t%0, %1", operands
);
13136 output_asm_insn ("ldr%?\t%0, %1", operands
);
13137 output_asm_insn ("ldr%?\t%0, %1", otherops
);
13144 /* Constraints should ensure this. */
13145 gcc_assert (code0
== MEM
&& code1
== REG
);
13146 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
13148 switch (GET_CODE (XEXP (operands
[0], 0)))
13152 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
13154 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
13158 gcc_assert (TARGET_LDRD
);
13159 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
13164 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
13166 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
13171 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
13173 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
13177 gcc_assert (TARGET_LDRD
);
13178 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
13183 otherops
[0] = operands
[1];
13184 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
13185 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
13187 /* IWMMXT allows offsets larger than ldrd can handle,
13188 fix these up with a pair of ldr. */
13190 && GET_CODE (otherops
[2]) == CONST_INT
13191 && (INTVAL(otherops
[2]) <= -256
13192 || INTVAL(otherops
[2]) >= 256))
13194 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
13196 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
13197 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
13201 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
13202 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
13205 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
13206 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
13208 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
13212 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
13213 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
13215 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
13218 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
13224 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
13230 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
13235 && (GET_CODE (otherops
[2]) == REG
13237 || (GET_CODE (otherops
[2]) == CONST_INT
13238 && INTVAL (otherops
[2]) > -256
13239 && INTVAL (otherops
[2]) < 256)))
13241 otherops
[0] = operands
[1];
13242 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
13243 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
13249 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
13250 otherops
[1] = operands
[1];
13251 output_asm_insn ("str%?\t%1, %0", operands
);
13252 output_asm_insn ("str%?\t%H1, %0", otherops
);
13259 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13260 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13263 output_move_quad (rtx
*operands
)
13265 if (REG_P (operands
[0]))
13267 /* Load, or reg->reg move. */
13269 if (MEM_P (operands
[1]))
13271 switch (GET_CODE (XEXP (operands
[1], 0)))
13274 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
13279 output_asm_insn ("adr%?\t%0, %1", operands
);
13280 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
13284 gcc_unreachable ();
13292 gcc_assert (REG_P (operands
[1]));
13294 dest
= REGNO (operands
[0]);
13295 src
= REGNO (operands
[1]);
13297 /* This seems pretty dumb, but hopefully GCC won't try to do it
13300 for (i
= 0; i
< 4; i
++)
13302 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
13303 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
13304 output_asm_insn ("mov%?\t%0, %1", ops
);
13307 for (i
= 3; i
>= 0; i
--)
13309 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
13310 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
13311 output_asm_insn ("mov%?\t%0, %1", ops
);
13317 gcc_assert (MEM_P (operands
[0]));
13318 gcc_assert (REG_P (operands
[1]));
13319 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
13321 switch (GET_CODE (XEXP (operands
[0], 0)))
13324 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
13328 gcc_unreachable ();
13335 /* Output a VFP load or store instruction. */
13338 output_move_vfp (rtx
*operands
)
13340 rtx reg
, mem
, addr
, ops
[2];
13341 int load
= REG_P (operands
[0]);
13342 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
13343 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
13346 enum machine_mode mode
;
13348 reg
= operands
[!load
];
13349 mem
= operands
[load
];
13351 mode
= GET_MODE (reg
);
13353 gcc_assert (REG_P (reg
));
13354 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
13355 gcc_assert (mode
== SFmode
13359 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
13360 gcc_assert (MEM_P (mem
));
13362 addr
= XEXP (mem
, 0);
13364 switch (GET_CODE (addr
))
13367 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13368 ops
[0] = XEXP (addr
, 0);
13373 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
13374 ops
[0] = XEXP (addr
, 0);
13379 templ
= "f%s%c%%?\t%%%s0, %%1%s";
13385 sprintf (buff
, templ
,
13386 load
? "ld" : "st",
13389 integer_p
? "\t%@ int" : "");
13390 output_asm_insn (buff
, ops
);
13395 /* Output a Neon quad-word load or store, or a load or store for
13396 larger structure modes.
13398 WARNING: The ordering of elements is weird in big-endian mode,
13399 because we use VSTM, as required by the EABI. GCC RTL defines
13400 element ordering based on in-memory order. This can be differ
13401 from the architectural ordering of elements within a NEON register.
13402 The intrinsics defined in arm_neon.h use the NEON register element
13403 ordering, not the GCC RTL element ordering.
13405 For example, the in-memory ordering of a big-endian a quadword
13406 vector with 16-bit elements when stored from register pair {d0,d1}
13407 will be (lowest address first, d0[N] is NEON register element N):
13409 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13411 When necessary, quadword registers (dN, dN+1) are moved to ARM
13412 registers from rN in the order:
13414 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13416 So that STM/LDM can be used on vectors in ARM registers, and the
13417 same memory layout will result as if VSTM/VLDM were used. */
13420 output_move_neon (rtx
*operands
)
13422 rtx reg
, mem
, addr
, ops
[2];
13423 int regno
, load
= REG_P (operands
[0]);
13426 enum machine_mode mode
;
13428 reg
= operands
[!load
];
13429 mem
= operands
[load
];
13431 mode
= GET_MODE (reg
);
13433 gcc_assert (REG_P (reg
));
13434 regno
= REGNO (reg
);
13435 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
13436 || NEON_REGNO_OK_FOR_QUAD (regno
));
13437 gcc_assert (VALID_NEON_DREG_MODE (mode
)
13438 || VALID_NEON_QREG_MODE (mode
)
13439 || VALID_NEON_STRUCT_MODE (mode
));
13440 gcc_assert (MEM_P (mem
));
13442 addr
= XEXP (mem
, 0);
13444 /* Strip off const from addresses like (const (plus (...))). */
13445 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
13446 addr
= XEXP (addr
, 0);
13448 switch (GET_CODE (addr
))
13451 templ
= "v%smia%%?\t%%0!, %%h1";
13452 ops
[0] = XEXP (addr
, 0);
13457 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13458 templ
= "v%smdb%%?\t%%0!, %%h1";
13459 ops
[0] = XEXP (addr
, 0);
13464 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13465 gcc_unreachable ();
13470 int nregs
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
13473 for (i
= 0; i
< nregs
; i
++)
13475 /* We're only using DImode here because it's a convenient size. */
13476 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
13477 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
13478 if (reg_overlap_mentioned_p (ops
[0], mem
))
13480 gcc_assert (overlap
== -1);
13485 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
13486 output_asm_insn (buff
, ops
);
13491 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
13492 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
13493 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
13494 output_asm_insn (buff
, ops
);
13501 templ
= "v%smia%%?\t%%m0, %%h1";
13506 sprintf (buff
, templ
, load
? "ld" : "st");
13507 output_asm_insn (buff
, ops
);
13512 /* Compute and return the length of neon_mov<mode>, where <mode> is
13513 one of VSTRUCT modes: EI, OI, CI or XI. */
13515 arm_attr_length_move_neon (rtx insn
)
13517 rtx reg
, mem
, addr
;
13519 enum machine_mode mode
;
13521 extract_insn_cached (insn
);
13523 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
13525 mode
= GET_MODE (recog_data
.operand
[0]);
13536 gcc_unreachable ();
13540 load
= REG_P (recog_data
.operand
[0]);
13541 reg
= recog_data
.operand
[!load
];
13542 mem
= recog_data
.operand
[load
];
13544 gcc_assert (MEM_P (mem
));
13546 mode
= GET_MODE (reg
);
13547 addr
= XEXP (mem
, 0);
13549 /* Strip off const from addresses like (const (plus (...))). */
13550 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
13551 addr
= XEXP (addr
, 0);
13553 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
13555 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
13562 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13566 arm_address_offset_is_imm (rtx insn
)
13570 extract_insn_cached (insn
);
13572 if (REG_P (recog_data
.operand
[0]))
13575 mem
= recog_data
.operand
[0];
13577 gcc_assert (MEM_P (mem
));
13579 addr
= XEXP (mem
, 0);
13581 if (GET_CODE (addr
) == REG
13582 || (GET_CODE (addr
) == PLUS
13583 && GET_CODE (XEXP (addr
, 0)) == REG
13584 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
))
13590 /* Output an ADD r, s, #n where n may be too big for one instruction.
13591 If adding zero to one register, output nothing. */
13593 output_add_immediate (rtx
*operands
)
13595 HOST_WIDE_INT n
= INTVAL (operands
[2]);
13597 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
13600 output_multi_immediate (operands
,
13601 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13604 output_multi_immediate (operands
,
13605 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13612 /* Output a multiple immediate operation.
13613 OPERANDS is the vector of operands referred to in the output patterns.
13614 INSTR1 is the output pattern to use for the first constant.
13615 INSTR2 is the output pattern to use for subsequent constants.
13616 IMMED_OP is the index of the constant slot in OPERANDS.
13617 N is the constant value. */
13618 static const char *
13619 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
13620 int immed_op
, HOST_WIDE_INT n
)
13622 #if HOST_BITS_PER_WIDE_INT > 32
13628 /* Quick and easy output. */
13629 operands
[immed_op
] = const0_rtx
;
13630 output_asm_insn (instr1
, operands
);
13635 const char * instr
= instr1
;
13637 /* Note that n is never zero here (which would give no output). */
13638 for (i
= 0; i
< 32; i
+= 2)
13642 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
13643 output_asm_insn (instr
, operands
);
13653 /* Return the name of a shifter operation. */
13654 static const char *
13655 arm_shift_nmem(enum rtx_code code
)
13660 return ARM_LSL_NAME
;
13676 /* Return the appropriate ARM instruction for the operation code.
13677 The returned result should not be overwritten. OP is the rtx of the
13678 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13681 arithmetic_instr (rtx op
, int shift_first_arg
)
13683 switch (GET_CODE (op
))
13689 return shift_first_arg
? "rsb" : "sub";
13704 return arm_shift_nmem(GET_CODE(op
));
13707 gcc_unreachable ();
13711 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13712 for the operation code. The returned result should not be overwritten.
13713 OP is the rtx code of the shift.
13714 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13716 static const char *
13717 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
13720 enum rtx_code code
= GET_CODE (op
);
13722 switch (GET_CODE (XEXP (op
, 1)))
13730 *amountp
= INTVAL (XEXP (op
, 1));
13734 gcc_unreachable ();
13740 gcc_assert (*amountp
!= -1);
13741 *amountp
= 32 - *amountp
;
13744 /* Fall through. */
13750 mnem
= arm_shift_nmem(code
);
13754 /* We never have to worry about the amount being other than a
13755 power of 2, since this case can never be reloaded from a reg. */
13756 gcc_assert (*amountp
!= -1);
13757 *amountp
= int_log2 (*amountp
);
13758 return ARM_LSL_NAME
;
13761 gcc_unreachable ();
13764 if (*amountp
!= -1)
13766 /* This is not 100% correct, but follows from the desire to merge
13767 multiplication by a power of 2 with the recognizer for a
13768 shift. >=32 is not a valid shift for "lsl", so we must try and
13769 output a shift that produces the correct arithmetical result.
13770 Using lsr #32 is identical except for the fact that the carry bit
13771 is not set correctly if we set the flags; but we never use the
13772 carry bit from such an operation, so we can ignore that. */
13773 if (code
== ROTATERT
)
13774 /* Rotate is just modulo 32. */
13776 else if (*amountp
!= (*amountp
& 31))
13778 if (code
== ASHIFT
)
13783 /* Shifts of 0 are no-ops. */
13791 /* Obtain the shift from the POWER of two. */
13793 static HOST_WIDE_INT
13794 int_log2 (HOST_WIDE_INT power
)
13796 HOST_WIDE_INT shift
= 0;
13798 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
13800 gcc_assert (shift
<= 31);
13807 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13808 because /bin/as is horribly restrictive. The judgement about
13809 whether or not each character is 'printable' (and can be output as
13810 is) or not (and must be printed with an octal escape) must be made
13811 with reference to the *host* character set -- the situation is
13812 similar to that discussed in the comments above pp_c_char in
13813 c-pretty-print.c. */
13815 #define MAX_ASCII_LEN 51
13818 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
13821 int len_so_far
= 0;
13823 fputs ("\t.ascii\t\"", stream
);
13825 for (i
= 0; i
< len
; i
++)
13829 if (len_so_far
>= MAX_ASCII_LEN
)
13831 fputs ("\"\n\t.ascii\t\"", stream
);
13837 if (c
== '\\' || c
== '\"')
13839 putc ('\\', stream
);
13847 fprintf (stream
, "\\%03o", c
);
13852 fputs ("\"\n", stream
);
13855 /* Compute the register save mask for registers 0 through 12
13856 inclusive. This code is used by arm_compute_save_reg_mask. */
13858 static unsigned long
13859 arm_compute_save_reg0_reg12_mask (void)
13861 unsigned long func_type
= arm_current_func_type ();
13862 unsigned long save_reg_mask
= 0;
13865 if (IS_INTERRUPT (func_type
))
13867 unsigned int max_reg
;
13868 /* Interrupt functions must not corrupt any registers,
13869 even call clobbered ones. If this is a leaf function
13870 we can just examine the registers used by the RTL, but
13871 otherwise we have to assume that whatever function is
13872 called might clobber anything, and so we have to save
13873 all the call-clobbered registers as well. */
13874 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
13875 /* FIQ handlers have registers r8 - r12 banked, so
13876 we only need to check r0 - r7, Normal ISRs only
13877 bank r14 and r15, so we must check up to r12.
13878 r13 is the stack pointer which is always preserved,
13879 so we do not need to consider it here. */
13884 for (reg
= 0; reg
<= max_reg
; reg
++)
13885 if (df_regs_ever_live_p (reg
)
13886 || (! current_function_is_leaf
&& call_used_regs
[reg
]))
13887 save_reg_mask
|= (1 << reg
);
13889 /* Also save the pic base register if necessary. */
13891 && !TARGET_SINGLE_PIC_BASE
13892 && arm_pic_register
!= INVALID_REGNUM
13893 && crtl
->uses_pic_offset_table
)
13894 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13896 else if (IS_VOLATILE(func_type
))
13898 /* For noreturn functions we historically omitted register saves
13899 altogether. However this really messes up debugging. As a
13900 compromise save just the frame pointers. Combined with the link
13901 register saved elsewhere this should be sufficient to get
13903 if (frame_pointer_needed
)
13904 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
13905 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
13906 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
13907 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
13908 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
13912 /* In the normal case we only need to save those registers
13913 which are call saved and which are used by this function. */
13914 for (reg
= 0; reg
<= 11; reg
++)
13915 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
13916 save_reg_mask
|= (1 << reg
);
13918 /* Handle the frame pointer as a special case. */
13919 if (frame_pointer_needed
)
13920 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
13922 /* If we aren't loading the PIC register,
13923 don't stack it even though it may be live. */
13925 && !TARGET_SINGLE_PIC_BASE
13926 && arm_pic_register
!= INVALID_REGNUM
13927 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
13928 || crtl
->uses_pic_offset_table
))
13929 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13931 /* The prologue will copy SP into R0, so save it. */
13932 if (IS_STACKALIGN (func_type
))
13933 save_reg_mask
|= 1;
13936 /* Save registers so the exception handler can modify them. */
13937 if (crtl
->calls_eh_return
)
13943 reg
= EH_RETURN_DATA_REGNO (i
);
13944 if (reg
== INVALID_REGNUM
)
13946 save_reg_mask
|= 1 << reg
;
13950 return save_reg_mask
;
13954 /* Compute the number of bytes used to store the static chain register on the
13955 stack, above the stack frame. We need to know this accurately to get the
13956 alignment of the rest of the stack frame correct. */
13958 static int arm_compute_static_chain_stack_bytes (void)
13960 unsigned long func_type
= arm_current_func_type ();
13961 int static_chain_stack_bytes
= 0;
13963 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
&&
13964 IS_NESTED (func_type
) &&
13965 df_regs_ever_live_p (3) && crtl
->args
.pretend_args_size
== 0)
13966 static_chain_stack_bytes
= 4;
13968 return static_chain_stack_bytes
;
13972 /* Compute a bit mask of which registers need to be
13973 saved on the stack for the current function.
13974 This is used by arm_get_frame_offsets, which may add extra registers. */
13976 static unsigned long
13977 arm_compute_save_reg_mask (void)
13979 unsigned int save_reg_mask
= 0;
13980 unsigned long func_type
= arm_current_func_type ();
13983 if (IS_NAKED (func_type
))
13984 /* This should never really happen. */
13987 /* If we are creating a stack frame, then we must save the frame pointer,
13988 IP (which will hold the old stack pointer), LR and the PC. */
13989 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
13991 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
13994 | (1 << PC_REGNUM
);
13996 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
13998 /* Decide if we need to save the link register.
13999 Interrupt routines have their own banked link register,
14000 so they never need to save it.
14001 Otherwise if we do not use the link register we do not need to save
14002 it. If we are pushing other registers onto the stack however, we
14003 can save an instruction in the epilogue by pushing the link register
14004 now and then popping it back into the PC. This incurs extra memory
14005 accesses though, so we only do it when optimizing for size, and only
14006 if we know that we will not need a fancy return sequence. */
14007 if (df_regs_ever_live_p (LR_REGNUM
)
14010 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
14011 && !crtl
->calls_eh_return
))
14012 save_reg_mask
|= 1 << LR_REGNUM
;
14014 if (cfun
->machine
->lr_save_eliminated
)
14015 save_reg_mask
&= ~ (1 << LR_REGNUM
);
14017 if (TARGET_REALLY_IWMMXT
14018 && ((bit_count (save_reg_mask
)
14019 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
14020 arm_compute_static_chain_stack_bytes())
14023 /* The total number of registers that are going to be pushed
14024 onto the stack is odd. We need to ensure that the stack
14025 is 64-bit aligned before we start to save iWMMXt registers,
14026 and also before we start to create locals. (A local variable
14027 might be a double or long long which we will load/store using
14028 an iWMMXt instruction). Therefore we need to push another
14029 ARM register, so that the stack will be 64-bit aligned. We
14030 try to avoid using the arg registers (r0 -r3) as they might be
14031 used to pass values in a tail call. */
14032 for (reg
= 4; reg
<= 12; reg
++)
14033 if ((save_reg_mask
& (1 << reg
)) == 0)
14037 save_reg_mask
|= (1 << reg
);
14040 cfun
->machine
->sibcall_blocked
= 1;
14041 save_reg_mask
|= (1 << 3);
14045 /* We may need to push an additional register for use initializing the
14046 PIC base register. */
14047 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
14048 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
14050 reg
= thumb_find_work_register (1 << 4);
14051 if (!call_used_regs
[reg
])
14052 save_reg_mask
|= (1 << reg
);
14055 return save_reg_mask
;
14059 /* Compute a bit mask of which registers need to be
14060 saved on the stack for the current function. */
14061 static unsigned long
14062 thumb1_compute_save_reg_mask (void)
14064 unsigned long mask
;
14068 for (reg
= 0; reg
< 12; reg
++)
14069 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14073 && !TARGET_SINGLE_PIC_BASE
14074 && arm_pic_register
!= INVALID_REGNUM
14075 && crtl
->uses_pic_offset_table
)
14076 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
14078 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14079 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
14080 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
14082 /* LR will also be pushed if any lo regs are pushed. */
14083 if (mask
& 0xff || thumb_force_lr_save ())
14084 mask
|= (1 << LR_REGNUM
);
14086 /* Make sure we have a low work register if we need one.
14087 We will need one if we are going to push a high register,
14088 but we are not currently intending to push a low register. */
14089 if ((mask
& 0xff) == 0
14090 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
14092 /* Use thumb_find_work_register to choose which register
14093 we will use. If the register is live then we will
14094 have to push it. Use LAST_LO_REGNUM as our fallback
14095 choice for the register to select. */
14096 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
14097 /* Make sure the register returned by thumb_find_work_register is
14098 not part of the return value. */
14099 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
14100 reg
= LAST_LO_REGNUM
;
14102 if (! call_used_regs
[reg
])
14106 /* The 504 below is 8 bytes less than 512 because there are two possible
14107 alignment words. We can't tell here if they will be present or not so we
14108 have to play it safe and assume that they are. */
14109 if ((CALLER_INTERWORKING_SLOT_SIZE
+
14110 ROUND_UP_WORD (get_frame_size ()) +
14111 crtl
->outgoing_args_size
) >= 504)
14113 /* This is the same as the code in thumb1_expand_prologue() which
14114 determines which register to use for stack decrement. */
14115 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
14116 if (mask
& (1 << reg
))
14119 if (reg
> LAST_LO_REGNUM
)
14121 /* Make sure we have a register available for stack decrement. */
14122 mask
|= 1 << LAST_LO_REGNUM
;
14130 /* Return the number of bytes required to save VFP registers. */
14132 arm_get_vfp_saved_size (void)
14134 unsigned int regno
;
14139 /* Space for saved VFP registers. */
14140 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14143 for (regno
= FIRST_VFP_REGNUM
;
14144 regno
< LAST_VFP_REGNUM
;
14147 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
14148 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
14152 /* Workaround ARM10 VFPr1 bug. */
14153 if (count
== 2 && !arm_arch6
)
14155 saved
+= count
* 8;
14164 if (count
== 2 && !arm_arch6
)
14166 saved
+= count
* 8;
14173 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14174 everything bar the final return instruction. */
14176 output_return_instruction (rtx operand
, int really_return
, int reverse
)
14178 char conditional
[10];
14181 unsigned long live_regs_mask
;
14182 unsigned long func_type
;
14183 arm_stack_offsets
*offsets
;
14185 func_type
= arm_current_func_type ();
14187 if (IS_NAKED (func_type
))
14190 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
14192 /* If this function was declared non-returning, and we have
14193 found a tail call, then we have to trust that the called
14194 function won't return. */
14199 /* Otherwise, trap an attempted return by aborting. */
14201 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
14203 assemble_external_libcall (ops
[1]);
14204 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
14210 gcc_assert (!cfun
->calls_alloca
|| really_return
);
14212 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
14214 cfun
->machine
->return_used_this_function
= 1;
14216 offsets
= arm_get_frame_offsets ();
14217 live_regs_mask
= offsets
->saved_regs_mask
;
14219 if (live_regs_mask
)
14221 const char * return_reg
;
14223 /* If we do not have any special requirements for function exit
14224 (e.g. interworking) then we can load the return address
14225 directly into the PC. Otherwise we must load it into LR. */
14227 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
14228 return_reg
= reg_names
[PC_REGNUM
];
14230 return_reg
= reg_names
[LR_REGNUM
];
14232 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
14234 /* There are three possible reasons for the IP register
14235 being saved. 1) a stack frame was created, in which case
14236 IP contains the old stack pointer, or 2) an ISR routine
14237 corrupted it, or 3) it was saved to align the stack on
14238 iWMMXt. In case 1, restore IP into SP, otherwise just
14240 if (frame_pointer_needed
)
14242 live_regs_mask
&= ~ (1 << IP_REGNUM
);
14243 live_regs_mask
|= (1 << SP_REGNUM
);
14246 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
14249 /* On some ARM architectures it is faster to use LDR rather than
14250 LDM to load a single register. On other architectures, the
14251 cost is the same. In 26 bit mode, or for exception handlers,
14252 we have to use LDM to load the PC so that the CPSR is also
14254 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
14255 if (live_regs_mask
== (1U << reg
))
14258 if (reg
<= LAST_ARM_REGNUM
14259 && (reg
!= LR_REGNUM
14261 || ! IS_INTERRUPT (func_type
)))
14263 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
14264 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
14271 /* Generate the load multiple instruction to restore the
14272 registers. Note we can get here, even if
14273 frame_pointer_needed is true, but only if sp already
14274 points to the base of the saved core registers. */
14275 if (live_regs_mask
& (1 << SP_REGNUM
))
14277 unsigned HOST_WIDE_INT stack_adjust
;
14279 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
14280 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
14282 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
14283 if (TARGET_UNIFIED_ASM
)
14284 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
14286 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
14289 /* If we can't use ldmib (SA110 bug),
14290 then try to pop r3 instead. */
14292 live_regs_mask
|= 1 << 3;
14294 if (TARGET_UNIFIED_ASM
)
14295 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
14297 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
14301 if (TARGET_UNIFIED_ASM
)
14302 sprintf (instr
, "pop%s\t{", conditional
);
14304 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
14306 p
= instr
+ strlen (instr
);
14308 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
14309 if (live_regs_mask
& (1 << reg
))
14311 int l
= strlen (reg_names
[reg
]);
14317 memcpy (p
, ", ", 2);
14321 memcpy (p
, "%|", 2);
14322 memcpy (p
+ 2, reg_names
[reg
], l
);
14326 if (live_regs_mask
& (1 << LR_REGNUM
))
14328 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
14329 /* If returning from an interrupt, restore the CPSR. */
14330 if (IS_INTERRUPT (func_type
))
14337 output_asm_insn (instr
, & operand
);
14339 /* See if we need to generate an extra instruction to
14340 perform the actual function return. */
14342 && func_type
!= ARM_FT_INTERWORKED
14343 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
14345 /* The return has already been handled
14346 by loading the LR into the PC. */
14353 switch ((int) ARM_FUNC_TYPE (func_type
))
14357 /* ??? This is wrong for unified assembly syntax. */
14358 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
14361 case ARM_FT_INTERWORKED
:
14362 sprintf (instr
, "bx%s\t%%|lr", conditional
);
14365 case ARM_FT_EXCEPTION
:
14366 /* ??? This is wrong for unified assembly syntax. */
14367 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
14371 /* Use bx if it's available. */
14372 if (arm_arch5
|| arm_arch4t
)
14373 sprintf (instr
, "bx%s\t%%|lr", conditional
);
14375 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
14379 output_asm_insn (instr
, & operand
);
14385 /* Write the function name into the code section, directly preceding
14386 the function prologue.
14388 Code will be output similar to this:
14390 .ascii "arm_poke_function_name", 0
14393 .word 0xff000000 + (t1 - t0)
14394 arm_poke_function_name
14396 stmfd sp!, {fp, ip, lr, pc}
14399 When performing a stack backtrace, code can inspect the value
14400 of 'pc' stored at 'fp' + 0. If the trace function then looks
14401 at location pc - 12 and the top 8 bits are set, then we know
14402 that there is a function name embedded immediately preceding this
14403 location and has length ((pc[-3]) & 0xff000000).
14405 We assume that pc is declared as a pointer to an unsigned long.
14407 It is of no benefit to output the function name if we are assembling
14408 a leaf function. These function types will not contain a stack
14409 backtrace structure, therefore it is not possible to determine the
14412 arm_poke_function_name (FILE *stream
, const char *name
)
14414 unsigned long alignlength
;
14415 unsigned long length
;
14418 length
= strlen (name
) + 1;
14419 alignlength
= ROUND_UP_WORD (length
);
14421 ASM_OUTPUT_ASCII (stream
, name
, length
);
14422 ASM_OUTPUT_ALIGN (stream
, 2);
14423 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
14424 assemble_aligned_integer (UNITS_PER_WORD
, x
);
14427 /* Place some comments into the assembler stream
14428 describing the current function. */
14430 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
14432 unsigned long func_type
;
14436 thumb1_output_function_prologue (f
, frame_size
);
14440 /* Sanity check. */
14441 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
14443 func_type
= arm_current_func_type ();
14445 switch ((int) ARM_FUNC_TYPE (func_type
))
14448 case ARM_FT_NORMAL
:
14450 case ARM_FT_INTERWORKED
:
14451 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
14454 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
14457 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
14459 case ARM_FT_EXCEPTION
:
14460 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
14464 if (IS_NAKED (func_type
))
14465 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14467 if (IS_VOLATILE (func_type
))
14468 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
14470 if (IS_NESTED (func_type
))
14471 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
14472 if (IS_STACKALIGN (func_type
))
14473 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14475 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14477 crtl
->args
.pretend_args_size
, frame_size
);
14479 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14480 frame_pointer_needed
,
14481 cfun
->machine
->uses_anonymous_args
);
14483 if (cfun
->machine
->lr_save_eliminated
)
14484 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
14486 if (crtl
->calls_eh_return
)
14487 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
14492 arm_output_epilogue (rtx sibling
)
14495 unsigned long saved_regs_mask
;
14496 unsigned long func_type
;
14497 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14498 frame that is $fp + 4 for a non-variadic function. */
14499 int floats_offset
= 0;
14501 FILE * f
= asm_out_file
;
14502 unsigned int lrm_count
= 0;
14503 int really_return
= (sibling
== NULL
);
14505 arm_stack_offsets
*offsets
;
14507 /* If we have already generated the return instruction
14508 then it is futile to generate anything else. */
14509 if (use_return_insn (FALSE
, sibling
) &&
14510 (cfun
->machine
->return_used_this_function
!= 0))
14513 func_type
= arm_current_func_type ();
14515 if (IS_NAKED (func_type
))
14516 /* Naked functions don't have epilogues. */
14519 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
14523 /* A volatile function should never return. Call abort. */
14524 op
= gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)" : "abort");
14525 assemble_external_libcall (op
);
14526 output_asm_insn ("bl\t%a0", &op
);
14531 /* If we are throwing an exception, then we really must be doing a
14532 return, so we can't tail-call. */
14533 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
14535 offsets
= arm_get_frame_offsets ();
14536 saved_regs_mask
= offsets
->saved_regs_mask
;
14539 lrm_count
= bit_count (saved_regs_mask
);
14541 floats_offset
= offsets
->saved_args
;
14542 /* Compute how far away the floats will be. */
14543 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
14544 if (saved_regs_mask
& (1 << reg
))
14545 floats_offset
+= 4;
14547 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
14549 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14550 int vfp_offset
= offsets
->frame
;
14552 if (TARGET_FPA_EMU2
)
14554 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14555 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14557 floats_offset
+= 12;
14558 asm_fprintf (f
, "\tldfe\t%r, [%r, #-%d]\n",
14559 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
14564 start_reg
= LAST_FPA_REGNUM
;
14566 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14568 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14570 floats_offset
+= 12;
14572 /* We can't unstack more than four registers at once. */
14573 if (start_reg
- reg
== 3)
14575 asm_fprintf (f
, "\tlfm\t%r, 4, [%r, #-%d]\n",
14576 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
14577 start_reg
= reg
- 1;
14582 if (reg
!= start_reg
)
14583 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
14584 reg
+ 1, start_reg
- reg
,
14585 FP_REGNUM
, floats_offset
- vfp_offset
);
14586 start_reg
= reg
- 1;
14590 /* Just in case the last register checked also needs unstacking. */
14591 if (reg
!= start_reg
)
14592 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
14593 reg
+ 1, start_reg
- reg
,
14594 FP_REGNUM
, floats_offset
- vfp_offset
);
14597 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14601 /* The fldmd insns do not have base+offset addressing
14602 modes, so we use IP to hold the address. */
14603 saved_size
= arm_get_vfp_saved_size ();
14605 if (saved_size
> 0)
14607 floats_offset
+= saved_size
;
14608 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", IP_REGNUM
,
14609 FP_REGNUM
, floats_offset
- vfp_offset
);
14611 start_reg
= FIRST_VFP_REGNUM
;
14612 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
14614 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14615 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
14617 if (start_reg
!= reg
)
14618 vfp_output_fldmd (f
, IP_REGNUM
,
14619 (start_reg
- FIRST_VFP_REGNUM
) / 2,
14620 (reg
- start_reg
) / 2);
14621 start_reg
= reg
+ 2;
14624 if (start_reg
!= reg
)
14625 vfp_output_fldmd (f
, IP_REGNUM
,
14626 (start_reg
- FIRST_VFP_REGNUM
) / 2,
14627 (reg
- start_reg
) / 2);
14632 /* The frame pointer is guaranteed to be non-double-word aligned.
14633 This is because it is set to (old_stack_pointer - 4) and the
14634 old_stack_pointer was double word aligned. Thus the offset to
14635 the iWMMXt registers to be loaded must also be non-double-word
14636 sized, so that the resultant address *is* double-word aligned.
14637 We can ignore floats_offset since that was already included in
14638 the live_regs_mask. */
14639 lrm_count
+= (lrm_count
% 2 ? 2 : 1);
14641 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
14642 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14644 asm_fprintf (f
, "\twldrd\t%r, [%r, #-%d]\n",
14645 reg
, FP_REGNUM
, lrm_count
* 4);
14650 /* saved_regs_mask should contain the IP, which at the time of stack
14651 frame generation actually contains the old stack pointer. So a
14652 quick way to unwind the stack is just pop the IP register directly
14653 into the stack pointer. */
14654 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
14655 saved_regs_mask
&= ~ (1 << IP_REGNUM
);
14656 saved_regs_mask
|= (1 << SP_REGNUM
);
14658 /* There are two registers left in saved_regs_mask - LR and PC. We
14659 only need to restore the LR register (the return address), but to
14660 save time we can load it directly into the PC, unless we need a
14661 special function exit sequence, or we are not really returning. */
14663 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
14664 && !crtl
->calls_eh_return
)
14665 /* Delete the LR from the register mask, so that the LR on
14666 the stack is loaded into the PC in the register mask. */
14667 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
14669 saved_regs_mask
&= ~ (1 << PC_REGNUM
);
14671 /* We must use SP as the base register, because SP is one of the
14672 registers being restored. If an interrupt or page fault
14673 happens in the ldm instruction, the SP might or might not
14674 have been restored. That would be bad, as then SP will no
14675 longer indicate the safe area of stack, and we can get stack
14676 corruption. Using SP as the base register means that it will
14677 be reset correctly to the original value, should an interrupt
14678 occur. If the stack pointer already points at the right
14679 place, then omit the subtraction. */
14680 if (offsets
->outgoing_args
!= (1 + (int) bit_count (saved_regs_mask
))
14681 || cfun
->calls_alloca
)
14682 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", SP_REGNUM
, FP_REGNUM
,
14683 4 * bit_count (saved_regs_mask
));
14684 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
, 0);
14686 if (IS_INTERRUPT (func_type
))
14687 /* Interrupt handlers will have pushed the
14688 IP onto the stack, so restore it now. */
14689 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, 1 << IP_REGNUM
, 0);
14693 /* This branch is executed for ARM mode (non-apcs frames) and
14694 Thumb-2 mode. Frame layout is essentially the same for those
14695 cases, except that in ARM mode frame pointer points to the
14696 first saved register, while in Thumb-2 mode the frame pointer points
14697 to the last saved register.
14699 It is possible to make frame pointer point to last saved
14700 register in both cases, and remove some conditionals below.
14701 That means that fp setup in prologue would be just "mov fp, sp"
14702 and sp restore in epilogue would be just "mov sp, fp", whereas
14703 now we have to use add/sub in those cases. However, the value
14704 of that would be marginal, as both mov and add/sub are 32-bit
14705 in ARM mode, and it would require extra conditionals
14706 in arm_expand_prologue to distingish ARM-apcs-frame case
14707 (where frame pointer is required to point at first register)
14708 and ARM-non-apcs-frame. Therefore, such change is postponed
14709 until real need arise. */
14710 unsigned HOST_WIDE_INT amount
;
14712 /* Restore stack pointer if necessary. */
14713 if (TARGET_ARM
&& frame_pointer_needed
)
14715 operands
[0] = stack_pointer_rtx
;
14716 operands
[1] = hard_frame_pointer_rtx
;
14718 operands
[2] = GEN_INT (offsets
->frame
- offsets
->saved_regs
);
14719 output_add_immediate (operands
);
14723 if (frame_pointer_needed
)
14725 /* For Thumb-2 restore sp from the frame pointer.
14726 Operand restrictions mean we have to incrememnt FP, then copy
14728 amount
= offsets
->locals_base
- offsets
->saved_regs
;
14729 operands
[0] = hard_frame_pointer_rtx
;
14733 unsigned long count
;
14734 operands
[0] = stack_pointer_rtx
;
14735 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
14736 /* pop call clobbered registers if it avoids a
14737 separate stack adjustment. */
14738 count
= offsets
->saved_regs
- offsets
->saved_args
;
14741 && !crtl
->calls_eh_return
14742 && bit_count(saved_regs_mask
) * 4 == count
14743 && !IS_INTERRUPT (func_type
)
14744 && !crtl
->tail_call_emit
)
14746 unsigned long mask
;
14747 /* Preserve return values, of any size. */
14748 mask
= (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14750 mask
&= ~saved_regs_mask
;
14752 while (bit_count (mask
) * 4 > amount
)
14754 while ((mask
& (1 << reg
)) == 0)
14756 mask
&= ~(1 << reg
);
14758 if (bit_count (mask
) * 4 == amount
) {
14760 saved_regs_mask
|= mask
;
14767 operands
[1] = operands
[0];
14768 operands
[2] = GEN_INT (amount
);
14769 output_add_immediate (operands
);
14771 if (frame_pointer_needed
)
14772 asm_fprintf (f
, "\tmov\t%r, %r\n",
14773 SP_REGNUM
, HARD_FRAME_POINTER_REGNUM
);
14776 if (TARGET_FPA_EMU2
)
14778 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
14779 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14780 asm_fprintf (f
, "\tldfe\t%r, [%r], #12\n",
14785 start_reg
= FIRST_FPA_REGNUM
;
14787 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
14789 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14791 if (reg
- start_reg
== 3)
14793 asm_fprintf (f
, "\tlfmfd\t%r, 4, [%r]!\n",
14794 start_reg
, SP_REGNUM
);
14795 start_reg
= reg
+ 1;
14800 if (reg
!= start_reg
)
14801 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
14802 start_reg
, reg
- start_reg
,
14805 start_reg
= reg
+ 1;
14809 /* Just in case the last register checked also needs unstacking. */
14810 if (reg
!= start_reg
)
14811 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
14812 start_reg
, reg
- start_reg
, SP_REGNUM
);
14815 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14817 int end_reg
= LAST_VFP_REGNUM
+ 1;
14819 /* Scan the registers in reverse order. We need to match
14820 any groupings made in the prologue and generate matching
14822 for (reg
= LAST_VFP_REGNUM
- 1; reg
>= FIRST_VFP_REGNUM
; reg
-= 2)
14824 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14825 && (!df_regs_ever_live_p (reg
+ 1)
14826 || call_used_regs
[reg
+ 1]))
14828 if (end_reg
> reg
+ 2)
14829 vfp_output_fldmd (f
, SP_REGNUM
,
14830 (reg
+ 2 - FIRST_VFP_REGNUM
) / 2,
14831 (end_reg
- (reg
+ 2)) / 2);
14835 if (end_reg
> reg
+ 2)
14836 vfp_output_fldmd (f
, SP_REGNUM
, 0,
14837 (end_reg
- (reg
+ 2)) / 2);
14841 for (reg
= FIRST_IWMMXT_REGNUM
; reg
<= LAST_IWMMXT_REGNUM
; reg
++)
14842 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14843 asm_fprintf (f
, "\twldrd\t%r, [%r], #8\n", reg
, SP_REGNUM
);
14845 /* If we can, restore the LR into the PC. */
14846 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
14847 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
14848 && !IS_STACKALIGN (func_type
)
14850 && crtl
->args
.pretend_args_size
== 0
14851 && saved_regs_mask
& (1 << LR_REGNUM
)
14852 && !crtl
->calls_eh_return
)
14854 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
14855 saved_regs_mask
|= (1 << PC_REGNUM
);
14856 rfe
= IS_INTERRUPT (func_type
);
14861 /* Load the registers off the stack. If we only have one register
14862 to load use the LDR instruction - it is faster. For Thumb-2
14863 always use pop and the assembler will pick the best instruction.*/
14864 if (TARGET_ARM
&& saved_regs_mask
== (1 << LR_REGNUM
)
14865 && !IS_INTERRUPT(func_type
))
14867 asm_fprintf (f
, "\tldr\t%r, [%r], #4\n", LR_REGNUM
, SP_REGNUM
);
14869 else if (saved_regs_mask
)
14871 if (saved_regs_mask
& (1 << SP_REGNUM
))
14872 /* Note - write back to the stack register is not enabled
14873 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14874 in the list of registers and if we add writeback the
14875 instruction becomes UNPREDICTABLE. */
14876 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
,
14878 else if (TARGET_ARM
)
14879 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, saved_regs_mask
,
14882 print_multi_reg (f
, "pop\t", SP_REGNUM
, saved_regs_mask
, 0);
14885 if (crtl
->args
.pretend_args_size
)
14887 /* Unwind the pre-pushed regs. */
14888 operands
[0] = operands
[1] = stack_pointer_rtx
;
14889 operands
[2] = GEN_INT (crtl
->args
.pretend_args_size
);
14890 output_add_immediate (operands
);
14894 /* We may have already restored PC directly from the stack. */
14895 if (!really_return
|| saved_regs_mask
& (1 << PC_REGNUM
))
14898 /* Stack adjustment for exception handler. */
14899 if (crtl
->calls_eh_return
)
14900 asm_fprintf (f
, "\tadd\t%r, %r, %r\n", SP_REGNUM
, SP_REGNUM
,
14901 ARM_EH_STACKADJ_REGNUM
);
14903 /* Generate the return instruction. */
14904 switch ((int) ARM_FUNC_TYPE (func_type
))
14908 asm_fprintf (f
, "\tsubs\t%r, %r, #4\n", PC_REGNUM
, LR_REGNUM
);
14911 case ARM_FT_EXCEPTION
:
14912 asm_fprintf (f
, "\tmovs\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
14915 case ARM_FT_INTERWORKED
:
14916 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
14920 if (IS_STACKALIGN (func_type
))
14922 /* See comment in arm_expand_prologue. */
14923 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, 0);
14925 if (arm_arch5
|| arm_arch4t
)
14926 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
14928 asm_fprintf (f
, "\tmov\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
14936 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
14937 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
14939 arm_stack_offsets
*offsets
;
14945 /* Emit any call-via-reg trampolines that are needed for v4t support
14946 of call_reg and call_value_reg type insns. */
14947 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
14949 rtx label
= cfun
->machine
->call_via
[regno
];
14953 switch_to_section (function_section (current_function_decl
));
14954 targetm
.asm_out
.internal_label (asm_out_file
, "L",
14955 CODE_LABEL_NUMBER (label
));
14956 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
14960 /* ??? Probably not safe to set this here, since it assumes that a
14961 function will be emitted as assembly immediately after we generate
14962 RTL for it. This does not happen for inline functions. */
14963 cfun
->machine
->return_used_this_function
= 0;
14965 else /* TARGET_32BIT */
14967 /* We need to take into account any stack-frame rounding. */
14968 offsets
= arm_get_frame_offsets ();
14970 gcc_assert (!use_return_insn (FALSE
, NULL
)
14971 || (cfun
->machine
->return_used_this_function
!= 0)
14972 || offsets
->saved_regs
== offsets
->outgoing_args
14973 || frame_pointer_needed
);
14975 /* Reset the ARM-specific per-function variables. */
14976 after_arm_reorg
= 0;
14980 /* Generate and emit an insn that we will recognize as a push_multi.
14981 Unfortunately, since this insn does not reflect very well the actual
14982 semantics of the operation, we need to annotate the insn for the benefit
14983 of DWARF2 frame unwind information. */
14985 emit_multi_reg_push (unsigned long mask
)
14988 int num_dwarf_regs
;
14992 int dwarf_par_index
;
14995 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
14996 if (mask
& (1 << i
))
14999 gcc_assert (num_regs
&& num_regs
<= 16);
15001 /* We don't record the PC in the dwarf frame information. */
15002 num_dwarf_regs
= num_regs
;
15003 if (mask
& (1 << PC_REGNUM
))
15006 /* For the body of the insn we are going to generate an UNSPEC in
15007 parallel with several USEs. This allows the insn to be recognized
15008 by the push_multi pattern in the arm.md file.
15010 The body of the insn looks something like this:
15013 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15014 (const_int:SI <num>)))
15015 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15021 For the frame note however, we try to be more explicit and actually
15022 show each register being stored into the stack frame, plus a (single)
15023 decrement of the stack pointer. We do it this way in order to be
15024 friendly to the stack unwinding code, which only wants to see a single
15025 stack decrement per instruction. The RTL we generate for the note looks
15026 something like this:
15029 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15030 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15031 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15032 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15036 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15037 instead we'd have a parallel expression detailing all
15038 the stores to the various memory addresses so that debug
15039 information is more up-to-date. Remember however while writing
15040 this to take care of the constraints with the push instruction.
15042 Note also that this has to be taken care of for the VFP registers.
15044 For more see PR43399. */
15046 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
15047 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
15048 dwarf_par_index
= 1;
15050 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15052 if (mask
& (1 << i
))
15054 reg
= gen_rtx_REG (SImode
, i
);
15056 XVECEXP (par
, 0, 0)
15057 = gen_rtx_SET (VOIDmode
,
15060 gen_rtx_PRE_MODIFY (Pmode
,
15063 (stack_pointer_rtx
,
15066 gen_rtx_UNSPEC (BLKmode
,
15067 gen_rtvec (1, reg
),
15068 UNSPEC_PUSH_MULT
));
15070 if (i
!= PC_REGNUM
)
15072 tmp
= gen_rtx_SET (VOIDmode
,
15073 gen_frame_mem (SImode
, stack_pointer_rtx
),
15075 RTX_FRAME_RELATED_P (tmp
) = 1;
15076 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
15084 for (j
= 1, i
++; j
< num_regs
; i
++)
15086 if (mask
& (1 << i
))
15088 reg
= gen_rtx_REG (SImode
, i
);
15090 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
15092 if (i
!= PC_REGNUM
)
15095 = gen_rtx_SET (VOIDmode
,
15098 plus_constant (stack_pointer_rtx
,
15101 RTX_FRAME_RELATED_P (tmp
) = 1;
15102 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
15109 par
= emit_insn (par
);
15111 tmp
= gen_rtx_SET (VOIDmode
,
15113 plus_constant (stack_pointer_rtx
, -4 * num_regs
));
15114 RTX_FRAME_RELATED_P (tmp
) = 1;
15115 XVECEXP (dwarf
, 0, 0) = tmp
;
15117 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
15122 /* Calculate the size of the return value that is passed in registers. */
15124 arm_size_return_regs (void)
15126 enum machine_mode mode
;
15128 if (crtl
->return_rtx
!= 0)
15129 mode
= GET_MODE (crtl
->return_rtx
);
15131 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
15133 return GET_MODE_SIZE (mode
);
15137 emit_sfm (int base_reg
, int count
)
15144 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
15145 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
15147 reg
= gen_rtx_REG (XFmode
, base_reg
++);
15149 XVECEXP (par
, 0, 0)
15150 = gen_rtx_SET (VOIDmode
,
15153 gen_rtx_PRE_MODIFY (Pmode
,
15156 (stack_pointer_rtx
,
15159 gen_rtx_UNSPEC (BLKmode
,
15160 gen_rtvec (1, reg
),
15161 UNSPEC_PUSH_MULT
));
15162 tmp
= gen_rtx_SET (VOIDmode
,
15163 gen_frame_mem (XFmode
, stack_pointer_rtx
), reg
);
15164 RTX_FRAME_RELATED_P (tmp
) = 1;
15165 XVECEXP (dwarf
, 0, 1) = tmp
;
15167 for (i
= 1; i
< count
; i
++)
15169 reg
= gen_rtx_REG (XFmode
, base_reg
++);
15170 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
15172 tmp
= gen_rtx_SET (VOIDmode
,
15173 gen_frame_mem (XFmode
,
15174 plus_constant (stack_pointer_rtx
,
15177 RTX_FRAME_RELATED_P (tmp
) = 1;
15178 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
15181 tmp
= gen_rtx_SET (VOIDmode
,
15183 plus_constant (stack_pointer_rtx
, -12 * count
));
15185 RTX_FRAME_RELATED_P (tmp
) = 1;
15186 XVECEXP (dwarf
, 0, 0) = tmp
;
15188 par
= emit_insn (par
);
15189 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
15195 /* Return true if the current function needs to save/restore LR. */
15198 thumb_force_lr_save (void)
15200 return !cfun
->machine
->lr_save_eliminated
15201 && (!leaf_function_p ()
15202 || thumb_far_jump_used_p ()
15203 || df_regs_ever_live_p (LR_REGNUM
));
15207 /* Return true if r3 is used by any of the tail call insns in the
15208 current function. */
15211 any_sibcall_uses_r3 (void)
15216 if (!crtl
->tail_call_emit
)
15218 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
15219 if (e
->flags
& EDGE_SIBCALL
)
15221 rtx call
= BB_END (e
->src
);
15222 if (!CALL_P (call
))
15223 call
= prev_nonnote_nondebug_insn (call
);
15224 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
15225 if (find_regno_fusage (call
, USE
, 3))
15232 /* Compute the distance from register FROM to register TO.
15233 These can be the arg pointer (26), the soft frame pointer (25),
15234 the stack pointer (13) or the hard frame pointer (11).
15235 In thumb mode r7 is used as the soft frame pointer, if needed.
15236 Typical stack layout looks like this:
15238 old stack pointer -> | |
15241 | | saved arguments for
15242 | | vararg functions
15245 hard FP & arg pointer -> | | \
15253 soft frame pointer -> | | /
15258 locals base pointer -> | | /
15263 current stack pointer -> | | /
15266 For a given function some or all of these stack components
15267 may not be needed, giving rise to the possibility of
15268 eliminating some of the registers.
15270 The values returned by this function must reflect the behavior
15271 of arm_expand_prologue() and arm_compute_save_reg_mask().
15273 The sign of the number returned reflects the direction of stack
15274 growth, so the values are positive for all eliminations except
15275 from the soft frame pointer to the hard frame pointer.
15277 SFP may point just inside the local variables block to ensure correct
15281 /* Calculate stack offsets. These are used to calculate register elimination
15282 offsets and in prologue/epilogue code. Also calculates which registers
15283 should be saved. */
15285 static arm_stack_offsets
*
15286 arm_get_frame_offsets (void)
15288 struct arm_stack_offsets
*offsets
;
15289 unsigned long func_type
;
15293 HOST_WIDE_INT frame_size
;
15296 offsets
= &cfun
->machine
->stack_offsets
;
15298 /* We need to know if we are a leaf function. Unfortunately, it
15299 is possible to be called after start_sequence has been called,
15300 which causes get_insns to return the insns for the sequence,
15301 not the function, which will cause leaf_function_p to return
15302 the incorrect result.
15304 to know about leaf functions once reload has completed, and the
15305 frame size cannot be changed after that time, so we can safely
15306 use the cached value. */
15308 if (reload_completed
)
15311 /* Initially this is the size of the local variables. It will translated
15312 into an offset once we have determined the size of preceding data. */
15313 frame_size
= ROUND_UP_WORD (get_frame_size ());
15315 leaf
= leaf_function_p ();
15317 /* Space for variadic functions. */
15318 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
15320 /* In Thumb mode this is incorrect, but never used. */
15321 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
15322 arm_compute_static_chain_stack_bytes();
15326 unsigned int regno
;
15328 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
15329 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
15330 saved
= core_saved
;
15332 /* We know that SP will be doubleword aligned on entry, and we must
15333 preserve that condition at any subroutine call. We also require the
15334 soft frame pointer to be doubleword aligned. */
15336 if (TARGET_REALLY_IWMMXT
)
15338 /* Check for the call-saved iWMMXt registers. */
15339 for (regno
= FIRST_IWMMXT_REGNUM
;
15340 regno
<= LAST_IWMMXT_REGNUM
;
15342 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
15346 func_type
= arm_current_func_type ();
15347 if (! IS_VOLATILE (func_type
))
15349 /* Space for saved FPA registers. */
15350 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
15351 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
15354 /* Space for saved VFP registers. */
15355 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15356 saved
+= arm_get_vfp_saved_size ();
15359 else /* TARGET_THUMB1 */
15361 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
15362 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
15363 saved
= core_saved
;
15364 if (TARGET_BACKTRACE
)
15368 /* Saved registers include the stack frame. */
15369 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
15370 arm_compute_static_chain_stack_bytes();
15371 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
15372 /* A leaf function does not need any stack alignment if it has nothing
15374 if (leaf
&& frame_size
== 0
15375 /* However if it calls alloca(), we have a dynamically allocated
15376 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15377 && ! cfun
->calls_alloca
)
15379 offsets
->outgoing_args
= offsets
->soft_frame
;
15380 offsets
->locals_base
= offsets
->soft_frame
;
15384 /* Ensure SFP has the correct alignment. */
15385 if (ARM_DOUBLEWORD_ALIGN
15386 && (offsets
->soft_frame
& 7))
15388 offsets
->soft_frame
+= 4;
15389 /* Try to align stack by pushing an extra reg. Don't bother doing this
15390 when there is a stack frame as the alignment will be rolled into
15391 the normal stack adjustment. */
15392 if (frame_size
+ crtl
->outgoing_args_size
== 0)
15396 /* If it is safe to use r3, then do so. This sometimes
15397 generates better code on Thumb-2 by avoiding the need to
15398 use 32-bit push/pop instructions. */
15399 if (! any_sibcall_uses_r3 ()
15400 && arm_size_return_regs () <= 12
15401 && (offsets
->saved_regs_mask
& (1 << 3)) == 0)
15406 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
15408 if ((offsets
->saved_regs_mask
& (1 << i
)) == 0)
15417 offsets
->saved_regs
+= 4;
15418 offsets
->saved_regs_mask
|= (1 << reg
);
15423 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
15424 offsets
->outgoing_args
= (offsets
->locals_base
15425 + crtl
->outgoing_args_size
);
15427 if (ARM_DOUBLEWORD_ALIGN
)
15429 /* Ensure SP remains doubleword aligned. */
15430 if (offsets
->outgoing_args
& 7)
15431 offsets
->outgoing_args
+= 4;
15432 gcc_assert (!(offsets
->outgoing_args
& 7));
15439 /* Calculate the relative offsets for the different stack pointers. Positive
15440 offsets are in the direction of stack growth. */
15443 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
15445 arm_stack_offsets
*offsets
;
15447 offsets
= arm_get_frame_offsets ();
15449 /* OK, now we have enough information to compute the distances.
15450 There must be an entry in these switch tables for each pair
15451 of registers in ELIMINABLE_REGS, even if some of the entries
15452 seem to be redundant or useless. */
15455 case ARG_POINTER_REGNUM
:
15458 case THUMB_HARD_FRAME_POINTER_REGNUM
:
15461 case FRAME_POINTER_REGNUM
:
15462 /* This is the reverse of the soft frame pointer
15463 to hard frame pointer elimination below. */
15464 return offsets
->soft_frame
- offsets
->saved_args
;
15466 case ARM_HARD_FRAME_POINTER_REGNUM
:
15467 /* This is only non-zero in the case where the static chain register
15468 is stored above the frame. */
15469 return offsets
->frame
- offsets
->saved_args
- 4;
15471 case STACK_POINTER_REGNUM
:
15472 /* If nothing has been pushed on the stack at all
15473 then this will return -4. This *is* correct! */
15474 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
15477 gcc_unreachable ();
15479 gcc_unreachable ();
15481 case FRAME_POINTER_REGNUM
:
15484 case THUMB_HARD_FRAME_POINTER_REGNUM
:
15487 case ARM_HARD_FRAME_POINTER_REGNUM
:
15488 /* The hard frame pointer points to the top entry in the
15489 stack frame. The soft frame pointer to the bottom entry
15490 in the stack frame. If there is no stack frame at all,
15491 then they are identical. */
15493 return offsets
->frame
- offsets
->soft_frame
;
15495 case STACK_POINTER_REGNUM
:
15496 return offsets
->outgoing_args
- offsets
->soft_frame
;
15499 gcc_unreachable ();
15501 gcc_unreachable ();
15504 /* You cannot eliminate from the stack pointer.
15505 In theory you could eliminate from the hard frame
15506 pointer to the stack pointer, but this will never
15507 happen, since if a stack frame is not needed the
15508 hard frame pointer will never be used. */
15509 gcc_unreachable ();
15513 /* Given FROM and TO register numbers, say whether this elimination is
15514 allowed. Frame pointer elimination is automatically handled.
15516 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15517 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15518 pointer, we must eliminate FRAME_POINTER_REGNUM into
15519 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15520 ARG_POINTER_REGNUM. */
15523 arm_can_eliminate (const int from
, const int to
)
15525 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
15526 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
15527 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
15528 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
15532 /* Emit RTL to save coprocessor registers on function entry. Returns the
15533 number of bytes pushed. */
15536 arm_save_coproc_regs(void)
15538 int saved_size
= 0;
15540 unsigned start_reg
;
15543 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
15544 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
15546 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
15547 insn
= gen_rtx_MEM (V2SImode
, insn
);
15548 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
15549 RTX_FRAME_RELATED_P (insn
) = 1;
15553 /* Save any floating point call-saved registers used by this
15555 if (TARGET_FPA_EMU2
)
15557 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
15558 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15560 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
15561 insn
= gen_rtx_MEM (XFmode
, insn
);
15562 insn
= emit_set_insn (insn
, gen_rtx_REG (XFmode
, reg
));
15563 RTX_FRAME_RELATED_P (insn
) = 1;
15569 start_reg
= LAST_FPA_REGNUM
;
15571 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
15573 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15575 if (start_reg
- reg
== 3)
15577 insn
= emit_sfm (reg
, 4);
15578 RTX_FRAME_RELATED_P (insn
) = 1;
15580 start_reg
= reg
- 1;
15585 if (start_reg
!= reg
)
15587 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
15588 RTX_FRAME_RELATED_P (insn
) = 1;
15589 saved_size
+= (start_reg
- reg
) * 12;
15591 start_reg
= reg
- 1;
15595 if (start_reg
!= reg
)
15597 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
15598 saved_size
+= (start_reg
- reg
) * 12;
15599 RTX_FRAME_RELATED_P (insn
) = 1;
15602 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15604 start_reg
= FIRST_VFP_REGNUM
;
15606 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
15608 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
15609 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
15611 if (start_reg
!= reg
)
15612 saved_size
+= vfp_emit_fstmd (start_reg
,
15613 (reg
- start_reg
) / 2);
15614 start_reg
= reg
+ 2;
15617 if (start_reg
!= reg
)
15618 saved_size
+= vfp_emit_fstmd (start_reg
,
15619 (reg
- start_reg
) / 2);
15625 /* Set the Thumb frame pointer from the stack pointer. */
15628 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
15630 HOST_WIDE_INT amount
;
15633 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
15635 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15636 stack_pointer_rtx
, GEN_INT (amount
)));
15639 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
15640 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15641 expects the first two operands to be the same. */
15644 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15646 hard_frame_pointer_rtx
));
15650 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15651 hard_frame_pointer_rtx
,
15652 stack_pointer_rtx
));
15654 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
15655 plus_constant (stack_pointer_rtx
, amount
));
15656 RTX_FRAME_RELATED_P (dwarf
) = 1;
15657 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15660 RTX_FRAME_RELATED_P (insn
) = 1;
15663 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15666 arm_expand_prologue (void)
15671 unsigned long live_regs_mask
;
15672 unsigned long func_type
;
15674 int saved_pretend_args
= 0;
15675 int saved_regs
= 0;
15676 unsigned HOST_WIDE_INT args_to_push
;
15677 arm_stack_offsets
*offsets
;
15679 func_type
= arm_current_func_type ();
15681 /* Naked functions don't have prologues. */
15682 if (IS_NAKED (func_type
))
15685 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15686 args_to_push
= crtl
->args
.pretend_args_size
;
15688 /* Compute which register we will have to save onto the stack. */
15689 offsets
= arm_get_frame_offsets ();
15690 live_regs_mask
= offsets
->saved_regs_mask
;
15692 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
15694 if (IS_STACKALIGN (func_type
))
15699 /* Handle a word-aligned stack pointer. We generate the following:
15704 <save and restore r0 in normal prologue/epilogue>
15708 The unwinder doesn't need to know about the stack realignment.
15709 Just tell it we saved SP in r0. */
15710 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
15712 r0
= gen_rtx_REG (SImode
, 0);
15713 r1
= gen_rtx_REG (SImode
, 1);
15714 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15715 compiler won't choke. */
15716 dwarf
= gen_rtx_UNSPEC (SImode
, rtvec_alloc (0), UNSPEC_STACK_ALIGN
);
15717 dwarf
= gen_rtx_SET (VOIDmode
, r0
, dwarf
);
15718 insn
= gen_movsi (r0
, stack_pointer_rtx
);
15719 RTX_FRAME_RELATED_P (insn
) = 1;
15720 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15722 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
15723 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
15726 /* For APCS frames, if IP register is clobbered
15727 when creating frame, save that register in a special
15729 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
15731 if (IS_INTERRUPT (func_type
))
15733 /* Interrupt functions must not corrupt any registers.
15734 Creating a frame pointer however, corrupts the IP
15735 register, so we must push it first. */
15736 insn
= emit_multi_reg_push (1 << IP_REGNUM
);
15738 /* Do not set RTX_FRAME_RELATED_P on this insn.
15739 The dwarf stack unwinding code only wants to see one
15740 stack decrement per function, and this is not it. If
15741 this instruction is labeled as being part of the frame
15742 creation sequence then dwarf2out_frame_debug_expr will
15743 die when it encounters the assignment of IP to FP
15744 later on, since the use of SP here establishes SP as
15745 the CFA register and not IP.
15747 Anyway this instruction is not really part of the stack
15748 frame creation although it is part of the prologue. */
15750 else if (IS_NESTED (func_type
))
15752 /* The Static chain register is the same as the IP register
15753 used as a scratch register during stack frame creation.
15754 To get around this need to find somewhere to store IP
15755 whilst the frame is being created. We try the following
15758 1. The last argument register.
15759 2. A slot on the stack above the frame. (This only
15760 works if the function is not a varargs function).
15761 3. Register r3, after pushing the argument registers
15764 Note - we only need to tell the dwarf2 backend about the SP
15765 adjustment in the second variant; the static chain register
15766 doesn't need to be unwound, as it doesn't contain a value
15767 inherited from the caller. */
15769 if (df_regs_ever_live_p (3) == false)
15770 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
15771 else if (args_to_push
== 0)
15775 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15778 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
15779 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
15782 /* Just tell the dwarf backend that we adjusted SP. */
15783 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
15784 plus_constant (stack_pointer_rtx
,
15786 RTX_FRAME_RELATED_P (insn
) = 1;
15787 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15791 /* Store the args on the stack. */
15792 if (cfun
->machine
->uses_anonymous_args
)
15793 insn
= emit_multi_reg_push
15794 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
15797 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15798 GEN_INT (- args_to_push
)));
15800 RTX_FRAME_RELATED_P (insn
) = 1;
15802 saved_pretend_args
= 1;
15803 fp_offset
= args_to_push
;
15806 /* Now reuse r3 to preserve IP. */
15807 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
15811 insn
= emit_set_insn (ip_rtx
,
15812 plus_constant (stack_pointer_rtx
, fp_offset
));
15813 RTX_FRAME_RELATED_P (insn
) = 1;
15818 /* Push the argument registers, or reserve space for them. */
15819 if (cfun
->machine
->uses_anonymous_args
)
15820 insn
= emit_multi_reg_push
15821 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
15824 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15825 GEN_INT (- args_to_push
)));
15826 RTX_FRAME_RELATED_P (insn
) = 1;
15829 /* If this is an interrupt service routine, and the link register
15830 is going to be pushed, and we're not generating extra
15831 push of IP (needed when frame is needed and frame layout if apcs),
15832 subtracting four from LR now will mean that the function return
15833 can be done with a single instruction. */
15834 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
15835 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
15836 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
15839 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
15841 emit_set_insn (lr
, plus_constant (lr
, -4));
15844 if (live_regs_mask
)
15846 saved_regs
+= bit_count (live_regs_mask
) * 4;
15847 if (optimize_size
&& !frame_pointer_needed
15848 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
15850 /* If no coprocessor registers are being pushed and we don't have
15851 to worry about a frame pointer then push extra registers to
15852 create the stack frame. This is done is a way that does not
15853 alter the frame layout, so is independent of the epilogue. */
15857 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
15859 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
15860 if (frame
&& n
* 4 >= frame
)
15863 live_regs_mask
|= (1 << n
) - 1;
15864 saved_regs
+= frame
;
15867 insn
= emit_multi_reg_push (live_regs_mask
);
15868 RTX_FRAME_RELATED_P (insn
) = 1;
15871 if (! IS_VOLATILE (func_type
))
15872 saved_regs
+= arm_save_coproc_regs ();
15874 if (frame_pointer_needed
&& TARGET_ARM
)
15876 /* Create the new frame pointer. */
15877 if (TARGET_APCS_FRAME
)
15879 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
15880 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
15881 RTX_FRAME_RELATED_P (insn
) = 1;
15883 if (IS_NESTED (func_type
))
15885 /* Recover the static chain register. */
15886 if (!df_regs_ever_live_p (3)
15887 || saved_pretend_args
)
15888 insn
= gen_rtx_REG (SImode
, 3);
15889 else /* if (crtl->args.pretend_args_size == 0) */
15891 insn
= plus_constant (hard_frame_pointer_rtx
, 4);
15892 insn
= gen_frame_mem (SImode
, insn
);
15894 emit_set_insn (ip_rtx
, insn
);
15895 /* Add a USE to stop propagate_one_insn() from barfing. */
15896 emit_insn (gen_prologue_use (ip_rtx
));
15901 insn
= GEN_INT (saved_regs
- 4);
15902 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15903 stack_pointer_rtx
, insn
));
15904 RTX_FRAME_RELATED_P (insn
) = 1;
15908 if (flag_stack_usage_info
)
15909 current_function_static_stack_size
15910 = offsets
->outgoing_args
- offsets
->saved_args
;
15912 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
15914 /* This add can produce multiple insns for a large constant, so we
15915 need to get tricky. */
15916 rtx last
= get_last_insn ();
15918 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
15919 - offsets
->outgoing_args
);
15921 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15925 last
= last
? NEXT_INSN (last
) : get_insns ();
15926 RTX_FRAME_RELATED_P (last
) = 1;
15928 while (last
!= insn
);
15930 /* If the frame pointer is needed, emit a special barrier that
15931 will prevent the scheduler from moving stores to the frame
15932 before the stack adjustment. */
15933 if (frame_pointer_needed
)
15934 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
15935 hard_frame_pointer_rtx
));
15939 if (frame_pointer_needed
&& TARGET_THUMB2
)
15940 thumb_set_frame_pointer (offsets
);
15942 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
15944 unsigned long mask
;
15946 mask
= live_regs_mask
;
15947 mask
&= THUMB2_WORK_REGS
;
15948 if (!IS_NESTED (func_type
))
15949 mask
|= (1 << IP_REGNUM
);
15950 arm_load_pic_register (mask
);
15953 /* If we are profiling, make sure no instructions are scheduled before
15954 the call to mcount. Similarly if the user has requested no
15955 scheduling in the prolog. Similarly if we want non-call exceptions
15956 using the EABI unwinder, to prevent faulting instructions from being
15957 swapped with a stack adjustment. */
15958 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
15959 || (arm_except_unwind_info (&global_options
) == UI_TARGET
15960 && cfun
->can_throw_non_call_exceptions
))
15961 emit_insn (gen_blockage ());
15963 /* If the link register is being kept alive, with the return address in it,
15964 then make sure that it does not get reused by the ce2 pass. */
15965 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
15966 cfun
->machine
->lr_save_eliminated
= 1;
15969 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15971 arm_print_condition (FILE *stream
)
15973 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
15975 /* Branch conversion is not implemented for Thumb-2. */
15978 output_operand_lossage ("predicated Thumb instruction");
15981 if (current_insn_predicate
!= NULL
)
15983 output_operand_lossage
15984 ("predicated instruction in conditional sequence");
15988 fputs (arm_condition_codes
[arm_current_cc
], stream
);
15990 else if (current_insn_predicate
)
15992 enum arm_cond_code code
;
15996 output_operand_lossage ("predicated Thumb instruction");
16000 code
= get_arm_condition_code (current_insn_predicate
);
16001 fputs (arm_condition_codes
[code
], stream
);
16006 /* If CODE is 'd', then the X is a condition operand and the instruction
16007 should only be executed if the condition is true.
16008 if CODE is 'D', then the X is a condition operand and the instruction
16009 should only be executed if the condition is false: however, if the mode
16010 of the comparison is CCFPEmode, then always execute the instruction -- we
16011 do this because in these circumstances !GE does not necessarily imply LT;
16012 in these cases the instruction pattern will take care to make sure that
16013 an instruction containing %d will follow, thereby undoing the effects of
16014 doing this instruction unconditionally.
16015 If CODE is 'N' then X is a floating point operand that must be negated
16017 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16018 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16020 arm_print_operand (FILE *stream
, rtx x
, int code
)
16025 fputs (ASM_COMMENT_START
, stream
);
16029 fputs (user_label_prefix
, stream
);
16033 fputs (REGISTER_PREFIX
, stream
);
16037 arm_print_condition (stream
);
16041 /* Nothing in unified syntax, otherwise the current condition code. */
16042 if (!TARGET_UNIFIED_ASM
)
16043 arm_print_condition (stream
);
16047 /* The current condition code in unified syntax, otherwise nothing. */
16048 if (TARGET_UNIFIED_ASM
)
16049 arm_print_condition (stream
);
16053 /* The current condition code for a condition code setting instruction.
16054 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16055 if (TARGET_UNIFIED_ASM
)
16057 fputc('s', stream
);
16058 arm_print_condition (stream
);
16062 arm_print_condition (stream
);
16063 fputc('s', stream
);
16068 /* If the instruction is conditionally executed then print
16069 the current condition code, otherwise print 's'. */
16070 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
16071 if (current_insn_predicate
)
16072 arm_print_condition (stream
);
16074 fputc('s', stream
);
16077 /* %# is a "break" sequence. It doesn't output anything, but is used to
16078 separate e.g. operand numbers from following text, if that text consists
16079 of further digits which we don't want to be part of the operand
16087 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
16088 r
= real_value_negate (&r
);
16089 fprintf (stream
, "%s", fp_const_from_val (&r
));
16093 /* An integer or symbol address without a preceding # sign. */
16095 switch (GET_CODE (x
))
16098 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
16102 output_addr_const (stream
, x
);
16106 gcc_unreachable ();
16111 if (GET_CODE (x
) == CONST_INT
)
16114 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
16115 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
16119 putc ('~', stream
);
16120 output_addr_const (stream
, x
);
16125 /* The low 16 bits of an immediate constant. */
16126 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
16130 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
16133 /* Truncate Cirrus shift counts. */
16135 if (GET_CODE (x
) == CONST_INT
)
16137 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 0x3f);
16140 arm_print_operand (stream
, x
, 0);
16144 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
16152 if (!shift_operator (x
, SImode
))
16154 output_operand_lossage ("invalid shift operand");
16158 shift
= shift_op (x
, &val
);
16162 fprintf (stream
, ", %s ", shift
);
16164 arm_print_operand (stream
, XEXP (x
, 1), 0);
16166 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
16171 /* An explanation of the 'Q', 'R' and 'H' register operands:
16173 In a pair of registers containing a DI or DF value the 'Q'
16174 operand returns the register number of the register containing
16175 the least significant part of the value. The 'R' operand returns
16176 the register number of the register containing the most
16177 significant part of the value.
16179 The 'H' operand returns the higher of the two register numbers.
16180 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16181 same as the 'Q' operand, since the most significant part of the
16182 value is held in the lower number register. The reverse is true
16183 on systems where WORDS_BIG_ENDIAN is false.
16185 The purpose of these operands is to distinguish between cases
16186 where the endian-ness of the values is important (for example
16187 when they are added together), and cases where the endian-ness
16188 is irrelevant, but the order of register operations is important.
16189 For example when loading a value from memory into a register
16190 pair, the endian-ness does not matter. Provided that the value
16191 from the lower memory address is put into the lower numbered
16192 register, and the value from the higher address is put into the
16193 higher numbered register, the load will work regardless of whether
16194 the value being loaded is big-wordian or little-wordian. The
16195 order of the two register loads can matter however, if the address
16196 of the memory location is actually held in one of the registers
16197 being overwritten by the load.
16199 The 'Q' and 'R' constraints are also available for 64-bit
16202 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
16204 rtx part
= gen_lowpart (SImode
, x
);
16205 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
16209 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16211 output_operand_lossage ("invalid operand for code '%c'", code
);
16215 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
16219 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
16221 enum machine_mode mode
= GET_MODE (x
);
16224 if (mode
== VOIDmode
)
16226 part
= gen_highpart_mode (SImode
, mode
, x
);
16227 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
16231 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16233 output_operand_lossage ("invalid operand for code '%c'", code
);
16237 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
16241 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16243 output_operand_lossage ("invalid operand for code '%c'", code
);
16247 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
16251 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16253 output_operand_lossage ("invalid operand for code '%c'", code
);
16257 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
16261 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16263 output_operand_lossage ("invalid operand for code '%c'", code
);
16267 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
16271 asm_fprintf (stream
, "%r",
16272 GET_CODE (XEXP (x
, 0)) == REG
16273 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
16277 asm_fprintf (stream
, "{%r-%r}",
16279 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
16282 /* Like 'M', but writing doubleword vector registers, for use by Neon
16286 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
16287 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
16289 asm_fprintf (stream
, "{d%d}", regno
);
16291 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
16296 /* CONST_TRUE_RTX means always -- that's the default. */
16297 if (x
== const_true_rtx
)
16300 if (!COMPARISON_P (x
))
16302 output_operand_lossage ("invalid operand for code '%c'", code
);
16306 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
16311 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16312 want to do that. */
16313 if (x
== const_true_rtx
)
16315 output_operand_lossage ("instruction never executed");
16318 if (!COMPARISON_P (x
))
16320 output_operand_lossage ("invalid operand for code '%c'", code
);
16324 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
16325 (get_arm_condition_code (x
))],
16329 /* Cirrus registers can be accessed in a variety of ways:
16330 single floating point (f)
16331 double floating point (d)
16333 64bit integer (dx). */
16334 case 'W': /* Cirrus register in F mode. */
16335 case 'X': /* Cirrus register in D mode. */
16336 case 'Y': /* Cirrus register in FX mode. */
16337 case 'Z': /* Cirrus register in DX mode. */
16338 gcc_assert (GET_CODE (x
) == REG
16339 && REGNO_REG_CLASS (REGNO (x
)) == CIRRUS_REGS
);
16341 fprintf (stream
, "mv%s%s",
16343 : code
== 'X' ? "d"
16344 : code
== 'Y' ? "fx" : "dx", reg_names
[REGNO (x
)] + 2);
16348 /* Print cirrus register in the mode specified by the register's mode. */
16351 int mode
= GET_MODE (x
);
16353 if (GET_CODE (x
) != REG
|| REGNO_REG_CLASS (REGNO (x
)) != CIRRUS_REGS
)
16355 output_operand_lossage ("invalid operand for code '%c'", code
);
16359 fprintf (stream
, "mv%s%s",
16360 mode
== DFmode
? "d"
16361 : mode
== SImode
? "fx"
16362 : mode
== DImode
? "dx"
16363 : "f", reg_names
[REGNO (x
)] + 2);
16369 if (GET_CODE (x
) != REG
16370 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
16371 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
16372 /* Bad value for wCG register number. */
16374 output_operand_lossage ("invalid operand for code '%c'", code
);
16379 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
16382 /* Print an iWMMXt control register name. */
16384 if (GET_CODE (x
) != CONST_INT
16386 || INTVAL (x
) >= 16)
16387 /* Bad value for wC register number. */
16389 output_operand_lossage ("invalid operand for code '%c'", code
);
16395 static const char * wc_reg_names
[16] =
16397 "wCID", "wCon", "wCSSF", "wCASF",
16398 "wC4", "wC5", "wC6", "wC7",
16399 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16400 "wC12", "wC13", "wC14", "wC15"
16403 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
16407 /* Print the high single-precision register of a VFP double-precision
16411 int mode
= GET_MODE (x
);
16414 if (GET_MODE_SIZE (mode
) != 8 || GET_CODE (x
) != REG
)
16416 output_operand_lossage ("invalid operand for code '%c'", code
);
16421 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
16423 output_operand_lossage ("invalid operand for code '%c'", code
);
16427 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
16431 /* Print a VFP/Neon double precision or quad precision register name. */
16435 int mode
= GET_MODE (x
);
16436 int is_quad
= (code
== 'q');
16439 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
16441 output_operand_lossage ("invalid operand for code '%c'", code
);
16445 if (GET_CODE (x
) != REG
16446 || !IS_VFP_REGNUM (REGNO (x
)))
16448 output_operand_lossage ("invalid operand for code '%c'", code
);
16453 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
16454 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
16456 output_operand_lossage ("invalid operand for code '%c'", code
);
16460 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
16461 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
16465 /* These two codes print the low/high doubleword register of a Neon quad
16466 register, respectively. For pair-structure types, can also print
16467 low/high quadword registers. */
16471 int mode
= GET_MODE (x
);
16474 if ((GET_MODE_SIZE (mode
) != 16
16475 && GET_MODE_SIZE (mode
) != 32) || GET_CODE (x
) != REG
)
16477 output_operand_lossage ("invalid operand for code '%c'", code
);
16482 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
16484 output_operand_lossage ("invalid operand for code '%c'", code
);
16488 if (GET_MODE_SIZE (mode
) == 16)
16489 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
16490 + (code
== 'f' ? 1 : 0));
16492 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
16493 + (code
== 'f' ? 1 : 0));
16497 /* Print a VFPv3 floating-point constant, represented as an integer
16501 int index
= vfp3_const_double_index (x
);
16502 gcc_assert (index
!= -1);
16503 fprintf (stream
, "%d", index
);
16507 /* Print bits representing opcode features for Neon.
16509 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16510 and polynomials as unsigned.
16512 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16514 Bit 2 is 1 for rounding functions, 0 otherwise. */
16516 /* Identify the type as 's', 'u', 'p' or 'f'. */
16519 HOST_WIDE_INT bits
= INTVAL (x
);
16520 fputc ("uspf"[bits
& 3], stream
);
16524 /* Likewise, but signed and unsigned integers are both 'i'. */
16527 HOST_WIDE_INT bits
= INTVAL (x
);
16528 fputc ("iipf"[bits
& 3], stream
);
16532 /* As for 'T', but emit 'u' instead of 'p'. */
16535 HOST_WIDE_INT bits
= INTVAL (x
);
16536 fputc ("usuf"[bits
& 3], stream
);
16540 /* Bit 2: rounding (vs none). */
16543 HOST_WIDE_INT bits
= INTVAL (x
);
16544 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
16548 /* Memory operand for vld1/vst1 instruction. */
16552 bool postinc
= FALSE
;
16553 unsigned align
, memsize
, align_bits
;
16555 gcc_assert (GET_CODE (x
) == MEM
);
16556 addr
= XEXP (x
, 0);
16557 if (GET_CODE (addr
) == POST_INC
)
16560 addr
= XEXP (addr
, 0);
16562 asm_fprintf (stream
, "[%r", REGNO (addr
));
16564 /* We know the alignment of this access, so we can emit a hint in the
16565 instruction (for some alignments) as an aid to the memory subsystem
16567 align
= MEM_ALIGN (x
) >> 3;
16568 memsize
= INTVAL (MEM_SIZE (x
));
16570 /* Only certain alignment specifiers are supported by the hardware. */
16571 if (memsize
== 16 && (align
% 32) == 0)
16573 else if ((memsize
== 8 || memsize
== 16) && (align
% 16) == 0)
16575 else if ((align
% 8) == 0)
16580 if (align_bits
!= 0)
16581 asm_fprintf (stream
, ":%d", align_bits
);
16583 asm_fprintf (stream
, "]");
16586 fputs("!", stream
);
16594 gcc_assert (GET_CODE (x
) == MEM
);
16595 addr
= XEXP (x
, 0);
16596 gcc_assert (GET_CODE (addr
) == REG
);
16597 asm_fprintf (stream
, "[%r]", REGNO (addr
));
16601 /* Translate an S register number into a D register number and element index. */
16604 int mode
= GET_MODE (x
);
16607 if (GET_MODE_SIZE (mode
) != 4 || GET_CODE (x
) != REG
)
16609 output_operand_lossage ("invalid operand for code '%c'", code
);
16614 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
16616 output_operand_lossage ("invalid operand for code '%c'", code
);
16620 regno
= regno
- FIRST_VFP_REGNUM
;
16621 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
16625 /* Register specifier for vld1.16/vst1.16. Translate the S register
16626 number into a D register number and element index. */
16629 int mode
= GET_MODE (x
);
16632 if (GET_MODE_SIZE (mode
) != 2 || GET_CODE (x
) != REG
)
16634 output_operand_lossage ("invalid operand for code '%c'", code
);
16639 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
16641 output_operand_lossage ("invalid operand for code '%c'", code
);
16645 regno
= regno
- FIRST_VFP_REGNUM
;
16646 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
16653 output_operand_lossage ("missing operand");
16657 switch (GET_CODE (x
))
16660 asm_fprintf (stream
, "%r", REGNO (x
));
16664 output_memory_reference_mode
= GET_MODE (x
);
16665 output_address (XEXP (x
, 0));
16672 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
16673 sizeof (fpstr
), 0, 1);
16674 fprintf (stream
, "#%s", fpstr
);
16677 fprintf (stream
, "#%s", fp_immediate_constant (x
));
16681 gcc_assert (GET_CODE (x
) != NEG
);
16682 fputc ('#', stream
);
16683 if (GET_CODE (x
) == HIGH
)
16685 fputs (":lower16:", stream
);
16689 output_addr_const (stream
, x
);
16695 /* Target hook for printing a memory address. */
16697 arm_print_operand_address (FILE *stream
, rtx x
)
16701 int is_minus
= GET_CODE (x
) == MINUS
;
16703 if (GET_CODE (x
) == REG
)
16704 asm_fprintf (stream
, "[%r, #0]", REGNO (x
));
16705 else if (GET_CODE (x
) == PLUS
|| is_minus
)
16707 rtx base
= XEXP (x
, 0);
16708 rtx index
= XEXP (x
, 1);
16709 HOST_WIDE_INT offset
= 0;
16710 if (GET_CODE (base
) != REG
16711 || (GET_CODE (index
) == REG
&& REGNO (index
) == SP_REGNUM
))
16713 /* Ensure that BASE is a register. */
16714 /* (one of them must be). */
16715 /* Also ensure the SP is not used as in index register. */
16720 switch (GET_CODE (index
))
16723 offset
= INTVAL (index
);
16726 asm_fprintf (stream
, "[%r, #%wd]",
16727 REGNO (base
), offset
);
16731 asm_fprintf (stream
, "[%r, %s%r]",
16732 REGNO (base
), is_minus
? "-" : "",
16742 asm_fprintf (stream
, "[%r, %s%r",
16743 REGNO (base
), is_minus
? "-" : "",
16744 REGNO (XEXP (index
, 0)));
16745 arm_print_operand (stream
, index
, 'S');
16746 fputs ("]", stream
);
16751 gcc_unreachable ();
16754 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
16755 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
16757 extern enum machine_mode output_memory_reference_mode
;
16759 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
16761 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
16762 asm_fprintf (stream
, "[%r, #%s%d]!",
16763 REGNO (XEXP (x
, 0)),
16764 GET_CODE (x
) == PRE_DEC
? "-" : "",
16765 GET_MODE_SIZE (output_memory_reference_mode
));
16767 asm_fprintf (stream
, "[%r], #%s%d",
16768 REGNO (XEXP (x
, 0)),
16769 GET_CODE (x
) == POST_DEC
? "-" : "",
16770 GET_MODE_SIZE (output_memory_reference_mode
));
16772 else if (GET_CODE (x
) == PRE_MODIFY
)
16774 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
16775 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
16776 asm_fprintf (stream
, "#%wd]!",
16777 INTVAL (XEXP (XEXP (x
, 1), 1)));
16779 asm_fprintf (stream
, "%r]!",
16780 REGNO (XEXP (XEXP (x
, 1), 1)));
16782 else if (GET_CODE (x
) == POST_MODIFY
)
16784 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
16785 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
16786 asm_fprintf (stream
, "#%wd",
16787 INTVAL (XEXP (XEXP (x
, 1), 1)));
16789 asm_fprintf (stream
, "%r",
16790 REGNO (XEXP (XEXP (x
, 1), 1)));
16792 else output_addr_const (stream
, x
);
16796 if (GET_CODE (x
) == REG
)
16797 asm_fprintf (stream
, "[%r]", REGNO (x
));
16798 else if (GET_CODE (x
) == POST_INC
)
16799 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
16800 else if (GET_CODE (x
) == PLUS
)
16802 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
16803 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
16804 asm_fprintf (stream
, "[%r, #%wd]",
16805 REGNO (XEXP (x
, 0)),
16806 INTVAL (XEXP (x
, 1)));
16808 asm_fprintf (stream
, "[%r, %r]",
16809 REGNO (XEXP (x
, 0)),
16810 REGNO (XEXP (x
, 1)));
16813 output_addr_const (stream
, x
);
16817 /* Target hook for indicating whether a punctuation character for
16818 TARGET_PRINT_OPERAND is valid. */
16820 arm_print_operand_punct_valid_p (unsigned char code
)
16822 return (code
== '@' || code
== '|' || code
== '.'
16823 || code
== '(' || code
== ')' || code
== '#'
16824 || (TARGET_32BIT
&& (code
== '?'))
16825 || (TARGET_THUMB2
&& (code
== '!'))
16826 || (TARGET_THUMB
&& (code
== '_')));
16829 /* Target hook for assembling integer objects. The ARM version needs to
16830 handle word-sized values specially. */
16832 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
16834 enum machine_mode mode
;
16836 if (size
== UNITS_PER_WORD
&& aligned_p
)
16838 fputs ("\t.word\t", asm_out_file
);
16839 output_addr_const (asm_out_file
, x
);
16841 /* Mark symbols as position independent. We only do this in the
16842 .text segment, not in the .data segment. */
16843 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
16844 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
16846 /* See legitimize_pic_address for an explanation of the
16847 TARGET_VXWORKS_RTP check. */
16848 if (TARGET_VXWORKS_RTP
16849 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
16850 fputs ("(GOT)", asm_out_file
);
16852 fputs ("(GOTOFF)", asm_out_file
);
16854 fputc ('\n', asm_out_file
);
16858 mode
= GET_MODE (x
);
16860 if (arm_vector_mode_supported_p (mode
))
16864 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
16866 units
= CONST_VECTOR_NUNITS (x
);
16867 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
16869 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16870 for (i
= 0; i
< units
; i
++)
16872 rtx elt
= CONST_VECTOR_ELT (x
, i
);
16874 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
16877 for (i
= 0; i
< units
; i
++)
16879 rtx elt
= CONST_VECTOR_ELT (x
, i
);
16880 REAL_VALUE_TYPE rval
;
16882 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
16885 (rval
, GET_MODE_INNER (mode
),
16886 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
16892 return default_assemble_integer (x
, size
, aligned_p
);
16896 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
16900 if (!TARGET_AAPCS_BASED
)
16903 default_named_section_asm_out_constructor
16904 : default_named_section_asm_out_destructor
) (symbol
, priority
);
16908 /* Put these in the .init_array section, using a special relocation. */
16909 if (priority
!= DEFAULT_INIT_PRIORITY
)
16912 sprintf (buf
, "%s.%.5u",
16913 is_ctor
? ".init_array" : ".fini_array",
16915 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
16922 switch_to_section (s
);
16923 assemble_align (POINTER_SIZE
);
16924 fputs ("\t.word\t", asm_out_file
);
16925 output_addr_const (asm_out_file
, symbol
);
16926 fputs ("(target1)\n", asm_out_file
);
16929 /* Add a function to the list of static constructors. */
16932 arm_elf_asm_constructor (rtx symbol
, int priority
)
16934 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
16937 /* Add a function to the list of static destructors. */
16940 arm_elf_asm_destructor (rtx symbol
, int priority
)
16942 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
16945 /* A finite state machine takes care of noticing whether or not instructions
16946 can be conditionally executed, and thus decrease execution time and code
16947 size by deleting branch instructions. The fsm is controlled by
16948 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16950 /* The state of the fsm controlling condition codes are:
16951 0: normal, do nothing special
16952 1: make ASM_OUTPUT_OPCODE not output this instruction
16953 2: make ASM_OUTPUT_OPCODE not output this instruction
16954 3: make instructions conditional
16955 4: make instructions conditional
16957 State transitions (state->state by whom under condition):
16958 0 -> 1 final_prescan_insn if the `target' is a label
16959 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16960 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16961 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16962 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16963 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16964 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16965 (the target insn is arm_target_insn).
16967 If the jump clobbers the conditions then we use states 2 and 4.
16969 A similar thing can be done with conditional return insns.
16971 XXX In case the `target' is an unconditional branch, this conditionalising
16972 of the instructions always reduces code size, but not always execution
16973 time. But then, I want to reduce the code size to somewhere near what
16974 /bin/cc produces. */
16976 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16977 instructions. When a COND_EXEC instruction is seen the subsequent
16978 instructions are scanned so that multiple conditional instructions can be
16979 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16980 specify the length and true/false mask for the IT block. These will be
16981 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16983 /* Returns the index of the ARM condition code string in
16984 `arm_condition_codes'. COMPARISON should be an rtx like
16985 `(eq (...) (...))'. */
16986 static enum arm_cond_code
16987 get_arm_condition_code (rtx comparison
)
16989 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
16990 enum arm_cond_code code
;
16991 enum rtx_code comp_code
= GET_CODE (comparison
);
16993 if (GET_MODE_CLASS (mode
) != MODE_CC
)
16994 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
16995 XEXP (comparison
, 1));
16999 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
17000 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
17001 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
17002 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
17003 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
17004 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
17005 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
17006 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
17007 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
17008 case CC_DLTUmode
: code
= ARM_CC
;
17011 gcc_assert (comp_code
== EQ
|| comp_code
== NE
);
17013 if (comp_code
== EQ
)
17014 return ARM_INVERSE_CONDITION_CODE (code
);
17020 case NE
: return ARM_NE
;
17021 case EQ
: return ARM_EQ
;
17022 case GE
: return ARM_PL
;
17023 case LT
: return ARM_MI
;
17024 default: gcc_unreachable ();
17030 case NE
: return ARM_NE
;
17031 case EQ
: return ARM_EQ
;
17032 default: gcc_unreachable ();
17038 case NE
: return ARM_MI
;
17039 case EQ
: return ARM_PL
;
17040 default: gcc_unreachable ();
17045 /* These encodings assume that AC=1 in the FPA system control
17046 byte. This allows us to handle all cases except UNEQ and
17050 case GE
: return ARM_GE
;
17051 case GT
: return ARM_GT
;
17052 case LE
: return ARM_LS
;
17053 case LT
: return ARM_MI
;
17054 case NE
: return ARM_NE
;
17055 case EQ
: return ARM_EQ
;
17056 case ORDERED
: return ARM_VC
;
17057 case UNORDERED
: return ARM_VS
;
17058 case UNLT
: return ARM_LT
;
17059 case UNLE
: return ARM_LE
;
17060 case UNGT
: return ARM_HI
;
17061 case UNGE
: return ARM_PL
;
17062 /* UNEQ and LTGT do not have a representation. */
17063 case UNEQ
: /* Fall through. */
17064 case LTGT
: /* Fall through. */
17065 default: gcc_unreachable ();
17071 case NE
: return ARM_NE
;
17072 case EQ
: return ARM_EQ
;
17073 case GE
: return ARM_LE
;
17074 case GT
: return ARM_LT
;
17075 case LE
: return ARM_GE
;
17076 case LT
: return ARM_GT
;
17077 case GEU
: return ARM_LS
;
17078 case GTU
: return ARM_CC
;
17079 case LEU
: return ARM_CS
;
17080 case LTU
: return ARM_HI
;
17081 default: gcc_unreachable ();
17087 case LTU
: return ARM_CS
;
17088 case GEU
: return ARM_CC
;
17089 default: gcc_unreachable ();
17095 case NE
: return ARM_NE
;
17096 case EQ
: return ARM_EQ
;
17097 case GEU
: return ARM_CS
;
17098 case GTU
: return ARM_HI
;
17099 case LEU
: return ARM_LS
;
17100 case LTU
: return ARM_CC
;
17101 default: gcc_unreachable ();
17107 case GE
: return ARM_GE
;
17108 case LT
: return ARM_LT
;
17109 case GEU
: return ARM_CS
;
17110 case LTU
: return ARM_CC
;
17111 default: gcc_unreachable ();
17117 case NE
: return ARM_NE
;
17118 case EQ
: return ARM_EQ
;
17119 case GE
: return ARM_GE
;
17120 case GT
: return ARM_GT
;
17121 case LE
: return ARM_LE
;
17122 case LT
: return ARM_LT
;
17123 case GEU
: return ARM_CS
;
17124 case GTU
: return ARM_HI
;
17125 case LEU
: return ARM_LS
;
17126 case LTU
: return ARM_CC
;
17127 default: gcc_unreachable ();
17130 default: gcc_unreachable ();
17134 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17137 thumb2_final_prescan_insn (rtx insn
)
17139 rtx first_insn
= insn
;
17140 rtx body
= PATTERN (insn
);
17142 enum arm_cond_code code
;
17146 /* Remove the previous insn from the count of insns to be output. */
17147 if (arm_condexec_count
)
17148 arm_condexec_count
--;
17150 /* Nothing to do if we are already inside a conditional block. */
17151 if (arm_condexec_count
)
17154 if (GET_CODE (body
) != COND_EXEC
)
17157 /* Conditional jumps are implemented directly. */
17158 if (GET_CODE (insn
) == JUMP_INSN
)
17161 predicate
= COND_EXEC_TEST (body
);
17162 arm_current_cc
= get_arm_condition_code (predicate
);
17164 n
= get_attr_ce_count (insn
);
17165 arm_condexec_count
= 1;
17166 arm_condexec_mask
= (1 << n
) - 1;
17167 arm_condexec_masklen
= n
;
17168 /* See if subsequent instructions can be combined into the same block. */
17171 insn
= next_nonnote_insn (insn
);
17173 /* Jumping into the middle of an IT block is illegal, so a label or
17174 barrier terminates the block. */
17175 if (GET_CODE (insn
) != INSN
&& GET_CODE(insn
) != JUMP_INSN
)
17178 body
= PATTERN (insn
);
17179 /* USE and CLOBBER aren't really insns, so just skip them. */
17180 if (GET_CODE (body
) == USE
17181 || GET_CODE (body
) == CLOBBER
)
17184 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17185 if (GET_CODE (body
) != COND_EXEC
)
17187 /* Allow up to 4 conditionally executed instructions in a block. */
17188 n
= get_attr_ce_count (insn
);
17189 if (arm_condexec_masklen
+ n
> 4)
17192 predicate
= COND_EXEC_TEST (body
);
17193 code
= get_arm_condition_code (predicate
);
17194 mask
= (1 << n
) - 1;
17195 if (arm_current_cc
== code
)
17196 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
17197 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
17200 arm_condexec_count
++;
17201 arm_condexec_masklen
+= n
;
17203 /* A jump must be the last instruction in a conditional block. */
17204 if (GET_CODE(insn
) == JUMP_INSN
)
17207 /* Restore recog_data (getting the attributes of other insns can
17208 destroy this array, but final.c assumes that it remains intact
17209 across this call). */
17210 extract_constrain_insn_cached (first_insn
);
17214 arm_final_prescan_insn (rtx insn
)
17216 /* BODY will hold the body of INSN. */
17217 rtx body
= PATTERN (insn
);
17219 /* This will be 1 if trying to repeat the trick, and things need to be
17220 reversed if it appears to fail. */
17223 /* If we start with a return insn, we only succeed if we find another one. */
17224 int seeking_return
= 0;
17226 /* START_INSN will hold the insn from where we start looking. This is the
17227 first insn after the following code_label if REVERSE is true. */
17228 rtx start_insn
= insn
;
17230 /* If in state 4, check if the target branch is reached, in order to
17231 change back to state 0. */
17232 if (arm_ccfsm_state
== 4)
17234 if (insn
== arm_target_insn
)
17236 arm_target_insn
= NULL
;
17237 arm_ccfsm_state
= 0;
17242 /* If in state 3, it is possible to repeat the trick, if this insn is an
17243 unconditional branch to a label, and immediately following this branch
17244 is the previous target label which is only used once, and the label this
17245 branch jumps to is not too far off. */
17246 if (arm_ccfsm_state
== 3)
17248 if (simplejump_p (insn
))
17250 start_insn
= next_nonnote_insn (start_insn
);
17251 if (GET_CODE (start_insn
) == BARRIER
)
17253 /* XXX Isn't this always a barrier? */
17254 start_insn
= next_nonnote_insn (start_insn
);
17256 if (GET_CODE (start_insn
) == CODE_LABEL
17257 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
17258 && LABEL_NUSES (start_insn
) == 1)
17263 else if (GET_CODE (body
) == RETURN
)
17265 start_insn
= next_nonnote_insn (start_insn
);
17266 if (GET_CODE (start_insn
) == BARRIER
)
17267 start_insn
= next_nonnote_insn (start_insn
);
17268 if (GET_CODE (start_insn
) == CODE_LABEL
17269 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
17270 && LABEL_NUSES (start_insn
) == 1)
17273 seeking_return
= 1;
17282 gcc_assert (!arm_ccfsm_state
|| reverse
);
17283 if (GET_CODE (insn
) != JUMP_INSN
)
17286 /* This jump might be paralleled with a clobber of the condition codes
17287 the jump should always come first */
17288 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
17289 body
= XVECEXP (body
, 0, 0);
17292 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
17293 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
17296 int fail
= FALSE
, succeed
= FALSE
;
17297 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17298 int then_not_else
= TRUE
;
17299 rtx this_insn
= start_insn
, label
= 0;
17301 /* Register the insn jumped to. */
17304 if (!seeking_return
)
17305 label
= XEXP (SET_SRC (body
), 0);
17307 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
17308 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
17309 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
17311 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
17312 then_not_else
= FALSE
;
17314 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == RETURN
)
17315 seeking_return
= 1;
17316 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == RETURN
)
17318 seeking_return
= 1;
17319 then_not_else
= FALSE
;
17322 gcc_unreachable ();
17324 /* See how many insns this branch skips, and what kind of insns. If all
17325 insns are okay, and the label or unconditional branch to the same
17326 label is not too far away, succeed. */
17327 for (insns_skipped
= 0;
17328 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
17332 this_insn
= next_nonnote_insn (this_insn
);
17336 switch (GET_CODE (this_insn
))
17339 /* Succeed if it is the target label, otherwise fail since
17340 control falls in from somewhere else. */
17341 if (this_insn
== label
)
17343 arm_ccfsm_state
= 1;
17351 /* Succeed if the following insn is the target label.
17353 If return insns are used then the last insn in a function
17354 will be a barrier. */
17355 this_insn
= next_nonnote_insn (this_insn
);
17356 if (this_insn
&& this_insn
== label
)
17358 arm_ccfsm_state
= 1;
17366 /* The AAPCS says that conditional calls should not be
17367 used since they make interworking inefficient (the
17368 linker can't transform BL<cond> into BLX). That's
17369 only a problem if the machine has BLX. */
17376 /* Succeed if the following insn is the target label, or
17377 if the following two insns are a barrier and the
17379 this_insn
= next_nonnote_insn (this_insn
);
17380 if (this_insn
&& GET_CODE (this_insn
) == BARRIER
)
17381 this_insn
= next_nonnote_insn (this_insn
);
17383 if (this_insn
&& this_insn
== label
17384 && insns_skipped
< max_insns_skipped
)
17386 arm_ccfsm_state
= 1;
17394 /* If this is an unconditional branch to the same label, succeed.
17395 If it is to another label, do nothing. If it is conditional,
17397 /* XXX Probably, the tests for SET and the PC are
17400 scanbody
= PATTERN (this_insn
);
17401 if (GET_CODE (scanbody
) == SET
17402 && GET_CODE (SET_DEST (scanbody
)) == PC
)
17404 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
17405 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
17407 arm_ccfsm_state
= 2;
17410 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
17413 /* Fail if a conditional return is undesirable (e.g. on a
17414 StrongARM), but still allow this if optimizing for size. */
17415 else if (GET_CODE (scanbody
) == RETURN
17416 && !use_return_insn (TRUE
, NULL
)
17419 else if (GET_CODE (scanbody
) == RETURN
17422 arm_ccfsm_state
= 2;
17425 else if (GET_CODE (scanbody
) == PARALLEL
)
17427 switch (get_attr_conds (this_insn
))
17437 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
17442 /* Instructions using or affecting the condition codes make it
17444 scanbody
= PATTERN (this_insn
);
17445 if (!(GET_CODE (scanbody
) == SET
17446 || GET_CODE (scanbody
) == PARALLEL
)
17447 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
17450 /* A conditional cirrus instruction must be followed by
17451 a non Cirrus instruction. However, since we
17452 conditionalize instructions in this function and by
17453 the time we get here we can't add instructions
17454 (nops), because shorten_branches() has already been
17455 called, we will disable conditionalizing Cirrus
17456 instructions to be safe. */
17457 if (GET_CODE (scanbody
) != USE
17458 && GET_CODE (scanbody
) != CLOBBER
17459 && get_attr_cirrus (this_insn
) != CIRRUS_NOT
)
17469 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
17470 arm_target_label
= CODE_LABEL_NUMBER (label
);
17473 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
17475 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
17477 this_insn
= next_nonnote_insn (this_insn
);
17478 gcc_assert (!this_insn
17479 || (GET_CODE (this_insn
) != BARRIER
17480 && GET_CODE (this_insn
) != CODE_LABEL
));
17484 /* Oh, dear! we ran off the end.. give up. */
17485 extract_constrain_insn_cached (insn
);
17486 arm_ccfsm_state
= 0;
17487 arm_target_insn
= NULL
;
17490 arm_target_insn
= this_insn
;
17493 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17496 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
17498 if (reverse
|| then_not_else
)
17499 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
17502 /* Restore recog_data (getting the attributes of other insns can
17503 destroy this array, but final.c assumes that it remains intact
17504 across this call. */
17505 extract_constrain_insn_cached (insn
);
17509 /* Output IT instructions. */
17511 thumb2_asm_output_opcode (FILE * stream
)
17516 if (arm_condexec_mask
)
17518 for (n
= 0; n
< arm_condexec_masklen
; n
++)
17519 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
17521 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
17522 arm_condition_codes
[arm_current_cc
]);
17523 arm_condexec_mask
= 0;
17527 /* Returns true if REGNO is a valid register
17528 for holding a quantity of type MODE. */
17530 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
17532 if (GET_MODE_CLASS (mode
) == MODE_CC
)
17533 return (regno
== CC_REGNUM
17534 || (TARGET_HARD_FLOAT
&& TARGET_VFP
17535 && regno
== VFPCC_REGNUM
));
17538 /* For the Thumb we only allow values bigger than SImode in
17539 registers 0 - 6, so that there is always a second low
17540 register available to hold the upper part of the value.
17541 We probably we ought to ensure that the register is the
17542 start of an even numbered register pair. */
17543 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
17545 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
17546 && IS_CIRRUS_REGNUM (regno
))
17547 /* We have outlawed SI values in Cirrus registers because they
17548 reside in the lower 32 bits, but SF values reside in the
17549 upper 32 bits. This causes gcc all sorts of grief. We can't
17550 even split the registers into pairs because Cirrus SI values
17551 get sign extended to 64bits-- aldyh. */
17552 return (GET_MODE_CLASS (mode
) == MODE_FLOAT
) || (mode
== DImode
);
17554 if (TARGET_HARD_FLOAT
&& TARGET_VFP
17555 && IS_VFP_REGNUM (regno
))
17557 if (mode
== SFmode
|| mode
== SImode
)
17558 return VFP_REGNO_OK_FOR_SINGLE (regno
);
17560 if (mode
== DFmode
)
17561 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
17563 /* VFP registers can hold HFmode values, but there is no point in
17564 putting them there unless we have hardware conversion insns. */
17565 if (mode
== HFmode
)
17566 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
17569 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
17570 || (VALID_NEON_QREG_MODE (mode
)
17571 && NEON_REGNO_OK_FOR_QUAD (regno
))
17572 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
17573 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
17574 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
17575 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
17576 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
17581 if (TARGET_REALLY_IWMMXT
)
17583 if (IS_IWMMXT_GR_REGNUM (regno
))
17584 return mode
== SImode
;
17586 if (IS_IWMMXT_REGNUM (regno
))
17587 return VALID_IWMMXT_REG_MODE (mode
);
17590 /* We allow almost any value to be stored in the general registers.
17591 Restrict doubleword quantities to even register pairs so that we can
17592 use ldrd. Do not allow very large Neon structure opaque modes in
17593 general registers; they would use too many. */
17594 if (regno
<= LAST_ARM_REGNUM
)
17595 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
17596 && ARM_NUM_REGS (mode
) <= 4;
17598 if (regno
== FRAME_POINTER_REGNUM
17599 || regno
== ARG_POINTER_REGNUM
)
17600 /* We only allow integers in the fake hard registers. */
17601 return GET_MODE_CLASS (mode
) == MODE_INT
;
17603 /* The only registers left are the FPA registers
17604 which we only allow to hold FP values. */
17605 return (TARGET_HARD_FLOAT
&& TARGET_FPA
17606 && GET_MODE_CLASS (mode
) == MODE_FLOAT
17607 && regno
>= FIRST_FPA_REGNUM
17608 && regno
<= LAST_FPA_REGNUM
);
17611 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17612 not used in arm mode. */
17615 arm_regno_class (int regno
)
17619 if (regno
== STACK_POINTER_REGNUM
)
17621 if (regno
== CC_REGNUM
)
17628 if (TARGET_THUMB2
&& regno
< 8)
17631 if ( regno
<= LAST_ARM_REGNUM
17632 || regno
== FRAME_POINTER_REGNUM
17633 || regno
== ARG_POINTER_REGNUM
)
17634 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
17636 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
17637 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
17639 if (IS_CIRRUS_REGNUM (regno
))
17640 return CIRRUS_REGS
;
17642 if (IS_VFP_REGNUM (regno
))
17644 if (regno
<= D7_VFP_REGNUM
)
17645 return VFP_D0_D7_REGS
;
17646 else if (regno
<= LAST_LO_VFP_REGNUM
)
17647 return VFP_LO_REGS
;
17649 return VFP_HI_REGS
;
17652 if (IS_IWMMXT_REGNUM (regno
))
17653 return IWMMXT_REGS
;
17655 if (IS_IWMMXT_GR_REGNUM (regno
))
17656 return IWMMXT_GR_REGS
;
17661 /* Handle a special case when computing the offset
17662 of an argument from the frame pointer. */
17664 arm_debugger_arg_offset (int value
, rtx addr
)
17668 /* We are only interested if dbxout_parms() failed to compute the offset. */
17672 /* We can only cope with the case where the address is held in a register. */
17673 if (GET_CODE (addr
) != REG
)
17676 /* If we are using the frame pointer to point at the argument, then
17677 an offset of 0 is correct. */
17678 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
17681 /* If we are using the stack pointer to point at the
17682 argument, then an offset of 0 is correct. */
17683 /* ??? Check this is consistent with thumb2 frame layout. */
17684 if ((TARGET_THUMB
|| !frame_pointer_needed
)
17685 && REGNO (addr
) == SP_REGNUM
)
17688 /* Oh dear. The argument is pointed to by a register rather
17689 than being held in a register, or being stored at a known
17690 offset from the frame pointer. Since GDB only understands
17691 those two kinds of argument we must translate the address
17692 held in the register into an offset from the frame pointer.
17693 We do this by searching through the insns for the function
17694 looking to see where this register gets its value. If the
17695 register is initialized from the frame pointer plus an offset
17696 then we are in luck and we can continue, otherwise we give up.
17698 This code is exercised by producing debugging information
17699 for a function with arguments like this:
17701 double func (double a, double b, int c, double d) {return d;}
17703 Without this code the stab for parameter 'd' will be set to
17704 an offset of 0 from the frame pointer, rather than 8. */
17706 /* The if() statement says:
17708 If the insn is a normal instruction
17709 and if the insn is setting the value in a register
17710 and if the register being set is the register holding the address of the argument
17711 and if the address is computing by an addition
17712 that involves adding to a register
17713 which is the frame pointer
17718 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
17720 if ( GET_CODE (insn
) == INSN
17721 && GET_CODE (PATTERN (insn
)) == SET
17722 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
17723 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
17724 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 0)) == REG
17725 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17726 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 1)) == CONST_INT
17729 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
17738 warning (0, "unable to compute real location of stacked parameter");
17739 value
= 8; /* XXX magic hack */
17759 T_MAX
/* Size of enum. Keep last. */
17760 } neon_builtin_type_mode
;
17762 #define TYPE_MODE_BIT(X) (1 << (X))
17764 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
17765 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
17766 | TYPE_MODE_BIT (T_DI))
17767 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
17768 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
17769 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
17771 #define v8qi_UP T_V8QI
17772 #define v4hi_UP T_V4HI
17773 #define v2si_UP T_V2SI
17774 #define v2sf_UP T_V2SF
17776 #define v16qi_UP T_V16QI
17777 #define v8hi_UP T_V8HI
17778 #define v4si_UP T_V4SI
17779 #define v4sf_UP T_V4SF
17780 #define v2di_UP T_V2DI
17785 #define UP(X) X##_UP
17818 NEON_LOADSTRUCTLANE
,
17820 NEON_STORESTRUCTLANE
,
17829 const neon_itype itype
;
17830 const neon_builtin_type_mode mode
;
17831 const enum insn_code code
;
17832 unsigned int fcode
;
17833 } neon_builtin_datum
;
17835 #define CF(N,X) CODE_FOR_neon_##N##X
17837 #define VAR1(T, N, A) \
17838 {#N, NEON_##T, UP (A), CF (N, A), 0}
17839 #define VAR2(T, N, A, B) \
17841 {#N, NEON_##T, UP (B), CF (N, B), 0}
17842 #define VAR3(T, N, A, B, C) \
17843 VAR2 (T, N, A, B), \
17844 {#N, NEON_##T, UP (C), CF (N, C), 0}
17845 #define VAR4(T, N, A, B, C, D) \
17846 VAR3 (T, N, A, B, C), \
17847 {#N, NEON_##T, UP (D), CF (N, D), 0}
17848 #define VAR5(T, N, A, B, C, D, E) \
17849 VAR4 (T, N, A, B, C, D), \
17850 {#N, NEON_##T, UP (E), CF (N, E), 0}
17851 #define VAR6(T, N, A, B, C, D, E, F) \
17852 VAR5 (T, N, A, B, C, D, E), \
17853 {#N, NEON_##T, UP (F), CF (N, F), 0}
17854 #define VAR7(T, N, A, B, C, D, E, F, G) \
17855 VAR6 (T, N, A, B, C, D, E, F), \
17856 {#N, NEON_##T, UP (G), CF (N, G), 0}
17857 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17858 VAR7 (T, N, A, B, C, D, E, F, G), \
17859 {#N, NEON_##T, UP (H), CF (N, H), 0}
17860 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17861 VAR8 (T, N, A, B, C, D, E, F, G, H), \
17862 {#N, NEON_##T, UP (I), CF (N, I), 0}
17863 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17864 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
17865 {#N, NEON_##T, UP (J), CF (N, J), 0}
17867 /* The mode entries in the following table correspond to the "key" type of the
17868 instruction variant, i.e. equivalent to that which would be specified after
17869 the assembler mnemonic, which usually refers to the last vector operand.
17870 (Signed/unsigned/polynomial types are not differentiated between though, and
17871 are all mapped onto the same mode for a given element size.) The modes
17872 listed per instruction should be the same as those defined for that
17873 instruction's pattern in neon.md. */
17875 static neon_builtin_datum neon_builtin_data
[] =
17877 VAR10 (BINOP
, vadd
,
17878 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17879 VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
),
17880 VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
),
17881 VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17882 VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17883 VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
),
17884 VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17885 VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17886 VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
),
17887 VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17888 VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
),
17889 VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
),
17890 VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
),
17891 VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
),
17892 VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
),
17893 VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
),
17894 VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
),
17895 VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
),
17896 VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
),
17897 VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
),
17898 VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
),
17899 VAR2 (BINOP
, vqdmull
, v4hi
, v2si
),
17900 VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17901 VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17902 VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17903 VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
),
17904 VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
),
17905 VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
),
17906 VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17907 VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17908 VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17909 VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
),
17910 VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17911 VAR10 (BINOP
, vsub
,
17912 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17913 VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
),
17914 VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
),
17915 VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17916 VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17917 VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
),
17918 VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17919 VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17920 VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17921 VAR2 (BINOP
, vcage
, v2sf
, v4sf
),
17922 VAR2 (BINOP
, vcagt
, v2sf
, v4sf
),
17923 VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17924 VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17925 VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
),
17926 VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17927 VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
),
17928 VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17929 VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17930 VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
),
17931 VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17932 VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17933 VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
),
17934 VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
),
17935 VAR2 (BINOP
, vrecps
, v2sf
, v4sf
),
17936 VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
),
17937 VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17938 VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17939 VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17940 VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17941 VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17942 VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17943 VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17944 VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17945 VAR2 (UNOP
, vcnt
, v8qi
, v16qi
),
17946 VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
),
17947 VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
),
17948 VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17949 /* FIXME: vget_lane supports more variants than this! */
17950 VAR10 (GETLANE
, vget_lane
,
17951 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17952 VAR10 (SETLANE
, vset_lane
,
17953 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17954 VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17955 VAR10 (DUP
, vdup_n
,
17956 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17957 VAR10 (DUPLANE
, vdup_lane
,
17958 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17959 VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17960 VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17961 VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17962 VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
),
17963 VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
),
17964 VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
),
17965 VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
),
17966 VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17967 VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17968 VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
),
17969 VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
),
17970 VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17971 VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
),
17972 VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
),
17973 VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17974 VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17975 VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
),
17976 VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
),
17977 VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17978 VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
),
17979 VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
),
17980 VAR10 (BINOP
, vext
,
17981 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17982 VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17983 VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
),
17984 VAR2 (UNOP
, vrev16
, v8qi
, v16qi
),
17985 VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
),
17986 VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
),
17987 VAR10 (SELECT
, vbsl
,
17988 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17989 VAR1 (VTBL
, vtbl1
, v8qi
),
17990 VAR1 (VTBL
, vtbl2
, v8qi
),
17991 VAR1 (VTBL
, vtbl3
, v8qi
),
17992 VAR1 (VTBL
, vtbl4
, v8qi
),
17993 VAR1 (VTBX
, vtbx1
, v8qi
),
17994 VAR1 (VTBX
, vtbx2
, v8qi
),
17995 VAR1 (VTBX
, vtbx3
, v8qi
),
17996 VAR1 (VTBX
, vtbx4
, v8qi
),
17997 VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17998 VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17999 VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18000 VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18001 VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18002 VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18003 VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18004 VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18005 VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18006 VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18007 VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18008 VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18009 VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18010 VAR10 (LOAD1
, vld1
,
18011 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18012 VAR10 (LOAD1LANE
, vld1_lane
,
18013 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18014 VAR10 (LOAD1
, vld1_dup
,
18015 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18016 VAR10 (STORE1
, vst1
,
18017 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18018 VAR10 (STORE1LANE
, vst1_lane
,
18019 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18021 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18022 VAR7 (LOADSTRUCTLANE
, vld2_lane
,
18023 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18024 VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18025 VAR9 (STORESTRUCT
, vst2
,
18026 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18027 VAR7 (STORESTRUCTLANE
, vst2_lane
,
18028 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18030 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18031 VAR7 (LOADSTRUCTLANE
, vld3_lane
,
18032 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18033 VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18034 VAR9 (STORESTRUCT
, vst3
,
18035 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18036 VAR7 (STORESTRUCTLANE
, vst3_lane
,
18037 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18038 VAR9 (LOADSTRUCT
, vld4
,
18039 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18040 VAR7 (LOADSTRUCTLANE
, vld4_lane
,
18041 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18042 VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18043 VAR9 (STORESTRUCT
, vst4
,
18044 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18045 VAR7 (STORESTRUCTLANE
, vst4_lane
,
18046 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18047 VAR10 (LOGICBINOP
, vand
,
18048 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18049 VAR10 (LOGICBINOP
, vorr
,
18050 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18051 VAR10 (BINOP
, veor
,
18052 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18053 VAR10 (LOGICBINOP
, vbic
,
18054 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18055 VAR10 (LOGICBINOP
, vorn
,
18056 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
)
18071 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18072 symbolic names defined here (which would require too much duplication).
18076 ARM_BUILTIN_GETWCX
,
18077 ARM_BUILTIN_SETWCX
,
18081 ARM_BUILTIN_WAVG2BR
,
18082 ARM_BUILTIN_WAVG2HR
,
18083 ARM_BUILTIN_WAVG2B
,
18084 ARM_BUILTIN_WAVG2H
,
18091 ARM_BUILTIN_WMACSZ
,
18093 ARM_BUILTIN_WMACUZ
,
18096 ARM_BUILTIN_WSADBZ
,
18098 ARM_BUILTIN_WSADHZ
,
18100 ARM_BUILTIN_WALIGN
,
18103 ARM_BUILTIN_TMIAPH
,
18104 ARM_BUILTIN_TMIABB
,
18105 ARM_BUILTIN_TMIABT
,
18106 ARM_BUILTIN_TMIATB
,
18107 ARM_BUILTIN_TMIATT
,
18109 ARM_BUILTIN_TMOVMSKB
,
18110 ARM_BUILTIN_TMOVMSKH
,
18111 ARM_BUILTIN_TMOVMSKW
,
18113 ARM_BUILTIN_TBCSTB
,
18114 ARM_BUILTIN_TBCSTH
,
18115 ARM_BUILTIN_TBCSTW
,
18117 ARM_BUILTIN_WMADDS
,
18118 ARM_BUILTIN_WMADDU
,
18120 ARM_BUILTIN_WPACKHSS
,
18121 ARM_BUILTIN_WPACKWSS
,
18122 ARM_BUILTIN_WPACKDSS
,
18123 ARM_BUILTIN_WPACKHUS
,
18124 ARM_BUILTIN_WPACKWUS
,
18125 ARM_BUILTIN_WPACKDUS
,
18130 ARM_BUILTIN_WADDSSB
,
18131 ARM_BUILTIN_WADDSSH
,
18132 ARM_BUILTIN_WADDSSW
,
18133 ARM_BUILTIN_WADDUSB
,
18134 ARM_BUILTIN_WADDUSH
,
18135 ARM_BUILTIN_WADDUSW
,
18139 ARM_BUILTIN_WSUBSSB
,
18140 ARM_BUILTIN_WSUBSSH
,
18141 ARM_BUILTIN_WSUBSSW
,
18142 ARM_BUILTIN_WSUBUSB
,
18143 ARM_BUILTIN_WSUBUSH
,
18144 ARM_BUILTIN_WSUBUSW
,
18151 ARM_BUILTIN_WCMPEQB
,
18152 ARM_BUILTIN_WCMPEQH
,
18153 ARM_BUILTIN_WCMPEQW
,
18154 ARM_BUILTIN_WCMPGTUB
,
18155 ARM_BUILTIN_WCMPGTUH
,
18156 ARM_BUILTIN_WCMPGTUW
,
18157 ARM_BUILTIN_WCMPGTSB
,
18158 ARM_BUILTIN_WCMPGTSH
,
18159 ARM_BUILTIN_WCMPGTSW
,
18161 ARM_BUILTIN_TEXTRMSB
,
18162 ARM_BUILTIN_TEXTRMSH
,
18163 ARM_BUILTIN_TEXTRMSW
,
18164 ARM_BUILTIN_TEXTRMUB
,
18165 ARM_BUILTIN_TEXTRMUH
,
18166 ARM_BUILTIN_TEXTRMUW
,
18167 ARM_BUILTIN_TINSRB
,
18168 ARM_BUILTIN_TINSRH
,
18169 ARM_BUILTIN_TINSRW
,
18171 ARM_BUILTIN_WMAXSW
,
18172 ARM_BUILTIN_WMAXSH
,
18173 ARM_BUILTIN_WMAXSB
,
18174 ARM_BUILTIN_WMAXUW
,
18175 ARM_BUILTIN_WMAXUH
,
18176 ARM_BUILTIN_WMAXUB
,
18177 ARM_BUILTIN_WMINSW
,
18178 ARM_BUILTIN_WMINSH
,
18179 ARM_BUILTIN_WMINSB
,
18180 ARM_BUILTIN_WMINUW
,
18181 ARM_BUILTIN_WMINUH
,
18182 ARM_BUILTIN_WMINUB
,
18184 ARM_BUILTIN_WMULUM
,
18185 ARM_BUILTIN_WMULSM
,
18186 ARM_BUILTIN_WMULUL
,
18188 ARM_BUILTIN_PSADBH
,
18189 ARM_BUILTIN_WSHUFH
,
18203 ARM_BUILTIN_WSLLHI
,
18204 ARM_BUILTIN_WSLLWI
,
18205 ARM_BUILTIN_WSLLDI
,
18206 ARM_BUILTIN_WSRAHI
,
18207 ARM_BUILTIN_WSRAWI
,
18208 ARM_BUILTIN_WSRADI
,
18209 ARM_BUILTIN_WSRLHI
,
18210 ARM_BUILTIN_WSRLWI
,
18211 ARM_BUILTIN_WSRLDI
,
18212 ARM_BUILTIN_WRORHI
,
18213 ARM_BUILTIN_WRORWI
,
18214 ARM_BUILTIN_WRORDI
,
18216 ARM_BUILTIN_WUNPCKIHB
,
18217 ARM_BUILTIN_WUNPCKIHH
,
18218 ARM_BUILTIN_WUNPCKIHW
,
18219 ARM_BUILTIN_WUNPCKILB
,
18220 ARM_BUILTIN_WUNPCKILH
,
18221 ARM_BUILTIN_WUNPCKILW
,
18223 ARM_BUILTIN_WUNPCKEHSB
,
18224 ARM_BUILTIN_WUNPCKEHSH
,
18225 ARM_BUILTIN_WUNPCKEHSW
,
18226 ARM_BUILTIN_WUNPCKEHUB
,
18227 ARM_BUILTIN_WUNPCKEHUH
,
18228 ARM_BUILTIN_WUNPCKEHUW
,
18229 ARM_BUILTIN_WUNPCKELSB
,
18230 ARM_BUILTIN_WUNPCKELSH
,
18231 ARM_BUILTIN_WUNPCKELSW
,
18232 ARM_BUILTIN_WUNPCKELUB
,
18233 ARM_BUILTIN_WUNPCKELUH
,
18234 ARM_BUILTIN_WUNPCKELUW
,
18236 ARM_BUILTIN_THREAD_POINTER
,
18238 ARM_BUILTIN_NEON_BASE
,
18240 ARM_BUILTIN_MAX
= ARM_BUILTIN_NEON_BASE
+ ARRAY_SIZE (neon_builtin_data
)
18243 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
18246 arm_init_neon_builtins (void)
18248 unsigned int i
, fcode
;
18251 tree neon_intQI_type_node
;
18252 tree neon_intHI_type_node
;
18253 tree neon_polyQI_type_node
;
18254 tree neon_polyHI_type_node
;
18255 tree neon_intSI_type_node
;
18256 tree neon_intDI_type_node
;
18257 tree neon_float_type_node
;
18259 tree intQI_pointer_node
;
18260 tree intHI_pointer_node
;
18261 tree intSI_pointer_node
;
18262 tree intDI_pointer_node
;
18263 tree float_pointer_node
;
18265 tree const_intQI_node
;
18266 tree const_intHI_node
;
18267 tree const_intSI_node
;
18268 tree const_intDI_node
;
18269 tree const_float_node
;
18271 tree const_intQI_pointer_node
;
18272 tree const_intHI_pointer_node
;
18273 tree const_intSI_pointer_node
;
18274 tree const_intDI_pointer_node
;
18275 tree const_float_pointer_node
;
18277 tree V8QI_type_node
;
18278 tree V4HI_type_node
;
18279 tree V2SI_type_node
;
18280 tree V2SF_type_node
;
18281 tree V16QI_type_node
;
18282 tree V8HI_type_node
;
18283 tree V4SI_type_node
;
18284 tree V4SF_type_node
;
18285 tree V2DI_type_node
;
18287 tree intUQI_type_node
;
18288 tree intUHI_type_node
;
18289 tree intUSI_type_node
;
18290 tree intUDI_type_node
;
18292 tree intEI_type_node
;
18293 tree intOI_type_node
;
18294 tree intCI_type_node
;
18295 tree intXI_type_node
;
18297 tree V8QI_pointer_node
;
18298 tree V4HI_pointer_node
;
18299 tree V2SI_pointer_node
;
18300 tree V2SF_pointer_node
;
18301 tree V16QI_pointer_node
;
18302 tree V8HI_pointer_node
;
18303 tree V4SI_pointer_node
;
18304 tree V4SF_pointer_node
;
18305 tree V2DI_pointer_node
;
18307 tree void_ftype_pv8qi_v8qi_v8qi
;
18308 tree void_ftype_pv4hi_v4hi_v4hi
;
18309 tree void_ftype_pv2si_v2si_v2si
;
18310 tree void_ftype_pv2sf_v2sf_v2sf
;
18311 tree void_ftype_pdi_di_di
;
18312 tree void_ftype_pv16qi_v16qi_v16qi
;
18313 tree void_ftype_pv8hi_v8hi_v8hi
;
18314 tree void_ftype_pv4si_v4si_v4si
;
18315 tree void_ftype_pv4sf_v4sf_v4sf
;
18316 tree void_ftype_pv2di_v2di_v2di
;
18318 tree reinterp_ftype_dreg
[5][5];
18319 tree reinterp_ftype_qreg
[5][5];
18320 tree dreg_types
[5], qreg_types
[5];
18322 /* Create distinguished type nodes for NEON vector element types,
18323 and pointers to values of such types, so we can detect them later. */
18324 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
18325 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
18326 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
18327 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
18328 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
18329 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
18330 neon_float_type_node
= make_node (REAL_TYPE
);
18331 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
18332 layout_type (neon_float_type_node
);
18334 /* Define typedefs which exactly correspond to the modes we are basing vector
18335 types on. If you change these names you'll need to change
18336 the table used by arm_mangle_type too. */
18337 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
18338 "__builtin_neon_qi");
18339 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
18340 "__builtin_neon_hi");
18341 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
18342 "__builtin_neon_si");
18343 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
18344 "__builtin_neon_sf");
18345 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
18346 "__builtin_neon_di");
18347 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
18348 "__builtin_neon_poly8");
18349 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
18350 "__builtin_neon_poly16");
18352 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
18353 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
18354 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
18355 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
18356 float_pointer_node
= build_pointer_type (neon_float_type_node
);
18358 /* Next create constant-qualified versions of the above types. */
18359 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
18361 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
18363 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
18365 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
18367 const_float_node
= build_qualified_type (neon_float_type_node
,
18370 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
18371 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
18372 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
18373 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
18374 const_float_pointer_node
= build_pointer_type (const_float_node
);
18376 /* Now create vector types based on our NEON element types. */
18377 /* 64-bit vectors. */
18379 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
18381 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
18383 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
18385 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
18386 /* 128-bit vectors. */
18388 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
18390 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
18392 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
18394 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
18396 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
18398 /* Unsigned integer types for various mode sizes. */
18399 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
18400 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
18401 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
18402 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
18404 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
18405 "__builtin_neon_uqi");
18406 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
18407 "__builtin_neon_uhi");
18408 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
18409 "__builtin_neon_usi");
18410 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
18411 "__builtin_neon_udi");
18413 /* Opaque integer types for structures of vectors. */
18414 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
18415 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
18416 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
18417 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
18419 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
18420 "__builtin_neon_ti");
18421 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
18422 "__builtin_neon_ei");
18423 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
18424 "__builtin_neon_oi");
18425 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
18426 "__builtin_neon_ci");
18427 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
18428 "__builtin_neon_xi");
18430 /* Pointers to vector types. */
18431 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
18432 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
18433 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
18434 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
18435 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
18436 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
18437 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
18438 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
18439 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
18441 /* Operations which return results as pairs. */
18442 void_ftype_pv8qi_v8qi_v8qi
=
18443 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
18444 V8QI_type_node
, NULL
);
18445 void_ftype_pv4hi_v4hi_v4hi
=
18446 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
18447 V4HI_type_node
, NULL
);
18448 void_ftype_pv2si_v2si_v2si
=
18449 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
18450 V2SI_type_node
, NULL
);
18451 void_ftype_pv2sf_v2sf_v2sf
=
18452 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
18453 V2SF_type_node
, NULL
);
18454 void_ftype_pdi_di_di
=
18455 build_function_type_list (void_type_node
, intDI_pointer_node
,
18456 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
18457 void_ftype_pv16qi_v16qi_v16qi
=
18458 build_function_type_list (void_type_node
, V16QI_pointer_node
,
18459 V16QI_type_node
, V16QI_type_node
, NULL
);
18460 void_ftype_pv8hi_v8hi_v8hi
=
18461 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
18462 V8HI_type_node
, NULL
);
18463 void_ftype_pv4si_v4si_v4si
=
18464 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
18465 V4SI_type_node
, NULL
);
18466 void_ftype_pv4sf_v4sf_v4sf
=
18467 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
18468 V4SF_type_node
, NULL
);
18469 void_ftype_pv2di_v2di_v2di
=
18470 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
18471 V2DI_type_node
, NULL
);
18473 dreg_types
[0] = V8QI_type_node
;
18474 dreg_types
[1] = V4HI_type_node
;
18475 dreg_types
[2] = V2SI_type_node
;
18476 dreg_types
[3] = V2SF_type_node
;
18477 dreg_types
[4] = neon_intDI_type_node
;
18479 qreg_types
[0] = V16QI_type_node
;
18480 qreg_types
[1] = V8HI_type_node
;
18481 qreg_types
[2] = V4SI_type_node
;
18482 qreg_types
[3] = V4SF_type_node
;
18483 qreg_types
[4] = V2DI_type_node
;
18485 for (i
= 0; i
< 5; i
++)
18488 for (j
= 0; j
< 5; j
++)
18490 reinterp_ftype_dreg
[i
][j
]
18491 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
18492 reinterp_ftype_qreg
[i
][j
]
18493 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
18497 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
18498 i
< ARRAY_SIZE (neon_builtin_data
);
18501 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
18503 const char* const modenames
[] = {
18504 "v8qi", "v4hi", "v2si", "v2sf", "di",
18505 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
18510 int is_load
= 0, is_store
= 0;
18512 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
18519 case NEON_LOAD1LANE
:
18520 case NEON_LOADSTRUCT
:
18521 case NEON_LOADSTRUCTLANE
:
18523 /* Fall through. */
18525 case NEON_STORE1LANE
:
18526 case NEON_STORESTRUCT
:
18527 case NEON_STORESTRUCTLANE
:
18530 /* Fall through. */
18533 case NEON_LOGICBINOP
:
18534 case NEON_SHIFTINSERT
:
18541 case NEON_SHIFTIMM
:
18542 case NEON_SHIFTACC
:
18548 case NEON_LANEMULL
:
18549 case NEON_LANEMULH
:
18551 case NEON_SCALARMUL
:
18552 case NEON_SCALARMULL
:
18553 case NEON_SCALARMULH
:
18554 case NEON_SCALARMAC
:
18560 tree return_type
= void_type_node
, args
= void_list_node
;
18562 /* Build a function type directly from the insn_data for
18563 this builtin. The build_function_type() function takes
18564 care of removing duplicates for us. */
18565 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
18569 if (is_load
&& k
== 1)
18571 /* Neon load patterns always have the memory
18572 operand in the operand 1 position. */
18573 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
18574 == neon_struct_operand
);
18580 eltype
= const_intQI_pointer_node
;
18585 eltype
= const_intHI_pointer_node
;
18590 eltype
= const_intSI_pointer_node
;
18595 eltype
= const_float_pointer_node
;
18600 eltype
= const_intDI_pointer_node
;
18603 default: gcc_unreachable ();
18606 else if (is_store
&& k
== 0)
18608 /* Similarly, Neon store patterns use operand 0 as
18609 the memory location to store to. */
18610 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
18611 == neon_struct_operand
);
18617 eltype
= intQI_pointer_node
;
18622 eltype
= intHI_pointer_node
;
18627 eltype
= intSI_pointer_node
;
18632 eltype
= float_pointer_node
;
18637 eltype
= intDI_pointer_node
;
18640 default: gcc_unreachable ();
18645 switch (insn_data
[d
->code
].operand
[k
].mode
)
18647 case VOIDmode
: eltype
= void_type_node
; break;
18649 case QImode
: eltype
= neon_intQI_type_node
; break;
18650 case HImode
: eltype
= neon_intHI_type_node
; break;
18651 case SImode
: eltype
= neon_intSI_type_node
; break;
18652 case SFmode
: eltype
= neon_float_type_node
; break;
18653 case DImode
: eltype
= neon_intDI_type_node
; break;
18654 case TImode
: eltype
= intTI_type_node
; break;
18655 case EImode
: eltype
= intEI_type_node
; break;
18656 case OImode
: eltype
= intOI_type_node
; break;
18657 case CImode
: eltype
= intCI_type_node
; break;
18658 case XImode
: eltype
= intXI_type_node
; break;
18659 /* 64-bit vectors. */
18660 case V8QImode
: eltype
= V8QI_type_node
; break;
18661 case V4HImode
: eltype
= V4HI_type_node
; break;
18662 case V2SImode
: eltype
= V2SI_type_node
; break;
18663 case V2SFmode
: eltype
= V2SF_type_node
; break;
18664 /* 128-bit vectors. */
18665 case V16QImode
: eltype
= V16QI_type_node
; break;
18666 case V8HImode
: eltype
= V8HI_type_node
; break;
18667 case V4SImode
: eltype
= V4SI_type_node
; break;
18668 case V4SFmode
: eltype
= V4SF_type_node
; break;
18669 case V2DImode
: eltype
= V2DI_type_node
; break;
18670 default: gcc_unreachable ();
18674 if (k
== 0 && !is_store
)
18675 return_type
= eltype
;
18677 args
= tree_cons (NULL_TREE
, eltype
, args
);
18680 ftype
= build_function_type (return_type
, args
);
18684 case NEON_RESULTPAIR
:
18686 switch (insn_data
[d
->code
].operand
[1].mode
)
18688 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
18689 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
18690 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
18691 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
18692 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
18693 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
18694 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
18695 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
18696 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
18697 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
18698 default: gcc_unreachable ();
18703 case NEON_REINTERP
:
18705 /* We iterate over 5 doubleword types, then 5 quadword
18707 int rhs
= d
->mode
% 5;
18708 switch (insn_data
[d
->code
].operand
[0].mode
)
18710 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
18711 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
18712 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
18713 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
18714 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
18715 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
18716 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
18717 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
18718 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
18719 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
18720 default: gcc_unreachable ();
18726 gcc_unreachable ();
18729 gcc_assert (ftype
!= NULL
);
18731 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
18733 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
18735 arm_builtin_decls
[fcode
] = decl
;
18739 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
18742 if ((MASK) & insn_flags) \
18745 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
18746 BUILT_IN_MD, NULL, NULL_TREE); \
18747 arm_builtin_decls[CODE] = bdecl; \
18752 struct builtin_description
18754 const unsigned int mask
;
18755 const enum insn_code icode
;
18756 const char * const name
;
18757 const enum arm_builtins code
;
18758 const enum rtx_code comparison
;
18759 const unsigned int flag
;
18762 static const struct builtin_description bdesc_2arg
[] =
18764 #define IWMMXT_BUILTIN(code, string, builtin) \
18765 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
18766 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18768 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
18769 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
18770 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
18771 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
18772 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
18773 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
18774 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
18775 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
18776 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
18777 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
18778 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
18779 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
18780 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
18781 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
18782 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
18783 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
18784 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
18785 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
18786 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
18787 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
18788 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
18789 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
18790 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
18791 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
18792 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
18793 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
18794 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
18795 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
18796 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
18797 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
18798 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
18799 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
18800 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
18801 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
18802 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
18803 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
18804 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
18805 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
18806 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
18807 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
18808 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
18809 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
18810 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
18811 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
18812 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
18813 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
18814 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
18815 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
18816 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
18817 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
18818 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
18819 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
18820 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
18821 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
18822 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
18823 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
18824 IWMMXT_BUILTIN (iwmmxt_wmadds
, "wmadds", WMADDS
)
18825 IWMMXT_BUILTIN (iwmmxt_wmaddu
, "wmaddu", WMADDU
)
18827 #define IWMMXT_BUILTIN2(code, builtin) \
18828 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18830 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
18831 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
18832 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
18833 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
18834 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
18835 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
18836 IWMMXT_BUILTIN2 (ashlv4hi3_di
, WSLLH
)
18837 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt
, WSLLHI
)
18838 IWMMXT_BUILTIN2 (ashlv2si3_di
, WSLLW
)
18839 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt
, WSLLWI
)
18840 IWMMXT_BUILTIN2 (ashldi3_di
, WSLLD
)
18841 IWMMXT_BUILTIN2 (ashldi3_iwmmxt
, WSLLDI
)
18842 IWMMXT_BUILTIN2 (lshrv4hi3_di
, WSRLH
)
18843 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt
, WSRLHI
)
18844 IWMMXT_BUILTIN2 (lshrv2si3_di
, WSRLW
)
18845 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt
, WSRLWI
)
18846 IWMMXT_BUILTIN2 (lshrdi3_di
, WSRLD
)
18847 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt
, WSRLDI
)
18848 IWMMXT_BUILTIN2 (ashrv4hi3_di
, WSRAH
)
18849 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt
, WSRAHI
)
18850 IWMMXT_BUILTIN2 (ashrv2si3_di
, WSRAW
)
18851 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt
, WSRAWI
)
18852 IWMMXT_BUILTIN2 (ashrdi3_di
, WSRAD
)
18853 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt
, WSRADI
)
18854 IWMMXT_BUILTIN2 (rorv4hi3_di
, WRORH
)
18855 IWMMXT_BUILTIN2 (rorv4hi3
, WRORHI
)
18856 IWMMXT_BUILTIN2 (rorv2si3_di
, WRORW
)
18857 IWMMXT_BUILTIN2 (rorv2si3
, WRORWI
)
18858 IWMMXT_BUILTIN2 (rordi3_di
, WRORD
)
18859 IWMMXT_BUILTIN2 (rordi3
, WRORDI
)
18860 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
18861 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
18864 static const struct builtin_description bdesc_1arg
[] =
18866 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
18867 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
18868 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
18869 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
18870 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
18871 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
18872 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
18873 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
18874 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
18875 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
18876 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
18877 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
18878 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
18879 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
18880 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
18881 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
18882 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
18883 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
18886 /* Set up all the iWMMXt builtins. This is not called if
18887 TARGET_IWMMXT is zero. */
18890 arm_init_iwmmxt_builtins (void)
18892 const struct builtin_description
* d
;
18895 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
18896 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
18897 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
18900 = build_function_type_list (integer_type_node
,
18901 integer_type_node
, NULL_TREE
);
18902 tree v8qi_ftype_v8qi_v8qi_int
18903 = build_function_type_list (V8QI_type_node
,
18904 V8QI_type_node
, V8QI_type_node
,
18905 integer_type_node
, NULL_TREE
);
18906 tree v4hi_ftype_v4hi_int
18907 = build_function_type_list (V4HI_type_node
,
18908 V4HI_type_node
, integer_type_node
, NULL_TREE
);
18909 tree v2si_ftype_v2si_int
18910 = build_function_type_list (V2SI_type_node
,
18911 V2SI_type_node
, integer_type_node
, NULL_TREE
);
18912 tree v2si_ftype_di_di
18913 = build_function_type_list (V2SI_type_node
,
18914 long_long_integer_type_node
,
18915 long_long_integer_type_node
,
18917 tree di_ftype_di_int
18918 = build_function_type_list (long_long_integer_type_node
,
18919 long_long_integer_type_node
,
18920 integer_type_node
, NULL_TREE
);
18921 tree di_ftype_di_int_int
18922 = build_function_type_list (long_long_integer_type_node
,
18923 long_long_integer_type_node
,
18925 integer_type_node
, NULL_TREE
);
18926 tree int_ftype_v8qi
18927 = build_function_type_list (integer_type_node
,
18928 V8QI_type_node
, NULL_TREE
);
18929 tree int_ftype_v4hi
18930 = build_function_type_list (integer_type_node
,
18931 V4HI_type_node
, NULL_TREE
);
18932 tree int_ftype_v2si
18933 = build_function_type_list (integer_type_node
,
18934 V2SI_type_node
, NULL_TREE
);
18935 tree int_ftype_v8qi_int
18936 = build_function_type_list (integer_type_node
,
18937 V8QI_type_node
, integer_type_node
, NULL_TREE
);
18938 tree int_ftype_v4hi_int
18939 = build_function_type_list (integer_type_node
,
18940 V4HI_type_node
, integer_type_node
, NULL_TREE
);
18941 tree int_ftype_v2si_int
18942 = build_function_type_list (integer_type_node
,
18943 V2SI_type_node
, integer_type_node
, NULL_TREE
);
18944 tree v8qi_ftype_v8qi_int_int
18945 = build_function_type_list (V8QI_type_node
,
18946 V8QI_type_node
, integer_type_node
,
18947 integer_type_node
, NULL_TREE
);
18948 tree v4hi_ftype_v4hi_int_int
18949 = build_function_type_list (V4HI_type_node
,
18950 V4HI_type_node
, integer_type_node
,
18951 integer_type_node
, NULL_TREE
);
18952 tree v2si_ftype_v2si_int_int
18953 = build_function_type_list (V2SI_type_node
,
18954 V2SI_type_node
, integer_type_node
,
18955 integer_type_node
, NULL_TREE
);
18956 /* Miscellaneous. */
18957 tree v8qi_ftype_v4hi_v4hi
18958 = build_function_type_list (V8QI_type_node
,
18959 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18960 tree v4hi_ftype_v2si_v2si
18961 = build_function_type_list (V4HI_type_node
,
18962 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18963 tree v2si_ftype_v4hi_v4hi
18964 = build_function_type_list (V2SI_type_node
,
18965 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18966 tree v2si_ftype_v8qi_v8qi
18967 = build_function_type_list (V2SI_type_node
,
18968 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18969 tree v4hi_ftype_v4hi_di
18970 = build_function_type_list (V4HI_type_node
,
18971 V4HI_type_node
, long_long_integer_type_node
,
18973 tree v2si_ftype_v2si_di
18974 = build_function_type_list (V2SI_type_node
,
18975 V2SI_type_node
, long_long_integer_type_node
,
18977 tree void_ftype_int_int
18978 = build_function_type_list (void_type_node
,
18979 integer_type_node
, integer_type_node
,
18982 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
18984 = build_function_type_list (long_long_integer_type_node
,
18985 V8QI_type_node
, NULL_TREE
);
18987 = build_function_type_list (long_long_integer_type_node
,
18988 V4HI_type_node
, NULL_TREE
);
18990 = build_function_type_list (long_long_integer_type_node
,
18991 V2SI_type_node
, NULL_TREE
);
18992 tree v2si_ftype_v4hi
18993 = build_function_type_list (V2SI_type_node
,
18994 V4HI_type_node
, NULL_TREE
);
18995 tree v4hi_ftype_v8qi
18996 = build_function_type_list (V4HI_type_node
,
18997 V8QI_type_node
, NULL_TREE
);
18999 tree di_ftype_di_v4hi_v4hi
19000 = build_function_type_list (long_long_unsigned_type_node
,
19001 long_long_unsigned_type_node
,
19002 V4HI_type_node
, V4HI_type_node
,
19005 tree di_ftype_v4hi_v4hi
19006 = build_function_type_list (long_long_unsigned_type_node
,
19007 V4HI_type_node
,V4HI_type_node
,
19010 /* Normal vector binops. */
19011 tree v8qi_ftype_v8qi_v8qi
19012 = build_function_type_list (V8QI_type_node
,
19013 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
19014 tree v4hi_ftype_v4hi_v4hi
19015 = build_function_type_list (V4HI_type_node
,
19016 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
19017 tree v2si_ftype_v2si_v2si
19018 = build_function_type_list (V2SI_type_node
,
19019 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
19020 tree di_ftype_di_di
19021 = build_function_type_list (long_long_unsigned_type_node
,
19022 long_long_unsigned_type_node
,
19023 long_long_unsigned_type_node
,
19026 /* Add all builtins that are more or less simple operations on two
19028 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19030 /* Use one of the operands; the target can have a different mode for
19031 mask-generating compares. */
19032 enum machine_mode mode
;
19038 mode
= insn_data
[d
->icode
].operand
[1].mode
;
19043 type
= v8qi_ftype_v8qi_v8qi
;
19046 type
= v4hi_ftype_v4hi_v4hi
;
19049 type
= v2si_ftype_v2si_v2si
;
19052 type
= di_ftype_di_di
;
19056 gcc_unreachable ();
19059 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
19062 /* Add the remaining MMX insns with somewhat more complicated types. */
19063 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19064 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19065 ARM_BUILTIN_ ## CODE)
19067 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
19068 iwmmx_mbuiltin ("setwcx", void_ftype_int_int
, SETWCX
);
19069 iwmmx_mbuiltin ("getwcx", int_ftype_int
, GETWCX
);
19071 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
19072 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
19073 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
19074 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
19075 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
19076 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
19078 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
19079 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
19080 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
19081 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
19082 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
19083 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
19085 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
19086 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
19087 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
19088 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
19089 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
19090 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
19092 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
19093 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
19094 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
19095 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
19096 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
19097 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
19099 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
19101 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi
, WSADB
);
19102 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi
, WSADH
);
19103 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
19104 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
19106 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
19107 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
19108 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
19109 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
19110 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
19111 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
19112 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
19113 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
19114 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
19116 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
19117 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
19118 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
19120 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
19121 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
19122 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
19124 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
19125 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
19126 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
19127 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
19128 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
19129 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
19131 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
19132 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
19133 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
19134 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
19135 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
19136 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
19137 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
19138 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
19139 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
19140 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
19141 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
19142 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
19144 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
19145 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
19146 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
19147 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
19149 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGN
);
19150 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
19151 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
19152 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
19153 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
19154 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
19155 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
19157 #undef iwmmx_mbuiltin
19161 arm_init_tls_builtins (void)
19165 ftype
= build_function_type (ptr_type_node
, void_list_node
);
19166 decl
= add_builtin_function ("__builtin_thread_pointer", ftype
,
19167 ARM_BUILTIN_THREAD_POINTER
, BUILT_IN_MD
,
19169 TREE_NOTHROW (decl
) = 1;
19170 TREE_READONLY (decl
) = 1;
19171 arm_builtin_decls
[ARM_BUILTIN_THREAD_POINTER
] = decl
;
19175 arm_init_fp16_builtins (void)
19177 tree fp16_type
= make_node (REAL_TYPE
);
19178 TYPE_PRECISION (fp16_type
) = 16;
19179 layout_type (fp16_type
);
19180 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
19184 arm_init_builtins (void)
19186 arm_init_tls_builtins ();
19188 if (TARGET_REALLY_IWMMXT
)
19189 arm_init_iwmmxt_builtins ();
19192 arm_init_neon_builtins ();
19194 if (arm_fp16_format
)
19195 arm_init_fp16_builtins ();
19198 /* Return the ARM builtin for CODE. */
19201 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
19203 if (code
>= ARM_BUILTIN_MAX
)
19204 return error_mark_node
;
19206 return arm_builtin_decls
[code
];
19209 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19211 static const char *
19212 arm_invalid_parameter_type (const_tree t
)
19214 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19215 return N_("function parameters cannot have __fp16 type");
19219 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19221 static const char *
19222 arm_invalid_return_type (const_tree t
)
19224 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19225 return N_("functions cannot return __fp16 type");
19229 /* Implement TARGET_PROMOTED_TYPE. */
19232 arm_promoted_type (const_tree t
)
19234 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19235 return float_type_node
;
19239 /* Implement TARGET_CONVERT_TO_TYPE.
19240 Specifically, this hook implements the peculiarity of the ARM
19241 half-precision floating-point C semantics that requires conversions between
19242 __fp16 to or from double to do an intermediate conversion to float. */
19245 arm_convert_to_type (tree type
, tree expr
)
19247 tree fromtype
= TREE_TYPE (expr
);
19248 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
19250 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
19251 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
19252 return convert (type
, convert (float_type_node
, expr
));
19256 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19257 This simply adds HFmode as a supported mode; even though we don't
19258 implement arithmetic on this type directly, it's supported by
19259 optabs conversions, much the way the double-word arithmetic is
19260 special-cased in the default hook. */
19263 arm_scalar_mode_supported_p (enum machine_mode mode
)
19265 if (mode
== HFmode
)
19266 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
19268 return default_scalar_mode_supported_p (mode
);
19271 /* Errors in the source file can cause expand_expr to return const0_rtx
19272 where we expect a vector. To avoid crashing, use one of the vector
19273 clear instructions. */
19276 safe_vector_operand (rtx x
, enum machine_mode mode
)
19278 if (x
!= const0_rtx
)
19280 x
= gen_reg_rtx (mode
);
19282 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
19283 : gen_rtx_SUBREG (DImode
, x
, 0)));
19287 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19290 arm_expand_binop_builtin (enum insn_code icode
,
19291 tree exp
, rtx target
)
19294 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19295 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19296 rtx op0
= expand_normal (arg0
);
19297 rtx op1
= expand_normal (arg1
);
19298 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19299 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19300 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19302 if (VECTOR_MODE_P (mode0
))
19303 op0
= safe_vector_operand (op0
, mode0
);
19304 if (VECTOR_MODE_P (mode1
))
19305 op1
= safe_vector_operand (op1
, mode1
);
19308 || GET_MODE (target
) != tmode
19309 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19310 target
= gen_reg_rtx (tmode
);
19312 gcc_assert (GET_MODE (op0
) == mode0
&& GET_MODE (op1
) == mode1
);
19314 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19315 op0
= copy_to_mode_reg (mode0
, op0
);
19316 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19317 op1
= copy_to_mode_reg (mode1
, op1
);
19319 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19326 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19329 arm_expand_unop_builtin (enum insn_code icode
,
19330 tree exp
, rtx target
, int do_load
)
19333 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19334 rtx op0
= expand_normal (arg0
);
19335 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19336 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19339 || GET_MODE (target
) != tmode
19340 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19341 target
= gen_reg_rtx (tmode
);
19343 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19346 if (VECTOR_MODE_P (mode0
))
19347 op0
= safe_vector_operand (op0
, mode0
);
19349 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19350 op0
= copy_to_mode_reg (mode0
, op0
);
19353 pat
= GEN_FCN (icode
) (target
, op0
);
19361 NEON_ARG_COPY_TO_REG
,
19367 #define NEON_MAX_BUILTIN_ARGS 5
19369 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
19370 and return an expression for the accessed memory.
19372 The intrinsic function operates on a block of registers that has
19373 mode REG_MODE. This block contains vectors of type TYPE_MODE.
19374 The function references the memory at EXP in mode MEM_MODE;
19375 this mode may be BLKmode if no more suitable mode is available. */
19378 neon_dereference_pointer (tree exp
, enum machine_mode mem_mode
,
19379 enum machine_mode reg_mode
,
19380 neon_builtin_type_mode type_mode
)
19382 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
19383 tree elem_type
, upper_bound
, array_type
;
19385 /* Work out the size of the register block in bytes. */
19386 reg_size
= GET_MODE_SIZE (reg_mode
);
19388 /* Work out the size of each vector in bytes. */
19389 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
19390 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
19392 /* Work out how many vectors there are. */
19393 gcc_assert (reg_size
% vector_size
== 0);
19394 nvectors
= reg_size
/ vector_size
;
19396 /* Work out how many elements are being loaded or stored.
19397 MEM_MODE == REG_MODE implies a one-to-one mapping between register
19398 and memory elements; anything else implies a lane load or store. */
19399 if (mem_mode
== reg_mode
)
19400 nelems
= vector_size
* nvectors
;
19404 /* Work out the type of each element. */
19405 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp
)));
19406 elem_type
= TREE_TYPE (TREE_TYPE (exp
));
19408 /* Create a type that describes the full access. */
19409 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
19410 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
19412 /* Dereference EXP using that type. */
19413 exp
= convert (build_pointer_type (array_type
), exp
);
19414 return fold_build2 (MEM_REF
, array_type
, exp
,
19415 build_int_cst (TREE_TYPE (exp
), 0));
19418 /* Expand a Neon builtin. */
19420 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
19421 neon_builtin_type_mode type_mode
,
19426 tree arg
[NEON_MAX_BUILTIN_ARGS
];
19427 rtx op
[NEON_MAX_BUILTIN_ARGS
];
19428 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19429 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
19430 enum machine_mode other_mode
;
19436 || GET_MODE (target
) != tmode
19437 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
19438 target
= gen_reg_rtx (tmode
);
19440 va_start (ap
, exp
);
19444 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
19446 if (thisarg
== NEON_ARG_STOP
)
19450 opno
= argc
+ have_retval
;
19451 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
19452 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
19453 if (thisarg
== NEON_ARG_MEMORY
)
19455 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
19456 arg
[argc
] = neon_dereference_pointer (arg
[argc
], mode
[argc
],
19457 other_mode
, type_mode
);
19459 op
[argc
] = expand_normal (arg
[argc
]);
19463 case NEON_ARG_COPY_TO_REG
:
19464 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19465 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
19466 (op
[argc
], mode
[argc
]))
19467 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
19470 case NEON_ARG_CONSTANT
:
19471 /* FIXME: This error message is somewhat unhelpful. */
19472 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
19473 (op
[argc
], mode
[argc
]))
19474 error ("argument must be a constant");
19477 case NEON_ARG_MEMORY
:
19478 gcc_assert (MEM_P (op
[argc
]));
19479 PUT_MODE (op
[argc
], mode
[argc
]);
19480 /* ??? arm_neon.h uses the same built-in functions for signed
19481 and unsigned accesses, casting where necessary. This isn't
19483 set_mem_alias_set (op
[argc
], 0);
19484 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
19485 (op
[argc
], mode
[argc
]))
19486 op
[argc
] = (replace_equiv_address
19487 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
19490 case NEON_ARG_STOP
:
19491 gcc_unreachable ();
19504 pat
= GEN_FCN (icode
) (target
, op
[0]);
19508 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
19512 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
19516 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
19520 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
19524 gcc_unreachable ();
19530 pat
= GEN_FCN (icode
) (op
[0]);
19534 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
19538 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
19542 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
19546 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
19550 gcc_unreachable ();
19561 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19562 constants defined per-instruction or per instruction-variant. Instead, the
19563 required info is looked up in the table neon_builtin_data. */
19565 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
19567 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
19568 neon_itype itype
= d
->itype
;
19569 enum insn_code icode
= d
->code
;
19570 neon_builtin_type_mode type_mode
= d
->mode
;
19577 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19578 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19582 case NEON_SCALARMUL
:
19583 case NEON_SCALARMULL
:
19584 case NEON_SCALARMULH
:
19585 case NEON_SHIFTINSERT
:
19586 case NEON_LOGICBINOP
:
19587 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19588 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19592 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19593 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19594 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19598 case NEON_SHIFTIMM
:
19599 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19600 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
19604 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19605 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19609 case NEON_REINTERP
:
19610 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19611 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19615 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19616 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19618 case NEON_RESULTPAIR
:
19619 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
,
19620 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19624 case NEON_LANEMULL
:
19625 case NEON_LANEMULH
:
19626 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19627 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19628 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19631 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19632 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19633 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19635 case NEON_SHIFTACC
:
19636 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19637 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19638 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19640 case NEON_SCALARMAC
:
19641 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19642 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19643 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19647 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19648 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19652 case NEON_LOADSTRUCT
:
19653 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19654 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
19656 case NEON_LOAD1LANE
:
19657 case NEON_LOADSTRUCTLANE
:
19658 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19659 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19663 case NEON_STORESTRUCT
:
19664 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
,
19665 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19667 case NEON_STORE1LANE
:
19668 case NEON_STORESTRUCTLANE
:
19669 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
,
19670 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19674 gcc_unreachable ();
19677 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19679 neon_reinterpret (rtx dest
, rtx src
)
19681 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
19684 /* Emit code to place a Neon pair result in memory locations (with equal
19687 neon_emit_pair_result_insn (enum machine_mode mode
,
19688 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
19691 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
19692 rtx tmp1
= gen_reg_rtx (mode
);
19693 rtx tmp2
= gen_reg_rtx (mode
);
19695 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
19697 emit_move_insn (mem
, tmp1
);
19698 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
19699 emit_move_insn (mem
, tmp2
);
19702 /* Set up operands for a register copy from src to dest, taking care not to
19703 clobber registers in the process.
19704 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19705 be called with a large N, so that should be OK. */
19708 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
19710 unsigned int copied
= 0, opctr
= 0;
19711 unsigned int done
= (1 << count
) - 1;
19714 while (copied
!= done
)
19716 for (i
= 0; i
< count
; i
++)
19720 for (j
= 0; good
&& j
< count
; j
++)
19721 if (i
!= j
&& (copied
& (1 << j
)) == 0
19722 && reg_overlap_mentioned_p (src
[j
], dest
[i
]))
19727 operands
[opctr
++] = dest
[i
];
19728 operands
[opctr
++] = src
[i
];
19734 gcc_assert (opctr
== count
* 2);
19737 /* Expand an expression EXP that calls a built-in function,
19738 with result going to TARGET if that's convenient
19739 (and in mode MODE if that's convenient).
19740 SUBTARGET may be used as the target for computing one of EXP's operands.
19741 IGNORE is nonzero if the value is to be ignored. */
19744 arm_expand_builtin (tree exp
,
19746 rtx subtarget ATTRIBUTE_UNUSED
,
19747 enum machine_mode mode ATTRIBUTE_UNUSED
,
19748 int ignore ATTRIBUTE_UNUSED
)
19750 const struct builtin_description
* d
;
19751 enum insn_code icode
;
19752 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
19760 int fcode
= DECL_FUNCTION_CODE (fndecl
);
19762 enum machine_mode tmode
;
19763 enum machine_mode mode0
;
19764 enum machine_mode mode1
;
19765 enum machine_mode mode2
;
19767 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
19768 return arm_expand_neon_builtin (fcode
, exp
, target
);
19772 case ARM_BUILTIN_TEXTRMSB
:
19773 case ARM_BUILTIN_TEXTRMUB
:
19774 case ARM_BUILTIN_TEXTRMSH
:
19775 case ARM_BUILTIN_TEXTRMUH
:
19776 case ARM_BUILTIN_TEXTRMSW
:
19777 case ARM_BUILTIN_TEXTRMUW
:
19778 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
19779 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
19780 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
19781 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
19782 : CODE_FOR_iwmmxt_textrmw
);
19784 arg0
= CALL_EXPR_ARG (exp
, 0);
19785 arg1
= CALL_EXPR_ARG (exp
, 1);
19786 op0
= expand_normal (arg0
);
19787 op1
= expand_normal (arg1
);
19788 tmode
= insn_data
[icode
].operand
[0].mode
;
19789 mode0
= insn_data
[icode
].operand
[1].mode
;
19790 mode1
= insn_data
[icode
].operand
[2].mode
;
19792 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19793 op0
= copy_to_mode_reg (mode0
, op0
);
19794 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19796 /* @@@ better error message */
19797 error ("selector must be an immediate");
19798 return gen_reg_rtx (tmode
);
19801 || GET_MODE (target
) != tmode
19802 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19803 target
= gen_reg_rtx (tmode
);
19804 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19810 case ARM_BUILTIN_TINSRB
:
19811 case ARM_BUILTIN_TINSRH
:
19812 case ARM_BUILTIN_TINSRW
:
19813 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
19814 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
19815 : CODE_FOR_iwmmxt_tinsrw
);
19816 arg0
= CALL_EXPR_ARG (exp
, 0);
19817 arg1
= CALL_EXPR_ARG (exp
, 1);
19818 arg2
= CALL_EXPR_ARG (exp
, 2);
19819 op0
= expand_normal (arg0
);
19820 op1
= expand_normal (arg1
);
19821 op2
= expand_normal (arg2
);
19822 tmode
= insn_data
[icode
].operand
[0].mode
;
19823 mode0
= insn_data
[icode
].operand
[1].mode
;
19824 mode1
= insn_data
[icode
].operand
[2].mode
;
19825 mode2
= insn_data
[icode
].operand
[3].mode
;
19827 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19828 op0
= copy_to_mode_reg (mode0
, op0
);
19829 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19830 op1
= copy_to_mode_reg (mode1
, op1
);
19831 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19833 /* @@@ better error message */
19834 error ("selector must be an immediate");
19838 || GET_MODE (target
) != tmode
19839 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19840 target
= gen_reg_rtx (tmode
);
19841 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19847 case ARM_BUILTIN_SETWCX
:
19848 arg0
= CALL_EXPR_ARG (exp
, 0);
19849 arg1
= CALL_EXPR_ARG (exp
, 1);
19850 op0
= force_reg (SImode
, expand_normal (arg0
));
19851 op1
= expand_normal (arg1
);
19852 emit_insn (gen_iwmmxt_tmcr (op1
, op0
));
19855 case ARM_BUILTIN_GETWCX
:
19856 arg0
= CALL_EXPR_ARG (exp
, 0);
19857 op0
= expand_normal (arg0
);
19858 target
= gen_reg_rtx (SImode
);
19859 emit_insn (gen_iwmmxt_tmrc (target
, op0
));
19862 case ARM_BUILTIN_WSHUFH
:
19863 icode
= CODE_FOR_iwmmxt_wshufh
;
19864 arg0
= CALL_EXPR_ARG (exp
, 0);
19865 arg1
= CALL_EXPR_ARG (exp
, 1);
19866 op0
= expand_normal (arg0
);
19867 op1
= expand_normal (arg1
);
19868 tmode
= insn_data
[icode
].operand
[0].mode
;
19869 mode1
= insn_data
[icode
].operand
[1].mode
;
19870 mode2
= insn_data
[icode
].operand
[2].mode
;
19872 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19873 op0
= copy_to_mode_reg (mode1
, op0
);
19874 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19876 /* @@@ better error message */
19877 error ("mask must be an immediate");
19881 || GET_MODE (target
) != tmode
19882 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19883 target
= gen_reg_rtx (tmode
);
19884 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19890 case ARM_BUILTIN_WSADB
:
19891 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb
, exp
, target
);
19892 case ARM_BUILTIN_WSADH
:
19893 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh
, exp
, target
);
19894 case ARM_BUILTIN_WSADBZ
:
19895 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
19896 case ARM_BUILTIN_WSADHZ
:
19897 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
19899 /* Several three-argument builtins. */
19900 case ARM_BUILTIN_WMACS
:
19901 case ARM_BUILTIN_WMACU
:
19902 case ARM_BUILTIN_WALIGN
:
19903 case ARM_BUILTIN_TMIA
:
19904 case ARM_BUILTIN_TMIAPH
:
19905 case ARM_BUILTIN_TMIATT
:
19906 case ARM_BUILTIN_TMIATB
:
19907 case ARM_BUILTIN_TMIABT
:
19908 case ARM_BUILTIN_TMIABB
:
19909 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
19910 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
19911 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
19912 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
19913 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
19914 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
19915 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
19916 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
19917 : CODE_FOR_iwmmxt_walign
);
19918 arg0
= CALL_EXPR_ARG (exp
, 0);
19919 arg1
= CALL_EXPR_ARG (exp
, 1);
19920 arg2
= CALL_EXPR_ARG (exp
, 2);
19921 op0
= expand_normal (arg0
);
19922 op1
= expand_normal (arg1
);
19923 op2
= expand_normal (arg2
);
19924 tmode
= insn_data
[icode
].operand
[0].mode
;
19925 mode0
= insn_data
[icode
].operand
[1].mode
;
19926 mode1
= insn_data
[icode
].operand
[2].mode
;
19927 mode2
= insn_data
[icode
].operand
[3].mode
;
19929 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19930 op0
= copy_to_mode_reg (mode0
, op0
);
19931 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19932 op1
= copy_to_mode_reg (mode1
, op1
);
19933 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19934 op2
= copy_to_mode_reg (mode2
, op2
);
19936 || GET_MODE (target
) != tmode
19937 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19938 target
= gen_reg_rtx (tmode
);
19939 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19945 case ARM_BUILTIN_WZERO
:
19946 target
= gen_reg_rtx (DImode
);
19947 emit_insn (gen_iwmmxt_clrdi (target
));
19950 case ARM_BUILTIN_THREAD_POINTER
:
19951 return arm_load_tp (target
);
19957 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19958 if (d
->code
== (const enum arm_builtins
) fcode
)
19959 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
19961 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19962 if (d
->code
== (const enum arm_builtins
) fcode
)
19963 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
19965 /* @@@ Should really do something sensible here. */
19969 /* Return the number (counting from 0) of
19970 the least significant set bit in MASK. */
19973 number_of_first_bit_set (unsigned mask
)
19978 (mask
& (1 << bit
)) == 0;
19985 /* Emit code to push or pop registers to or from the stack. F is the
19986 assembly file. MASK is the registers to push or pop. PUSH is
19987 nonzero if we should push, and zero if we should pop. For debugging
19988 output, if pushing, adjust CFA_OFFSET by the amount of space added
19989 to the stack. REAL_REGS should have the same number of bits set as
19990 MASK, and will be used instead (in the same order) to describe which
19991 registers were saved - this is used to mark the save slots when we
19992 push high registers after moving them to low registers. */
19994 thumb_pushpop (FILE *f
, unsigned long mask
, int push
, int *cfa_offset
,
19995 unsigned long real_regs
)
19998 int lo_mask
= mask
& 0xFF;
19999 int pushed_words
= 0;
20003 if (lo_mask
== 0 && !push
&& (mask
& (1 << PC_REGNUM
)))
20005 /* Special case. Do not generate a POP PC statement here, do it in
20007 thumb_exit (f
, -1);
20011 if (push
&& arm_except_unwind_info (&global_options
) == UI_TARGET
)
20013 fprintf (f
, "\t.save\t{");
20014 for (regno
= 0; regno
< 15; regno
++)
20016 if (real_regs
& (1 << regno
))
20018 if (real_regs
& ((1 << regno
) -1))
20020 asm_fprintf (f
, "%r", regno
);
20023 fprintf (f
, "}\n");
20026 fprintf (f
, "\t%s\t{", push
? "push" : "pop");
20028 /* Look at the low registers first. */
20029 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
20033 asm_fprintf (f
, "%r", regno
);
20035 if ((lo_mask
& ~1) != 0)
20042 if (push
&& (mask
& (1 << LR_REGNUM
)))
20044 /* Catch pushing the LR. */
20048 asm_fprintf (f
, "%r", LR_REGNUM
);
20052 else if (!push
&& (mask
& (1 << PC_REGNUM
)))
20054 /* Catch popping the PC. */
20055 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
20056 || crtl
->calls_eh_return
)
20058 /* The PC is never poped directly, instead
20059 it is popped into r3 and then BX is used. */
20060 fprintf (f
, "}\n");
20062 thumb_exit (f
, -1);
20071 asm_fprintf (f
, "%r", PC_REGNUM
);
20075 fprintf (f
, "}\n");
20077 if (push
&& pushed_words
&& dwarf2out_do_frame ())
20079 char *l
= dwarf2out_cfi_label (false);
20080 int pushed_mask
= real_regs
;
20082 *cfa_offset
+= pushed_words
* 4;
20083 dwarf2out_def_cfa (l
, SP_REGNUM
, *cfa_offset
);
20086 pushed_mask
= real_regs
;
20087 for (regno
= 0; regno
<= 14; regno
++, pushed_mask
>>= 1)
20089 if (pushed_mask
& 1)
20090 dwarf2out_reg_save (l
, regno
, 4 * pushed_words
++ - *cfa_offset
);
20095 /* Generate code to return from a thumb function.
20096 If 'reg_containing_return_addr' is -1, then the return address is
20097 actually on the stack, at the stack pointer. */
20099 thumb_exit (FILE *f
, int reg_containing_return_addr
)
20101 unsigned regs_available_for_popping
;
20102 unsigned regs_to_pop
;
20104 unsigned available
;
20108 int restore_a4
= FALSE
;
20110 /* Compute the registers we need to pop. */
20114 if (reg_containing_return_addr
== -1)
20116 regs_to_pop
|= 1 << LR_REGNUM
;
20120 if (TARGET_BACKTRACE
)
20122 /* Restore the (ARM) frame pointer and stack pointer. */
20123 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
20127 /* If there is nothing to pop then just emit the BX instruction and
20129 if (pops_needed
== 0)
20131 if (crtl
->calls_eh_return
)
20132 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
20134 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
20137 /* Otherwise if we are not supporting interworking and we have not created
20138 a backtrace structure and the function was not entered in ARM mode then
20139 just pop the return address straight into the PC. */
20140 else if (!TARGET_INTERWORK
20141 && !TARGET_BACKTRACE
20142 && !is_called_in_ARM_mode (current_function_decl
)
20143 && !crtl
->calls_eh_return
)
20145 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
20149 /* Find out how many of the (return) argument registers we can corrupt. */
20150 regs_available_for_popping
= 0;
20152 /* If returning via __builtin_eh_return, the bottom three registers
20153 all contain information needed for the return. */
20154 if (crtl
->calls_eh_return
)
20158 /* If we can deduce the registers used from the function's
20159 return value. This is more reliable that examining
20160 df_regs_ever_live_p () because that will be set if the register is
20161 ever used in the function, not just if the register is used
20162 to hold a return value. */
20164 if (crtl
->return_rtx
!= 0)
20165 mode
= GET_MODE (crtl
->return_rtx
);
20167 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20169 size
= GET_MODE_SIZE (mode
);
20173 /* In a void function we can use any argument register.
20174 In a function that returns a structure on the stack
20175 we can use the second and third argument registers. */
20176 if (mode
== VOIDmode
)
20177 regs_available_for_popping
=
20178 (1 << ARG_REGISTER (1))
20179 | (1 << ARG_REGISTER (2))
20180 | (1 << ARG_REGISTER (3));
20182 regs_available_for_popping
=
20183 (1 << ARG_REGISTER (2))
20184 | (1 << ARG_REGISTER (3));
20186 else if (size
<= 4)
20187 regs_available_for_popping
=
20188 (1 << ARG_REGISTER (2))
20189 | (1 << ARG_REGISTER (3));
20190 else if (size
<= 8)
20191 regs_available_for_popping
=
20192 (1 << ARG_REGISTER (3));
20195 /* Match registers to be popped with registers into which we pop them. */
20196 for (available
= regs_available_for_popping
,
20197 required
= regs_to_pop
;
20198 required
!= 0 && available
!= 0;
20199 available
&= ~(available
& - available
),
20200 required
&= ~(required
& - required
))
20203 /* If we have any popping registers left over, remove them. */
20205 regs_available_for_popping
&= ~available
;
20207 /* Otherwise if we need another popping register we can use
20208 the fourth argument register. */
20209 else if (pops_needed
)
20211 /* If we have not found any free argument registers and
20212 reg a4 contains the return address, we must move it. */
20213 if (regs_available_for_popping
== 0
20214 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
20216 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
20217 reg_containing_return_addr
= LR_REGNUM
;
20219 else if (size
> 12)
20221 /* Register a4 is being used to hold part of the return value,
20222 but we have dire need of a free, low register. */
20225 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
20228 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
20230 /* The fourth argument register is available. */
20231 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
20237 /* Pop as many registers as we can. */
20238 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
20239 regs_available_for_popping
);
20241 /* Process the registers we popped. */
20242 if (reg_containing_return_addr
== -1)
20244 /* The return address was popped into the lowest numbered register. */
20245 regs_to_pop
&= ~(1 << LR_REGNUM
);
20247 reg_containing_return_addr
=
20248 number_of_first_bit_set (regs_available_for_popping
);
20250 /* Remove this register for the mask of available registers, so that
20251 the return address will not be corrupted by further pops. */
20252 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
20255 /* If we popped other registers then handle them here. */
20256 if (regs_available_for_popping
)
20260 /* Work out which register currently contains the frame pointer. */
20261 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
20263 /* Move it into the correct place. */
20264 asm_fprintf (f
, "\tmov\t%r, %r\n",
20265 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
20267 /* (Temporarily) remove it from the mask of popped registers. */
20268 regs_available_for_popping
&= ~(1 << frame_pointer
);
20269 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
20271 if (regs_available_for_popping
)
20275 /* We popped the stack pointer as well,
20276 find the register that contains it. */
20277 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
20279 /* Move it into the stack register. */
20280 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
20282 /* At this point we have popped all necessary registers, so
20283 do not worry about restoring regs_available_for_popping
20284 to its correct value:
20286 assert (pops_needed == 0)
20287 assert (regs_available_for_popping == (1 << frame_pointer))
20288 assert (regs_to_pop == (1 << STACK_POINTER)) */
20292 /* Since we have just move the popped value into the frame
20293 pointer, the popping register is available for reuse, and
20294 we know that we still have the stack pointer left to pop. */
20295 regs_available_for_popping
|= (1 << frame_pointer
);
20299 /* If we still have registers left on the stack, but we no longer have
20300 any registers into which we can pop them, then we must move the return
20301 address into the link register and make available the register that
20303 if (regs_available_for_popping
== 0 && pops_needed
> 0)
20305 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
20307 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
20308 reg_containing_return_addr
);
20310 reg_containing_return_addr
= LR_REGNUM
;
20313 /* If we have registers left on the stack then pop some more.
20314 We know that at most we will want to pop FP and SP. */
20315 if (pops_needed
> 0)
20320 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
20321 regs_available_for_popping
);
20323 /* We have popped either FP or SP.
20324 Move whichever one it is into the correct register. */
20325 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
20326 move_to
= number_of_first_bit_set (regs_to_pop
);
20328 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
20330 regs_to_pop
&= ~(1 << move_to
);
20335 /* If we still have not popped everything then we must have only
20336 had one register available to us and we are now popping the SP. */
20337 if (pops_needed
> 0)
20341 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
20342 regs_available_for_popping
);
20344 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
20346 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
20348 assert (regs_to_pop == (1 << STACK_POINTER))
20349 assert (pops_needed == 1)
20353 /* If necessary restore the a4 register. */
20356 if (reg_containing_return_addr
!= LR_REGNUM
)
20358 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
20359 reg_containing_return_addr
= LR_REGNUM
;
20362 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
20365 if (crtl
->calls_eh_return
)
20366 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
20368 /* Return to caller. */
20369 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
20372 /* Scan INSN just before assembler is output for it.
20373 For Thumb-1, we track the status of the condition codes; this
20374 information is used in the cbranchsi4_insn pattern. */
20376 thumb1_final_prescan_insn (rtx insn
)
20378 if (flag_print_asm_name
)
20379 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
20380 INSN_ADDRESSES (INSN_UID (insn
)));
20381 /* Don't overwrite the previous setter when we get to a cbranch. */
20382 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
20384 enum attr_conds conds
;
20386 if (cfun
->machine
->thumb1_cc_insn
)
20388 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
20389 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
20392 conds
= get_attr_conds (insn
);
20393 if (conds
== CONDS_SET
)
20395 rtx set
= single_set (insn
);
20396 cfun
->machine
->thumb1_cc_insn
= insn
;
20397 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
20398 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
20399 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
20400 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
20402 rtx src1
= XEXP (SET_SRC (set
), 1);
20403 if (src1
== const0_rtx
)
20404 cfun
->machine
->thumb1_cc_mode
= CCmode
;
20407 else if (conds
!= CONDS_NOCOND
)
20408 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
20413 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
20415 unsigned HOST_WIDE_INT mask
= 0xff;
20418 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
20419 if (val
== 0) /* XXX */
20422 for (i
= 0; i
< 25; i
++)
20423 if ((val
& (mask
<< i
)) == val
)
20429 /* Returns nonzero if the current function contains,
20430 or might contain a far jump. */
20432 thumb_far_jump_used_p (void)
20436 /* This test is only important for leaf functions. */
20437 /* assert (!leaf_function_p ()); */
20439 /* If we have already decided that far jumps may be used,
20440 do not bother checking again, and always return true even if
20441 it turns out that they are not being used. Once we have made
20442 the decision that far jumps are present (and that hence the link
20443 register will be pushed onto the stack) we cannot go back on it. */
20444 if (cfun
->machine
->far_jump_used
)
20447 /* If this function is not being called from the prologue/epilogue
20448 generation code then it must be being called from the
20449 INITIAL_ELIMINATION_OFFSET macro. */
20450 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
20452 /* In this case we know that we are being asked about the elimination
20453 of the arg pointer register. If that register is not being used,
20454 then there are no arguments on the stack, and we do not have to
20455 worry that a far jump might force the prologue to push the link
20456 register, changing the stack offsets. In this case we can just
20457 return false, since the presence of far jumps in the function will
20458 not affect stack offsets.
20460 If the arg pointer is live (or if it was live, but has now been
20461 eliminated and so set to dead) then we do have to test to see if
20462 the function might contain a far jump. This test can lead to some
20463 false negatives, since before reload is completed, then length of
20464 branch instructions is not known, so gcc defaults to returning their
20465 longest length, which in turn sets the far jump attribute to true.
20467 A false negative will not result in bad code being generated, but it
20468 will result in a needless push and pop of the link register. We
20469 hope that this does not occur too often.
20471 If we need doubleword stack alignment this could affect the other
20472 elimination offsets so we can't risk getting it wrong. */
20473 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
20474 cfun
->machine
->arg_pointer_live
= 1;
20475 else if (!cfun
->machine
->arg_pointer_live
)
20479 /* Check to see if the function contains a branch
20480 insn with the far jump attribute set. */
20481 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
20483 if (GET_CODE (insn
) == JUMP_INSN
20484 /* Ignore tablejump patterns. */
20485 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
20486 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
20487 && get_attr_far_jump (insn
) == FAR_JUMP_YES
20490 /* Record the fact that we have decided that
20491 the function does use far jumps. */
20492 cfun
->machine
->far_jump_used
= 1;
20500 /* Return nonzero if FUNC must be entered in ARM mode. */
20502 is_called_in_ARM_mode (tree func
)
20504 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
20506 /* Ignore the problem about functions whose address is taken. */
20507 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
20511 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
20517 /* Given the stack offsets and register mask in OFFSETS, decide how
20518 many additional registers to push instead of subtracting a constant
20519 from SP. For epilogues the principle is the same except we use pop.
20520 FOR_PROLOGUE indicates which we're generating. */
20522 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
20524 HOST_WIDE_INT amount
;
20525 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
20526 /* Extract a mask of the ones we can give to the Thumb's push/pop
20528 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
20529 /* Then count how many other high registers will need to be pushed. */
20530 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20531 int n_free
, reg_base
;
20533 if (!for_prologue
&& frame_pointer_needed
)
20534 amount
= offsets
->locals_base
- offsets
->saved_regs
;
20536 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20538 /* If the stack frame size is 512 exactly, we can save one load
20539 instruction, which should make this a win even when optimizing
20541 if (!optimize_size
&& amount
!= 512)
20544 /* Can't do this if there are high registers to push. */
20545 if (high_regs_pushed
!= 0)
20548 /* Shouldn't do it in the prologue if no registers would normally
20549 be pushed at all. In the epilogue, also allow it if we'll have
20550 a pop insn for the PC. */
20553 || TARGET_BACKTRACE
20554 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
20555 || TARGET_INTERWORK
20556 || crtl
->args
.pretend_args_size
!= 0))
20559 /* Don't do this if thumb_expand_prologue wants to emit instructions
20560 between the push and the stack frame allocation. */
20562 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20563 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
20570 reg_base
= arm_size_return_regs () / UNITS_PER_WORD
;
20571 live_regs_mask
>>= reg_base
;
20574 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
20575 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
20577 live_regs_mask
>>= 1;
20583 gcc_assert (amount
/ 4 * 4 == amount
);
20585 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
20586 return (amount
- 508) / 4;
20587 if (amount
<= n_free
* 4)
20592 /* The bits which aren't usefully expanded as rtl. */
20594 thumb_unexpanded_epilogue (void)
20596 arm_stack_offsets
*offsets
;
20598 unsigned long live_regs_mask
= 0;
20599 int high_regs_pushed
= 0;
20601 int had_to_push_lr
;
20604 if (cfun
->machine
->return_used_this_function
!= 0)
20607 if (IS_NAKED (arm_current_func_type ()))
20610 offsets
= arm_get_frame_offsets ();
20611 live_regs_mask
= offsets
->saved_regs_mask
;
20612 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20614 /* If we can deduce the registers used from the function's return value.
20615 This is more reliable that examining df_regs_ever_live_p () because that
20616 will be set if the register is ever used in the function, not just if
20617 the register is used to hold a return value. */
20618 size
= arm_size_return_regs ();
20620 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
20623 unsigned long extra_mask
= (1 << extra_pop
) - 1;
20624 live_regs_mask
|= extra_mask
<< (size
/ UNITS_PER_WORD
);
20627 /* The prolog may have pushed some high registers to use as
20628 work registers. e.g. the testsuite file:
20629 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20630 compiles to produce:
20631 push {r4, r5, r6, r7, lr}
20635 as part of the prolog. We have to undo that pushing here. */
20637 if (high_regs_pushed
)
20639 unsigned long mask
= live_regs_mask
& 0xff;
20642 /* The available low registers depend on the size of the value we are
20650 /* Oh dear! We have no low registers into which we can pop
20653 ("no low registers available for popping high registers");
20655 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
20656 if (live_regs_mask
& (1 << next_hi_reg
))
20659 while (high_regs_pushed
)
20661 /* Find lo register(s) into which the high register(s) can
20663 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
20665 if (mask
& (1 << regno
))
20666 high_regs_pushed
--;
20667 if (high_regs_pushed
== 0)
20671 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
20673 /* Pop the values into the low register(s). */
20674 thumb_pushpop (asm_out_file
, mask
, 0, NULL
, mask
);
20676 /* Move the value(s) into the high registers. */
20677 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
20679 if (mask
& (1 << regno
))
20681 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
20684 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
20685 if (live_regs_mask
& (1 << next_hi_reg
))
20690 live_regs_mask
&= ~0x0f00;
20693 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
20694 live_regs_mask
&= 0xff;
20696 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
20698 /* Pop the return address into the PC. */
20699 if (had_to_push_lr
)
20700 live_regs_mask
|= 1 << PC_REGNUM
;
20702 /* Either no argument registers were pushed or a backtrace
20703 structure was created which includes an adjusted stack
20704 pointer, so just pop everything. */
20705 if (live_regs_mask
)
20706 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
20709 /* We have either just popped the return address into the
20710 PC or it is was kept in LR for the entire function.
20711 Note that thumb_pushpop has already called thumb_exit if the
20712 PC was in the list. */
20713 if (!had_to_push_lr
)
20714 thumb_exit (asm_out_file
, LR_REGNUM
);
20718 /* Pop everything but the return address. */
20719 if (live_regs_mask
)
20720 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
20723 if (had_to_push_lr
)
20727 /* We have no free low regs, so save one. */
20728 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
20732 /* Get the return address into a temporary register. */
20733 thumb_pushpop (asm_out_file
, 1 << LAST_ARG_REGNUM
, 0, NULL
,
20734 1 << LAST_ARG_REGNUM
);
20738 /* Move the return address to lr. */
20739 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
20741 /* Restore the low register. */
20742 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
20747 regno
= LAST_ARG_REGNUM
;
20752 /* Remove the argument registers that were pushed onto the stack. */
20753 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
20754 SP_REGNUM
, SP_REGNUM
,
20755 crtl
->args
.pretend_args_size
);
20757 thumb_exit (asm_out_file
, regno
);
20763 /* Functions to save and restore machine-specific function data. */
20764 static struct machine_function
*
20765 arm_init_machine_status (void)
20767 struct machine_function
*machine
;
20768 machine
= ggc_alloc_cleared_machine_function ();
20770 #if ARM_FT_UNKNOWN != 0
20771 machine
->func_type
= ARM_FT_UNKNOWN
;
20776 /* Return an RTX indicating where the return address to the
20777 calling function can be found. */
20779 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
20784 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
20787 /* Do anything needed before RTL is emitted for each function. */
20789 arm_init_expanders (void)
20791 /* Arrange to initialize and mark the machine per-function status. */
20792 init_machine_status
= arm_init_machine_status
;
20794 /* This is to stop the combine pass optimizing away the alignment
20795 adjustment of va_arg. */
20796 /* ??? It is claimed that this should not be necessary. */
20798 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
20802 /* Like arm_compute_initial_elimination offset. Simpler because there
20803 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20804 to point at the base of the local variables after static stack
20805 space for a function has been allocated. */
20808 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20810 arm_stack_offsets
*offsets
;
20812 offsets
= arm_get_frame_offsets ();
20816 case ARG_POINTER_REGNUM
:
20819 case STACK_POINTER_REGNUM
:
20820 return offsets
->outgoing_args
- offsets
->saved_args
;
20822 case FRAME_POINTER_REGNUM
:
20823 return offsets
->soft_frame
- offsets
->saved_args
;
20825 case ARM_HARD_FRAME_POINTER_REGNUM
:
20826 return offsets
->saved_regs
- offsets
->saved_args
;
20828 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20829 return offsets
->locals_base
- offsets
->saved_args
;
20832 gcc_unreachable ();
20836 case FRAME_POINTER_REGNUM
:
20839 case STACK_POINTER_REGNUM
:
20840 return offsets
->outgoing_args
- offsets
->soft_frame
;
20842 case ARM_HARD_FRAME_POINTER_REGNUM
:
20843 return offsets
->saved_regs
- offsets
->soft_frame
;
20845 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20846 return offsets
->locals_base
- offsets
->soft_frame
;
20849 gcc_unreachable ();
20854 gcc_unreachable ();
20858 /* Generate the rest of a function's prologue. */
20860 thumb1_expand_prologue (void)
20864 HOST_WIDE_INT amount
;
20865 arm_stack_offsets
*offsets
;
20866 unsigned long func_type
;
20868 unsigned long live_regs_mask
;
20870 func_type
= arm_current_func_type ();
20872 /* Naked functions don't have prologues. */
20873 if (IS_NAKED (func_type
))
20876 if (IS_INTERRUPT (func_type
))
20878 error ("interrupt Service Routines cannot be coded in Thumb mode");
20882 offsets
= arm_get_frame_offsets ();
20883 live_regs_mask
= offsets
->saved_regs_mask
;
20884 /* Load the pic register before setting the frame pointer,
20885 so we can use r7 as a temporary work register. */
20886 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20887 arm_load_pic_register (live_regs_mask
);
20889 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
20890 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
20891 stack_pointer_rtx
);
20893 if (flag_stack_usage_info
)
20894 current_function_static_stack_size
20895 = offsets
->outgoing_args
- offsets
->saved_args
;
20897 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20898 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
20903 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20904 GEN_INT (- amount
)));
20905 RTX_FRAME_RELATED_P (insn
) = 1;
20911 /* The stack decrement is too big for an immediate value in a single
20912 insn. In theory we could issue multiple subtracts, but after
20913 three of them it becomes more space efficient to place the full
20914 value in the constant pool and load into a register. (Also the
20915 ARM debugger really likes to see only one stack decrement per
20916 function). So instead we look for a scratch register into which
20917 we can load the decrement, and then we subtract this from the
20918 stack pointer. Unfortunately on the thumb the only available
20919 scratch registers are the argument registers, and we cannot use
20920 these as they may hold arguments to the function. Instead we
20921 attempt to locate a call preserved register which is used by this
20922 function. If we can find one, then we know that it will have
20923 been pushed at the start of the prologue and so we can corrupt
20925 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
20926 if (live_regs_mask
& (1 << regno
))
20929 gcc_assert(regno
<= LAST_LO_REGNUM
);
20931 reg
= gen_rtx_REG (SImode
, regno
);
20933 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
20935 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
20936 stack_pointer_rtx
, reg
));
20937 RTX_FRAME_RELATED_P (insn
) = 1;
20938 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20939 plus_constant (stack_pointer_rtx
,
20941 RTX_FRAME_RELATED_P (dwarf
) = 1;
20942 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20946 if (frame_pointer_needed
)
20947 thumb_set_frame_pointer (offsets
);
20949 /* If we are profiling, make sure no instructions are scheduled before
20950 the call to mcount. Similarly if the user has requested no
20951 scheduling in the prolog. Similarly if we want non-call exceptions
20952 using the EABI unwinder, to prevent faulting instructions from being
20953 swapped with a stack adjustment. */
20954 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20955 || (arm_except_unwind_info (&global_options
) == UI_TARGET
20956 && cfun
->can_throw_non_call_exceptions
))
20957 emit_insn (gen_blockage ());
20959 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
20960 if (live_regs_mask
& 0xff)
20961 cfun
->machine
->lr_save_eliminated
= 0;
20966 thumb1_expand_epilogue (void)
20968 HOST_WIDE_INT amount
;
20969 arm_stack_offsets
*offsets
;
20972 /* Naked functions don't have prologues. */
20973 if (IS_NAKED (arm_current_func_type ()))
20976 offsets
= arm_get_frame_offsets ();
20977 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20979 if (frame_pointer_needed
)
20981 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
20982 amount
= offsets
->locals_base
- offsets
->saved_regs
;
20984 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
20986 gcc_assert (amount
>= 0);
20990 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20991 GEN_INT (amount
)));
20994 /* r3 is always free in the epilogue. */
20995 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
20997 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
20998 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
21002 /* Emit a USE (stack_pointer_rtx), so that
21003 the stack adjustment will not be deleted. */
21004 emit_insn (gen_prologue_use (stack_pointer_rtx
));
21006 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
21007 emit_insn (gen_blockage ());
21009 /* Emit a clobber for each insn that will be restored in the epilogue,
21010 so that flow2 will get register lifetimes correct. */
21011 for (regno
= 0; regno
< 13; regno
++)
21012 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
21013 emit_clobber (gen_rtx_REG (SImode
, regno
));
21015 if (! df_regs_ever_live_p (LR_REGNUM
))
21016 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
21020 thumb1_output_function_prologue (FILE *f
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
21022 arm_stack_offsets
*offsets
;
21023 unsigned long live_regs_mask
= 0;
21024 unsigned long l_mask
;
21025 unsigned high_regs_pushed
= 0;
21026 int cfa_offset
= 0;
21029 if (IS_NAKED (arm_current_func_type ()))
21032 if (is_called_in_ARM_mode (current_function_decl
))
21036 gcc_assert (GET_CODE (DECL_RTL (current_function_decl
)) == MEM
);
21037 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
21039 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
21041 /* Generate code sequence to switch us into Thumb mode. */
21042 /* The .code 32 directive has already been emitted by
21043 ASM_DECLARE_FUNCTION_NAME. */
21044 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
21045 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
21047 /* Generate a label, so that the debugger will notice the
21048 change in instruction sets. This label is also used by
21049 the assembler to bypass the ARM code when this function
21050 is called from a Thumb encoded function elsewhere in the
21051 same file. Hence the definition of STUB_NAME here must
21052 agree with the definition in gas/config/tc-arm.c. */
21054 #define STUB_NAME ".real_start_of"
21056 fprintf (f
, "\t.code\t16\n");
21058 if (arm_dllexport_name_p (name
))
21059 name
= arm_strip_name_encoding (name
);
21061 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
21062 fprintf (f
, "\t.thumb_func\n");
21063 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
21066 if (crtl
->args
.pretend_args_size
)
21068 /* Output unwind directive for the stack adjustment. */
21069 if (arm_except_unwind_info (&global_options
) == UI_TARGET
)
21070 fprintf (f
, "\t.pad #%d\n",
21071 crtl
->args
.pretend_args_size
);
21073 if (cfun
->machine
->uses_anonymous_args
)
21077 fprintf (f
, "\tpush\t{");
21079 num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
21081 for (regno
= LAST_ARG_REGNUM
+ 1 - num_pushes
;
21082 regno
<= LAST_ARG_REGNUM
;
21084 asm_fprintf (f
, "%r%s", regno
,
21085 regno
== LAST_ARG_REGNUM
? "" : ", ");
21087 fprintf (f
, "}\n");
21090 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n",
21091 SP_REGNUM
, SP_REGNUM
,
21092 crtl
->args
.pretend_args_size
);
21094 /* We don't need to record the stores for unwinding (would it
21095 help the debugger any if we did?), but record the change in
21096 the stack pointer. */
21097 if (dwarf2out_do_frame ())
21099 char *l
= dwarf2out_cfi_label (false);
21101 cfa_offset
= cfa_offset
+ crtl
->args
.pretend_args_size
;
21102 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
21106 /* Get the registers we are going to push. */
21107 offsets
= arm_get_frame_offsets ();
21108 live_regs_mask
= offsets
->saved_regs_mask
;
21109 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21110 l_mask
= live_regs_mask
& 0x40ff;
21111 /* Then count how many other high registers will need to be pushed. */
21112 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
21114 if (TARGET_BACKTRACE
)
21117 unsigned work_register
;
21119 /* We have been asked to create a stack backtrace structure.
21120 The code looks like this:
21124 0 sub SP, #16 Reserve space for 4 registers.
21125 2 push {R7} Push low registers.
21126 4 add R7, SP, #20 Get the stack pointer before the push.
21127 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
21128 8 mov R7, PC Get hold of the start of this code plus 12.
21129 10 str R7, [SP, #16] Store it.
21130 12 mov R7, FP Get hold of the current frame pointer.
21131 14 str R7, [SP, #4] Store it.
21132 16 mov R7, LR Get hold of the current return address.
21133 18 str R7, [SP, #12] Store it.
21134 20 add R7, SP, #16 Point at the start of the backtrace structure.
21135 22 mov FP, R7 Put this value into the frame pointer. */
21137 work_register
= thumb_find_work_register (live_regs_mask
);
21139 if (arm_except_unwind_info (&global_options
) == UI_TARGET
)
21140 asm_fprintf (f
, "\t.pad #16\n");
21143 (f
, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
21144 SP_REGNUM
, SP_REGNUM
);
21146 if (dwarf2out_do_frame ())
21148 char *l
= dwarf2out_cfi_label (false);
21150 cfa_offset
= cfa_offset
+ 16;
21151 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
21156 thumb_pushpop (f
, l_mask
, 1, &cfa_offset
, l_mask
);
21157 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
21162 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
21163 offset
+ 16 + crtl
->args
.pretend_args_size
);
21165 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21168 /* Make sure that the instruction fetching the PC is in the right place
21169 to calculate "start of backtrace creation code + 12". */
21172 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
21173 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21175 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
21176 ARM_HARD_FRAME_POINTER_REGNUM
);
21177 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21182 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
21183 ARM_HARD_FRAME_POINTER_REGNUM
);
21184 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21186 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
21187 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21191 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, LR_REGNUM
);
21192 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21194 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
21196 asm_fprintf (f
, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21197 ARM_HARD_FRAME_POINTER_REGNUM
, work_register
);
21199 /* Optimization: If we are not pushing any low registers but we are going
21200 to push some high registers then delay our first push. This will just
21201 be a push of LR and we can combine it with the push of the first high
21203 else if ((l_mask
& 0xff) != 0
21204 || (high_regs_pushed
== 0 && l_mask
))
21206 unsigned long mask
= l_mask
;
21207 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
21208 thumb_pushpop (f
, mask
, 1, &cfa_offset
, mask
);
21211 if (high_regs_pushed
)
21213 unsigned pushable_regs
;
21214 unsigned next_hi_reg
;
21216 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
21217 if (live_regs_mask
& (1 << next_hi_reg
))
21220 pushable_regs
= l_mask
& 0xff;
21222 if (pushable_regs
== 0)
21223 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
21225 while (high_regs_pushed
> 0)
21227 unsigned long real_regs_mask
= 0;
21229 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
21231 if (pushable_regs
& (1 << regno
))
21233 asm_fprintf (f
, "\tmov\t%r, %r\n", regno
, next_hi_reg
);
21235 high_regs_pushed
--;
21236 real_regs_mask
|= (1 << next_hi_reg
);
21238 if (high_regs_pushed
)
21240 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
21242 if (live_regs_mask
& (1 << next_hi_reg
))
21247 pushable_regs
&= ~((1 << regno
) - 1);
21253 /* If we had to find a work register and we have not yet
21254 saved the LR then add it to the list of regs to push. */
21255 if (l_mask
== (1 << LR_REGNUM
))
21257 thumb_pushpop (f
, pushable_regs
| (1 << LR_REGNUM
),
21259 real_regs_mask
| (1 << LR_REGNUM
));
21263 thumb_pushpop (f
, pushable_regs
, 1, &cfa_offset
, real_regs_mask
);
21268 /* Handle the case of a double word load into a low register from
21269 a computed memory address. The computed address may involve a
21270 register which is overwritten by the load. */
21272 thumb_load_double_from_address (rtx
*operands
)
21280 gcc_assert (GET_CODE (operands
[0]) == REG
);
21281 gcc_assert (GET_CODE (operands
[1]) == MEM
);
21283 /* Get the memory address. */
21284 addr
= XEXP (operands
[1], 0);
21286 /* Work out how the memory address is computed. */
21287 switch (GET_CODE (addr
))
21290 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21292 if (REGNO (operands
[0]) == REGNO (addr
))
21294 output_asm_insn ("ldr\t%H0, %2", operands
);
21295 output_asm_insn ("ldr\t%0, %1", operands
);
21299 output_asm_insn ("ldr\t%0, %1", operands
);
21300 output_asm_insn ("ldr\t%H0, %2", operands
);
21305 /* Compute <address> + 4 for the high order load. */
21306 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21308 output_asm_insn ("ldr\t%0, %1", operands
);
21309 output_asm_insn ("ldr\t%H0, %2", operands
);
21313 arg1
= XEXP (addr
, 0);
21314 arg2
= XEXP (addr
, 1);
21316 if (CONSTANT_P (arg1
))
21317 base
= arg2
, offset
= arg1
;
21319 base
= arg1
, offset
= arg2
;
21321 gcc_assert (GET_CODE (base
) == REG
);
21323 /* Catch the case of <address> = <reg> + <reg> */
21324 if (GET_CODE (offset
) == REG
)
21326 int reg_offset
= REGNO (offset
);
21327 int reg_base
= REGNO (base
);
21328 int reg_dest
= REGNO (operands
[0]);
21330 /* Add the base and offset registers together into the
21331 higher destination register. */
21332 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
21333 reg_dest
+ 1, reg_base
, reg_offset
);
21335 /* Load the lower destination register from the address in
21336 the higher destination register. */
21337 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
21338 reg_dest
, reg_dest
+ 1);
21340 /* Load the higher destination register from its own address
21342 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
21343 reg_dest
+ 1, reg_dest
+ 1);
21347 /* Compute <address> + 4 for the high order load. */
21348 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21350 /* If the computed address is held in the low order register
21351 then load the high order register first, otherwise always
21352 load the low order register first. */
21353 if (REGNO (operands
[0]) == REGNO (base
))
21355 output_asm_insn ("ldr\t%H0, %2", operands
);
21356 output_asm_insn ("ldr\t%0, %1", operands
);
21360 output_asm_insn ("ldr\t%0, %1", operands
);
21361 output_asm_insn ("ldr\t%H0, %2", operands
);
21367 /* With no registers to worry about we can just load the value
21369 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21371 output_asm_insn ("ldr\t%H0, %2", operands
);
21372 output_asm_insn ("ldr\t%0, %1", operands
);
21376 gcc_unreachable ();
21383 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
21390 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21393 operands
[4] = operands
[5];
21396 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
21397 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
21401 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21404 operands
[4] = operands
[5];
21407 if (REGNO (operands
[5]) > REGNO (operands
[6]))
21410 operands
[5] = operands
[6];
21413 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21416 operands
[4] = operands
[5];
21420 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
21421 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
21425 gcc_unreachable ();
21431 /* Output a call-via instruction for thumb state. */
21433 thumb_call_via_reg (rtx reg
)
21435 int regno
= REGNO (reg
);
21438 gcc_assert (regno
< LR_REGNUM
);
21440 /* If we are in the normal text section we can use a single instance
21441 per compilation unit. If we are doing function sections, then we need
21442 an entry per section, since we can't rely on reachability. */
21443 if (in_section
== text_section
)
21445 thumb_call_reg_needed
= 1;
21447 if (thumb_call_via_label
[regno
] == NULL
)
21448 thumb_call_via_label
[regno
] = gen_label_rtx ();
21449 labelp
= thumb_call_via_label
+ regno
;
21453 if (cfun
->machine
->call_via
[regno
] == NULL
)
21454 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
21455 labelp
= cfun
->machine
->call_via
+ regno
;
21458 output_asm_insn ("bl\t%a0", labelp
);
21462 /* Routines for generating rtl. */
21464 thumb_expand_movmemqi (rtx
*operands
)
21466 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
21467 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
21468 HOST_WIDE_INT len
= INTVAL (operands
[2]);
21469 HOST_WIDE_INT offset
= 0;
21473 emit_insn (gen_movmem12b (out
, in
, out
, in
));
21479 emit_insn (gen_movmem8b (out
, in
, out
, in
));
21485 rtx reg
= gen_reg_rtx (SImode
);
21486 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
21487 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
21494 rtx reg
= gen_reg_rtx (HImode
);
21495 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
21496 plus_constant (in
, offset
))));
21497 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (out
, offset
)),
21505 rtx reg
= gen_reg_rtx (QImode
);
21506 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
21507 plus_constant (in
, offset
))));
21508 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (out
, offset
)),
21514 thumb_reload_out_hi (rtx
*operands
)
21516 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
21519 /* Handle reading a half-word from memory during reload. */
21521 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
21523 gcc_unreachable ();
21526 /* Return the length of a function name prefix
21527 that starts with the character 'c'. */
21529 arm_get_strip_length (int c
)
21533 ARM_NAME_ENCODING_LENGTHS
21538 /* Return a pointer to a function's name with any
21539 and all prefix encodings stripped from it. */
21541 arm_strip_name_encoding (const char *name
)
21545 while ((skip
= arm_get_strip_length (* name
)))
21551 /* If there is a '*' anywhere in the name's prefix, then
21552 emit the stripped name verbatim, otherwise prepend an
21553 underscore if leading underscores are being used. */
21555 arm_asm_output_labelref (FILE *stream
, const char *name
)
21560 while ((skip
= arm_get_strip_length (* name
)))
21562 verbatim
|= (*name
== '*');
21567 fputs (name
, stream
);
21569 asm_fprintf (stream
, "%U%s", name
);
21573 arm_file_start (void)
21577 if (TARGET_UNIFIED_ASM
)
21578 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
21582 const char *fpu_name
;
21583 if (arm_selected_arch
)
21584 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
21586 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_selected_cpu
->name
);
21588 if (TARGET_SOFT_FLOAT
)
21591 fpu_name
= "softvfp";
21593 fpu_name
= "softfpa";
21597 fpu_name
= arm_fpu_desc
->name
;
21598 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
21600 if (TARGET_HARD_FLOAT
)
21601 asm_fprintf (asm_out_file
, "\t.eabi_attribute 27, 3\n");
21602 if (TARGET_HARD_FLOAT_ABI
)
21603 asm_fprintf (asm_out_file
, "\t.eabi_attribute 28, 1\n");
21606 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
21608 /* Some of these attributes only apply when the corresponding features
21609 are used. However we don't have any easy way of figuring this out.
21610 Conservatively record the setting that would have been used. */
21612 /* Tag_ABI_FP_rounding. */
21613 if (flag_rounding_math
)
21614 asm_fprintf (asm_out_file
, "\t.eabi_attribute 19, 1\n");
21615 if (!flag_unsafe_math_optimizations
)
21617 /* Tag_ABI_FP_denomal. */
21618 asm_fprintf (asm_out_file
, "\t.eabi_attribute 20, 1\n");
21619 /* Tag_ABI_FP_exceptions. */
21620 asm_fprintf (asm_out_file
, "\t.eabi_attribute 21, 1\n");
21622 /* Tag_ABI_FP_user_exceptions. */
21623 if (flag_signaling_nans
)
21624 asm_fprintf (asm_out_file
, "\t.eabi_attribute 22, 1\n");
21625 /* Tag_ABI_FP_number_model. */
21626 asm_fprintf (asm_out_file
, "\t.eabi_attribute 23, %d\n",
21627 flag_finite_math_only
? 1 : 3);
21629 /* Tag_ABI_align8_needed. */
21630 asm_fprintf (asm_out_file
, "\t.eabi_attribute 24, 1\n");
21631 /* Tag_ABI_align8_preserved. */
21632 asm_fprintf (asm_out_file
, "\t.eabi_attribute 25, 1\n");
21633 /* Tag_ABI_enum_size. */
21634 asm_fprintf (asm_out_file
, "\t.eabi_attribute 26, %d\n",
21635 flag_short_enums
? 1 : 2);
21637 /* Tag_ABI_optimization_goals. */
21640 else if (optimize
>= 2)
21646 asm_fprintf (asm_out_file
, "\t.eabi_attribute 30, %d\n", val
);
21648 /* Tag_ABI_FP_16bit_format. */
21649 if (arm_fp16_format
)
21650 asm_fprintf (asm_out_file
, "\t.eabi_attribute 38, %d\n",
21651 (int)arm_fp16_format
);
21653 if (arm_lang_output_object_attributes_hook
)
21654 arm_lang_output_object_attributes_hook();
21656 default_file_start();
21660 arm_file_end (void)
21664 if (NEED_INDICATE_EXEC_STACK
)
21665 /* Add .note.GNU-stack. */
21666 file_end_indicate_exec_stack ();
21668 if (! thumb_call_reg_needed
)
21671 switch_to_section (text_section
);
21672 asm_fprintf (asm_out_file
, "\t.code 16\n");
21673 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
21675 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21677 rtx label
= thumb_call_via_label
[regno
];
21681 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21682 CODE_LABEL_NUMBER (label
));
21683 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21689 /* Symbols in the text segment can be accessed without indirecting via the
21690 constant pool; it may take an extra binary operation, but this is still
21691 faster than indirecting via memory. Don't do this when not optimizing,
21692 since we won't be calculating al of the offsets necessary to do this
21696 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
21698 if (optimize
> 0 && TREE_CONSTANT (decl
))
21699 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
21701 default_encode_section_info (decl
, rtl
, first
);
21703 #endif /* !ARM_PE */
21706 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
21708 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
21709 && !strcmp (prefix
, "L"))
21711 arm_ccfsm_state
= 0;
21712 arm_target_insn
= NULL
;
21714 default_internal_label (stream
, prefix
, labelno
);
21717 /* Output code to add DELTA to the first argument, and then jump
21718 to FUNCTION. Used for C++ multiple inheritance. */
21720 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
21721 HOST_WIDE_INT delta
,
21722 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
21725 static int thunk_label
= 0;
21728 int mi_delta
= delta
;
21729 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
21731 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
21734 mi_delta
= - mi_delta
;
21738 int labelno
= thunk_label
++;
21739 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
21740 /* Thunks are entered in arm mode when avaiable. */
21741 if (TARGET_THUMB1_ONLY
)
21743 /* push r3 so we can use it as a temporary. */
21744 /* TODO: Omit this save if r3 is not used. */
21745 fputs ("\tpush {r3}\n", file
);
21746 fputs ("\tldr\tr3, ", file
);
21750 fputs ("\tldr\tr12, ", file
);
21752 assemble_name (file
, label
);
21753 fputc ('\n', file
);
21756 /* If we are generating PIC, the ldr instruction below loads
21757 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21758 the address of the add + 8, so we have:
21760 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21763 Note that we have "+ 1" because some versions of GNU ld
21764 don't set the low bit of the result for R_ARM_REL32
21765 relocations against thumb function symbols.
21766 On ARMv6M this is +4, not +8. */
21767 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
21768 assemble_name (file
, labelpc
);
21769 fputs (":\n", file
);
21770 if (TARGET_THUMB1_ONLY
)
21772 /* This is 2 insns after the start of the thunk, so we know it
21773 is 4-byte aligned. */
21774 fputs ("\tadd\tr3, pc, r3\n", file
);
21775 fputs ("\tmov r12, r3\n", file
);
21778 fputs ("\tadd\tr12, pc, r12\n", file
);
21780 else if (TARGET_THUMB1_ONLY
)
21781 fputs ("\tmov r12, r3\n", file
);
21783 if (TARGET_THUMB1_ONLY
)
21785 if (mi_delta
> 255)
21787 fputs ("\tldr\tr3, ", file
);
21788 assemble_name (file
, label
);
21789 fputs ("+4\n", file
);
21790 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
21791 mi_op
, this_regno
, this_regno
);
21793 else if (mi_delta
!= 0)
21795 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
21796 mi_op
, this_regno
, this_regno
,
21802 /* TODO: Use movw/movt for large constants when available. */
21803 while (mi_delta
!= 0)
21805 if ((mi_delta
& (3 << shift
)) == 0)
21809 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
21810 mi_op
, this_regno
, this_regno
,
21811 mi_delta
& (0xff << shift
));
21812 mi_delta
&= ~(0xff << shift
);
21819 if (TARGET_THUMB1_ONLY
)
21820 fputs ("\tpop\t{r3}\n", file
);
21822 fprintf (file
, "\tbx\tr12\n");
21823 ASM_OUTPUT_ALIGN (file
, 2);
21824 assemble_name (file
, label
);
21825 fputs (":\n", file
);
21828 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21829 rtx tem
= XEXP (DECL_RTL (function
), 0);
21830 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
21831 tem
= gen_rtx_MINUS (GET_MODE (tem
),
21833 gen_rtx_SYMBOL_REF (Pmode
,
21834 ggc_strdup (labelpc
)));
21835 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
21838 /* Output ".word .LTHUNKn". */
21839 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
21841 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
21842 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
21846 fputs ("\tb\t", file
);
21847 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
21848 if (NEED_PLT_RELOC
)
21849 fputs ("(PLT)", file
);
21850 fputc ('\n', file
);
21855 arm_emit_vector_const (FILE *file
, rtx x
)
21858 const char * pattern
;
21860 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21862 switch (GET_MODE (x
))
21864 case V2SImode
: pattern
= "%08x"; break;
21865 case V4HImode
: pattern
= "%04x"; break;
21866 case V8QImode
: pattern
= "%02x"; break;
21867 default: gcc_unreachable ();
21870 fprintf (file
, "0x");
21871 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
21875 element
= CONST_VECTOR_ELT (x
, i
);
21876 fprintf (file
, pattern
, INTVAL (element
));
21882 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21883 HFmode constant pool entries are actually loaded with ldr. */
21885 arm_emit_fp16_const (rtx c
)
21890 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
21891 bits
= real_to_target (NULL
, &r
, HFmode
);
21892 if (WORDS_BIG_ENDIAN
)
21893 assemble_zeros (2);
21894 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
21895 if (!WORDS_BIG_ENDIAN
)
21896 assemble_zeros (2);
21900 arm_output_load_gr (rtx
*operands
)
21907 if (GET_CODE (operands
[1]) != MEM
21908 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
21909 || GET_CODE (reg
= XEXP (sum
, 0)) != REG
21910 || GET_CODE (offset
= XEXP (sum
, 1)) != CONST_INT
21911 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
21912 return "wldrw%?\t%0, %1";
21914 /* Fix up an out-of-range load of a GR register. */
21915 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
21916 wcgr
= operands
[0];
21918 output_asm_insn ("ldr%?\t%0, %1", operands
);
21920 operands
[0] = wcgr
;
21922 output_asm_insn ("tmcr%?\t%0, %1", operands
);
21923 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
21928 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21930 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21931 named arg and all anonymous args onto the stack.
21932 XXX I know the prologue shouldn't be pushing registers, but it is faster
21936 arm_setup_incoming_varargs (CUMULATIVE_ARGS
*pcum
,
21937 enum machine_mode mode
,
21940 int second_time ATTRIBUTE_UNUSED
)
21944 cfun
->machine
->uses_anonymous_args
= 1;
21945 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
21947 nregs
= pcum
->aapcs_ncrn
;
21948 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
21952 nregs
= pcum
->nregs
;
21954 if (nregs
< NUM_ARG_REGS
)
21955 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
21958 /* Return nonzero if the CONSUMER instruction (a store) does not need
21959 PRODUCER's value to calculate the address. */
21962 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
21964 rtx value
= PATTERN (producer
);
21965 rtx addr
= PATTERN (consumer
);
21967 if (GET_CODE (value
) == COND_EXEC
)
21968 value
= COND_EXEC_CODE (value
);
21969 if (GET_CODE (value
) == PARALLEL
)
21970 value
= XVECEXP (value
, 0, 0);
21971 value
= XEXP (value
, 0);
21972 if (GET_CODE (addr
) == COND_EXEC
)
21973 addr
= COND_EXEC_CODE (addr
);
21974 if (GET_CODE (addr
) == PARALLEL
)
21975 addr
= XVECEXP (addr
, 0, 0);
21976 addr
= XEXP (addr
, 0);
21978 return !reg_overlap_mentioned_p (value
, addr
);
21981 /* Return nonzero if the CONSUMER instruction (a store) does need
21982 PRODUCER's value to calculate the address. */
21985 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
21987 return !arm_no_early_store_addr_dep (producer
, consumer
);
21990 /* Return nonzero if the CONSUMER instruction (a load) does need
21991 PRODUCER's value to calculate the address. */
21994 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
21996 rtx value
= PATTERN (producer
);
21997 rtx addr
= PATTERN (consumer
);
21999 if (GET_CODE (value
) == COND_EXEC
)
22000 value
= COND_EXEC_CODE (value
);
22001 if (GET_CODE (value
) == PARALLEL
)
22002 value
= XVECEXP (value
, 0, 0);
22003 value
= XEXP (value
, 0);
22004 if (GET_CODE (addr
) == COND_EXEC
)
22005 addr
= COND_EXEC_CODE (addr
);
22006 if (GET_CODE (addr
) == PARALLEL
)
22007 addr
= XVECEXP (addr
, 0, 0);
22008 addr
= XEXP (addr
, 1);
22010 return reg_overlap_mentioned_p (value
, addr
);
22013 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22014 have an early register shift value or amount dependency on the
22015 result of PRODUCER. */
22018 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
22020 rtx value
= PATTERN (producer
);
22021 rtx op
= PATTERN (consumer
);
22024 if (GET_CODE (value
) == COND_EXEC
)
22025 value
= COND_EXEC_CODE (value
);
22026 if (GET_CODE (value
) == PARALLEL
)
22027 value
= XVECEXP (value
, 0, 0);
22028 value
= XEXP (value
, 0);
22029 if (GET_CODE (op
) == COND_EXEC
)
22030 op
= COND_EXEC_CODE (op
);
22031 if (GET_CODE (op
) == PARALLEL
)
22032 op
= XVECEXP (op
, 0, 0);
22035 early_op
= XEXP (op
, 0);
22036 /* This is either an actual independent shift, or a shift applied to
22037 the first operand of another operation. We want the whole shift
22039 if (GET_CODE (early_op
) == REG
)
22042 return !reg_overlap_mentioned_p (value
, early_op
);
22045 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22046 have an early register shift value dependency on the result of
22050 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
22052 rtx value
= PATTERN (producer
);
22053 rtx op
= PATTERN (consumer
);
22056 if (GET_CODE (value
) == COND_EXEC
)
22057 value
= COND_EXEC_CODE (value
);
22058 if (GET_CODE (value
) == PARALLEL
)
22059 value
= XVECEXP (value
, 0, 0);
22060 value
= XEXP (value
, 0);
22061 if (GET_CODE (op
) == COND_EXEC
)
22062 op
= COND_EXEC_CODE (op
);
22063 if (GET_CODE (op
) == PARALLEL
)
22064 op
= XVECEXP (op
, 0, 0);
22067 early_op
= XEXP (op
, 0);
22069 /* This is either an actual independent shift, or a shift applied to
22070 the first operand of another operation. We want the value being
22071 shifted, in either case. */
22072 if (GET_CODE (early_op
) != REG
)
22073 early_op
= XEXP (early_op
, 0);
22075 return !reg_overlap_mentioned_p (value
, early_op
);
22078 /* Return nonzero if the CONSUMER (a mul or mac op) does not
22079 have an early register mult dependency on the result of
22083 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
22085 rtx value
= PATTERN (producer
);
22086 rtx op
= PATTERN (consumer
);
22088 if (GET_CODE (value
) == COND_EXEC
)
22089 value
= COND_EXEC_CODE (value
);
22090 if (GET_CODE (value
) == PARALLEL
)
22091 value
= XVECEXP (value
, 0, 0);
22092 value
= XEXP (value
, 0);
22093 if (GET_CODE (op
) == COND_EXEC
)
22094 op
= COND_EXEC_CODE (op
);
22095 if (GET_CODE (op
) == PARALLEL
)
22096 op
= XVECEXP (op
, 0, 0);
22099 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
22101 if (GET_CODE (XEXP (op
, 0)) == MULT
)
22102 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
22104 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
22110 /* We can't rely on the caller doing the proper promotion when
22111 using APCS or ATPCS. */
22114 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
22116 return !TARGET_AAPCS_BASED
;
22119 static enum machine_mode
22120 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
22121 enum machine_mode mode
,
22122 int *punsignedp ATTRIBUTE_UNUSED
,
22123 const_tree fntype ATTRIBUTE_UNUSED
,
22124 int for_return ATTRIBUTE_UNUSED
)
22126 if (GET_MODE_CLASS (mode
) == MODE_INT
22127 && GET_MODE_SIZE (mode
) < 4)
22133 /* AAPCS based ABIs use short enums by default. */
22136 arm_default_short_enums (void)
22138 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
22142 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22145 arm_align_anon_bitfield (void)
22147 return TARGET_AAPCS_BASED
;
22151 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22154 arm_cxx_guard_type (void)
22156 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
22159 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22160 has an accumulator dependency on the result of the producer (a
22161 multiplication instruction) and no other dependency on that result. */
22163 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
22165 rtx mul
= PATTERN (producer
);
22166 rtx mac
= PATTERN (consumer
);
22168 rtx mac_op0
, mac_op1
, mac_acc
;
22170 if (GET_CODE (mul
) == COND_EXEC
)
22171 mul
= COND_EXEC_CODE (mul
);
22172 if (GET_CODE (mac
) == COND_EXEC
)
22173 mac
= COND_EXEC_CODE (mac
);
22175 /* Check that mul is of the form (set (...) (mult ...))
22176 and mla is of the form (set (...) (plus (mult ...) (...))). */
22177 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
22178 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
22179 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
22182 mul_result
= XEXP (mul
, 0);
22183 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
22184 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
22185 mac_acc
= XEXP (XEXP (mac
, 1), 1);
22187 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
22188 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
22189 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
22193 /* The EABI says test the least significant bit of a guard variable. */
22196 arm_cxx_guard_mask_bit (void)
22198 return TARGET_AAPCS_BASED
;
22202 /* The EABI specifies that all array cookies are 8 bytes long. */
22205 arm_get_cookie_size (tree type
)
22209 if (!TARGET_AAPCS_BASED
)
22210 return default_cxx_get_cookie_size (type
);
22212 size
= build_int_cst (sizetype
, 8);
22217 /* The EABI says that array cookies should also contain the element size. */
22220 arm_cookie_has_size (void)
22222 return TARGET_AAPCS_BASED
;
22226 /* The EABI says constructors and destructors should return a pointer to
22227 the object constructed/destroyed. */
22230 arm_cxx_cdtor_returns_this (void)
22232 return TARGET_AAPCS_BASED
;
22235 /* The EABI says that an inline function may never be the key
22239 arm_cxx_key_method_may_be_inline (void)
22241 return !TARGET_AAPCS_BASED
;
22245 arm_cxx_determine_class_data_visibility (tree decl
)
22247 if (!TARGET_AAPCS_BASED
22248 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
22251 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22252 is exported. However, on systems without dynamic vague linkage,
22253 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22254 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
22255 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
22257 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
22258 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
22262 arm_cxx_class_data_always_comdat (void)
22264 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22265 vague linkage if the class has no key function. */
22266 return !TARGET_AAPCS_BASED
;
22270 /* The EABI says __aeabi_atexit should be used to register static
22274 arm_cxx_use_aeabi_atexit (void)
22276 return TARGET_AAPCS_BASED
;
22281 arm_set_return_address (rtx source
, rtx scratch
)
22283 arm_stack_offsets
*offsets
;
22284 HOST_WIDE_INT delta
;
22286 unsigned long saved_regs
;
22288 offsets
= arm_get_frame_offsets ();
22289 saved_regs
= offsets
->saved_regs_mask
;
22291 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
22292 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
22295 if (frame_pointer_needed
)
22296 addr
= plus_constant(hard_frame_pointer_rtx
, -4);
22299 /* LR will be the first saved register. */
22300 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
22305 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
22306 GEN_INT (delta
& ~4095)));
22311 addr
= stack_pointer_rtx
;
22313 addr
= plus_constant (addr
, delta
);
22315 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
22321 thumb_set_return_address (rtx source
, rtx scratch
)
22323 arm_stack_offsets
*offsets
;
22324 HOST_WIDE_INT delta
;
22325 HOST_WIDE_INT limit
;
22328 unsigned long mask
;
22332 offsets
= arm_get_frame_offsets ();
22333 mask
= offsets
->saved_regs_mask
;
22334 if (mask
& (1 << LR_REGNUM
))
22337 /* Find the saved regs. */
22338 if (frame_pointer_needed
)
22340 delta
= offsets
->soft_frame
- offsets
->saved_args
;
22341 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
22347 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
22350 /* Allow for the stack frame. */
22351 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
22353 /* The link register is always the first saved register. */
22356 /* Construct the address. */
22357 addr
= gen_rtx_REG (SImode
, reg
);
22360 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
22361 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
22365 addr
= plus_constant (addr
, delta
);
22367 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
22370 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
22373 /* Implements target hook vector_mode_supported_p. */
22375 arm_vector_mode_supported_p (enum machine_mode mode
)
22377 /* Neon also supports V2SImode, etc. listed in the clause below. */
22378 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
22379 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
22382 if ((TARGET_NEON
|| TARGET_IWMMXT
)
22383 && ((mode
== V2SImode
)
22384 || (mode
== V4HImode
)
22385 || (mode
== V8QImode
)))
22391 /* Implements target hook array_mode_supported_p. */
22394 arm_array_mode_supported_p (enum machine_mode mode
,
22395 unsigned HOST_WIDE_INT nelems
)
22398 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
22399 && (nelems
>= 2 && nelems
<= 4))
22405 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22406 registers when autovectorizing for Neon, at least until multiple vector
22407 widths are supported properly by the middle-end. */
22409 static enum machine_mode
22410 arm_preferred_simd_mode (enum machine_mode mode
)
22416 return TARGET_NEON_VECTORIZE_QUAD
? V4SFmode
: V2SFmode
;
22418 return TARGET_NEON_VECTORIZE_QUAD
? V4SImode
: V2SImode
;
22420 return TARGET_NEON_VECTORIZE_QUAD
? V8HImode
: V4HImode
;
22422 return TARGET_NEON_VECTORIZE_QUAD
? V16QImode
: V8QImode
;
22424 if (TARGET_NEON_VECTORIZE_QUAD
)
22431 if (TARGET_REALLY_IWMMXT
)
22447 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22449 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22450 using r0-r4 for function arguments, r7 for the stack frame and don't have
22451 enough left over to do doubleword arithmetic. For Thumb-2 all the
22452 potentially problematic instructions accept high registers so this is not
22453 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22454 that require many low registers. */
22456 arm_class_likely_spilled_p (reg_class_t rclass
)
22458 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
22459 || rclass
== CC_REG
)
22465 /* Implements target hook small_register_classes_for_mode_p. */
22467 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
22469 return TARGET_THUMB1
;
22472 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22473 ARM insns and therefore guarantee that the shift count is modulo 256.
22474 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22475 guarantee no particular behavior for out-of-range counts. */
22477 static unsigned HOST_WIDE_INT
22478 arm_shift_truncation_mask (enum machine_mode mode
)
22480 return mode
== SImode
? 255 : 0;
22484 /* Map internal gcc register numbers to DWARF2 register numbers. */
22487 arm_dbx_register_number (unsigned int regno
)
22492 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22493 compatibility. The EABI defines them as registers 96-103. */
22494 if (IS_FPA_REGNUM (regno
))
22495 return (TARGET_AAPCS_BASED
? 96 : 16) + regno
- FIRST_FPA_REGNUM
;
22497 if (IS_VFP_REGNUM (regno
))
22499 /* See comment in arm_dwarf_register_span. */
22500 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
22501 return 64 + regno
- FIRST_VFP_REGNUM
;
22503 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
22506 if (IS_IWMMXT_GR_REGNUM (regno
))
22507 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
22509 if (IS_IWMMXT_REGNUM (regno
))
22510 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
22512 gcc_unreachable ();
22515 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22516 GCC models tham as 64 32-bit registers, so we need to describe this to
22517 the DWARF generation code. Other registers can use the default. */
22519 arm_dwarf_register_span (rtx rtl
)
22526 regno
= REGNO (rtl
);
22527 if (!IS_VFP_REGNUM (regno
))
22530 /* XXX FIXME: The EABI defines two VFP register ranges:
22531 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22533 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22534 corresponding D register. Until GDB supports this, we shall use the
22535 legacy encodings. We also use these encodings for D0-D15 for
22536 compatibility with older debuggers. */
22537 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
22540 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
22541 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
22542 regno
= (regno
- FIRST_VFP_REGNUM
) / 2;
22543 for (i
= 0; i
< nregs
; i
++)
22544 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, 256 + regno
+ i
);
22549 #if ARM_UNWIND_INFO
22550 /* Emit unwind directives for a store-multiple instruction or stack pointer
22551 push during alignment.
22552 These should only ever be generated by the function prologue code, so
22553 expect them to have a particular form. */
22556 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
22559 HOST_WIDE_INT offset
;
22560 HOST_WIDE_INT nregs
;
22566 e
= XVECEXP (p
, 0, 0);
22567 if (GET_CODE (e
) != SET
)
22570 /* First insn will adjust the stack pointer. */
22571 if (GET_CODE (e
) != SET
22572 || GET_CODE (XEXP (e
, 0)) != REG
22573 || REGNO (XEXP (e
, 0)) != SP_REGNUM
22574 || GET_CODE (XEXP (e
, 1)) != PLUS
)
22577 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
22578 nregs
= XVECLEN (p
, 0) - 1;
22580 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
22583 /* The function prologue may also push pc, but not annotate it as it is
22584 never restored. We turn this into a stack pointer adjustment. */
22585 if (nregs
* 4 == offset
- 4)
22587 fprintf (asm_out_file
, "\t.pad #4\n");
22591 fprintf (asm_out_file
, "\t.save {");
22593 else if (IS_VFP_REGNUM (reg
))
22596 fprintf (asm_out_file
, "\t.vsave {");
22598 else if (reg
>= FIRST_FPA_REGNUM
&& reg
<= LAST_FPA_REGNUM
)
22600 /* FPA registers are done differently. */
22601 asm_fprintf (asm_out_file
, "\t.save %r, %wd\n", reg
, nregs
);
22605 /* Unknown register type. */
22608 /* If the stack increment doesn't match the size of the saved registers,
22609 something has gone horribly wrong. */
22610 if (offset
!= nregs
* reg_size
)
22615 /* The remaining insns will describe the stores. */
22616 for (i
= 1; i
<= nregs
; i
++)
22618 /* Expect (set (mem <addr>) (reg)).
22619 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22620 e
= XVECEXP (p
, 0, i
);
22621 if (GET_CODE (e
) != SET
22622 || GET_CODE (XEXP (e
, 0)) != MEM
22623 || GET_CODE (XEXP (e
, 1)) != REG
)
22626 reg
= REGNO (XEXP (e
, 1));
22631 fprintf (asm_out_file
, ", ");
22632 /* We can't use %r for vfp because we need to use the
22633 double precision register names. */
22634 if (IS_VFP_REGNUM (reg
))
22635 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
22637 asm_fprintf (asm_out_file
, "%r", reg
);
22639 #ifdef ENABLE_CHECKING
22640 /* Check that the addresses are consecutive. */
22641 e
= XEXP (XEXP (e
, 0), 0);
22642 if (GET_CODE (e
) == PLUS
)
22644 offset
+= reg_size
;
22645 if (GET_CODE (XEXP (e
, 0)) != REG
22646 || REGNO (XEXP (e
, 0)) != SP_REGNUM
22647 || GET_CODE (XEXP (e
, 1)) != CONST_INT
22648 || offset
!= INTVAL (XEXP (e
, 1)))
22652 || GET_CODE (e
) != REG
22653 || REGNO (e
) != SP_REGNUM
)
22657 fprintf (asm_out_file
, "}\n");
22660 /* Emit unwind directives for a SET. */
22663 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
22671 switch (GET_CODE (e0
))
22674 /* Pushing a single register. */
22675 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
22676 || GET_CODE (XEXP (XEXP (e0
, 0), 0)) != REG
22677 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
22680 asm_fprintf (asm_out_file
, "\t.save ");
22681 if (IS_VFP_REGNUM (REGNO (e1
)))
22682 asm_fprintf(asm_out_file
, "{d%d}\n",
22683 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
22685 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
22689 if (REGNO (e0
) == SP_REGNUM
)
22691 /* A stack increment. */
22692 if (GET_CODE (e1
) != PLUS
22693 || GET_CODE (XEXP (e1
, 0)) != REG
22694 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
22695 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
22698 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
22699 -INTVAL (XEXP (e1
, 1)));
22701 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
22703 HOST_WIDE_INT offset
;
22705 if (GET_CODE (e1
) == PLUS
)
22707 if (GET_CODE (XEXP (e1
, 0)) != REG
22708 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
22710 reg
= REGNO (XEXP (e1
, 0));
22711 offset
= INTVAL (XEXP (e1
, 1));
22712 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
22713 HARD_FRAME_POINTER_REGNUM
, reg
,
22716 else if (GET_CODE (e1
) == REG
)
22719 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
22720 HARD_FRAME_POINTER_REGNUM
, reg
);
22725 else if (GET_CODE (e1
) == REG
&& REGNO (e1
) == SP_REGNUM
)
22727 /* Move from sp to reg. */
22728 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
22730 else if (GET_CODE (e1
) == PLUS
22731 && GET_CODE (XEXP (e1
, 0)) == REG
22732 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
22733 && GET_CODE (XEXP (e1
, 1)) == CONST_INT
)
22735 /* Set reg to offset from sp. */
22736 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
22737 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
22739 else if (GET_CODE (e1
) == UNSPEC
&& XINT (e1
, 1) == UNSPEC_STACK_ALIGN
)
22741 /* Stack pointer save before alignment. */
22743 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22756 /* Emit unwind directives for the given insn. */
22759 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
22763 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
22766 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
22767 && (TREE_NOTHROW (current_function_decl
)
22768 || crtl
->all_throwers_are_sibcalls
))
22771 if (GET_CODE (insn
) == NOTE
|| !RTX_FRAME_RELATED_P (insn
))
22774 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
22776 pat
= XEXP (pat
, 0);
22778 pat
= PATTERN (insn
);
22780 switch (GET_CODE (pat
))
22783 arm_unwind_emit_set (asm_out_file
, pat
);
22787 /* Store multiple. */
22788 arm_unwind_emit_sequence (asm_out_file
, pat
);
22797 /* Output a reference from a function exception table to the type_info
22798 object X. The EABI specifies that the symbol should be relocated by
22799 an R_ARM_TARGET2 relocation. */
22802 arm_output_ttype (rtx x
)
22804 fputs ("\t.word\t", asm_out_file
);
22805 output_addr_const (asm_out_file
, x
);
22806 /* Use special relocations for symbol references. */
22807 if (GET_CODE (x
) != CONST_INT
)
22808 fputs ("(TARGET2)", asm_out_file
);
22809 fputc ('\n', asm_out_file
);
22814 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22817 arm_asm_emit_except_personality (rtx personality
)
22819 fputs ("\t.personality\t", asm_out_file
);
22820 output_addr_const (asm_out_file
, personality
);
22821 fputc ('\n', asm_out_file
);
22824 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22827 arm_asm_init_sections (void)
22829 exception_section
= get_unnamed_section (0, output_section_asm_op
,
22832 #endif /* ARM_UNWIND_INFO */
22834 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22835 stack alignment. */
22838 arm_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
22840 rtx unspec
= SET_SRC (pattern
);
22841 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
22845 case UNSPEC_STACK_ALIGN
:
22846 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22847 put anything on the stack, so hopefully it won't matter.
22848 CFA = SP will be correct after alignment. */
22849 dwarf2out_reg_save_reg (label
, stack_pointer_rtx
,
22850 SET_DEST (pattern
));
22853 gcc_unreachable ();
22858 /* Output unwind directives for the start/end of a function. */
22861 arm_output_fn_unwind (FILE * f
, bool prologue
)
22863 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
22867 fputs ("\t.fnstart\n", f
);
22870 /* If this function will never be unwound, then mark it as such.
22871 The came condition is used in arm_unwind_emit to suppress
22872 the frame annotations. */
22873 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
22874 && (TREE_NOTHROW (current_function_decl
)
22875 || crtl
->all_throwers_are_sibcalls
))
22876 fputs("\t.cantunwind\n", f
);
22878 fputs ("\t.fnend\n", f
);
22883 arm_emit_tls_decoration (FILE *fp
, rtx x
)
22885 enum tls_reloc reloc
;
22888 val
= XVECEXP (x
, 0, 0);
22889 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
22891 output_addr_const (fp
, val
);
22896 fputs ("(tlsgd)", fp
);
22899 fputs ("(tlsldm)", fp
);
22902 fputs ("(tlsldo)", fp
);
22905 fputs ("(gottpoff)", fp
);
22908 fputs ("(tpoff)", fp
);
22911 gcc_unreachable ();
22919 fputs (" + (. - ", fp
);
22920 output_addr_const (fp
, XVECEXP (x
, 0, 2));
22922 output_addr_const (fp
, XVECEXP (x
, 0, 3));
22932 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22935 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
22937 gcc_assert (size
== 4);
22938 fputs ("\t.word\t", file
);
22939 output_addr_const (file
, x
);
22940 fputs ("(tlsldo)", file
);
22943 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22946 arm_output_addr_const_extra (FILE *fp
, rtx x
)
22948 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
22949 return arm_emit_tls_decoration (fp
, x
);
22950 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
22953 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
22955 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
22956 assemble_name_raw (fp
, label
);
22960 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
22962 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
22966 output_addr_const (fp
, XVECEXP (x
, 0, 0));
22970 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
22972 output_addr_const (fp
, XVECEXP (x
, 0, 0));
22976 output_addr_const (fp
, XVECEXP (x
, 0, 1));
22980 else if (GET_CODE (x
) == CONST_VECTOR
)
22981 return arm_emit_vector_const (fp
, x
);
22986 /* Output assembly for a shift instruction.
22987 SET_FLAGS determines how the instruction modifies the condition codes.
22988 0 - Do not set condition codes.
22989 1 - Set condition codes.
22990 2 - Use smallest instruction. */
22992 arm_output_shift(rtx
* operands
, int set_flags
)
22995 static const char flag_chars
[3] = {'?', '.', '!'};
23000 c
= flag_chars
[set_flags
];
23001 if (TARGET_UNIFIED_ASM
)
23003 shift
= shift_op(operands
[3], &val
);
23007 operands
[2] = GEN_INT(val
);
23008 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
23011 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
23014 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
23015 output_asm_insn (pattern
, operands
);
23019 /* Output a Thumb-1 casesi dispatch sequence. */
23021 thumb1_output_casesi (rtx
*operands
)
23023 rtx diff_vec
= PATTERN (next_real_insn (operands
[0]));
23025 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
23027 switch (GET_MODE(diff_vec
))
23030 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
23031 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
23033 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
23034 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
23036 return "bl\t%___gnu_thumb1_case_si";
23038 gcc_unreachable ();
23042 /* Output a Thumb-2 casesi instruction. */
23044 thumb2_output_casesi (rtx
*operands
)
23046 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
23048 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
23050 output_asm_insn ("cmp\t%0, %1", operands
);
23051 output_asm_insn ("bhi\t%l3", operands
);
23052 switch (GET_MODE(diff_vec
))
23055 return "tbb\t[%|pc, %0]";
23057 return "tbh\t[%|pc, %0, lsl #1]";
23061 output_asm_insn ("adr\t%4, %l2", operands
);
23062 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
23063 output_asm_insn ("add\t%4, %4, %5", operands
);
23068 output_asm_insn ("adr\t%4, %l2", operands
);
23069 return "ldr\t%|pc, [%4, %0, lsl #2]";
23072 gcc_unreachable ();
23076 /* Most ARM cores are single issue, but some newer ones can dual issue.
23077 The scheduler descriptions rely on this being correct. */
23079 arm_issue_rate (void)
23097 /* A table and a function to perform ARM-specific name mangling for
23098 NEON vector types in order to conform to the AAPCS (see "Procedure
23099 Call Standard for the ARM Architecture", Appendix A). To qualify
23100 for emission with the mangled names defined in that document, a
23101 vector type must not only be of the correct mode but also be
23102 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23105 enum machine_mode mode
;
23106 const char *element_type_name
;
23107 const char *aapcs_name
;
23108 } arm_mangle_map_entry
;
23110 static arm_mangle_map_entry arm_mangle_map
[] = {
23111 /* 64-bit containerized types. */
23112 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
23113 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23114 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
23115 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23116 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
23117 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
23118 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
23119 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23120 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23121 /* 128-bit containerized types. */
23122 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
23123 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23124 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
23125 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23126 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
23127 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
23128 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
23129 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23130 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23131 { VOIDmode
, NULL
, NULL
}
23135 arm_mangle_type (const_tree type
)
23137 arm_mangle_map_entry
*pos
= arm_mangle_map
;
23139 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23140 has to be managled as if it is in the "std" namespace. */
23141 if (TARGET_AAPCS_BASED
23142 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
23144 static bool warned
;
23145 if (!warned
&& warn_psabi
&& !in_system_header
)
23148 inform (input_location
,
23149 "the mangling of %<va_list%> has changed in GCC 4.4");
23151 return "St9__va_list";
23154 /* Half-precision float. */
23155 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
23158 if (TREE_CODE (type
) != VECTOR_TYPE
)
23161 /* Check the mode of the vector type, and the name of the vector
23162 element type, against the table. */
23163 while (pos
->mode
!= VOIDmode
)
23165 tree elt_type
= TREE_TYPE (type
);
23167 if (pos
->mode
== TYPE_MODE (type
)
23168 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
23169 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
23170 pos
->element_type_name
))
23171 return pos
->aapcs_name
;
23176 /* Use the default mangling for unrecognized (possibly user-defined)
23181 /* Order of allocation of core registers for Thumb: this allocation is
23182 written over the corresponding initial entries of the array
23183 initialized with REG_ALLOC_ORDER. We allocate all low registers
23184 first. Saving and restoring a low register is usually cheaper than
23185 using a call-clobbered high register. */
23187 static const int thumb_core_reg_alloc_order
[] =
23189 3, 2, 1, 0, 4, 5, 6, 7,
23190 14, 12, 8, 9, 10, 11, 13, 15
23193 /* Adjust register allocation order when compiling for Thumb. */
23196 arm_order_regs_for_local_alloc (void)
23198 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
23199 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
23201 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
23202 sizeof (thumb_core_reg_alloc_order
));
23205 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23208 arm_frame_pointer_required (void)
23210 return (cfun
->has_nonlocal_label
23211 || SUBTARGET_FRAME_POINTER_REQUIRED
23212 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
23215 /* Only thumb1 can't support conditional execution, so return true if
23216 the target is not thumb1. */
23218 arm_have_conditional_execution (void)
23220 return !TARGET_THUMB1
;
23223 /* Legitimize a memory reference for sync primitive implemented using
23224 ldrex / strex. We currently force the form of the reference to be
23225 indirect without offset. We do not yet support the indirect offset
23226 addressing supported by some ARM targets for these
23229 arm_legitimize_sync_memory (rtx memory
)
23231 rtx addr
= force_reg (Pmode
, XEXP (memory
, 0));
23232 rtx legitimate_memory
= gen_rtx_MEM (GET_MODE (memory
), addr
);
23234 set_mem_alias_set (legitimate_memory
, ALIAS_SET_MEMORY_BARRIER
);
23235 MEM_VOLATILE_P (legitimate_memory
) = MEM_VOLATILE_P (memory
);
23236 return legitimate_memory
;
23239 /* An instruction emitter. */
23240 typedef void (* emit_f
) (int label
, const char *, rtx
*);
23242 /* An instruction emitter that emits via the conventional
23243 output_asm_insn. */
23245 arm_emit (int label ATTRIBUTE_UNUSED
, const char *pattern
, rtx
*operands
)
23247 output_asm_insn (pattern
, operands
);
23250 /* Count the number of emitted synchronization instructions. */
23251 static unsigned arm_insn_count
;
23253 /* An emitter that counts emitted instructions but does not actually
23254 emit instruction into the instruction stream. */
23256 arm_count (int label
,
23257 const char *pattern ATTRIBUTE_UNUSED
,
23258 rtx
*operands ATTRIBUTE_UNUSED
)
23264 /* Construct a pattern using conventional output formatting and feed
23265 it to output_asm_insn. Provides a mechanism to construct the
23266 output pattern on the fly. Note the hard limit on the pattern
23268 static void ATTRIBUTE_PRINTF_4
23269 arm_output_asm_insn (emit_f emit
, int label
, rtx
*operands
,
23270 const char *pattern
, ...)
23275 va_start (ap
, pattern
);
23276 vsprintf (buffer
, pattern
, ap
);
23278 emit (label
, buffer
, operands
);
23281 /* Emit the memory barrier instruction, if any, provided by this
23282 target to a specified emitter. */
23284 arm_process_output_memory_barrier (emit_f emit
, rtx
*operands
)
23286 if (TARGET_HAVE_DMB
)
23288 /* Note we issue a system level barrier. We should consider
23289 issuing a inner shareabilty zone barrier here instead, ie.
23291 emit (0, "dmb\tsy", operands
);
23295 if (TARGET_HAVE_DMB_MCR
)
23297 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands
);
23301 gcc_unreachable ();
23304 /* Emit the memory barrier instruction, if any, provided by this
23307 arm_output_memory_barrier (rtx
*operands
)
23309 arm_process_output_memory_barrier (arm_emit
, operands
);
23313 /* Helper to figure out the instruction suffix required on ldrex/strex
23314 for operations on an object of the specified mode. */
23315 static const char *
23316 arm_ldrex_suffix (enum machine_mode mode
)
23320 case QImode
: return "b";
23321 case HImode
: return "h";
23322 case SImode
: return "";
23323 case DImode
: return "d";
23325 gcc_unreachable ();
23330 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23333 arm_output_ldrex (emit_f emit
,
23334 enum machine_mode mode
,
23338 const char *suffix
= arm_ldrex_suffix (mode
);
23341 operands
[0] = target
;
23342 operands
[1] = memory
;
23343 arm_output_asm_insn (emit
, 0, operands
, "ldrex%s\t%%0, %%C1", suffix
);
23346 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23349 arm_output_strex (emit_f emit
,
23350 enum machine_mode mode
,
23356 const char *suffix
= arm_ldrex_suffix (mode
);
23359 operands
[0] = result
;
23360 operands
[1] = value
;
23361 operands
[2] = memory
;
23362 arm_output_asm_insn (emit
, 0, operands
, "strex%s%s\t%%0, %%1, %%C2", suffix
,
23366 /* Helper to emit a two operand instruction. */
23368 arm_output_op2 (emit_f emit
, const char *mnemonic
, rtx d
, rtx s
)
23374 arm_output_asm_insn (emit
, 0, operands
, "%s\t%%0, %%1", mnemonic
);
23377 /* Helper to emit a three operand instruction. */
23379 arm_output_op3 (emit_f emit
, const char *mnemonic
, rtx d
, rtx a
, rtx b
)
23386 arm_output_asm_insn (emit
, 0, operands
, "%s\t%%0, %%1, %%2", mnemonic
);
23389 /* Emit a load store exclusive synchronization loop.
23393 if old_value != required_value
23395 t1 = sync_op (old_value, new_value)
23396 [mem] = t1, t2 = [0|1]
23400 t1 == t2 is not permitted
23401 t1 == old_value is permitted
23405 RTX register or const_int representing the required old_value for
23406 the modify to continue, if NULL no comparsion is performed. */
23408 arm_output_sync_loop (emit_f emit
,
23409 enum machine_mode mode
,
23412 rtx required_value
,
23416 enum attr_sync_op sync_op
,
23417 int early_barrier_required
)
23421 gcc_assert (t1
!= t2
);
23423 if (early_barrier_required
)
23424 arm_process_output_memory_barrier (emit
, NULL
);
23426 arm_output_asm_insn (emit
, 1, operands
, "%sLSYT%%=:", LOCAL_LABEL_PREFIX
);
23428 arm_output_ldrex (emit
, mode
, old_value
, memory
);
23430 if (required_value
)
23434 operands
[0] = old_value
;
23435 operands
[1] = required_value
;
23436 arm_output_asm_insn (emit
, 0, operands
, "cmp\t%%0, %%1");
23437 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX
);
23443 arm_output_op3 (emit
, "add", t1
, old_value
, new_value
);
23447 arm_output_op3 (emit
, "sub", t1
, old_value
, new_value
);
23451 arm_output_op3 (emit
, "orr", t1
, old_value
, new_value
);
23455 arm_output_op3 (emit
, "eor", t1
, old_value
, new_value
);
23459 arm_output_op3 (emit
,"and", t1
, old_value
, new_value
);
23463 arm_output_op3 (emit
, "and", t1
, old_value
, new_value
);
23464 arm_output_op2 (emit
, "mvn", t1
, t1
);
23474 arm_output_strex (emit
, mode
, "", t2
, t1
, memory
);
23476 arm_output_asm_insn (emit
, 0, operands
, "teq\t%%0, #0");
23477 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYT%%=",
23478 LOCAL_LABEL_PREFIX
);
23482 /* Use old_value for the return value because for some operations
23483 the old_value can easily be restored. This saves one register. */
23484 arm_output_strex (emit
, mode
, "", old_value
, t1
, memory
);
23485 operands
[0] = old_value
;
23486 arm_output_asm_insn (emit
, 0, operands
, "teq\t%%0, #0");
23487 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYT%%=",
23488 LOCAL_LABEL_PREFIX
);
23493 arm_output_op3 (emit
, "sub", old_value
, t1
, new_value
);
23497 arm_output_op3 (emit
, "add", old_value
, t1
, new_value
);
23501 arm_output_op3 (emit
, "eor", old_value
, t1
, new_value
);
23505 arm_output_op2 (emit
, "mov", old_value
, required_value
);
23509 gcc_unreachable ();
23513 arm_process_output_memory_barrier (emit
, NULL
);
23514 arm_output_asm_insn (emit
, 1, operands
, "%sLSYB%%=:", LOCAL_LABEL_PREFIX
);
23518 arm_get_sync_operand (rtx
*operands
, int index
, rtx default_value
)
23521 default_value
= operands
[index
- 1];
23523 return default_value
;
23526 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23527 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23529 /* Extract the operands for a synchroniztion instruction from the
23530 instructions attributes and emit the instruction. */
23532 arm_process_output_sync_insn (emit_f emit
, rtx insn
, rtx
*operands
)
23534 rtx result
, memory
, required_value
, new_value
, t1
, t2
;
23536 enum machine_mode mode
;
23537 enum attr_sync_op sync_op
;
23539 result
= FETCH_SYNC_OPERAND(result
, 0);
23540 memory
= FETCH_SYNC_OPERAND(memory
, 0);
23541 required_value
= FETCH_SYNC_OPERAND(required_value
, 0);
23542 new_value
= FETCH_SYNC_OPERAND(new_value
, 0);
23543 t1
= FETCH_SYNC_OPERAND(t1
, 0);
23544 t2
= FETCH_SYNC_OPERAND(t2
, 0);
23546 get_attr_sync_release_barrier (insn
) == SYNC_RELEASE_BARRIER_YES
;
23547 sync_op
= get_attr_sync_op (insn
);
23548 mode
= GET_MODE (memory
);
23550 arm_output_sync_loop (emit
, mode
, result
, memory
, required_value
,
23551 new_value
, t1
, t2
, sync_op
, early_barrier
);
23554 /* Emit a synchronization instruction loop. */
23556 arm_output_sync_insn (rtx insn
, rtx
*operands
)
23558 arm_process_output_sync_insn (arm_emit
, insn
, operands
);
23562 /* Count the number of machine instruction that will be emitted for a
23563 synchronization instruction. Note that the emitter used does not
23564 emit instructions, it just counts instructions being carefull not
23565 to count labels. */
23567 arm_sync_loop_insns (rtx insn
, rtx
*operands
)
23569 arm_insn_count
= 0;
23570 arm_process_output_sync_insn (arm_count
, insn
, operands
);
23571 return arm_insn_count
;
23574 /* Helper to call a target sync instruction generator, dealing with
23575 the variation in operands required by the different generators. */
23577 arm_call_generator (struct arm_sync_generator
*generator
, rtx old_value
,
23578 rtx memory
, rtx required_value
, rtx new_value
)
23580 switch (generator
->op
)
23582 case arm_sync_generator_omn
:
23583 gcc_assert (! required_value
);
23584 return generator
->u
.omn (old_value
, memory
, new_value
);
23586 case arm_sync_generator_omrn
:
23587 gcc_assert (required_value
);
23588 return generator
->u
.omrn (old_value
, memory
, required_value
, new_value
);
23594 /* Expand a synchronization loop. The synchronization loop is expanded
23595 as an opaque block of instructions in order to ensure that we do
23596 not subsequently get extraneous memory accesses inserted within the
23597 critical region. The exclusive access property of ldrex/strex is
23598 only guaranteed in there are no intervening memory accesses. */
23600 arm_expand_sync (enum machine_mode mode
,
23601 struct arm_sync_generator
*generator
,
23602 rtx target
, rtx memory
, rtx required_value
, rtx new_value
)
23604 if (target
== NULL
)
23605 target
= gen_reg_rtx (mode
);
23607 memory
= arm_legitimize_sync_memory (memory
);
23608 if (mode
!= SImode
)
23610 rtx load_temp
= gen_reg_rtx (SImode
);
23612 if (required_value
)
23613 required_value
= convert_modes (SImode
, mode
, required_value
, true);
23615 new_value
= convert_modes (SImode
, mode
, new_value
, true);
23616 emit_insn (arm_call_generator (generator
, load_temp
, memory
,
23617 required_value
, new_value
));
23618 emit_move_insn (target
, gen_lowpart (mode
, load_temp
));
23622 emit_insn (arm_call_generator (generator
, target
, memory
, required_value
,
23627 static unsigned int
23628 arm_autovectorize_vector_sizes (void)
23630 return TARGET_NEON_VECTORIZE_QUAD
? 16 | 8 : 0;
23634 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
23636 /* Vectors which aren't in packed structures will not be less aligned than
23637 the natural alignment of their element type, so this is safe. */
23638 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
23641 return default_builtin_vector_alignment_reachable (type
, is_packed
);
23645 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
23646 const_tree type
, int misalignment
,
23649 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
23651 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
23656 /* If the misalignment is unknown, we should be able to handle the access
23657 so long as it is not to a member of a packed data structure. */
23658 if (misalignment
== -1)
23661 /* Return true if the misalignment is a multiple of the natural alignment
23662 of the vector's element type. This is probably always going to be
23663 true in practice, since we've already established that this isn't a
23665 return ((misalignment
% align
) == 0);
23668 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
23673 arm_conditional_register_usage (void)
23677 if (TARGET_SOFT_FLOAT
|| TARGET_THUMB1
|| !TARGET_FPA
)
23679 for (regno
= FIRST_FPA_REGNUM
;
23680 regno
<= LAST_FPA_REGNUM
; ++regno
)
23681 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
23684 if (TARGET_THUMB1
&& optimize_size
)
23686 /* When optimizing for size on Thumb-1, it's better not
23687 to use the HI regs, because of the overhead of
23689 for (regno
= FIRST_HI_REGNUM
;
23690 regno
<= LAST_HI_REGNUM
; ++regno
)
23691 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
23694 /* The link register can be clobbered by any branch insn,
23695 but we have no way to track that at present, so mark
23696 it as unavailable. */
23698 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
23700 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
23702 if (TARGET_MAVERICK
)
23704 for (regno
= FIRST_FPA_REGNUM
;
23705 regno
<= LAST_FPA_REGNUM
; ++ regno
)
23706 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
23707 for (regno
= FIRST_CIRRUS_FP_REGNUM
;
23708 regno
<= LAST_CIRRUS_FP_REGNUM
; ++ regno
)
23710 fixed_regs
[regno
] = 0;
23711 call_used_regs
[regno
] = regno
< FIRST_CIRRUS_FP_REGNUM
+ 4;
23716 /* VFPv3 registers are disabled when earlier VFP
23717 versions are selected due to the definition of
23718 LAST_VFP_REGNUM. */
23719 for (regno
= FIRST_VFP_REGNUM
;
23720 regno
<= LAST_VFP_REGNUM
; ++ regno
)
23722 fixed_regs
[regno
] = 0;
23723 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
23724 || regno
>= FIRST_VFP_REGNUM
+ 32;
23729 if (TARGET_REALLY_IWMMXT
)
23731 regno
= FIRST_IWMMXT_GR_REGNUM
;
23732 /* The 2002/10/09 revision of the XScale ABI has wCG0
23733 and wCG1 as call-preserved registers. The 2002/11/21
23734 revision changed this so that all wCG registers are
23735 scratch registers. */
23736 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
23737 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
23738 fixed_regs
[regno
] = 0;
23739 /* The XScale ABI has wR0 - wR9 as scratch registers,
23740 the rest as call-preserved registers. */
23741 for (regno
= FIRST_IWMMXT_REGNUM
;
23742 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
23744 fixed_regs
[regno
] = 0;
23745 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
23749 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
23751 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
23752 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
23754 else if (TARGET_APCS_STACK
)
23756 fixed_regs
[10] = 1;
23757 call_used_regs
[10] = 1;
23759 /* -mcaller-super-interworking reserves r11 for calls to
23760 _interwork_r11_call_via_rN(). Making the register global
23761 is an easy way of ensuring that it remains valid for all
23763 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
23764 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
23766 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
23767 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
23768 if (TARGET_CALLER_INTERWORKING
)
23769 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
23771 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23775 arm_preferred_rename_class (reg_class_t rclass
)
23777 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23778 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23779 and code size can be reduced. */
23780 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
23786 /* Compute the atrribute "length" of insn "*push_multi".
23787 So this function MUST be kept in sync with that insn pattern. */
23789 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
23791 int i
, regno
, hi_reg
;
23792 int num_saves
= XVECLEN (parallel_op
, 0);
23799 regno
= REGNO (first_op
);
23800 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
23801 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
23803 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
23804 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
23812 /* Check the validity of operands in an ldrd/strd instruction. */
23814 arm_check_ldrd_operands (rtx reg1
, rtx reg2
, rtx off1
, rtx off2
)
23816 HOST_WIDE_INT offset1
= 0;
23817 HOST_WIDE_INT offset2
= 0;
23818 int regno1
= REGNO (reg1
);
23819 int regno2
= REGNO (reg2
);
23820 HOST_WIDE_INT max_offset
= 1020;
23825 if (off1
!= NULL_RTX
)
23826 offset1
= INTVAL (off1
);
23827 if (off2
!= NULL_RTX
)
23828 offset2
= INTVAL (off2
);
23830 /* The offset range of LDRD is [-max_offset, max_offset]. Here we check if
23831 both offsets lie in the range [-max_offset, max_offset+4]. If one of the
23832 offsets is max_offset+4, the following condition
23833 ((offset1 + 4) == offset2)
23834 will ensure offset1 to be max_offset, suitable for instruction LDRD. */
23835 if ((offset1
> (max_offset
+ 4)) || (offset1
< -max_offset
)
23836 || ((offset1
& 3) != 0))
23838 if ((offset2
> (max_offset
+ 4)) || (offset2
< -max_offset
)
23839 || ((offset2
& 3) != 0))
23842 if ((offset1
+ 4) == offset2
)
23848 if (((regno1
& 1) == 0) && ((regno1
+ 1) == regno2
)) /* ldrd */
23851 if ((regno1
< regno2
) && ((offset1
<= 4) && (offset1
>= -8))) /* ldm */
23854 if ((offset2
+ 4) == offset1
)
23860 if (((regno2
& 1) == 0) && ((regno2
+ 1) == regno1
)) /* ldrd */
23863 if ((regno2
< regno1
) && ((offset2
<= 4) && (offset2
>= -8))) /* ldm */
23870 /* Check if the two memory accesses can be merged to an ldrd/strd instruction.
23871 That is they use the same base register, and the gap between constant
23872 offsets should be 4. */
23874 arm_legitimate_ldrd_p (rtx reg1
, rtx reg2
, rtx mem1
, rtx mem2
, bool ldrd
)
23877 rtx offset1
= NULL_RTX
;
23878 rtx offset2
= NULL_RTX
;
23879 rtx addr1
= XEXP (mem1
, 0);
23880 rtx addr2
= XEXP (mem2
, 0);
23882 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
23887 else if (GET_CODE (addr1
) == PLUS
)
23889 base1
= XEXP (addr1
, 0);
23890 offset1
= XEXP (addr1
, 1);
23891 if (!REG_P (base1
) || (GET_CODE (offset1
) != CONST_INT
))
23899 else if (GET_CODE (addr2
) == PLUS
)
23901 base2
= XEXP (addr2
, 0);
23902 offset2
= XEXP (addr2
, 1);
23903 if (!REG_P (base2
) || (GET_CODE (offset2
) != CONST_INT
))
23909 if (base1
!= base2
)
23912 if (ldrd
&& ((reg1
== reg2
) || (reg1
== base1
)))
23915 return arm_check_ldrd_operands (reg1
, reg2
, offset1
, offset2
);
23918 /* Output instructions for ldrd and count the number of bytes has been
23919 outputted. Do not actually output instructions if EMIT_P is false. */
23921 arm_output_ldrd (rtx reg1
, rtx reg2
, rtx base
, rtx off1
, rtx off2
, bool emit_p
)
23925 HOST_WIDE_INT offset1
= 0;
23926 HOST_WIDE_INT offset2
= 0;
23928 if (off1
!= NULL_RTX
)
23929 offset1
= INTVAL (off1
);
23931 off1
= GEN_INT (0);
23932 if (off2
!= NULL_RTX
)
23933 offset2
= INTVAL (off2
);
23935 off2
= GEN_INT (0);
23936 if (offset1
> offset2
)
23939 HOST_WIDE_INT t
= offset1
; offset1
= offset2
; offset2
= t
;
23940 tmp
= off1
; off1
= off2
; off2
= tmp
;
23941 tmp
= reg1
; reg1
= reg2
; reg2
= tmp
;
23944 operands
[0] = reg1
;
23945 operands
[1] = reg2
;
23946 operands
[2] = base
;
23947 operands
[3] = off1
;
23948 operands
[4] = off2
;
23952 if (fix_cm3_ldrd
&& (base
== reg1
))
23954 if (offset1
<= -256)
23957 output_asm_insn ("sub\t%2, %2, %n3", operands
);
23961 output_asm_insn ("ldr\t%1, [%2, #4]", operands
);
23962 if (low_register_operand (reg2
, SImode
)
23963 && low_register_operand (base
, SImode
))
23969 output_asm_insn ("ldr\t%0, [%2]", operands
);
23970 if (low_register_operand (base
, SImode
))
23978 output_asm_insn ("ldr\t%1, [%2, %4]", operands
);
23979 if (low_register_operand (reg2
, SImode
) && (offset2
>= 0)
23980 && low_register_operand (base
, SImode
) && (offset2
< 128))
23986 output_asm_insn ("ldr\t%0, [%2, %3]", operands
);
23987 if (low_register_operand (base
, SImode
)
23988 && (offset1
>= 0) && (offset1
< 128))
23997 output_asm_insn ("ldrd\t%0, %1, [%2, %3]", operands
);
24001 else /* TARGET_ARM */
24003 if ((REGNO (reg2
) == (REGNO (reg1
) + 1)) && ((REGNO (reg1
) & 1) == 0))
24006 output_asm_insn ("ldrd\t%0, %1, [%2, %3]", operands
);
24016 output_asm_insn ("ldm%(db%)\t%2, {%0, %1}", operands
);
24020 output_asm_insn ("ldm%(da%)\t%2, {%0, %1}", operands
);
24024 output_asm_insn ("ldm%(ia%)\t%2, {%0, %1}", operands
);
24028 output_asm_insn ("ldm%(ib%)\t%2, {%0, %1}", operands
);
24032 gcc_unreachable ();
24042 #include "gt-arm.h"