1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
50 #include "target-def.h"
52 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode
;
62 typedef struct minipool_fixup Mfix
;
64 void (*arm_lang_output_object_attributes_hook
)(void);
71 /* Forward function declarations. */
72 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets
*arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
77 HOST_WIDE_INT
, rtx
, rtx
, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx
, int);
80 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
81 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
82 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
83 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
84 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
85 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
86 inline static int thumb1_index_register_rtx_p (rtx
, int);
87 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx
, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx
, int);
93 static void arm_print_operand_address (FILE *, rtx
);
94 static bool arm_print_operand_punct_valid_p (unsigned char code
);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
96 static arm_cc
get_arm_condition_code (rtx
);
97 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
98 static rtx
is_jump_table (rtx
);
99 static const char *output_multi_immediate (rtx
*, const char *, const char *,
101 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
102 static struct machine_function
*arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx
is_jump_table (rtx
);
105 static HOST_WIDE_INT
get_jump_table_size (rtx
);
106 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
107 static Mnode
*add_minipool_forward_ref (Mfix
*);
108 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
109 static Mnode
*add_minipool_backward_ref (Mfix
*);
110 static void assign_minipool_offsets (Mfix
*);
111 static void arm_print_value (FILE *, rtx
);
112 static void dump_minipool (rtx
);
113 static int arm_barrier_cost (rtx
);
114 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
115 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
116 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree
);
123 static unsigned long arm_compute_func_type (void);
124 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
125 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
126 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
132 static int arm_comp_type_attributes (const_tree
, const_tree
);
133 static void arm_set_default_type_attributes (tree
);
134 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
135 static int optimal_immediate_sequence (enum rtx_code code
,
136 unsigned HOST_WIDE_INT val
,
137 struct four_ints
*return_sequence
);
138 static int optimal_immediate_sequence_1 (enum rtx_code code
,
139 unsigned HOST_WIDE_INT val
,
140 struct four_ints
*return_sequence
,
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree
, tree
);
144 static enum machine_mode
arm_promote_function_mode (const_tree
,
145 enum machine_mode
, int *,
147 static bool arm_return_in_memory (const_tree
, const_tree
);
148 static rtx
arm_function_value (const_tree
, const_tree
, bool);
149 static rtx
arm_libcall_value_1 (enum machine_mode
);
150 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
157 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
158 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
159 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
160 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
163 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
164 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
165 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
166 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
167 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
171 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
172 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
173 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
174 static tree
arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond
, rtx pattern
);
176 static rtx
emit_set_insn (rtx
, rtx
);
177 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
179 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
181 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
183 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
184 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
186 static rtx
aapcs_libcall_value (enum machine_mode
);
187 static int aapcs_select_return_coproc (const_tree
, const_tree
);
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
191 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
194 static void arm_encode_section_info (tree
, rtx
, int);
197 static void arm_file_end (void);
198 static void arm_file_start (void);
200 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
202 static bool arm_pass_by_reference (cumulative_args_t
,
203 enum machine_mode
, const_tree
, bool);
204 static bool arm_promote_prototypes (const_tree
);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree
);
208 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
209 static bool arm_return_in_memory (const_tree
, const_tree
);
211 static void arm_unwind_emit (FILE *, rtx
);
212 static bool arm_output_ttype (rtx
);
213 static void arm_asm_emit_except_personality (rtx
);
214 static void arm_asm_init_sections (void);
216 static rtx
arm_dwarf_register_span (rtx
);
218 static tree
arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree
arm_get_cookie_size (tree
);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree
);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree
arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree
, rtx
);
230 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
233 static bool arm_cannot_copy_insn_p (rtx
);
234 static bool arm_tls_symbol_p (rtx x
);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
237 static bool arm_output_addr_const_extra (FILE *, rtx
);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static bool arm_warn_func_return (tree
);
240 static const char *arm_invalid_parameter_type (const_tree t
);
241 static const char *arm_invalid_return_type (const_tree t
);
242 static tree
arm_promoted_type (const_tree t
);
243 static tree
arm_convert_to_type (tree type
, tree expr
);
244 static bool arm_scalar_mode_supported_p (enum machine_mode
);
245 static bool arm_frame_pointer_required (void);
246 static bool arm_can_eliminate (const int, const int);
247 static void arm_asm_trampoline_template (FILE *);
248 static void arm_trampoline_init (rtx
, tree
, rtx
);
249 static rtx
arm_trampoline_adjust_address (rtx
);
250 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
251 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
252 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
253 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
254 static bool arm_array_mode_supported_p (enum machine_mode
,
255 unsigned HOST_WIDE_INT
);
256 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
257 static bool arm_class_likely_spilled_p (reg_class_t
);
258 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
259 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
260 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
264 static void arm_conditional_register_usage (void);
265 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
266 static unsigned int arm_autovectorize_vector_sizes (void);
267 static int arm_default_branch_cost (bool, bool);
268 static int arm_cortex_a5_branch_cost (bool, bool);
270 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
271 const unsigned char *sel
);
274 /* Table of machine attributes. */
275 static const struct attribute_spec arm_attribute_table
[] =
277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
278 affects_type_identity } */
279 /* Function calls made to this symbol must be done indirectly, because
280 it may lie outside of the 26 bit addressing range of a normal function
282 { "long_call", 0, 0, false, true, true, NULL
, false },
283 /* Whereas these functions are always known to reside within the 26 bit
285 { "short_call", 0, 0, false, true, true, NULL
, false },
286 /* Specify the procedure call conventions for a function. */
287 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
289 /* Interrupt Service Routines have special prologue and epilogue requirements. */
290 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
292 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
294 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
297 /* ARM/PE has three new attributes:
299 dllexport - for exporting a function/variable that will live in a dll
300 dllimport - for importing a function/variable from a dll
302 Microsoft allows multiple declspecs in one __declspec, separating
303 them with spaces. We do NOT support this. Instead, use __declspec
306 { "dllimport", 0, 0, true, false, false, NULL
, false },
307 { "dllexport", 0, 0, true, false, false, NULL
, false },
308 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
310 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
312 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
313 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
316 { NULL
, 0, 0, false, false, false, NULL
, false }
319 /* Initialize the GCC target structure. */
320 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 #undef TARGET_MERGE_DECL_ATTRIBUTES
322 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
325 #undef TARGET_LEGITIMIZE_ADDRESS
326 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
328 #undef TARGET_ATTRIBUTE_TABLE
329 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
331 #undef TARGET_ASM_FILE_START
332 #define TARGET_ASM_FILE_START arm_file_start
333 #undef TARGET_ASM_FILE_END
334 #define TARGET_ASM_FILE_END arm_file_end
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP NULL
338 #undef TARGET_ASM_INTEGER
339 #define TARGET_ASM_INTEGER arm_assemble_integer
341 #undef TARGET_PRINT_OPERAND
342 #define TARGET_PRINT_OPERAND arm_print_operand
343 #undef TARGET_PRINT_OPERAND_ADDRESS
344 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
345 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
346 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
348 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
349 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
351 #undef TARGET_ASM_FUNCTION_PROLOGUE
352 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
354 #undef TARGET_ASM_FUNCTION_EPILOGUE
355 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
357 #undef TARGET_OPTION_OVERRIDE
358 #define TARGET_OPTION_OVERRIDE arm_option_override
360 #undef TARGET_COMP_TYPE_ATTRIBUTES
361 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
363 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
364 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
369 #undef TARGET_REGISTER_MOVE_COST
370 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
372 #undef TARGET_MEMORY_MOVE_COST
373 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
375 #undef TARGET_ENCODE_SECTION_INFO
377 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
379 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
382 #undef TARGET_STRIP_NAME_ENCODING
383 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
385 #undef TARGET_ASM_INTERNAL_LABEL
386 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
388 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
389 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
391 #undef TARGET_FUNCTION_VALUE
392 #define TARGET_FUNCTION_VALUE arm_function_value
394 #undef TARGET_LIBCALL_VALUE
395 #define TARGET_LIBCALL_VALUE arm_libcall_value
397 #undef TARGET_FUNCTION_VALUE_REGNO_P
398 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
400 #undef TARGET_ASM_OUTPUT_MI_THUNK
401 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
402 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
403 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
405 #undef TARGET_RTX_COSTS
406 #define TARGET_RTX_COSTS arm_rtx_costs
407 #undef TARGET_ADDRESS_COST
408 #define TARGET_ADDRESS_COST arm_address_cost
410 #undef TARGET_SHIFT_TRUNCATION_MASK
411 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
412 #undef TARGET_VECTOR_MODE_SUPPORTED_P
413 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
414 #undef TARGET_ARRAY_MODE_SUPPORTED_P
415 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
416 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
417 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
418 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
419 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
420 arm_autovectorize_vector_sizes
422 #undef TARGET_MACHINE_DEPENDENT_REORG
423 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
425 #undef TARGET_INIT_BUILTINS
426 #define TARGET_INIT_BUILTINS arm_init_builtins
427 #undef TARGET_EXPAND_BUILTIN
428 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL arm_builtin_decl
432 #undef TARGET_INIT_LIBFUNCS
433 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
435 #undef TARGET_PROMOTE_FUNCTION_MODE
436 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
437 #undef TARGET_PROMOTE_PROTOTYPES
438 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
443 #undef TARGET_FUNCTION_ARG
444 #define TARGET_FUNCTION_ARG arm_function_arg
445 #undef TARGET_FUNCTION_ARG_ADVANCE
446 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
447 #undef TARGET_FUNCTION_ARG_BOUNDARY
448 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
450 #undef TARGET_SETUP_INCOMING_VARARGS
451 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
453 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
454 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
456 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
457 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
458 #undef TARGET_TRAMPOLINE_INIT
459 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
460 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
461 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
463 #undef TARGET_WARN_FUNC_RETURN
464 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
466 #undef TARGET_DEFAULT_SHORT_ENUMS
467 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
469 #undef TARGET_ALIGN_ANON_BITFIELD
470 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
472 #undef TARGET_NARROW_VOLATILE_BITFIELD
473 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
475 #undef TARGET_CXX_GUARD_TYPE
476 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
478 #undef TARGET_CXX_GUARD_MASK_BIT
479 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
481 #undef TARGET_CXX_GET_COOKIE_SIZE
482 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
484 #undef TARGET_CXX_COOKIE_HAS_SIZE
485 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
487 #undef TARGET_CXX_CDTOR_RETURNS_THIS
488 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
490 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
491 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
493 #undef TARGET_CXX_USE_AEABI_ATEXIT
494 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
496 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
497 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
498 arm_cxx_determine_class_data_visibility
500 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
501 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
503 #undef TARGET_RETURN_IN_MSB
504 #define TARGET_RETURN_IN_MSB arm_return_in_msb
506 #undef TARGET_RETURN_IN_MEMORY
507 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
509 #undef TARGET_MUST_PASS_IN_STACK
510 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
513 #undef TARGET_ASM_UNWIND_EMIT
514 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
516 /* EABI unwinding tables use a different format for the typeinfo tables. */
517 #undef TARGET_ASM_TTYPE
518 #define TARGET_ASM_TTYPE arm_output_ttype
520 #undef TARGET_ARM_EABI_UNWINDER
521 #define TARGET_ARM_EABI_UNWINDER true
523 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
524 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
526 #undef TARGET_ASM_INIT_SECTIONS
527 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
528 #endif /* ARM_UNWIND_INFO */
530 #undef TARGET_DWARF_REGISTER_SPAN
531 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
533 #undef TARGET_CANNOT_COPY_INSN_P
534 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
537 #undef TARGET_HAVE_TLS
538 #define TARGET_HAVE_TLS true
541 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
542 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
544 #undef TARGET_LEGITIMATE_CONSTANT_P
545 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
547 #undef TARGET_CANNOT_FORCE_CONST_MEM
548 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
550 #undef TARGET_MAX_ANCHOR_OFFSET
551 #define TARGET_MAX_ANCHOR_OFFSET 4095
553 /* The minimum is set such that the total size of the block
554 for a particular anchor is -4088 + 1 + 4095 bytes, which is
555 divisible by eight, ensuring natural spacing of anchors. */
556 #undef TARGET_MIN_ANCHOR_OFFSET
557 #define TARGET_MIN_ANCHOR_OFFSET -4088
559 #undef TARGET_SCHED_ISSUE_RATE
560 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
562 #undef TARGET_MANGLE_TYPE
563 #define TARGET_MANGLE_TYPE arm_mangle_type
565 #undef TARGET_BUILD_BUILTIN_VA_LIST
566 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
567 #undef TARGET_EXPAND_BUILTIN_VA_START
568 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
569 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
570 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
577 #undef TARGET_LEGITIMATE_ADDRESS_P
578 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
580 #undef TARGET_PREFERRED_RELOAD_CLASS
581 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
583 #undef TARGET_INVALID_PARAMETER_TYPE
584 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
586 #undef TARGET_INVALID_RETURN_TYPE
587 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
589 #undef TARGET_PROMOTED_TYPE
590 #define TARGET_PROMOTED_TYPE arm_promoted_type
592 #undef TARGET_CONVERT_TO_TYPE
593 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
595 #undef TARGET_SCALAR_MODE_SUPPORTED_P
596 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
598 #undef TARGET_FRAME_POINTER_REQUIRED
599 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
601 #undef TARGET_CAN_ELIMINATE
602 #define TARGET_CAN_ELIMINATE arm_can_eliminate
604 #undef TARGET_CONDITIONAL_REGISTER_USAGE
605 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
607 #undef TARGET_CLASS_LIKELY_SPILLED_P
608 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
610 #undef TARGET_VECTOR_ALIGNMENT
611 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
613 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
614 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
615 arm_vector_alignment_reachable
617 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
618 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
619 arm_builtin_support_vector_misalignment
621 #undef TARGET_PREFERRED_RENAME_CLASS
622 #define TARGET_PREFERRED_RENAME_CLASS \
623 arm_preferred_rename_class
625 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
626 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
627 arm_vectorize_vec_perm_const_ok
629 struct gcc_target targetm
= TARGET_INITIALIZER
;
631 /* Obstack for minipool constant handling. */
632 static struct obstack minipool_obstack
;
633 static char * minipool_startobj
;
635 /* The maximum number of insns skipped which
636 will be conditionalised if possible. */
637 static int max_insns_skipped
= 5;
639 extern FILE * asm_out_file
;
641 /* True if we are currently building a constant table. */
642 int making_const_table
;
644 /* The processor for which instructions should be scheduled. */
645 enum processor_type arm_tune
= arm_none
;
647 /* The current tuning set. */
648 const struct tune_params
*current_tune
;
650 /* Which floating point hardware to schedule for. */
653 /* Which floating popint hardware to use. */
654 const struct arm_fpu_desc
*arm_fpu_desc
;
656 /* Used for Thumb call_via trampolines. */
657 rtx thumb_call_via_label
[14];
658 static int thumb_call_reg_needed
;
660 /* Bit values used to identify processor capabilities. */
661 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
662 #define FL_ARCH3M (1 << 1) /* Extended multiply */
663 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
664 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
665 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
666 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
667 #define FL_THUMB (1 << 6) /* Thumb aware */
668 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
669 #define FL_STRONG (1 << 8) /* StrongARM */
670 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
671 #define FL_XSCALE (1 << 10) /* XScale */
672 /* spare (1 << 11) */
673 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
674 media instructions. */
675 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
676 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
677 Note: ARM6 & 7 derivatives only. */
678 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
679 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
680 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
682 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
683 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
684 #define FL_NEON (1 << 20) /* Neon instructions. */
685 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
687 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
688 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
689 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
691 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
692 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
694 /* Flags that only effect tuning, not available instructions. */
695 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
698 #define FL_FOR_ARCH2 FL_NOTM
699 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
700 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
701 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
702 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
703 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
704 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
705 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
706 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
707 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
708 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
709 #define FL_FOR_ARCH6J FL_FOR_ARCH6
710 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
711 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
712 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
713 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
714 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
715 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
716 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
717 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
718 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
719 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
720 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
721 | FL_ARM_DIV | FL_NOTM)
723 /* The bits in this mask specify which
724 instructions we are allowed to generate. */
725 static unsigned long insn_flags
= 0;
727 /* The bits in this mask specify which instruction scheduling options should
729 static unsigned long tune_flags
= 0;
731 /* The highest ARM architecture version supported by the
733 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
735 /* The following are used in the arm.md file as equivalents to bits
736 in the above two flag variables. */
738 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
741 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
744 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
747 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
750 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
753 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
756 /* Nonzero if this chip supports the ARM 6K extensions. */
759 /* Nonzero if instructions present in ARMv6-M can be used. */
762 /* Nonzero if this chip supports the ARM 7 extensions. */
765 /* Nonzero if instructions not present in the 'M' profile can be used. */
766 int arm_arch_notm
= 0;
768 /* Nonzero if instructions present in ARMv7E-M can be used. */
771 /* Nonzero if instructions present in ARMv8 can be used. */
774 /* Nonzero if this chip can benefit from load scheduling. */
775 int arm_ld_sched
= 0;
777 /* Nonzero if this chip is a StrongARM. */
778 int arm_tune_strongarm
= 0;
780 /* Nonzero if this chip supports Intel Wireless MMX technology. */
781 int arm_arch_iwmmxt
= 0;
783 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
784 int arm_arch_iwmmxt2
= 0;
786 /* Nonzero if this chip is an XScale. */
787 int arm_arch_xscale
= 0;
789 /* Nonzero if tuning for XScale */
790 int arm_tune_xscale
= 0;
792 /* Nonzero if we want to tune for stores that access the write-buffer.
793 This typically means an ARM6 or ARM7 with MMU or MPU. */
794 int arm_tune_wbuf
= 0;
796 /* Nonzero if tuning for Cortex-A9. */
797 int arm_tune_cortex_a9
= 0;
799 /* Nonzero if generating Thumb instructions. */
802 /* Nonzero if generating Thumb-1 instructions. */
805 /* Nonzero if we should define __THUMB_INTERWORK__ in the
807 XXX This is a bit of a hack, it's intended to help work around
808 problems in GLD which doesn't understand that armv5t code is
809 interworking clean. */
810 int arm_cpp_interwork
= 0;
812 /* Nonzero if chip supports Thumb 2. */
815 /* Nonzero if chip supports integer division instruction. */
816 int arm_arch_arm_hwdiv
;
817 int arm_arch_thumb_hwdiv
;
819 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
820 we must report the mode of the memory reference from
821 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
822 enum machine_mode output_memory_reference_mode
;
824 /* The register number to be used for the PIC offset register. */
825 unsigned arm_pic_register
= INVALID_REGNUM
;
827 /* Set to 1 after arm_reorg has started. Reset to start at the start of
828 the next function. */
829 static int after_arm_reorg
= 0;
831 enum arm_pcs arm_pcs_default
;
833 /* For an explanation of these variables, see final_prescan_insn below. */
835 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
836 enum arm_cond_code arm_current_cc
;
839 int arm_target_label
;
840 /* The number of conditionally executed insns, including the current insn. */
841 int arm_condexec_count
= 0;
842 /* A bitmask specifying the patterns for the IT block.
843 Zero means do not output an IT block before this insn. */
844 int arm_condexec_mask
= 0;
845 /* The number of bits used in arm_condexec_mask. */
846 int arm_condexec_masklen
= 0;
848 /* The condition codes of the ARM, and the inverse function. */
849 static const char * const arm_condition_codes
[] =
851 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
852 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
855 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
856 int arm_regs_in_sequence
[] =
858 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
861 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
862 #define streq(string1, string2) (strcmp (string1, string2) == 0)
864 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
865 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
866 | (1 << PIC_OFFSET_TABLE_REGNUM)))
868 /* Initialization code. */
872 const char *const name
;
873 enum processor_type core
;
875 enum base_architecture base_arch
;
876 const unsigned long flags
;
877 const struct tune_params
*const tune
;
881 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
882 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
887 const struct tune_params arm_slowmul_tune
=
889 arm_slowmul_rtx_costs
,
891 3, /* Constant limit. */
892 5, /* Max cond insns. */
893 ARM_PREFETCH_NOT_BENEFICIAL
,
894 true, /* Prefer constant pool. */
895 arm_default_branch_cost
,
896 false /* Prefer LDRD/STRD. */
899 const struct tune_params arm_fastmul_tune
=
901 arm_fastmul_rtx_costs
,
903 1, /* Constant limit. */
904 5, /* Max cond insns. */
905 ARM_PREFETCH_NOT_BENEFICIAL
,
906 true, /* Prefer constant pool. */
907 arm_default_branch_cost
,
908 false /* Prefer LDRD/STRD. */
911 /* StrongARM has early execution of branches, so a sequence that is worth
912 skipping is shorter. Set max_insns_skipped to a lower value. */
914 const struct tune_params arm_strongarm_tune
=
916 arm_fastmul_rtx_costs
,
918 1, /* Constant limit. */
919 3, /* Max cond insns. */
920 ARM_PREFETCH_NOT_BENEFICIAL
,
921 true, /* Prefer constant pool. */
922 arm_default_branch_cost
,
923 false /* Prefer LDRD/STRD. */
926 const struct tune_params arm_xscale_tune
=
928 arm_xscale_rtx_costs
,
929 xscale_sched_adjust_cost
,
930 2, /* Constant limit. */
931 3, /* Max cond insns. */
932 ARM_PREFETCH_NOT_BENEFICIAL
,
933 true, /* Prefer constant pool. */
934 arm_default_branch_cost
,
935 false /* Prefer LDRD/STRD. */
938 const struct tune_params arm_9e_tune
=
942 1, /* Constant limit. */
943 5, /* Max cond insns. */
944 ARM_PREFETCH_NOT_BENEFICIAL
,
945 true, /* Prefer constant pool. */
946 arm_default_branch_cost
,
947 false /* Prefer LDRD/STRD. */
950 const struct tune_params arm_v6t2_tune
=
954 1, /* Constant limit. */
955 5, /* Max cond insns. */
956 ARM_PREFETCH_NOT_BENEFICIAL
,
957 false, /* Prefer constant pool. */
958 arm_default_branch_cost
,
959 false /* Prefer LDRD/STRD. */
962 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
963 const struct tune_params arm_cortex_tune
=
967 1, /* Constant limit. */
968 5, /* Max cond insns. */
969 ARM_PREFETCH_NOT_BENEFICIAL
,
970 false, /* Prefer constant pool. */
971 arm_default_branch_cost
,
972 false /* Prefer LDRD/STRD. */
975 const struct tune_params arm_cortex_a15_tune
=
979 1, /* Constant limit. */
980 5, /* Max cond insns. */
981 ARM_PREFETCH_NOT_BENEFICIAL
,
982 false, /* Prefer constant pool. */
983 arm_default_branch_cost
,
984 true /* Prefer LDRD/STRD. */
987 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
988 less appealing. Set max_insns_skipped to a low value. */
990 const struct tune_params arm_cortex_a5_tune
=
994 1, /* Constant limit. */
995 1, /* Max cond insns. */
996 ARM_PREFETCH_NOT_BENEFICIAL
,
997 false, /* Prefer constant pool. */
998 arm_cortex_a5_branch_cost
,
999 false /* Prefer LDRD/STRD. */
1002 const struct tune_params arm_cortex_a9_tune
=
1005 cortex_a9_sched_adjust_cost
,
1006 1, /* Constant limit. */
1007 5, /* Max cond insns. */
1008 ARM_PREFETCH_BENEFICIAL(4,32,32),
1009 false, /* Prefer constant pool. */
1010 arm_default_branch_cost
,
1011 false /* Prefer LDRD/STRD. */
1014 const struct tune_params arm_fa726te_tune
=
1017 fa726te_sched_adjust_cost
,
1018 1, /* Constant limit. */
1019 5, /* Max cond insns. */
1020 ARM_PREFETCH_NOT_BENEFICIAL
,
1021 true, /* Prefer constant pool. */
1022 arm_default_branch_cost
,
1023 false /* Prefer LDRD/STRD. */
1027 /* Not all of these give usefully different compilation alternatives,
1028 but there is no simple way of generalizing them. */
1029 static const struct processors all_cores
[] =
1032 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1033 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1034 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1035 #include "arm-cores.def"
1037 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1040 static const struct processors all_architectures
[] =
1042 /* ARM Architectures */
1043 /* We don't specify tuning costs here as it will be figured out
1046 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1047 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1048 #include "arm-arches.def"
1050 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1054 /* These are populated as commandline arguments are processed, or NULL
1055 if not specified. */
1056 static const struct processors
*arm_selected_arch
;
1057 static const struct processors
*arm_selected_cpu
;
1058 static const struct processors
*arm_selected_tune
;
1060 /* The name of the preprocessor macro to define for this architecture. */
1062 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1064 /* Available values for -mfpu=. */
1066 static const struct arm_fpu_desc all_fpus
[] =
1068 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1069 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1070 #include "arm-fpus.def"
1075 /* Supported TLS relocations. */
1083 TLS_DESCSEQ
/* GNU scheme */
1086 /* The maximum number of insns to be used when loading a constant. */
1088 arm_constant_limit (bool size_p
)
1090 return size_p
? 1 : current_tune
->constant_limit
;
1093 /* Emit an insn that's a simple single-set. Both the operands must be known
1096 emit_set_insn (rtx x
, rtx y
)
1098 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1101 /* Return the number of bits set in VALUE. */
1103 bit_count (unsigned long value
)
1105 unsigned long count
= 0;
1110 value
&= value
- 1; /* Clear the least-significant set bit. */
1118 enum machine_mode mode
;
1120 } arm_fixed_mode_set
;
1122 /* A small helper for setting fixed-point library libfuncs. */
1125 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1126 const char *funcname
, const char *modename
,
1131 if (num_suffix
== 0)
1132 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
1134 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
1136 set_optab_libfunc (optable
, mode
, buffer
);
1140 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
1141 enum machine_mode from
, const char *funcname
,
1142 const char *toname
, const char *fromname
)
1145 const char *maybe_suffix_2
= "";
1147 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1148 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
1149 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
1150 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
1151 maybe_suffix_2
= "2";
1153 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
1156 set_conv_libfunc (optable
, to
, from
, buffer
);
1159 /* Set up library functions unique to ARM. */
1162 arm_init_libfuncs (void)
1164 /* For Linux, we have access to kernel support for atomic operations. */
1165 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
1166 init_sync_libfuncs (2 * UNITS_PER_WORD
);
1168 /* There are no special library functions unless we are using the
1173 /* The functions below are described in Section 4 of the "Run-Time
1174 ABI for the ARM architecture", Version 1.0. */
1176 /* Double-precision floating-point arithmetic. Table 2. */
1177 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1178 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1179 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1180 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1181 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1183 /* Double-precision comparisons. Table 3. */
1184 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1185 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1186 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1187 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1188 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1189 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1190 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1192 /* Single-precision floating-point arithmetic. Table 4. */
1193 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1194 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1195 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1196 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1197 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1199 /* Single-precision comparisons. Table 5. */
1200 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1201 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1202 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1203 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1204 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1205 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1206 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1208 /* Floating-point to integer conversions. Table 6. */
1209 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1210 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1211 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1212 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1213 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1214 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1215 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1216 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1218 /* Conversions between floating types. Table 7. */
1219 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1220 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1222 /* Integer to floating-point conversions. Table 8. */
1223 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1224 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1225 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1226 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1227 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1228 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1229 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1230 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1232 /* Long long. Table 9. */
1233 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1234 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1235 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1236 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1237 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1238 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1239 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1240 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1242 /* Integer (32/32->32) division. \S 4.3.1. */
1243 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1244 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1246 /* The divmod functions are designed so that they can be used for
1247 plain division, even though they return both the quotient and the
1248 remainder. The quotient is returned in the usual location (i.e.,
1249 r0 for SImode, {r0, r1} for DImode), just as would be expected
1250 for an ordinary division routine. Because the AAPCS calling
1251 conventions specify that all of { r0, r1, r2, r3 } are
1252 callee-saved registers, there is no need to tell the compiler
1253 explicitly that those registers are clobbered by these
1255 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1256 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1258 /* For SImode division the ABI provides div-without-mod routines,
1259 which are faster. */
1260 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1261 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1263 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1264 divmod libcalls instead. */
1265 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1266 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1267 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1268 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1270 /* Half-precision float operations. The compiler handles all operations
1271 with NULL libfuncs by converting the SFmode. */
1272 switch (arm_fp16_format
)
1274 case ARM_FP16_FORMAT_IEEE
:
1275 case ARM_FP16_FORMAT_ALTERNATIVE
:
1278 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1279 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1281 : "__gnu_f2h_alternative"));
1282 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1283 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1285 : "__gnu_h2f_alternative"));
1288 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1289 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1290 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1291 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1292 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1295 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1296 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1297 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1298 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1299 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1300 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1301 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1308 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1310 const arm_fixed_mode_set fixed_arith_modes
[] =
1331 const arm_fixed_mode_set fixed_conv_modes
[] =
1361 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
1363 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
1364 "add", fixed_arith_modes
[i
].name
, 3);
1365 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
1366 "ssadd", fixed_arith_modes
[i
].name
, 3);
1367 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
1368 "usadd", fixed_arith_modes
[i
].name
, 3);
1369 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
1370 "sub", fixed_arith_modes
[i
].name
, 3);
1371 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
1372 "sssub", fixed_arith_modes
[i
].name
, 3);
1373 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
1374 "ussub", fixed_arith_modes
[i
].name
, 3);
1375 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
1376 "mul", fixed_arith_modes
[i
].name
, 3);
1377 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
1378 "ssmul", fixed_arith_modes
[i
].name
, 3);
1379 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
1380 "usmul", fixed_arith_modes
[i
].name
, 3);
1381 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
1382 "div", fixed_arith_modes
[i
].name
, 3);
1383 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
1384 "udiv", fixed_arith_modes
[i
].name
, 3);
1385 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
1386 "ssdiv", fixed_arith_modes
[i
].name
, 3);
1387 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
1388 "usdiv", fixed_arith_modes
[i
].name
, 3);
1389 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
1390 "neg", fixed_arith_modes
[i
].name
, 2);
1391 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
1392 "ssneg", fixed_arith_modes
[i
].name
, 2);
1393 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
1394 "usneg", fixed_arith_modes
[i
].name
, 2);
1395 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
1396 "ashl", fixed_arith_modes
[i
].name
, 3);
1397 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
1398 "ashr", fixed_arith_modes
[i
].name
, 3);
1399 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
1400 "lshr", fixed_arith_modes
[i
].name
, 3);
1401 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
1402 "ssashl", fixed_arith_modes
[i
].name
, 3);
1403 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
1404 "usashl", fixed_arith_modes
[i
].name
, 3);
1405 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
1406 "cmp", fixed_arith_modes
[i
].name
, 2);
1409 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
1410 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
1413 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
1414 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
1417 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
1418 fixed_conv_modes
[j
].mode
, "fract",
1419 fixed_conv_modes
[i
].name
,
1420 fixed_conv_modes
[j
].name
);
1421 arm_set_fixed_conv_libfunc (satfract_optab
,
1422 fixed_conv_modes
[i
].mode
,
1423 fixed_conv_modes
[j
].mode
, "satfract",
1424 fixed_conv_modes
[i
].name
,
1425 fixed_conv_modes
[j
].name
);
1426 arm_set_fixed_conv_libfunc (fractuns_optab
,
1427 fixed_conv_modes
[i
].mode
,
1428 fixed_conv_modes
[j
].mode
, "fractuns",
1429 fixed_conv_modes
[i
].name
,
1430 fixed_conv_modes
[j
].name
);
1431 arm_set_fixed_conv_libfunc (satfractuns_optab
,
1432 fixed_conv_modes
[i
].mode
,
1433 fixed_conv_modes
[j
].mode
, "satfractuns",
1434 fixed_conv_modes
[i
].name
,
1435 fixed_conv_modes
[j
].name
);
1439 if (TARGET_AAPCS_BASED
)
1440 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1443 /* On AAPCS systems, this is the "struct __va_list". */
1444 static GTY(()) tree va_list_type
;
1446 /* Return the type to use as __builtin_va_list. */
1448 arm_build_builtin_va_list (void)
1453 if (!TARGET_AAPCS_BASED
)
1454 return std_build_builtin_va_list ();
1456 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1464 The C Library ABI further reinforces this definition in \S
1467 We must follow this definition exactly. The structure tag
1468 name is visible in C++ mangled names, and thus forms a part
1469 of the ABI. The field name may be used by people who
1470 #include <stdarg.h>. */
1471 /* Create the type. */
1472 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1473 /* Give it the required name. */
1474 va_list_name
= build_decl (BUILTINS_LOCATION
,
1476 get_identifier ("__va_list"),
1478 DECL_ARTIFICIAL (va_list_name
) = 1;
1479 TYPE_NAME (va_list_type
) = va_list_name
;
1480 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
1481 /* Create the __ap field. */
1482 ap_field
= build_decl (BUILTINS_LOCATION
,
1484 get_identifier ("__ap"),
1486 DECL_ARTIFICIAL (ap_field
) = 1;
1487 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1488 TYPE_FIELDS (va_list_type
) = ap_field
;
1489 /* Compute its layout. */
1490 layout_type (va_list_type
);
1492 return va_list_type
;
1495 /* Return an expression of type "void *" pointing to the next
1496 available argument in a variable-argument list. VALIST is the
1497 user-level va_list object, of type __builtin_va_list. */
1499 arm_extract_valist_ptr (tree valist
)
1501 if (TREE_TYPE (valist
) == error_mark_node
)
1502 return error_mark_node
;
1504 /* On an AAPCS target, the pointer is stored within "struct
1506 if (TARGET_AAPCS_BASED
)
1508 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1509 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1510 valist
, ap_field
, NULL_TREE
);
1516 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1518 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1520 valist
= arm_extract_valist_ptr (valist
);
1521 std_expand_builtin_va_start (valist
, nextarg
);
1524 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1526 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1529 valist
= arm_extract_valist_ptr (valist
);
1530 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1533 /* Fix up any incompatible options that the user has specified. */
1535 arm_option_override (void)
1537 if (global_options_set
.x_arm_arch_option
)
1538 arm_selected_arch
= &all_architectures
[arm_arch_option
];
1540 if (global_options_set
.x_arm_cpu_option
)
1541 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
1543 if (global_options_set
.x_arm_tune_option
)
1544 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
1546 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1547 SUBTARGET_OVERRIDE_OPTIONS
;
1550 if (arm_selected_arch
)
1552 if (arm_selected_cpu
)
1554 /* Check for conflict between mcpu and march. */
1555 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
1557 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1558 arm_selected_cpu
->name
, arm_selected_arch
->name
);
1559 /* -march wins for code generation.
1560 -mcpu wins for default tuning. */
1561 if (!arm_selected_tune
)
1562 arm_selected_tune
= arm_selected_cpu
;
1564 arm_selected_cpu
= arm_selected_arch
;
1568 arm_selected_arch
= NULL
;
1571 /* Pick a CPU based on the architecture. */
1572 arm_selected_cpu
= arm_selected_arch
;
1575 /* If the user did not specify a processor, choose one for them. */
1576 if (!arm_selected_cpu
)
1578 const struct processors
* sel
;
1579 unsigned int sought
;
1581 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
1582 if (!arm_selected_cpu
->name
)
1584 #ifdef SUBTARGET_CPU_DEFAULT
1585 /* Use the subtarget default CPU if none was specified by
1587 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
1589 /* Default to ARM6. */
1590 if (!arm_selected_cpu
->name
)
1591 arm_selected_cpu
= &all_cores
[arm6
];
1594 sel
= arm_selected_cpu
;
1595 insn_flags
= sel
->flags
;
1597 /* Now check to see if the user has specified some command line
1598 switch that require certain abilities from the cpu. */
1601 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1603 sought
|= (FL_THUMB
| FL_MODE32
);
1605 /* There are no ARM processors that support both APCS-26 and
1606 interworking. Therefore we force FL_MODE26 to be removed
1607 from insn_flags here (if it was set), so that the search
1608 below will always be able to find a compatible processor. */
1609 insn_flags
&= ~FL_MODE26
;
1612 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1614 /* Try to locate a CPU type that supports all of the abilities
1615 of the default CPU, plus the extra abilities requested by
1617 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1618 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1621 if (sel
->name
== NULL
)
1623 unsigned current_bit_count
= 0;
1624 const struct processors
* best_fit
= NULL
;
1626 /* Ideally we would like to issue an error message here
1627 saying that it was not possible to find a CPU compatible
1628 with the default CPU, but which also supports the command
1629 line options specified by the programmer, and so they
1630 ought to use the -mcpu=<name> command line option to
1631 override the default CPU type.
1633 If we cannot find a cpu that has both the
1634 characteristics of the default cpu and the given
1635 command line options we scan the array again looking
1636 for a best match. */
1637 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1638 if ((sel
->flags
& sought
) == sought
)
1642 count
= bit_count (sel
->flags
& insn_flags
);
1644 if (count
>= current_bit_count
)
1647 current_bit_count
= count
;
1651 gcc_assert (best_fit
);
1655 arm_selected_cpu
= sel
;
1659 gcc_assert (arm_selected_cpu
);
1660 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1661 if (!arm_selected_tune
)
1662 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
1664 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
1665 insn_flags
= arm_selected_cpu
->flags
;
1666 arm_base_arch
= arm_selected_cpu
->base_arch
;
1668 arm_tune
= arm_selected_tune
->core
;
1669 tune_flags
= arm_selected_tune
->flags
;
1670 current_tune
= arm_selected_tune
->tune
;
1672 /* Make sure that the processor choice does not conflict with any of the
1673 other command line choices. */
1674 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1675 error ("target CPU does not support ARM mode");
1677 /* BPABI targets use linker tricks to allow interworking on cores
1678 without thumb support. */
1679 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
1681 warning (0, "target CPU does not support interworking" );
1682 target_flags
&= ~MASK_INTERWORK
;
1685 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1687 warning (0, "target CPU does not support THUMB instructions");
1688 target_flags
&= ~MASK_THUMB
;
1691 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1693 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1694 target_flags
&= ~MASK_APCS_FRAME
;
1697 /* Callee super interworking implies thumb interworking. Adding
1698 this to the flags here simplifies the logic elsewhere. */
1699 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1700 target_flags
|= MASK_INTERWORK
;
1702 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1703 from here where no function is being compiled currently. */
1704 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1705 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1707 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1708 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1710 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1712 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1713 target_flags
|= MASK_APCS_FRAME
;
1716 if (TARGET_POKE_FUNCTION_NAME
)
1717 target_flags
|= MASK_APCS_FRAME
;
1719 if (TARGET_APCS_REENT
&& flag_pic
)
1720 error ("-fpic and -mapcs-reent are incompatible");
1722 if (TARGET_APCS_REENT
)
1723 warning (0, "APCS reentrant code not supported. Ignored");
1725 /* If this target is normally configured to use APCS frames, warn if they
1726 are turned off and debugging is turned on. */
1728 && write_symbols
!= NO_DEBUG
1729 && !TARGET_APCS_FRAME
1730 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1731 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1733 if (TARGET_APCS_FLOAT
)
1734 warning (0, "passing floating point arguments in fp regs not yet supported");
1736 if (TARGET_LITTLE_WORDS
)
1737 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
1738 "will be removed in a future release");
1740 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1741 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1742 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1743 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1744 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1745 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1746 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1747 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1748 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1749 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
1750 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
1751 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
1752 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
1753 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1754 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1756 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1757 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1758 thumb_code
= TARGET_ARM
== 0;
1759 thumb1_code
= TARGET_THUMB1
!= 0;
1760 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1761 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1762 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1763 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
1764 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
1765 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
1766 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
1768 /* If we are not using the default (ARM mode) section anchor offset
1769 ranges, then set the correct ranges now. */
1772 /* Thumb-1 LDR instructions cannot have negative offsets.
1773 Permissible positive offset ranges are 5-bit (for byte loads),
1774 6-bit (for halfword loads), or 7-bit (for word loads).
1775 Empirical results suggest a 7-bit anchor range gives the best
1776 overall code size. */
1777 targetm
.min_anchor_offset
= 0;
1778 targetm
.max_anchor_offset
= 127;
1780 else if (TARGET_THUMB2
)
1782 /* The minimum is set such that the total size of the block
1783 for a particular anchor is 248 + 1 + 4095 bytes, which is
1784 divisible by eight, ensuring natural spacing of anchors. */
1785 targetm
.min_anchor_offset
= -248;
1786 targetm
.max_anchor_offset
= 4095;
1789 /* V5 code we generate is completely interworking capable, so we turn off
1790 TARGET_INTERWORK here to avoid many tests later on. */
1792 /* XXX However, we must pass the right pre-processor defines to CPP
1793 or GLD can get confused. This is a hack. */
1794 if (TARGET_INTERWORK
)
1795 arm_cpp_interwork
= 1;
1798 target_flags
&= ~MASK_INTERWORK
;
1800 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1801 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1803 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1804 error ("iwmmxt abi requires an iwmmxt capable cpu");
1806 if (!global_options_set
.x_arm_fpu_index
)
1808 const char *target_fpu_name
;
1811 #ifdef FPUTYPE_DEFAULT
1812 target_fpu_name
= FPUTYPE_DEFAULT
;
1814 target_fpu_name
= "vfp";
1817 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
1822 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
1824 switch (arm_fpu_desc
->model
)
1826 case ARM_FP_MODEL_VFP
:
1827 arm_fpu_attr
= FPU_VFP
;
1834 if (TARGET_AAPCS_BASED
)
1836 if (TARGET_CALLER_INTERWORKING
)
1837 error ("AAPCS does not support -mcaller-super-interworking");
1839 if (TARGET_CALLEE_INTERWORKING
)
1840 error ("AAPCS does not support -mcallee-super-interworking");
1843 /* iWMMXt and NEON are incompatible. */
1844 if (TARGET_IWMMXT
&& TARGET_NEON
)
1845 error ("iWMMXt and NEON are incompatible");
1847 /* iWMMXt unsupported under Thumb mode. */
1848 if (TARGET_THUMB
&& TARGET_IWMMXT
)
1849 error ("iWMMXt unsupported under Thumb mode");
1851 /* __fp16 support currently assumes the core has ldrh. */
1852 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
1853 sorry ("__fp16 and no ldrh");
1855 /* If soft-float is specified then don't use FPU. */
1856 if (TARGET_SOFT_FLOAT
)
1857 arm_fpu_attr
= FPU_NONE
;
1859 if (TARGET_AAPCS_BASED
)
1861 if (arm_abi
== ARM_ABI_IWMMXT
)
1862 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
1863 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
1864 && TARGET_HARD_FLOAT
1866 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
1868 arm_pcs_default
= ARM_PCS_AAPCS
;
1872 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1873 sorry ("-mfloat-abi=hard and VFP");
1875 if (arm_abi
== ARM_ABI_APCS
)
1876 arm_pcs_default
= ARM_PCS_APCS
;
1878 arm_pcs_default
= ARM_PCS_ATPCS
;
1881 /* For arm2/3 there is no need to do any scheduling if we are doing
1882 software floating-point. */
1883 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
1884 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1886 /* Use the cp15 method if it is available. */
1887 if (target_thread_pointer
== TP_AUTO
)
1889 if (arm_arch6k
&& !TARGET_THUMB1
)
1890 target_thread_pointer
= TP_CP15
;
1892 target_thread_pointer
= TP_SOFT
;
1895 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1896 error ("can not use -mtp=cp15 with 16-bit Thumb");
1898 /* Override the default structure alignment for AAPCS ABI. */
1899 if (!global_options_set
.x_arm_structure_size_boundary
)
1901 if (TARGET_AAPCS_BASED
)
1902 arm_structure_size_boundary
= 8;
1906 if (arm_structure_size_boundary
!= 8
1907 && arm_structure_size_boundary
!= 32
1908 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
1910 if (ARM_DOUBLEWORD_ALIGN
)
1912 "structure size boundary can only be set to 8, 32 or 64");
1914 warning (0, "structure size boundary can only be set to 8 or 32");
1915 arm_structure_size_boundary
1916 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
1920 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1922 error ("RTP PIC is incompatible with Thumb");
1926 /* If stack checking is disabled, we can use r10 as the PIC register,
1927 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1928 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1930 if (TARGET_VXWORKS_RTP
)
1931 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1932 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1935 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1936 arm_pic_register
= 9;
1938 if (arm_pic_register_string
!= NULL
)
1940 int pic_register
= decode_reg_name (arm_pic_register_string
);
1943 warning (0, "-mpic-register= is useless without -fpic");
1945 /* Prevent the user from choosing an obviously stupid PIC register. */
1946 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1947 || pic_register
== HARD_FRAME_POINTER_REGNUM
1948 || pic_register
== STACK_POINTER_REGNUM
1949 || pic_register
>= PC_REGNUM
1950 || (TARGET_VXWORKS_RTP
1951 && (unsigned int) pic_register
!= arm_pic_register
))
1952 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1954 arm_pic_register
= pic_register
;
1957 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1958 if (fix_cm3_ldrd
== 2)
1960 if (arm_selected_cpu
->core
== cortexm3
)
1966 /* Enable -munaligned-access by default for
1967 - all ARMv6 architecture-based processors
1968 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1969 - ARMv8 architecture-base processors.
1971 Disable -munaligned-access by default for
1972 - all pre-ARMv6 architecture-based processors
1973 - ARMv6-M architecture-based processors. */
1975 if (unaligned_access
== 2)
1977 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
1978 unaligned_access
= 1;
1980 unaligned_access
= 0;
1982 else if (unaligned_access
== 1
1983 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
1985 warning (0, "target CPU does not support unaligned accesses");
1986 unaligned_access
= 0;
1989 if (TARGET_THUMB1
&& flag_schedule_insns
)
1991 /* Don't warn since it's on by default in -O2. */
1992 flag_schedule_insns
= 0;
1997 /* If optimizing for size, bump the number of instructions that we
1998 are prepared to conditionally execute (even on a StrongARM). */
1999 max_insns_skipped
= 6;
2002 max_insns_skipped
= current_tune
->max_insns_skipped
;
2004 /* Hot/Cold partitioning is not currently supported, since we can't
2005 handle literal pool placement in that case. */
2006 if (flag_reorder_blocks_and_partition
)
2008 inform (input_location
,
2009 "-freorder-blocks-and-partition not supported on this architecture");
2010 flag_reorder_blocks_and_partition
= 0;
2011 flag_reorder_blocks
= 1;
2015 /* Hoisting PIC address calculations more aggressively provides a small,
2016 but measurable, size reduction for PIC code. Therefore, we decrease
2017 the bar for unrestricted expression hoisting to the cost of PIC address
2018 calculation, which is 2 instructions. */
2019 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2020 global_options
.x_param_values
,
2021 global_options_set
.x_param_values
);
2023 /* ARM EABI defaults to strict volatile bitfields. */
2024 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2025 && abi_version_at_least(2))
2026 flag_strict_volatile_bitfields
= 1;
2028 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2029 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2030 if (flag_prefetch_loop_arrays
< 0
2033 && current_tune
->num_prefetch_slots
> 0)
2034 flag_prefetch_loop_arrays
= 1;
2036 /* Set up parameters to be used in prefetching algorithm. Do not override the
2037 defaults unless we are tuning for a core we have researched values for. */
2038 if (current_tune
->num_prefetch_slots
> 0)
2039 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2040 current_tune
->num_prefetch_slots
,
2041 global_options
.x_param_values
,
2042 global_options_set
.x_param_values
);
2043 if (current_tune
->l1_cache_line_size
>= 0)
2044 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2045 current_tune
->l1_cache_line_size
,
2046 global_options
.x_param_values
,
2047 global_options_set
.x_param_values
);
2048 if (current_tune
->l1_cache_size
>= 0)
2049 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2050 current_tune
->l1_cache_size
,
2051 global_options
.x_param_values
,
2052 global_options_set
.x_param_values
);
2054 /* Use the alternative scheduling-pressure algorithm by default. */
2055 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
2056 global_options
.x_param_values
,
2057 global_options_set
.x_param_values
);
2059 /* Register global variables with the garbage collector. */
2060 arm_add_gc_roots ();
2064 arm_add_gc_roots (void)
2066 gcc_obstack_init(&minipool_obstack
);
2067 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2070 /* A table of known ARM exception types.
2071 For use with the interrupt function attribute. */
2075 const char *const arg
;
2076 const unsigned long return_value
;
2080 static const isr_attribute_arg isr_attribute_args
[] =
2082 { "IRQ", ARM_FT_ISR
},
2083 { "irq", ARM_FT_ISR
},
2084 { "FIQ", ARM_FT_FIQ
},
2085 { "fiq", ARM_FT_FIQ
},
2086 { "ABORT", ARM_FT_ISR
},
2087 { "abort", ARM_FT_ISR
},
2088 { "ABORT", ARM_FT_ISR
},
2089 { "abort", ARM_FT_ISR
},
2090 { "UNDEF", ARM_FT_EXCEPTION
},
2091 { "undef", ARM_FT_EXCEPTION
},
2092 { "SWI", ARM_FT_EXCEPTION
},
2093 { "swi", ARM_FT_EXCEPTION
},
2094 { NULL
, ARM_FT_NORMAL
}
2097 /* Returns the (interrupt) function type of the current
2098 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2100 static unsigned long
2101 arm_isr_value (tree argument
)
2103 const isr_attribute_arg
* ptr
;
2107 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2109 /* No argument - default to IRQ. */
2110 if (argument
== NULL_TREE
)
2113 /* Get the value of the argument. */
2114 if (TREE_VALUE (argument
) == NULL_TREE
2115 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2116 return ARM_FT_UNKNOWN
;
2118 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2120 /* Check it against the list of known arguments. */
2121 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2122 if (streq (arg
, ptr
->arg
))
2123 return ptr
->return_value
;
2125 /* An unrecognized interrupt type. */
2126 return ARM_FT_UNKNOWN
;
2129 /* Computes the type of the current function. */
2131 static unsigned long
2132 arm_compute_func_type (void)
2134 unsigned long type
= ARM_FT_UNKNOWN
;
2138 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2140 /* Decide if the current function is volatile. Such functions
2141 never return, and many memory cycles can be saved by not storing
2142 register values that will never be needed again. This optimization
2143 was added to speed up context switching in a kernel application. */
2145 && (TREE_NOTHROW (current_function_decl
)
2146 || !(flag_unwind_tables
2148 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2149 && TREE_THIS_VOLATILE (current_function_decl
))
2150 type
|= ARM_FT_VOLATILE
;
2152 if (cfun
->static_chain_decl
!= NULL
)
2153 type
|= ARM_FT_NESTED
;
2155 attr
= DECL_ATTRIBUTES (current_function_decl
);
2157 a
= lookup_attribute ("naked", attr
);
2159 type
|= ARM_FT_NAKED
;
2161 a
= lookup_attribute ("isr", attr
);
2163 a
= lookup_attribute ("interrupt", attr
);
2166 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2168 type
|= arm_isr_value (TREE_VALUE (a
));
2173 /* Returns the type of the current function. */
2176 arm_current_func_type (void)
2178 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2179 cfun
->machine
->func_type
= arm_compute_func_type ();
2181 return cfun
->machine
->func_type
;
2185 arm_allocate_stack_slots_for_args (void)
2187 /* Naked functions should not allocate stack slots for arguments. */
2188 return !IS_NAKED (arm_current_func_type ());
2192 arm_warn_func_return (tree decl
)
2194 /* Naked functions are implemented entirely in assembly, including the
2195 return sequence, so suppress warnings about this. */
2196 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
2200 /* Output assembler code for a block containing the constant parts
2201 of a trampoline, leaving space for the variable parts.
2203 On the ARM, (if r8 is the static chain regnum, and remembering that
2204 referencing pc adds an offset of 8) the trampoline looks like:
2207 .word static chain value
2208 .word function's address
2209 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2212 arm_asm_trampoline_template (FILE *f
)
2216 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2217 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2219 else if (TARGET_THUMB2
)
2221 /* The Thumb-2 trampoline is similar to the arm implementation.
2222 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2223 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2224 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2225 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2229 ASM_OUTPUT_ALIGN (f
, 2);
2230 fprintf (f
, "\t.code\t16\n");
2231 fprintf (f
, ".Ltrampoline_start:\n");
2232 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2233 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2234 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2235 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2236 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2237 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2239 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2240 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2243 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2246 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
2248 rtx fnaddr
, mem
, a_tramp
;
2250 emit_block_move (m_tramp
, assemble_trampoline_template (),
2251 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
2253 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
2254 emit_move_insn (mem
, chain_value
);
2256 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
2257 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
2258 emit_move_insn (mem
, fnaddr
);
2260 a_tramp
= XEXP (m_tramp
, 0);
2261 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
2262 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
2263 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
2266 /* Thumb trampolines should be entered in thumb mode, so set
2267 the bottom bit of the address. */
2270 arm_trampoline_adjust_address (rtx addr
)
2273 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
2274 NULL
, 0, OPTAB_LIB_WIDEN
);
2278 /* Return 1 if it is possible to return using a single instruction.
2279 If SIBLING is non-null, this is a test for a return before a sibling
2280 call. SIBLING is the call insn, so we can examine its register usage. */
2283 use_return_insn (int iscond
, rtx sibling
)
2286 unsigned int func_type
;
2287 unsigned long saved_int_regs
;
2288 unsigned HOST_WIDE_INT stack_adjust
;
2289 arm_stack_offsets
*offsets
;
2291 /* Never use a return instruction before reload has run. */
2292 if (!reload_completed
)
2295 func_type
= arm_current_func_type ();
2297 /* Naked, volatile and stack alignment functions need special
2299 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2302 /* So do interrupt functions that use the frame pointer and Thumb
2303 interrupt functions. */
2304 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2307 offsets
= arm_get_frame_offsets ();
2308 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2310 /* As do variadic functions. */
2311 if (crtl
->args
.pretend_args_size
2312 || cfun
->machine
->uses_anonymous_args
2313 /* Or if the function calls __builtin_eh_return () */
2314 || crtl
->calls_eh_return
2315 /* Or if the function calls alloca */
2316 || cfun
->calls_alloca
2317 /* Or if there is a stack adjustment. However, if the stack pointer
2318 is saved on the stack, we can use a pre-incrementing stack load. */
2319 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2320 && stack_adjust
== 4)))
2323 saved_int_regs
= offsets
->saved_regs_mask
;
2325 /* Unfortunately, the insn
2327 ldmib sp, {..., sp, ...}
2329 triggers a bug on most SA-110 based devices, such that the stack
2330 pointer won't be correctly restored if the instruction takes a
2331 page fault. We work around this problem by popping r3 along with
2332 the other registers, since that is never slower than executing
2333 another instruction.
2335 We test for !arm_arch5 here, because code for any architecture
2336 less than this could potentially be run on one of the buggy
2338 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2340 /* Validate that r3 is a call-clobbered register (always true in
2341 the default abi) ... */
2342 if (!call_used_regs
[3])
2345 /* ... that it isn't being used for a return value ... */
2346 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2349 /* ... or for a tail-call argument ... */
2352 gcc_assert (CALL_P (sibling
));
2354 if (find_regno_fusage (sibling
, USE
, 3))
2358 /* ... and that there are no call-saved registers in r0-r2
2359 (always true in the default ABI). */
2360 if (saved_int_regs
& 0x7)
2364 /* Can't be done if interworking with Thumb, and any registers have been
2366 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2369 /* On StrongARM, conditional returns are expensive if they aren't
2370 taken and multiple registers have been stacked. */
2371 if (iscond
&& arm_tune_strongarm
)
2373 /* Conditional return when just the LR is stored is a simple
2374 conditional-load instruction, that's not expensive. */
2375 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2379 && arm_pic_register
!= INVALID_REGNUM
2380 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2384 /* If there are saved registers but the LR isn't saved, then we need
2385 two instructions for the return. */
2386 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2389 /* Can't be done if any of the VFP regs are pushed,
2390 since this also requires an insn. */
2391 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2392 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2393 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2396 if (TARGET_REALLY_IWMMXT
)
2397 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2398 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2404 /* Return TRUE if int I is a valid immediate ARM constant. */
2407 const_ok_for_arm (HOST_WIDE_INT i
)
2411 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2412 be all zero, or all one. */
2413 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2414 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2415 != ((~(unsigned HOST_WIDE_INT
) 0)
2416 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2419 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2421 /* Fast return for 0 and small values. We must do this for zero, since
2422 the code below can't handle that one case. */
2423 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2426 /* Get the number of trailing zeros. */
2427 lowbit
= ffs((int) i
) - 1;
2429 /* Only even shifts are allowed in ARM mode so round down to the
2430 nearest even number. */
2434 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2439 /* Allow rotated constants in ARM mode. */
2441 && ((i
& ~0xc000003f) == 0
2442 || (i
& ~0xf000000f) == 0
2443 || (i
& ~0xfc000003) == 0))
2450 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2453 if (i
== v
|| i
== (v
| (v
<< 8)))
2456 /* Allow repeated pattern 0xXY00XY00. */
2466 /* Return true if I is a valid constant for the operation CODE. */
2468 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2470 if (const_ok_for_arm (i
))
2476 /* See if we can use movw. */
2477 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
2480 /* Otherwise, try mvn. */
2481 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2484 /* See if we can use addw or subw. */
2486 && ((i
& 0xfffff000) == 0
2487 || ((-i
) & 0xfffff000) == 0))
2489 /* else fall through. */
2509 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2511 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2517 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2521 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2528 /* Return true if I is a valid di mode constant for the operation CODE. */
2530 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
2532 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
2533 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
2534 rtx hi
= GEN_INT (hi_val
);
2535 rtx lo
= GEN_INT (lo_val
);
2543 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
2550 /* Emit a sequence of insns to handle a large constant.
2551 CODE is the code of the operation required, it can be any of SET, PLUS,
2552 IOR, AND, XOR, MINUS;
2553 MODE is the mode in which the operation is being performed;
2554 VAL is the integer to operate on;
2555 SOURCE is the other operand (a register, or a null-pointer for SET);
2556 SUBTARGETS means it is safe to create scratch registers if that will
2557 either produce a simpler sequence, or we will want to cse the values.
2558 Return value is the number of insns emitted. */
2560 /* ??? Tweak this for thumb2. */
2562 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2563 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2567 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2568 cond
= COND_EXEC_TEST (PATTERN (insn
));
2572 if (subtargets
|| code
== SET
2573 || (REG_P (target
) && REG_P (source
)
2574 && REGNO (target
) != REGNO (source
)))
2576 /* After arm_reorg has been called, we can't fix up expensive
2577 constants by pushing them into memory so we must synthesize
2578 them in-line, regardless of the cost. This is only likely to
2579 be more costly on chips that have load delay slots and we are
2580 compiling without running the scheduler (so no splitting
2581 occurred before the final instruction emission).
2583 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2585 if (!after_arm_reorg
2587 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2589 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2594 /* Currently SET is the only monadic value for CODE, all
2595 the rest are diadic. */
2596 if (TARGET_USE_MOVT
)
2597 arm_emit_movpair (target
, GEN_INT (val
));
2599 emit_set_insn (target
, GEN_INT (val
));
2605 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2607 if (TARGET_USE_MOVT
)
2608 arm_emit_movpair (temp
, GEN_INT (val
));
2610 emit_set_insn (temp
, GEN_INT (val
));
2612 /* For MINUS, the value is subtracted from, since we never
2613 have subtraction of a constant. */
2615 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2617 emit_set_insn (target
,
2618 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2624 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2628 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2629 ARM/THUMB2 immediates, and add up to VAL.
2630 Thr function return value gives the number of insns required. */
2632 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
2633 struct four_ints
*return_sequence
)
2635 int best_consecutive_zeros
= 0;
2639 struct four_ints tmp_sequence
;
2641 /* If we aren't targeting ARM, the best place to start is always at
2642 the bottom, otherwise look more closely. */
2645 for (i
= 0; i
< 32; i
+= 2)
2647 int consecutive_zeros
= 0;
2649 if (!(val
& (3 << i
)))
2651 while ((i
< 32) && !(val
& (3 << i
)))
2653 consecutive_zeros
+= 2;
2656 if (consecutive_zeros
> best_consecutive_zeros
)
2658 best_consecutive_zeros
= consecutive_zeros
;
2659 best_start
= i
- consecutive_zeros
;
2666 /* So long as it won't require any more insns to do so, it's
2667 desirable to emit a small constant (in bits 0...9) in the last
2668 insn. This way there is more chance that it can be combined with
2669 a later addressing insn to form a pre-indexed load or store
2670 operation. Consider:
2672 *((volatile int *)0xe0000100) = 1;
2673 *((volatile int *)0xe0000110) = 2;
2675 We want this to wind up as:
2679 str rB, [rA, #0x100]
2681 str rB, [rA, #0x110]
2683 rather than having to synthesize both large constants from scratch.
2685 Therefore, we calculate how many insns would be required to emit
2686 the constant starting from `best_start', and also starting from
2687 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2688 yield a shorter sequence, we may as well use zero. */
2689 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
2691 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
2693 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
2694 if (insns2
<= insns1
)
2696 *return_sequence
= tmp_sequence
;
2704 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2706 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
2707 struct four_ints
*return_sequence
, int i
)
2709 int remainder
= val
& 0xffffffff;
2712 /* Try and find a way of doing the job in either two or three
2715 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2716 location. We start at position I. This may be the MSB, or
2717 optimial_immediate_sequence may have positioned it at the largest block
2718 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2719 wrapping around to the top of the word when we drop off the bottom.
2720 In the worst case this code should produce no more than four insns.
2722 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2723 constants, shifted to any arbitrary location. We should always start
2728 unsigned int b1
, b2
, b3
, b4
;
2729 unsigned HOST_WIDE_INT result
;
2732 gcc_assert (insns
< 4);
2737 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2738 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
2741 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
2742 /* We can use addw/subw for the last 12 bits. */
2746 /* Use an 8-bit shifted/rotated immediate. */
2750 result
= remainder
& ((0x0ff << end
)
2751 | ((i
< end
) ? (0xff >> (32 - end
))
2758 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2759 arbitrary shifts. */
2760 i
-= TARGET_ARM
? 2 : 1;
2764 /* Next, see if we can do a better job with a thumb2 replicated
2767 We do it this way around to catch the cases like 0x01F001E0 where
2768 two 8-bit immediates would work, but a replicated constant would
2771 TODO: 16-bit constants that don't clear all the bits, but still win.
2772 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2775 b1
= (remainder
& 0xff000000) >> 24;
2776 b2
= (remainder
& 0x00ff0000) >> 16;
2777 b3
= (remainder
& 0x0000ff00) >> 8;
2778 b4
= remainder
& 0xff;
2782 /* The 8-bit immediate already found clears b1 (and maybe b2),
2783 but must leave b3 and b4 alone. */
2785 /* First try to find a 32-bit replicated constant that clears
2786 almost everything. We can assume that we can't do it in one,
2787 or else we wouldn't be here. */
2788 unsigned int tmp
= b1
& b2
& b3
& b4
;
2789 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
2791 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
2792 + (tmp
== b3
) + (tmp
== b4
);
2794 && (matching_bytes
>= 3
2795 || (matching_bytes
== 2
2796 && const_ok_for_op (remainder
& ~tmp2
, code
))))
2798 /* At least 3 of the bytes match, and the fourth has at
2799 least as many bits set, or two of the bytes match
2800 and it will only require one more insn to finish. */
2808 /* Second, try to find a 16-bit replicated constant that can
2809 leave three of the bytes clear. If b2 or b4 is already
2810 zero, then we can. If the 8-bit from above would not
2811 clear b2 anyway, then we still win. */
2812 else if (b1
== b3
&& (!b2
|| !b4
2813 || (remainder
& 0x00ff0000 & ~result
)))
2815 result
= remainder
& 0xff00ff00;
2821 /* The 8-bit immediate already found clears b2 (and maybe b3)
2822 and we don't get here unless b1 is alredy clear, but it will
2823 leave b4 unchanged. */
2825 /* If we can clear b2 and b4 at once, then we win, since the
2826 8-bits couldn't possibly reach that far. */
2829 result
= remainder
& 0x00ff00ff;
2835 return_sequence
->i
[insns
++] = result
;
2836 remainder
&= ~result
;
2838 if (code
== SET
|| code
== MINUS
)
2846 /* Emit an instruction with the indicated PATTERN. If COND is
2847 non-NULL, conditionalize the execution of the instruction on COND
2851 emit_constant_insn (rtx cond
, rtx pattern
)
2854 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
2855 emit_insn (pattern
);
2858 /* As above, but extra parameter GENERATE which, if clear, suppresses
2862 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
2863 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
2868 int final_invert
= 0;
2870 int set_sign_bit_copies
= 0;
2871 int clear_sign_bit_copies
= 0;
2872 int clear_zero_bit_copies
= 0;
2873 int set_zero_bit_copies
= 0;
2874 int insns
= 0, neg_insns
, inv_insns
;
2875 unsigned HOST_WIDE_INT temp1
, temp2
;
2876 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
2877 struct four_ints
*immediates
;
2878 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
2880 /* Find out which operations are safe for a given CODE. Also do a quick
2881 check for degenerate cases; these can occur when DImode operations
2894 if (remainder
== 0xffffffff)
2897 emit_constant_insn (cond
,
2898 gen_rtx_SET (VOIDmode
, target
,
2899 GEN_INT (ARM_SIGN_EXTEND (val
))));
2905 if (reload_completed
&& rtx_equal_p (target
, source
))
2909 emit_constant_insn (cond
,
2910 gen_rtx_SET (VOIDmode
, target
, source
));
2919 emit_constant_insn (cond
,
2920 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2923 if (remainder
== 0xffffffff)
2925 if (reload_completed
&& rtx_equal_p (target
, source
))
2928 emit_constant_insn (cond
,
2929 gen_rtx_SET (VOIDmode
, target
, source
));
2938 if (reload_completed
&& rtx_equal_p (target
, source
))
2941 emit_constant_insn (cond
,
2942 gen_rtx_SET (VOIDmode
, target
, source
));
2946 if (remainder
== 0xffffffff)
2949 emit_constant_insn (cond
,
2950 gen_rtx_SET (VOIDmode
, target
,
2951 gen_rtx_NOT (mode
, source
)));
2958 /* We treat MINUS as (val - source), since (source - val) is always
2959 passed as (source + (-val)). */
2963 emit_constant_insn (cond
,
2964 gen_rtx_SET (VOIDmode
, target
,
2965 gen_rtx_NEG (mode
, source
)));
2968 if (const_ok_for_arm (val
))
2971 emit_constant_insn (cond
,
2972 gen_rtx_SET (VOIDmode
, target
,
2973 gen_rtx_MINUS (mode
, GEN_INT (val
),
2984 /* If we can do it in one insn get out quickly. */
2985 if (const_ok_for_op (val
, code
))
2988 emit_constant_insn (cond
,
2989 gen_rtx_SET (VOIDmode
, target
,
2991 ? gen_rtx_fmt_ee (code
, mode
, source
,
2997 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
2999 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
3000 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
3004 if (mode
== SImode
&& i
== 16)
3005 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3007 emit_constant_insn (cond
,
3008 gen_zero_extendhisi2
3009 (target
, gen_lowpart (HImode
, source
)));
3011 /* Extz only supports SImode, but we can coerce the operands
3013 emit_constant_insn (cond
,
3014 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3015 gen_lowpart (SImode
, source
),
3016 GEN_INT (i
), const0_rtx
));
3022 /* Calculate a few attributes that may be useful for specific
3024 /* Count number of leading zeros. */
3025 for (i
= 31; i
>= 0; i
--)
3027 if ((remainder
& (1 << i
)) == 0)
3028 clear_sign_bit_copies
++;
3033 /* Count number of leading 1's. */
3034 for (i
= 31; i
>= 0; i
--)
3036 if ((remainder
& (1 << i
)) != 0)
3037 set_sign_bit_copies
++;
3042 /* Count number of trailing zero's. */
3043 for (i
= 0; i
<= 31; i
++)
3045 if ((remainder
& (1 << i
)) == 0)
3046 clear_zero_bit_copies
++;
3051 /* Count number of trailing 1's. */
3052 for (i
= 0; i
<= 31; i
++)
3054 if ((remainder
& (1 << i
)) != 0)
3055 set_zero_bit_copies
++;
3063 /* See if we can do this by sign_extending a constant that is known
3064 to be negative. This is a good, way of doing it, since the shift
3065 may well merge into a subsequent insn. */
3066 if (set_sign_bit_copies
> 1)
3068 if (const_ok_for_arm
3069 (temp1
= ARM_SIGN_EXTEND (remainder
3070 << (set_sign_bit_copies
- 1))))
3074 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3075 emit_constant_insn (cond
,
3076 gen_rtx_SET (VOIDmode
, new_src
,
3078 emit_constant_insn (cond
,
3079 gen_ashrsi3 (target
, new_src
,
3080 GEN_INT (set_sign_bit_copies
- 1)));
3084 /* For an inverted constant, we will need to set the low bits,
3085 these will be shifted out of harm's way. */
3086 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
3087 if (const_ok_for_arm (~temp1
))
3091 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3092 emit_constant_insn (cond
,
3093 gen_rtx_SET (VOIDmode
, new_src
,
3095 emit_constant_insn (cond
,
3096 gen_ashrsi3 (target
, new_src
,
3097 GEN_INT (set_sign_bit_copies
- 1)));
3103 /* See if we can calculate the value as the difference between two
3104 valid immediates. */
3105 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
3107 int topshift
= clear_sign_bit_copies
& ~1;
3109 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
3110 & (0xff000000 >> topshift
));
3112 /* If temp1 is zero, then that means the 9 most significant
3113 bits of remainder were 1 and we've caused it to overflow.
3114 When topshift is 0 we don't need to do anything since we
3115 can borrow from 'bit 32'. */
3116 if (temp1
== 0 && topshift
!= 0)
3117 temp1
= 0x80000000 >> (topshift
- 1);
3119 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
3121 if (const_ok_for_arm (temp2
))
3125 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3126 emit_constant_insn (cond
,
3127 gen_rtx_SET (VOIDmode
, new_src
,
3129 emit_constant_insn (cond
,
3130 gen_addsi3 (target
, new_src
,
3138 /* See if we can generate this by setting the bottom (or the top)
3139 16 bits, and then shifting these into the other half of the
3140 word. We only look for the simplest cases, to do more would cost
3141 too much. Be careful, however, not to generate this when the
3142 alternative would take fewer insns. */
3143 if (val
& 0xffff0000)
3145 temp1
= remainder
& 0xffff0000;
3146 temp2
= remainder
& 0x0000ffff;
3148 /* Overlaps outside this range are best done using other methods. */
3149 for (i
= 9; i
< 24; i
++)
3151 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
3152 && !const_ok_for_arm (temp2
))
3154 rtx new_src
= (subtargets
3155 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3157 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
3158 source
, subtargets
, generate
);
3166 gen_rtx_ASHIFT (mode
, source
,
3173 /* Don't duplicate cases already considered. */
3174 for (i
= 17; i
< 24; i
++)
3176 if (((temp1
| (temp1
>> i
)) == remainder
)
3177 && !const_ok_for_arm (temp1
))
3179 rtx new_src
= (subtargets
3180 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3182 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
3183 source
, subtargets
, generate
);
3188 gen_rtx_SET (VOIDmode
, target
,
3191 gen_rtx_LSHIFTRT (mode
, source
,
3202 /* If we have IOR or XOR, and the constant can be loaded in a
3203 single instruction, and we can find a temporary to put it in,
3204 then this can be done in two instructions instead of 3-4. */
3206 /* TARGET can't be NULL if SUBTARGETS is 0 */
3207 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
3209 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
3213 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3215 emit_constant_insn (cond
,
3216 gen_rtx_SET (VOIDmode
, sub
,
3218 emit_constant_insn (cond
,
3219 gen_rtx_SET (VOIDmode
, target
,
3220 gen_rtx_fmt_ee (code
, mode
,
3231 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3232 and the remainder 0s for e.g. 0xfff00000)
3233 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3235 This can be done in 2 instructions by using shifts with mov or mvn.
3240 mvn r0, r0, lsr #12 */
3241 if (set_sign_bit_copies
> 8
3242 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
3246 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3247 rtx shift
= GEN_INT (set_sign_bit_copies
);
3251 gen_rtx_SET (VOIDmode
, sub
,
3253 gen_rtx_ASHIFT (mode
,
3258 gen_rtx_SET (VOIDmode
, target
,
3260 gen_rtx_LSHIFTRT (mode
, sub
,
3267 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3269 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3271 For eg. r0 = r0 | 0xfff
3276 if (set_zero_bit_copies
> 8
3277 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
3281 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3282 rtx shift
= GEN_INT (set_zero_bit_copies
);
3286 gen_rtx_SET (VOIDmode
, sub
,
3288 gen_rtx_LSHIFTRT (mode
,
3293 gen_rtx_SET (VOIDmode
, target
,
3295 gen_rtx_ASHIFT (mode
, sub
,
3301 /* This will never be reached for Thumb2 because orn is a valid
3302 instruction. This is for Thumb1 and the ARM 32 bit cases.
3304 x = y | constant (such that ~constant is a valid constant)
3306 x = ~(~y & ~constant).
3308 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
3312 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3313 emit_constant_insn (cond
,
3314 gen_rtx_SET (VOIDmode
, sub
,
3315 gen_rtx_NOT (mode
, source
)));
3318 sub
= gen_reg_rtx (mode
);
3319 emit_constant_insn (cond
,
3320 gen_rtx_SET (VOIDmode
, sub
,
3321 gen_rtx_AND (mode
, source
,
3323 emit_constant_insn (cond
,
3324 gen_rtx_SET (VOIDmode
, target
,
3325 gen_rtx_NOT (mode
, sub
)));
3332 /* See if two shifts will do 2 or more insn's worth of work. */
3333 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
3335 HOST_WIDE_INT shift_mask
= ((0xffffffff
3336 << (32 - clear_sign_bit_copies
))
3339 if ((remainder
| shift_mask
) != 0xffffffff)
3343 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3344 insns
= arm_gen_constant (AND
, mode
, cond
,
3345 remainder
| shift_mask
,
3346 new_src
, source
, subtargets
, 1);
3351 rtx targ
= subtargets
? NULL_RTX
: target
;
3352 insns
= arm_gen_constant (AND
, mode
, cond
,
3353 remainder
| shift_mask
,
3354 targ
, source
, subtargets
, 0);
3360 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3361 rtx shift
= GEN_INT (clear_sign_bit_copies
);
3363 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
3364 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
3370 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
3372 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
3374 if ((remainder
| shift_mask
) != 0xffffffff)
3378 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3380 insns
= arm_gen_constant (AND
, mode
, cond
,
3381 remainder
| shift_mask
,
3382 new_src
, source
, subtargets
, 1);
3387 rtx targ
= subtargets
? NULL_RTX
: target
;
3389 insns
= arm_gen_constant (AND
, mode
, cond
,
3390 remainder
| shift_mask
,
3391 targ
, source
, subtargets
, 0);
3397 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3398 rtx shift
= GEN_INT (clear_zero_bit_copies
);
3400 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
3401 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
3413 /* Calculate what the instruction sequences would be if we generated it
3414 normally, negated, or inverted. */
3416 /* AND cannot be split into multiple insns, so invert and use BIC. */
3419 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
3422 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
3427 if (can_invert
|| final_invert
)
3428 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
3433 immediates
= &pos_immediates
;
3435 /* Is the negated immediate sequence more efficient? */
3436 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
3439 immediates
= &neg_immediates
;
3444 /* Is the inverted immediate sequence more efficient?
3445 We must allow for an extra NOT instruction for XOR operations, although
3446 there is some chance that the final 'mvn' will get optimized later. */
3447 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
3450 immediates
= &inv_immediates
;
3458 /* Now output the chosen sequence as instructions. */
3461 for (i
= 0; i
< insns
; i
++)
3463 rtx new_src
, temp1_rtx
;
3465 temp1
= immediates
->i
[i
];
3467 if (code
== SET
|| code
== MINUS
)
3468 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3469 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
3470 new_src
= gen_reg_rtx (mode
);
3476 else if (can_negate
)
3479 temp1
= trunc_int_for_mode (temp1
, mode
);
3480 temp1_rtx
= GEN_INT (temp1
);
3484 else if (code
== MINUS
)
3485 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3487 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3489 emit_constant_insn (cond
,
3490 gen_rtx_SET (VOIDmode
, new_src
,
3496 can_negate
= can_invert
;
3500 else if (code
== MINUS
)
3508 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3509 gen_rtx_NOT (mode
, source
)));
3516 /* Canonicalize a comparison so that we are more likely to recognize it.
3517 This can be done for a few constant compares, where we can make the
3518 immediate value easier to load. */
3521 arm_canonicalize_comparison (enum rtx_code code
, rtx
*op0
, rtx
*op1
)
3523 enum machine_mode mode
;
3524 unsigned HOST_WIDE_INT i
, maxval
;
3526 mode
= GET_MODE (*op0
);
3527 if (mode
== VOIDmode
)
3528 mode
= GET_MODE (*op1
);
3530 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3532 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3533 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3534 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3535 for GTU/LEU in Thumb mode. */
3540 if (code
== GT
|| code
== LE
3541 || (!TARGET_ARM
&& (code
== GTU
|| code
== LEU
)))
3543 /* Missing comparison. First try to use an available
3545 if (CONST_INT_P (*op1
))
3553 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3555 *op1
= GEN_INT (i
+ 1);
3556 return code
== GT
? GE
: LT
;
3561 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3562 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3564 *op1
= GEN_INT (i
+ 1);
3565 return code
== GTU
? GEU
: LTU
;
3573 /* If that did not work, reverse the condition. */
3577 return swap_condition (code
);
3583 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3584 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3585 to facilitate possible combining with a cmp into 'ands'. */
3587 && GET_CODE (*op0
) == ZERO_EXTEND
3588 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
3589 && GET_MODE (XEXP (*op0
, 0)) == QImode
3590 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
3591 && subreg_lowpart_p (XEXP (*op0
, 0))
3592 && *op1
== const0_rtx
)
3593 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
3596 /* Comparisons smaller than DImode. Only adjust comparisons against
3597 an out-of-range constant. */
3598 if (!CONST_INT_P (*op1
)
3599 || const_ok_for_arm (INTVAL (*op1
))
3600 || const_ok_for_arm (- INTVAL (*op1
)))
3614 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3616 *op1
= GEN_INT (i
+ 1);
3617 return code
== GT
? GE
: LT
;
3624 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3626 *op1
= GEN_INT (i
- 1);
3627 return code
== GE
? GT
: LE
;
3633 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3634 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3636 *op1
= GEN_INT (i
+ 1);
3637 return code
== GTU
? GEU
: LTU
;
3644 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3646 *op1
= GEN_INT (i
- 1);
3647 return code
== GEU
? GTU
: LEU
;
3659 /* Define how to find the value returned by a function. */
3662 arm_function_value(const_tree type
, const_tree func
,
3663 bool outgoing ATTRIBUTE_UNUSED
)
3665 enum machine_mode mode
;
3666 int unsignedp ATTRIBUTE_UNUSED
;
3667 rtx r ATTRIBUTE_UNUSED
;
3669 mode
= TYPE_MODE (type
);
3671 if (TARGET_AAPCS_BASED
)
3672 return aapcs_allocate_return_reg (mode
, type
, func
);
3674 /* Promote integer types. */
3675 if (INTEGRAL_TYPE_P (type
))
3676 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
3678 /* Promotes small structs returned in a register to full-word size
3679 for big-endian AAPCS. */
3680 if (arm_return_in_msb (type
))
3682 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3683 if (size
% UNITS_PER_WORD
!= 0)
3685 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
3686 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
3690 return arm_libcall_value_1 (mode
);
3694 libcall_eq (const void *p1
, const void *p2
)
3696 return rtx_equal_p ((const_rtx
) p1
, (const_rtx
) p2
);
3700 libcall_hash (const void *p1
)
3702 return hash_rtx ((const_rtx
) p1
, VOIDmode
, NULL
, NULL
, FALSE
);
3706 add_libcall (htab_t htab
, rtx libcall
)
3708 *htab_find_slot (htab
, libcall
, INSERT
) = libcall
;
3712 arm_libcall_uses_aapcs_base (const_rtx libcall
)
3714 static bool init_done
= false;
3715 static htab_t libcall_htab
;
3721 libcall_htab
= htab_create (31, libcall_hash
, libcall_eq
,
3723 add_libcall (libcall_htab
,
3724 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
3725 add_libcall (libcall_htab
,
3726 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
3727 add_libcall (libcall_htab
,
3728 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
3729 add_libcall (libcall_htab
,
3730 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
3732 add_libcall (libcall_htab
,
3733 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
3734 add_libcall (libcall_htab
,
3735 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
3736 add_libcall (libcall_htab
,
3737 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
3738 add_libcall (libcall_htab
,
3739 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
3741 add_libcall (libcall_htab
,
3742 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
3743 add_libcall (libcall_htab
,
3744 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
3745 add_libcall (libcall_htab
,
3746 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
3747 add_libcall (libcall_htab
,
3748 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
3749 add_libcall (libcall_htab
,
3750 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
3751 add_libcall (libcall_htab
,
3752 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
3753 add_libcall (libcall_htab
,
3754 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
3755 add_libcall (libcall_htab
,
3756 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
3758 /* Values from double-precision helper functions are returned in core
3759 registers if the selected core only supports single-precision
3760 arithmetic, even if we are using the hard-float ABI. The same is
3761 true for single-precision helpers, but we will never be using the
3762 hard-float ABI on a CPU which doesn't support single-precision
3763 operations in hardware. */
3764 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
3765 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
3766 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
3767 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
3768 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
3769 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
3770 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
3771 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
3772 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
3773 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
3774 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
3775 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
3777 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
3781 return libcall
&& htab_find (libcall_htab
, libcall
) != NULL
;
3785 arm_libcall_value_1 (enum machine_mode mode
)
3787 if (TARGET_AAPCS_BASED
)
3788 return aapcs_libcall_value (mode
);
3789 else if (TARGET_IWMMXT_ABI
3790 && arm_vector_mode_supported_p (mode
))
3791 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
3793 return gen_rtx_REG (mode
, ARG_REGISTER (1));
3796 /* Define how to find the value returned by a library function
3797 assuming the value has mode MODE. */
3800 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
3802 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
3803 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
3805 /* The following libcalls return their result in integer registers,
3806 even though they return a floating point value. */
3807 if (arm_libcall_uses_aapcs_base (libcall
))
3808 return gen_rtx_REG (mode
, ARG_REGISTER(1));
3812 return arm_libcall_value_1 (mode
);
3815 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3818 arm_function_value_regno_p (const unsigned int regno
)
3820 if (regno
== ARG_REGISTER (1)
3822 && TARGET_AAPCS_BASED
3824 && TARGET_HARD_FLOAT
3825 && regno
== FIRST_VFP_REGNUM
)
3826 || (TARGET_IWMMXT_ABI
3827 && regno
== FIRST_IWMMXT_REGNUM
))
3833 /* Determine the amount of memory needed to store the possible return
3834 registers of an untyped call. */
3836 arm_apply_result_size (void)
3842 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
3844 if (TARGET_IWMMXT_ABI
)
3851 /* Decide whether TYPE should be returned in memory (true)
3852 or in a register (false). FNTYPE is the type of the function making
3855 arm_return_in_memory (const_tree type
, const_tree fntype
)
3859 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
3861 if (TARGET_AAPCS_BASED
)
3863 /* Simple, non-aggregate types (ie not including vectors and
3864 complex) are always returned in a register (or registers).
3865 We don't care about which register here, so we can short-cut
3866 some of the detail. */
3867 if (!AGGREGATE_TYPE_P (type
)
3868 && TREE_CODE (type
) != VECTOR_TYPE
3869 && TREE_CODE (type
) != COMPLEX_TYPE
)
3872 /* Any return value that is no larger than one word can be
3874 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
3877 /* Check any available co-processors to see if they accept the
3878 type as a register candidate (VFP, for example, can return
3879 some aggregates in consecutive registers). These aren't
3880 available if the call is variadic. */
3881 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
3884 /* Vector values should be returned using ARM registers, not
3885 memory (unless they're over 16 bytes, which will break since
3886 we only have four call-clobbered registers to play with). */
3887 if (TREE_CODE (type
) == VECTOR_TYPE
)
3888 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3890 /* The rest go in memory. */
3894 if (TREE_CODE (type
) == VECTOR_TYPE
)
3895 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3897 if (!AGGREGATE_TYPE_P (type
) &&
3898 (TREE_CODE (type
) != VECTOR_TYPE
))
3899 /* All simple types are returned in registers. */
3902 if (arm_abi
!= ARM_ABI_APCS
)
3904 /* ATPCS and later return aggregate types in memory only if they are
3905 larger than a word (or are variable size). */
3906 return (size
< 0 || size
> UNITS_PER_WORD
);
3909 /* For the arm-wince targets we choose to be compatible with Microsoft's
3910 ARM and Thumb compilers, which always return aggregates in memory. */
3912 /* All structures/unions bigger than one word are returned in memory.
3913 Also catch the case where int_size_in_bytes returns -1. In this case
3914 the aggregate is either huge or of variable size, and in either case
3915 we will want to return it via memory and not in a register. */
3916 if (size
< 0 || size
> UNITS_PER_WORD
)
3919 if (TREE_CODE (type
) == RECORD_TYPE
)
3923 /* For a struct the APCS says that we only return in a register
3924 if the type is 'integer like' and every addressable element
3925 has an offset of zero. For practical purposes this means
3926 that the structure can have at most one non bit-field element
3927 and that this element must be the first one in the structure. */
3929 /* Find the first field, ignoring non FIELD_DECL things which will
3930 have been created by C++. */
3931 for (field
= TYPE_FIELDS (type
);
3932 field
&& TREE_CODE (field
) != FIELD_DECL
;
3933 field
= DECL_CHAIN (field
))
3937 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3939 /* Check that the first field is valid for returning in a register. */
3941 /* ... Floats are not allowed */
3942 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3945 /* ... Aggregates that are not themselves valid for returning in
3946 a register are not allowed. */
3947 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3950 /* Now check the remaining fields, if any. Only bitfields are allowed,
3951 since they are not addressable. */
3952 for (field
= DECL_CHAIN (field
);
3954 field
= DECL_CHAIN (field
))
3956 if (TREE_CODE (field
) != FIELD_DECL
)
3959 if (!DECL_BIT_FIELD_TYPE (field
))
3966 if (TREE_CODE (type
) == UNION_TYPE
)
3970 /* Unions can be returned in registers if every element is
3971 integral, or can be returned in an integer register. */
3972 for (field
= TYPE_FIELDS (type
);
3974 field
= DECL_CHAIN (field
))
3976 if (TREE_CODE (field
) != FIELD_DECL
)
3979 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3982 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3988 #endif /* not ARM_WINCE */
3990 /* Return all other types in memory. */
3994 const struct pcs_attribute_arg
3998 } pcs_attribute_args
[] =
4000 {"aapcs", ARM_PCS_AAPCS
},
4001 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
4003 /* We could recognize these, but changes would be needed elsewhere
4004 * to implement them. */
4005 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
4006 {"atpcs", ARM_PCS_ATPCS
},
4007 {"apcs", ARM_PCS_APCS
},
4009 {NULL
, ARM_PCS_UNKNOWN
}
4013 arm_pcs_from_attribute (tree attr
)
4015 const struct pcs_attribute_arg
*ptr
;
4018 /* Get the value of the argument. */
4019 if (TREE_VALUE (attr
) == NULL_TREE
4020 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4021 return ARM_PCS_UNKNOWN
;
4023 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4025 /* Check it against the list of known arguments. */
4026 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4027 if (streq (arg
, ptr
->arg
))
4030 /* An unrecognized interrupt type. */
4031 return ARM_PCS_UNKNOWN
;
4034 /* Get the PCS variant to use for this call. TYPE is the function's type
4035 specification, DECL is the specific declartion. DECL may be null if
4036 the call could be indirect or if this is a library call. */
4038 arm_get_pcs_model (const_tree type
, const_tree decl
)
4040 bool user_convention
= false;
4041 enum arm_pcs user_pcs
= arm_pcs_default
;
4046 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
4049 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
4050 user_convention
= true;
4053 if (TARGET_AAPCS_BASED
)
4055 /* Detect varargs functions. These always use the base rules
4056 (no argument is ever a candidate for a co-processor
4058 bool base_rules
= stdarg_p (type
);
4060 if (user_convention
)
4062 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
4063 sorry ("non-AAPCS derived PCS variant");
4064 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
4065 error ("variadic functions must use the base AAPCS variant");
4069 return ARM_PCS_AAPCS
;
4070 else if (user_convention
)
4072 else if (decl
&& flag_unit_at_a_time
)
4074 /* Local functions never leak outside this compilation unit,
4075 so we are free to use whatever conventions are
4077 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4078 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4080 return ARM_PCS_AAPCS_LOCAL
;
4083 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
4084 sorry ("PCS variant");
4086 /* For everything else we use the target's default. */
4087 return arm_pcs_default
;
4092 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4093 const_tree fntype ATTRIBUTE_UNUSED
,
4094 rtx libcall ATTRIBUTE_UNUSED
,
4095 const_tree fndecl ATTRIBUTE_UNUSED
)
4097 /* Record the unallocated VFP registers. */
4098 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
4099 pcum
->aapcs_vfp_reg_alloc
= 0;
4102 /* Walk down the type tree of TYPE counting consecutive base elements.
4103 If *MODEP is VOIDmode, then set it to the first valid floating point
4104 type. If a non-floating point type is found, or if a floating point
4105 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4106 otherwise return the count in the sub-tree. */
4108 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
4110 enum machine_mode mode
;
4113 switch (TREE_CODE (type
))
4116 mode
= TYPE_MODE (type
);
4117 if (mode
!= DFmode
&& mode
!= SFmode
)
4120 if (*modep
== VOIDmode
)
4129 mode
= TYPE_MODE (TREE_TYPE (type
));
4130 if (mode
!= DFmode
&& mode
!= SFmode
)
4133 if (*modep
== VOIDmode
)
4142 /* Use V2SImode and V4SImode as representatives of all 64-bit
4143 and 128-bit vector types, whether or not those modes are
4144 supported with the present options. */
4145 size
= int_size_in_bytes (type
);
4158 if (*modep
== VOIDmode
)
4161 /* Vector modes are considered to be opaque: two vectors are
4162 equivalent for the purposes of being homogeneous aggregates
4163 if they are the same size. */
4172 tree index
= TYPE_DOMAIN (type
);
4174 /* Can't handle incomplete types. */
4175 if (!COMPLETE_TYPE_P (type
))
4178 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
4181 || !TYPE_MAX_VALUE (index
)
4182 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
4183 || !TYPE_MIN_VALUE (index
)
4184 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
4188 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
4189 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
4191 /* There must be no padding. */
4192 if (!host_integerp (TYPE_SIZE (type
), 1)
4193 || (tree_low_cst (TYPE_SIZE (type
), 1)
4194 != count
* GET_MODE_BITSIZE (*modep
)))
4206 /* Can't handle incomplete types. */
4207 if (!COMPLETE_TYPE_P (type
))
4210 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4212 if (TREE_CODE (field
) != FIELD_DECL
)
4215 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4221 /* There must be no padding. */
4222 if (!host_integerp (TYPE_SIZE (type
), 1)
4223 || (tree_low_cst (TYPE_SIZE (type
), 1)
4224 != count
* GET_MODE_BITSIZE (*modep
)))
4231 case QUAL_UNION_TYPE
:
4233 /* These aren't very interesting except in a degenerate case. */
4238 /* Can't handle incomplete types. */
4239 if (!COMPLETE_TYPE_P (type
))
4242 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4244 if (TREE_CODE (field
) != FIELD_DECL
)
4247 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4250 count
= count
> sub_count
? count
: sub_count
;
4253 /* There must be no padding. */
4254 if (!host_integerp (TYPE_SIZE (type
), 1)
4255 || (tree_low_cst (TYPE_SIZE (type
), 1)
4256 != count
* GET_MODE_BITSIZE (*modep
)))
4269 /* Return true if PCS_VARIANT should use VFP registers. */
4271 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
4273 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
4275 static bool seen_thumb1_vfp
= false;
4277 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
4279 sorry ("Thumb-1 hard-float VFP ABI");
4280 /* sorry() is not immediately fatal, so only display this once. */
4281 seen_thumb1_vfp
= true;
4287 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
4290 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
4291 (TARGET_VFP_DOUBLE
|| !is_double
));
4294 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4295 suitable for passing or returning in VFP registers for the PCS
4296 variant selected. If it is, then *BASE_MODE is updated to contain
4297 a machine mode describing each element of the argument's type and
4298 *COUNT to hold the number of such elements. */
4300 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
4301 enum machine_mode mode
, const_tree type
,
4302 enum machine_mode
*base_mode
, int *count
)
4304 enum machine_mode new_mode
= VOIDmode
;
4306 /* If we have the type information, prefer that to working things
4307 out from the mode. */
4310 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
4312 if (ag_count
> 0 && ag_count
<= 4)
4317 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
4318 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
4319 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
4324 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
4327 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
4333 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
4336 *base_mode
= new_mode
;
4341 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
4342 enum machine_mode mode
, const_tree type
)
4344 int count ATTRIBUTE_UNUSED
;
4345 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
4347 if (!use_vfp_abi (pcs_variant
, false))
4349 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4354 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4357 if (!use_vfp_abi (pcum
->pcs_variant
, false))
4360 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
4361 &pcum
->aapcs_vfp_rmode
,
4362 &pcum
->aapcs_vfp_rcount
);
4366 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4367 const_tree type ATTRIBUTE_UNUSED
)
4369 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
4370 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
4373 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
4374 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
4376 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
4377 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4380 int rcount
= pcum
->aapcs_vfp_rcount
;
4382 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
4386 /* Avoid using unsupported vector modes. */
4387 if (rmode
== V2SImode
)
4389 else if (rmode
== V4SImode
)
4396 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
4397 for (i
= 0; i
< rcount
; i
++)
4399 rtx tmp
= gen_rtx_REG (rmode
,
4400 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
4401 tmp
= gen_rtx_EXPR_LIST
4403 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
4404 XVECEXP (par
, 0, i
) = tmp
;
4407 pcum
->aapcs_reg
= par
;
4410 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
4417 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
4418 enum machine_mode mode
,
4419 const_tree type ATTRIBUTE_UNUSED
)
4421 if (!use_vfp_abi (pcs_variant
, false))
4424 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4427 enum machine_mode ag_mode
;
4432 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4437 if (ag_mode
== V2SImode
)
4439 else if (ag_mode
== V4SImode
)
4445 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
4446 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
4447 for (i
= 0; i
< count
; i
++)
4449 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
4450 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
4451 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
4452 XVECEXP (par
, 0, i
) = tmp
;
4458 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
4462 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4463 enum machine_mode mode ATTRIBUTE_UNUSED
,
4464 const_tree type ATTRIBUTE_UNUSED
)
4466 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
4467 pcum
->aapcs_vfp_reg_alloc
= 0;
4471 #define AAPCS_CP(X) \
4473 aapcs_ ## X ## _cum_init, \
4474 aapcs_ ## X ## _is_call_candidate, \
4475 aapcs_ ## X ## _allocate, \
4476 aapcs_ ## X ## _is_return_candidate, \
4477 aapcs_ ## X ## _allocate_return_reg, \
4478 aapcs_ ## X ## _advance \
4481 /* Table of co-processors that can be used to pass arguments in
4482 registers. Idealy no arugment should be a candidate for more than
4483 one co-processor table entry, but the table is processed in order
4484 and stops after the first match. If that entry then fails to put
4485 the argument into a co-processor register, the argument will go on
4489 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4490 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4492 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4493 BLKmode) is a candidate for this co-processor's registers; this
4494 function should ignore any position-dependent state in
4495 CUMULATIVE_ARGS and only use call-type dependent information. */
4496 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4498 /* Return true if the argument does get a co-processor register; it
4499 should set aapcs_reg to an RTX of the register allocated as is
4500 required for a return from FUNCTION_ARG. */
4501 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4503 /* Return true if a result of mode MODE (or type TYPE if MODE is
4504 BLKmode) is can be returned in this co-processor's registers. */
4505 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4507 /* Allocate and return an RTX element to hold the return type of a
4508 call, this routine must not fail and will only be called if
4509 is_return_candidate returned true with the same parameters. */
4510 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4512 /* Finish processing this argument and prepare to start processing
4514 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4515 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4523 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4528 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4529 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4536 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4538 /* We aren't passed a decl, so we can't check that a call is local.
4539 However, it isn't clear that that would be a win anyway, since it
4540 might limit some tail-calling opportunities. */
4541 enum arm_pcs pcs_variant
;
4545 const_tree fndecl
= NULL_TREE
;
4547 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4550 fntype
= TREE_TYPE (fntype
);
4553 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4556 pcs_variant
= arm_pcs_default
;
4558 if (pcs_variant
!= ARM_PCS_AAPCS
)
4562 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4563 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4572 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4575 /* We aren't passed a decl, so we can't check that a call is local.
4576 However, it isn't clear that that would be a win anyway, since it
4577 might limit some tail-calling opportunities. */
4578 enum arm_pcs pcs_variant
;
4579 int unsignedp ATTRIBUTE_UNUSED
;
4583 const_tree fndecl
= NULL_TREE
;
4585 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4588 fntype
= TREE_TYPE (fntype
);
4591 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4594 pcs_variant
= arm_pcs_default
;
4596 /* Promote integer types. */
4597 if (type
&& INTEGRAL_TYPE_P (type
))
4598 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4600 if (pcs_variant
!= ARM_PCS_AAPCS
)
4604 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4605 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4607 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
4611 /* Promotes small structs returned in a register to full-word size
4612 for big-endian AAPCS. */
4613 if (type
&& arm_return_in_msb (type
))
4615 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4616 if (size
% UNITS_PER_WORD
!= 0)
4618 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4619 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4623 return gen_rtx_REG (mode
, R0_REGNUM
);
4627 aapcs_libcall_value (enum machine_mode mode
)
4629 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
4630 && GET_MODE_SIZE (mode
) <= 4)
4633 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
4636 /* Lay out a function argument using the AAPCS rules. The rule
4637 numbers referred to here are those in the AAPCS. */
4639 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4640 const_tree type
, bool named
)
4645 /* We only need to do this once per argument. */
4646 if (pcum
->aapcs_arg_processed
)
4649 pcum
->aapcs_arg_processed
= true;
4651 /* Special case: if named is false then we are handling an incoming
4652 anonymous argument which is on the stack. */
4656 /* Is this a potential co-processor register candidate? */
4657 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4659 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
4660 pcum
->aapcs_cprc_slot
= slot
;
4662 /* We don't have to apply any of the rules from part B of the
4663 preparation phase, these are handled elsewhere in the
4668 /* A Co-processor register candidate goes either in its own
4669 class of registers or on the stack. */
4670 if (!pcum
->aapcs_cprc_failed
[slot
])
4672 /* C1.cp - Try to allocate the argument to co-processor
4674 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
4677 /* C2.cp - Put the argument on the stack and note that we
4678 can't assign any more candidates in this slot. We also
4679 need to note that we have allocated stack space, so that
4680 we won't later try to split a non-cprc candidate between
4681 core registers and the stack. */
4682 pcum
->aapcs_cprc_failed
[slot
] = true;
4683 pcum
->can_split
= false;
4686 /* We didn't get a register, so this argument goes on the
4688 gcc_assert (pcum
->can_split
== false);
4693 /* C3 - For double-word aligned arguments, round the NCRN up to the
4694 next even number. */
4695 ncrn
= pcum
->aapcs_ncrn
;
4696 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
4699 nregs
= ARM_NUM_REGS2(mode
, type
);
4701 /* Sigh, this test should really assert that nregs > 0, but a GCC
4702 extension allows empty structs and then gives them empty size; it
4703 then allows such a structure to be passed by value. For some of
4704 the code below we have to pretend that such an argument has
4705 non-zero size so that we 'locate' it correctly either in
4706 registers or on the stack. */
4707 gcc_assert (nregs
>= 0);
4709 nregs2
= nregs
? nregs
: 1;
4711 /* C4 - Argument fits entirely in core registers. */
4712 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
4714 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4715 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
4719 /* C5 - Some core registers left and there are no arguments already
4720 on the stack: split this argument between the remaining core
4721 registers and the stack. */
4722 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
4724 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4725 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4726 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
4730 /* C6 - NCRN is set to 4. */
4731 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4733 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4737 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4738 for a call to a function whose data type is FNTYPE.
4739 For a library call, FNTYPE is NULL. */
4741 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
4743 tree fndecl ATTRIBUTE_UNUSED
)
4745 /* Long call handling. */
4747 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4749 pcum
->pcs_variant
= arm_pcs_default
;
4751 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4753 if (arm_libcall_uses_aapcs_base (libname
))
4754 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
4756 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
4757 pcum
->aapcs_reg
= NULL_RTX
;
4758 pcum
->aapcs_partial
= 0;
4759 pcum
->aapcs_arg_processed
= false;
4760 pcum
->aapcs_cprc_slot
= -1;
4761 pcum
->can_split
= true;
4763 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4767 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4769 pcum
->aapcs_cprc_failed
[i
] = false;
4770 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
4778 /* On the ARM, the offset starts at 0. */
4780 pcum
->iwmmxt_nregs
= 0;
4781 pcum
->can_split
= true;
4783 /* Varargs vectors are treated the same as long long.
4784 named_count avoids having to change the way arm handles 'named' */
4785 pcum
->named_count
= 0;
4788 if (TARGET_REALLY_IWMMXT
&& fntype
)
4792 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
4794 fn_arg
= TREE_CHAIN (fn_arg
))
4795 pcum
->named_count
+= 1;
4797 if (! pcum
->named_count
)
4798 pcum
->named_count
= INT_MAX
;
4803 /* Return true if mode/type need doubleword alignment. */
4805 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
4807 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
4808 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
4812 /* Determine where to put an argument to a function.
4813 Value is zero to push the argument on the stack,
4814 or a hard register in which to store the argument.
4816 MODE is the argument's machine mode.
4817 TYPE is the data type of the argument (as a tree).
4818 This is null for libcalls where that information may
4820 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4821 the preceding args and about the function being called.
4822 NAMED is nonzero if this argument is a named parameter
4823 (otherwise it is an extra parameter matching an ellipsis).
4825 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4826 other arguments are passed on the stack. If (NAMED == 0) (which happens
4827 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4828 defined), say it is passed in the stack (function_prologue will
4829 indeed make it pass in the stack if necessary). */
4832 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
4833 const_tree type
, bool named
)
4835 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4838 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4839 a call insn (op3 of a call_value insn). */
4840 if (mode
== VOIDmode
)
4843 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4845 aapcs_layout_arg (pcum
, mode
, type
, named
);
4846 return pcum
->aapcs_reg
;
4849 /* Varargs vectors are treated the same as long long.
4850 named_count avoids having to change the way arm handles 'named' */
4851 if (TARGET_IWMMXT_ABI
4852 && arm_vector_mode_supported_p (mode
)
4853 && pcum
->named_count
> pcum
->nargs
+ 1)
4855 if (pcum
->iwmmxt_nregs
<= 9)
4856 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
4859 pcum
->can_split
= false;
4864 /* Put doubleword aligned quantities in even register pairs. */
4866 && ARM_DOUBLEWORD_ALIGN
4867 && arm_needs_doubleword_align (mode
, type
))
4870 /* Only allow splitting an arg between regs and memory if all preceding
4871 args were allocated to regs. For args passed by reference we only count
4872 the reference pointer. */
4873 if (pcum
->can_split
)
4876 nregs
= ARM_NUM_REGS2 (mode
, type
);
4878 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
4881 return gen_rtx_REG (mode
, pcum
->nregs
);
4885 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
4887 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
4888 ? DOUBLEWORD_ALIGNMENT
4893 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
4894 tree type
, bool named
)
4896 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4897 int nregs
= pcum
->nregs
;
4899 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4901 aapcs_layout_arg (pcum
, mode
, type
, named
);
4902 return pcum
->aapcs_partial
;
4905 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
4908 if (NUM_ARG_REGS
> nregs
4909 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
4911 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
4916 /* Update the data in PCUM to advance over an argument
4917 of mode MODE and data type TYPE.
4918 (TYPE is null for libcalls where that information may not be available.) */
4921 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
4922 const_tree type
, bool named
)
4924 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4926 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4928 aapcs_layout_arg (pcum
, mode
, type
, named
);
4930 if (pcum
->aapcs_cprc_slot
>= 0)
4932 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
4934 pcum
->aapcs_cprc_slot
= -1;
4937 /* Generic stuff. */
4938 pcum
->aapcs_arg_processed
= false;
4939 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
4940 pcum
->aapcs_reg
= NULL_RTX
;
4941 pcum
->aapcs_partial
= 0;
4946 if (arm_vector_mode_supported_p (mode
)
4947 && pcum
->named_count
> pcum
->nargs
4948 && TARGET_IWMMXT_ABI
)
4949 pcum
->iwmmxt_nregs
+= 1;
4951 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
4955 /* Variable sized types are passed by reference. This is a GCC
4956 extension to the ARM ABI. */
4959 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
4960 enum machine_mode mode ATTRIBUTE_UNUSED
,
4961 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4963 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4966 /* Encode the current state of the #pragma [no_]long_calls. */
4969 OFF
, /* No #pragma [no_]long_calls is in effect. */
4970 LONG
, /* #pragma long_calls is in effect. */
4971 SHORT
/* #pragma no_long_calls is in effect. */
4974 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
4977 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4979 arm_pragma_long_calls
= LONG
;
4983 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4985 arm_pragma_long_calls
= SHORT
;
4989 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4991 arm_pragma_long_calls
= OFF
;
4994 /* Handle an attribute requiring a FUNCTION_DECL;
4995 arguments as in struct attribute_spec.handler. */
4997 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
4998 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5000 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5002 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5004 *no_add_attrs
= true;
5010 /* Handle an "interrupt" or "isr" attribute;
5011 arguments as in struct attribute_spec.handler. */
5013 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5018 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5020 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5022 *no_add_attrs
= true;
5024 /* FIXME: the argument if any is checked for type attributes;
5025 should it be checked for decl ones? */
5029 if (TREE_CODE (*node
) == FUNCTION_TYPE
5030 || TREE_CODE (*node
) == METHOD_TYPE
)
5032 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5034 warning (OPT_Wattributes
, "%qE attribute ignored",
5036 *no_add_attrs
= true;
5039 else if (TREE_CODE (*node
) == POINTER_TYPE
5040 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
5041 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
5042 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
5044 *node
= build_variant_type_copy (*node
);
5045 TREE_TYPE (*node
) = build_type_attribute_variant
5047 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
5048 *no_add_attrs
= true;
5052 /* Possibly pass this attribute on from the type to a decl. */
5053 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
5054 | (int) ATTR_FLAG_FUNCTION_NEXT
5055 | (int) ATTR_FLAG_ARRAY_NEXT
))
5057 *no_add_attrs
= true;
5058 return tree_cons (name
, args
, NULL_TREE
);
5062 warning (OPT_Wattributes
, "%qE attribute ignored",
5071 /* Handle a "pcs" attribute; arguments as in struct
5072 attribute_spec.handler. */
5074 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
5075 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5077 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
5079 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
5080 *no_add_attrs
= true;
5085 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5086 /* Handle the "notshared" attribute. This attribute is another way of
5087 requesting hidden visibility. ARM's compiler supports
5088 "__declspec(notshared)"; we support the same thing via an
5092 arm_handle_notshared_attribute (tree
*node
,
5093 tree name ATTRIBUTE_UNUSED
,
5094 tree args ATTRIBUTE_UNUSED
,
5095 int flags ATTRIBUTE_UNUSED
,
5098 tree decl
= TYPE_NAME (*node
);
5102 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
5103 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
5104 *no_add_attrs
= false;
5110 /* Return 0 if the attributes for two types are incompatible, 1 if they
5111 are compatible, and 2 if they are nearly compatible (which causes a
5112 warning to be generated). */
5114 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
5118 /* Check for mismatch of non-default calling convention. */
5119 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
5122 /* Check for mismatched call attributes. */
5123 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5124 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5125 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5126 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5128 /* Only bother to check if an attribute is defined. */
5129 if (l1
| l2
| s1
| s2
)
5131 /* If one type has an attribute, the other must have the same attribute. */
5132 if ((l1
!= l2
) || (s1
!= s2
))
5135 /* Disallow mixed attributes. */
5136 if ((l1
& s2
) || (l2
& s1
))
5140 /* Check for mismatched ISR attribute. */
5141 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
5143 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
5144 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
5146 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
5153 /* Assigns default attributes to newly defined type. This is used to
5154 set short_call/long_call attributes for function types of
5155 functions defined inside corresponding #pragma scopes. */
5157 arm_set_default_type_attributes (tree type
)
5159 /* Add __attribute__ ((long_call)) to all functions, when
5160 inside #pragma long_calls or __attribute__ ((short_call)),
5161 when inside #pragma no_long_calls. */
5162 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
5164 tree type_attr_list
, attr_name
;
5165 type_attr_list
= TYPE_ATTRIBUTES (type
);
5167 if (arm_pragma_long_calls
== LONG
)
5168 attr_name
= get_identifier ("long_call");
5169 else if (arm_pragma_long_calls
== SHORT
)
5170 attr_name
= get_identifier ("short_call");
5174 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
5175 TYPE_ATTRIBUTES (type
) = type_attr_list
;
5179 /* Return true if DECL is known to be linked into section SECTION. */
5182 arm_function_in_section_p (tree decl
, section
*section
)
5184 /* We can only be certain about functions defined in the same
5185 compilation unit. */
5186 if (!TREE_STATIC (decl
))
5189 /* Make sure that SYMBOL always binds to the definition in this
5190 compilation unit. */
5191 if (!targetm
.binds_local_p (decl
))
5194 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5195 if (!DECL_SECTION_NAME (decl
))
5197 /* Make sure that we will not create a unique section for DECL. */
5198 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
5202 return function_section (decl
) == section
;
5205 /* Return nonzero if a 32-bit "long_call" should be generated for
5206 a call from the current function to DECL. We generate a long_call
5209 a. has an __attribute__((long call))
5210 or b. is within the scope of a #pragma long_calls
5211 or c. the -mlong-calls command line switch has been specified
5213 However we do not generate a long call if the function:
5215 d. has an __attribute__ ((short_call))
5216 or e. is inside the scope of a #pragma no_long_calls
5217 or f. is defined in the same section as the current function. */
5220 arm_is_long_call_p (tree decl
)
5225 return TARGET_LONG_CALLS
;
5227 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
5228 if (lookup_attribute ("short_call", attrs
))
5231 /* For "f", be conservative, and only cater for cases in which the
5232 whole of the current function is placed in the same section. */
5233 if (!flag_reorder_blocks_and_partition
5234 && TREE_CODE (decl
) == FUNCTION_DECL
5235 && arm_function_in_section_p (decl
, current_function_section ()))
5238 if (lookup_attribute ("long_call", attrs
))
5241 return TARGET_LONG_CALLS
;
5244 /* Return nonzero if it is ok to make a tail-call to DECL. */
5246 arm_function_ok_for_sibcall (tree decl
, tree exp
)
5248 unsigned long func_type
;
5250 if (cfun
->machine
->sibcall_blocked
)
5253 /* Never tailcall something for which we have no decl, or if we
5254 are generating code for Thumb-1. */
5255 if (decl
== NULL
|| TARGET_THUMB1
)
5258 /* The PIC register is live on entry to VxWorks PLT entries, so we
5259 must make the call before restoring the PIC register. */
5260 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
5263 /* Cannot tail-call to long calls, since these are out of range of
5264 a branch instruction. */
5265 if (arm_is_long_call_p (decl
))
5268 /* If we are interworking and the function is not declared static
5269 then we can't tail-call it unless we know that it exists in this
5270 compilation unit (since it might be a Thumb routine). */
5271 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
5274 func_type
= arm_current_func_type ();
5275 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5276 if (IS_INTERRUPT (func_type
))
5279 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5281 /* Check that the return value locations are the same. For
5282 example that we aren't returning a value from the sibling in
5283 a VFP register but then need to transfer it to a core
5287 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
5288 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5290 if (!rtx_equal_p (a
, b
))
5294 /* Never tailcall if function may be called with a misaligned SP. */
5295 if (IS_STACKALIGN (func_type
))
5298 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5299 references should become a NOP. Don't convert such calls into
5301 if (TARGET_AAPCS_BASED
5302 && arm_abi
== ARM_ABI_AAPCS
5303 && DECL_WEAK (decl
))
5306 /* Everything else is ok. */
5311 /* Addressing mode support functions. */
5313 /* Return nonzero if X is a legitimate immediate operand when compiling
5314 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5316 legitimate_pic_operand_p (rtx x
)
5318 if (GET_CODE (x
) == SYMBOL_REF
5319 || (GET_CODE (x
) == CONST
5320 && GET_CODE (XEXP (x
, 0)) == PLUS
5321 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5327 /* Record that the current function needs a PIC register. Initialize
5328 cfun->machine->pic_reg if we have not already done so. */
5331 require_pic_register (void)
5333 /* A lot of the logic here is made obscure by the fact that this
5334 routine gets called as part of the rtx cost estimation process.
5335 We don't want those calls to affect any assumptions about the real
5336 function; and further, we can't call entry_of_function() until we
5337 start the real expansion process. */
5338 if (!crtl
->uses_pic_offset_table
)
5340 gcc_assert (can_create_pseudo_p ());
5341 if (arm_pic_register
!= INVALID_REGNUM
)
5343 if (!cfun
->machine
->pic_reg
)
5344 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
5346 /* Play games to avoid marking the function as needing pic
5347 if we are being called as part of the cost-estimation
5349 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5350 crtl
->uses_pic_offset_table
= 1;
5356 if (!cfun
->machine
->pic_reg
)
5357 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
5359 /* Play games to avoid marking the function as needing pic
5360 if we are being called as part of the cost-estimation
5362 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5364 crtl
->uses_pic_offset_table
= 1;
5367 arm_load_pic_register (0UL);
5372 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
5374 INSN_LOCATION (insn
) = prologue_location
;
5376 /* We can be called during expansion of PHI nodes, where
5377 we can't yet emit instructions directly in the final
5378 insn stream. Queue the insns on the entry edge, they will
5379 be committed after everything else is expanded. */
5380 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
5387 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
5389 if (GET_CODE (orig
) == SYMBOL_REF
5390 || GET_CODE (orig
) == LABEL_REF
)
5396 gcc_assert (can_create_pseudo_p ());
5397 reg
= gen_reg_rtx (Pmode
);
5400 /* VxWorks does not impose a fixed gap between segments; the run-time
5401 gap can be different from the object-file gap. We therefore can't
5402 use GOTOFF unless we are absolutely sure that the symbol is in the
5403 same segment as the GOT. Unfortunately, the flexibility of linker
5404 scripts means that we can't be sure of that in general, so assume
5405 that GOTOFF is never valid on VxWorks. */
5406 if ((GET_CODE (orig
) == LABEL_REF
5407 || (GET_CODE (orig
) == SYMBOL_REF
&&
5408 SYMBOL_REF_LOCAL_P (orig
)))
5410 && !TARGET_VXWORKS_RTP
)
5411 insn
= arm_pic_static_addr (orig
, reg
);
5417 /* If this function doesn't have a pic register, create one now. */
5418 require_pic_register ();
5420 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
5422 /* Make the MEM as close to a constant as possible. */
5423 mem
= SET_SRC (pat
);
5424 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
5425 MEM_READONLY_P (mem
) = 1;
5426 MEM_NOTRAP_P (mem
) = 1;
5428 insn
= emit_insn (pat
);
5431 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5433 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
5437 else if (GET_CODE (orig
) == CONST
)
5441 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5442 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
5445 /* Handle the case where we have: const (UNSPEC_TLS). */
5446 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
5447 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
5450 /* Handle the case where we have:
5451 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5453 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5454 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
5455 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
5457 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
5463 gcc_assert (can_create_pseudo_p ());
5464 reg
= gen_reg_rtx (Pmode
);
5467 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
5469 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
5470 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
5471 base
== reg
? 0 : reg
);
5473 if (CONST_INT_P (offset
))
5475 /* The base register doesn't really matter, we only want to
5476 test the index for the appropriate mode. */
5477 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
5479 gcc_assert (can_create_pseudo_p ());
5480 offset
= force_reg (Pmode
, offset
);
5483 if (CONST_INT_P (offset
))
5484 return plus_constant (Pmode
, base
, INTVAL (offset
));
5487 if (GET_MODE_SIZE (mode
) > 4
5488 && (GET_MODE_CLASS (mode
) == MODE_INT
5489 || TARGET_SOFT_FLOAT
))
5491 emit_insn (gen_addsi3 (reg
, base
, offset
));
5495 return gen_rtx_PLUS (Pmode
, base
, offset
);
5502 /* Find a spare register to use during the prolog of a function. */
5505 thumb_find_work_register (unsigned long pushed_regs_mask
)
5509 /* Check the argument registers first as these are call-used. The
5510 register allocation order means that sometimes r3 might be used
5511 but earlier argument registers might not, so check them all. */
5512 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5513 if (!df_regs_ever_live_p (reg
))
5516 /* Before going on to check the call-saved registers we can try a couple
5517 more ways of deducing that r3 is available. The first is when we are
5518 pushing anonymous arguments onto the stack and we have less than 4
5519 registers worth of fixed arguments(*). In this case r3 will be part of
5520 the variable argument list and so we can be sure that it will be
5521 pushed right at the start of the function. Hence it will be available
5522 for the rest of the prologue.
5523 (*): ie crtl->args.pretend_args_size is greater than 0. */
5524 if (cfun
->machine
->uses_anonymous_args
5525 && crtl
->args
.pretend_args_size
> 0)
5526 return LAST_ARG_REGNUM
;
5528 /* The other case is when we have fixed arguments but less than 4 registers
5529 worth. In this case r3 might be used in the body of the function, but
5530 it is not being used to convey an argument into the function. In theory
5531 we could just check crtl->args.size to see how many bytes are
5532 being passed in argument registers, but it seems that it is unreliable.
5533 Sometimes it will have the value 0 when in fact arguments are being
5534 passed. (See testcase execute/20021111-1.c for an example). So we also
5535 check the args_info.nregs field as well. The problem with this field is
5536 that it makes no allowances for arguments that are passed to the
5537 function but which are not used. Hence we could miss an opportunity
5538 when a function has an unused argument in r3. But it is better to be
5539 safe than to be sorry. */
5540 if (! cfun
->machine
->uses_anonymous_args
5541 && crtl
->args
.size
>= 0
5542 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5543 && crtl
->args
.info
.nregs
< 4)
5544 return LAST_ARG_REGNUM
;
5546 /* Otherwise look for a call-saved register that is going to be pushed. */
5547 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5548 if (pushed_regs_mask
& (1 << reg
))
5553 /* Thumb-2 can use high regs. */
5554 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5555 if (pushed_regs_mask
& (1 << reg
))
5558 /* Something went wrong - thumb_compute_save_reg_mask()
5559 should have arranged for a suitable register to be pushed. */
5563 static GTY(()) int pic_labelno
;
5565 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5569 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5571 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5573 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5576 gcc_assert (flag_pic
);
5578 pic_reg
= cfun
->machine
->pic_reg
;
5579 if (TARGET_VXWORKS_RTP
)
5581 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5582 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5583 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5585 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5587 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5588 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5592 /* We use an UNSPEC rather than a LABEL_REF because this label
5593 never appears in the code stream. */
5595 labelno
= GEN_INT (pic_labelno
++);
5596 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5597 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5599 /* On the ARM the PC register contains 'dot + 8' at the time of the
5600 addition, on the Thumb it is 'dot + 4'. */
5601 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
5602 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
5604 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5608 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
5610 else /* TARGET_THUMB1 */
5612 if (arm_pic_register
!= INVALID_REGNUM
5613 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
5615 /* We will have pushed the pic register, so we should always be
5616 able to find a work register. */
5617 pic_tmp
= gen_rtx_REG (SImode
,
5618 thumb_find_work_register (saved_regs
));
5619 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
5620 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
5621 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5624 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
5628 /* Need to emit this whether or not we obey regdecls,
5629 since setjmp/longjmp can cause life info to screw up. */
5633 /* Generate code to load the address of a static var when flag_pic is set. */
5635 arm_pic_static_addr (rtx orig
, rtx reg
)
5637 rtx l1
, labelno
, offset_rtx
, insn
;
5639 gcc_assert (flag_pic
);
5641 /* We use an UNSPEC rather than a LABEL_REF because this label
5642 never appears in the code stream. */
5643 labelno
= GEN_INT (pic_labelno
++);
5644 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5645 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5647 /* On the ARM the PC register contains 'dot + 8' at the time of the
5648 addition, on the Thumb it is 'dot + 4'. */
5649 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
5650 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
5651 UNSPEC_SYMBOL_OFFSET
);
5652 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
5654 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
5658 /* Return nonzero if X is valid as an ARM state addressing register. */
5660 arm_address_register_rtx_p (rtx x
, int strict_p
)
5670 return ARM_REGNO_OK_FOR_BASE_P (regno
);
5672 return (regno
<= LAST_ARM_REGNUM
5673 || regno
>= FIRST_PSEUDO_REGISTER
5674 || regno
== FRAME_POINTER_REGNUM
5675 || regno
== ARG_POINTER_REGNUM
);
5678 /* Return TRUE if this rtx is the difference of a symbol and a label,
5679 and will reduce to a PC-relative relocation in the object file.
5680 Expressions like this can be left alone when generating PIC, rather
5681 than forced through the GOT. */
5683 pcrel_constant_p (rtx x
)
5685 if (GET_CODE (x
) == MINUS
)
5686 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
5691 /* Return true if X will surely end up in an index register after next
5694 will_be_in_index_register (const_rtx x
)
5696 /* arm.md: calculate_pic_address will split this into a register. */
5697 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
5700 /* Return nonzero if X is a valid ARM state address operand. */
5702 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
5706 enum rtx_code code
= GET_CODE (x
);
5708 if (arm_address_register_rtx_p (x
, strict_p
))
5711 use_ldrd
= (TARGET_LDRD
5713 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5715 if (code
== POST_INC
|| code
== PRE_DEC
5716 || ((code
== PRE_INC
|| code
== POST_DEC
)
5717 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5718 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5720 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5721 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5722 && GET_CODE (XEXP (x
, 1)) == PLUS
5723 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5725 rtx addend
= XEXP (XEXP (x
, 1), 1);
5727 /* Don't allow ldrd post increment by register because it's hard
5728 to fixup invalid register choices. */
5730 && GET_CODE (x
) == POST_MODIFY
5734 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
5735 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
5738 /* After reload constants split into minipools will have addresses
5739 from a LABEL_REF. */
5740 else if (reload_completed
5741 && (code
== LABEL_REF
5743 && GET_CODE (XEXP (x
, 0)) == PLUS
5744 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5745 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
5748 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5751 else if (code
== PLUS
)
5753 rtx xop0
= XEXP (x
, 0);
5754 rtx xop1
= XEXP (x
, 1);
5756 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5757 && ((CONST_INT_P (xop1
)
5758 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
5759 || (!strict_p
&& will_be_in_index_register (xop1
))))
5760 || (arm_address_register_rtx_p (xop1
, strict_p
)
5761 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
5765 /* Reload currently can't handle MINUS, so disable this for now */
5766 else if (GET_CODE (x
) == MINUS
)
5768 rtx xop0
= XEXP (x
, 0);
5769 rtx xop1
= XEXP (x
, 1);
5771 return (arm_address_register_rtx_p (xop0
, strict_p
)
5772 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
5776 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5777 && code
== SYMBOL_REF
5778 && CONSTANT_POOL_ADDRESS_P (x
)
5780 && symbol_mentioned_p (get_pool_constant (x
))
5781 && ! pcrel_constant_p (get_pool_constant (x
))))
5787 /* Return nonzero if X is a valid Thumb-2 address operand. */
5789 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5792 enum rtx_code code
= GET_CODE (x
);
5794 if (arm_address_register_rtx_p (x
, strict_p
))
5797 use_ldrd
= (TARGET_LDRD
5799 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5801 if (code
== POST_INC
|| code
== PRE_DEC
5802 || ((code
== PRE_INC
|| code
== POST_DEC
)
5803 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5804 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5806 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5807 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5808 && GET_CODE (XEXP (x
, 1)) == PLUS
5809 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5811 /* Thumb-2 only has autoincrement by constant. */
5812 rtx addend
= XEXP (XEXP (x
, 1), 1);
5813 HOST_WIDE_INT offset
;
5815 if (!CONST_INT_P (addend
))
5818 offset
= INTVAL(addend
);
5819 if (GET_MODE_SIZE (mode
) <= 4)
5820 return (offset
> -256 && offset
< 256);
5822 return (use_ldrd
&& offset
> -1024 && offset
< 1024
5823 && (offset
& 3) == 0);
5826 /* After reload constants split into minipools will have addresses
5827 from a LABEL_REF. */
5828 else if (reload_completed
5829 && (code
== LABEL_REF
5831 && GET_CODE (XEXP (x
, 0)) == PLUS
5832 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5833 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
5836 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5839 else if (code
== PLUS
)
5841 rtx xop0
= XEXP (x
, 0);
5842 rtx xop1
= XEXP (x
, 1);
5844 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5845 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
5846 || (!strict_p
&& will_be_in_index_register (xop1
))))
5847 || (arm_address_register_rtx_p (xop1
, strict_p
)
5848 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
5851 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5852 && code
== SYMBOL_REF
5853 && CONSTANT_POOL_ADDRESS_P (x
)
5855 && symbol_mentioned_p (get_pool_constant (x
))
5856 && ! pcrel_constant_p (get_pool_constant (x
))))
5862 /* Return nonzero if INDEX is valid for an address index operand in
5865 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
5868 HOST_WIDE_INT range
;
5869 enum rtx_code code
= GET_CODE (index
);
5871 /* Standard coprocessor addressing modes. */
5872 if (TARGET_HARD_FLOAT
5874 && (mode
== SFmode
|| mode
== DFmode
))
5875 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5876 && INTVAL (index
) > -1024
5877 && (INTVAL (index
) & 3) == 0);
5879 /* For quad modes, we restrict the constant offset to be slightly less
5880 than what the instruction format permits. We do this because for
5881 quad mode moves, we will actually decompose them into two separate
5882 double-mode reads or writes. INDEX must therefore be a valid
5883 (double-mode) offset and so should INDEX+8. */
5884 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5885 return (code
== CONST_INT
5886 && INTVAL (index
) < 1016
5887 && INTVAL (index
) > -1024
5888 && (INTVAL (index
) & 3) == 0);
5890 /* We have no such constraint on double mode offsets, so we permit the
5891 full range of the instruction format. */
5892 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5893 return (code
== CONST_INT
5894 && INTVAL (index
) < 1024
5895 && INTVAL (index
) > -1024
5896 && (INTVAL (index
) & 3) == 0);
5898 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5899 return (code
== CONST_INT
5900 && INTVAL (index
) < 1024
5901 && INTVAL (index
) > -1024
5902 && (INTVAL (index
) & 3) == 0);
5904 if (arm_address_register_rtx_p (index
, strict_p
)
5905 && (GET_MODE_SIZE (mode
) <= 4))
5908 if (mode
== DImode
|| mode
== DFmode
)
5910 if (code
== CONST_INT
)
5912 HOST_WIDE_INT val
= INTVAL (index
);
5915 return val
> -256 && val
< 256;
5917 return val
> -4096 && val
< 4092;
5920 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
5923 if (GET_MODE_SIZE (mode
) <= 4
5927 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
5931 rtx xiop0
= XEXP (index
, 0);
5932 rtx xiop1
= XEXP (index
, 1);
5934 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5935 && power_of_two_operand (xiop1
, SImode
))
5936 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5937 && power_of_two_operand (xiop0
, SImode
)));
5939 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
5940 || code
== ASHIFT
|| code
== ROTATERT
)
5942 rtx op
= XEXP (index
, 1);
5944 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5947 && INTVAL (op
) <= 31);
5951 /* For ARM v4 we may be doing a sign-extend operation during the
5957 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
5963 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
5965 return (code
== CONST_INT
5966 && INTVAL (index
) < range
5967 && INTVAL (index
) > -range
);
5970 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5971 index operand. i.e. 1, 2, 4 or 8. */
5973 thumb2_index_mul_operand (rtx op
)
5977 if (!CONST_INT_P (op
))
5981 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
5984 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5986 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
5988 enum rtx_code code
= GET_CODE (index
);
5990 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5991 /* Standard coprocessor addressing modes. */
5992 if (TARGET_HARD_FLOAT
5994 && (mode
== SFmode
|| mode
== DFmode
))
5995 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5996 /* Thumb-2 allows only > -256 index range for it's core register
5997 load/stores. Since we allow SF/DF in core registers, we have
5998 to use the intersection between -256~4096 (core) and -1024~1024
6000 && INTVAL (index
) > -256
6001 && (INTVAL (index
) & 3) == 0);
6003 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6005 /* For DImode assume values will usually live in core regs
6006 and only allow LDRD addressing modes. */
6007 if (!TARGET_LDRD
|| mode
!= DImode
)
6008 return (code
== CONST_INT
6009 && INTVAL (index
) < 1024
6010 && INTVAL (index
) > -1024
6011 && (INTVAL (index
) & 3) == 0);
6014 /* For quad modes, we restrict the constant offset to be slightly less
6015 than what the instruction format permits. We do this because for
6016 quad mode moves, we will actually decompose them into two separate
6017 double-mode reads or writes. INDEX must therefore be a valid
6018 (double-mode) offset and so should INDEX+8. */
6019 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6020 return (code
== CONST_INT
6021 && INTVAL (index
) < 1016
6022 && INTVAL (index
) > -1024
6023 && (INTVAL (index
) & 3) == 0);
6025 /* We have no such constraint on double mode offsets, so we permit the
6026 full range of the instruction format. */
6027 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6028 return (code
== CONST_INT
6029 && INTVAL (index
) < 1024
6030 && INTVAL (index
) > -1024
6031 && (INTVAL (index
) & 3) == 0);
6033 if (arm_address_register_rtx_p (index
, strict_p
)
6034 && (GET_MODE_SIZE (mode
) <= 4))
6037 if (mode
== DImode
|| mode
== DFmode
)
6039 if (code
== CONST_INT
)
6041 HOST_WIDE_INT val
= INTVAL (index
);
6042 /* ??? Can we assume ldrd for thumb2? */
6043 /* Thumb-2 ldrd only has reg+const addressing modes. */
6044 /* ldrd supports offsets of +-1020.
6045 However the ldr fallback does not. */
6046 return val
> -256 && val
< 256 && (val
& 3) == 0;
6054 rtx xiop0
= XEXP (index
, 0);
6055 rtx xiop1
= XEXP (index
, 1);
6057 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6058 && thumb2_index_mul_operand (xiop1
))
6059 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6060 && thumb2_index_mul_operand (xiop0
)));
6062 else if (code
== ASHIFT
)
6064 rtx op
= XEXP (index
, 1);
6066 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6069 && INTVAL (op
) <= 3);
6072 return (code
== CONST_INT
6073 && INTVAL (index
) < 4096
6074 && INTVAL (index
) > -256);
6077 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6079 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
6089 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
6091 return (regno
<= LAST_LO_REGNUM
6092 || regno
> LAST_VIRTUAL_REGISTER
6093 || regno
== FRAME_POINTER_REGNUM
6094 || (GET_MODE_SIZE (mode
) >= 4
6095 && (regno
== STACK_POINTER_REGNUM
6096 || regno
>= FIRST_PSEUDO_REGISTER
6097 || x
== hard_frame_pointer_rtx
6098 || x
== arg_pointer_rtx
)));
6101 /* Return nonzero if x is a legitimate index register. This is the case
6102 for any base register that can access a QImode object. */
6104 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
6106 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
6109 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6111 The AP may be eliminated to either the SP or the FP, so we use the
6112 least common denominator, e.g. SImode, and offsets from 0 to 64.
6114 ??? Verify whether the above is the right approach.
6116 ??? Also, the FP may be eliminated to the SP, so perhaps that
6117 needs special handling also.
6119 ??? Look at how the mips16 port solves this problem. It probably uses
6120 better ways to solve some of these problems.
6122 Although it is not incorrect, we don't accept QImode and HImode
6123 addresses based on the frame pointer or arg pointer until the
6124 reload pass starts. This is so that eliminating such addresses
6125 into stack based ones won't produce impossible code. */
6127 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6129 /* ??? Not clear if this is right. Experiment. */
6130 if (GET_MODE_SIZE (mode
) < 4
6131 && !(reload_in_progress
|| reload_completed
)
6132 && (reg_mentioned_p (frame_pointer_rtx
, x
)
6133 || reg_mentioned_p (arg_pointer_rtx
, x
)
6134 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
6135 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
6136 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
6137 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
6140 /* Accept any base register. SP only in SImode or larger. */
6141 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
6144 /* This is PC relative data before arm_reorg runs. */
6145 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
6146 && GET_CODE (x
) == SYMBOL_REF
6147 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
6150 /* This is PC relative data after arm_reorg runs. */
6151 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
6153 && (GET_CODE (x
) == LABEL_REF
6154 || (GET_CODE (x
) == CONST
6155 && GET_CODE (XEXP (x
, 0)) == PLUS
6156 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6157 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6160 /* Post-inc indexing only supported for SImode and larger. */
6161 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
6162 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
6165 else if (GET_CODE (x
) == PLUS
)
6167 /* REG+REG address can be any two index registers. */
6168 /* We disallow FRAME+REG addressing since we know that FRAME
6169 will be replaced with STACK, and SP relative addressing only
6170 permits SP+OFFSET. */
6171 if (GET_MODE_SIZE (mode
) <= 4
6172 && XEXP (x
, 0) != frame_pointer_rtx
6173 && XEXP (x
, 1) != frame_pointer_rtx
6174 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
6175 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
6176 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
6179 /* REG+const has 5-7 bit offset for non-SP registers. */
6180 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
6181 || XEXP (x
, 0) == arg_pointer_rtx
)
6182 && CONST_INT_P (XEXP (x
, 1))
6183 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6186 /* REG+const has 10-bit offset for SP, but only SImode and
6187 larger is supported. */
6188 /* ??? Should probably check for DI/DFmode overflow here
6189 just like GO_IF_LEGITIMATE_OFFSET does. */
6190 else if (REG_P (XEXP (x
, 0))
6191 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
6192 && GET_MODE_SIZE (mode
) >= 4
6193 && CONST_INT_P (XEXP (x
, 1))
6194 && INTVAL (XEXP (x
, 1)) >= 0
6195 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
6196 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
6199 else if (REG_P (XEXP (x
, 0))
6200 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
6201 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
6202 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
6203 && REGNO (XEXP (x
, 0))
6204 <= LAST_VIRTUAL_POINTER_REGISTER
))
6205 && GET_MODE_SIZE (mode
) >= 4
6206 && CONST_INT_P (XEXP (x
, 1))
6207 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
6211 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6212 && GET_MODE_SIZE (mode
) == 4
6213 && GET_CODE (x
) == SYMBOL_REF
6214 && CONSTANT_POOL_ADDRESS_P (x
)
6216 && symbol_mentioned_p (get_pool_constant (x
))
6217 && ! pcrel_constant_p (get_pool_constant (x
))))
6223 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6224 instruction of mode MODE. */
6226 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
6228 switch (GET_MODE_SIZE (mode
))
6231 return val
>= 0 && val
< 32;
6234 return val
>= 0 && val
< 64 && (val
& 1) == 0;
6238 && (val
+ GET_MODE_SIZE (mode
)) <= 128
6244 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
6247 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
6248 else if (TARGET_THUMB2
)
6249 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
6250 else /* if (TARGET_THUMB1) */
6251 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
6254 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6256 Given an rtx X being reloaded into a reg required to be
6257 in class CLASS, return the class of reg to actually use.
6258 In general this is just CLASS, but for the Thumb core registers and
6259 immediate constants we prefer a LO_REGS class or a subset. */
6262 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
6268 if (rclass
== GENERAL_REGS
6269 || rclass
== HI_REGS
6270 || rclass
== NO_REGS
6271 || rclass
== STACK_REG
)
6278 /* Build the SYMBOL_REF for __tls_get_addr. */
6280 static GTY(()) rtx tls_get_addr_libfunc
;
6283 get_tls_get_addr (void)
6285 if (!tls_get_addr_libfunc
)
6286 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
6287 return tls_get_addr_libfunc
;
6291 arm_load_tp (rtx target
)
6294 target
= gen_reg_rtx (SImode
);
6298 /* Can return in any reg. */
6299 emit_insn (gen_load_tp_hard (target
));
6303 /* Always returned in r0. Immediately copy the result into a pseudo,
6304 otherwise other uses of r0 (e.g. setting up function arguments) may
6305 clobber the value. */
6309 emit_insn (gen_load_tp_soft ());
6311 tmp
= gen_rtx_REG (SImode
, 0);
6312 emit_move_insn (target
, tmp
);
6318 load_tls_operand (rtx x
, rtx reg
)
6322 if (reg
== NULL_RTX
)
6323 reg
= gen_reg_rtx (SImode
);
6325 tmp
= gen_rtx_CONST (SImode
, x
);
6327 emit_move_insn (reg
, tmp
);
6333 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
6335 rtx insns
, label
, labelno
, sum
;
6337 gcc_assert (reloc
!= TLS_DESCSEQ
);
6340 labelno
= GEN_INT (pic_labelno
++);
6341 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6342 label
= gen_rtx_CONST (VOIDmode
, label
);
6344 sum
= gen_rtx_UNSPEC (Pmode
,
6345 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
6346 GEN_INT (TARGET_ARM
? 8 : 4)),
6348 reg
= load_tls_operand (sum
, reg
);
6351 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
6353 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6355 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
6356 LCT_PURE
, /* LCT_CONST? */
6357 Pmode
, 1, reg
, Pmode
);
6359 insns
= get_insns ();
6366 arm_tls_descseq_addr (rtx x
, rtx reg
)
6368 rtx labelno
= GEN_INT (pic_labelno
++);
6369 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6370 rtx sum
= gen_rtx_UNSPEC (Pmode
,
6371 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
6372 gen_rtx_CONST (VOIDmode
, label
),
6373 GEN_INT (!TARGET_ARM
)),
6375 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
6377 emit_insn (gen_tlscall (x
, labelno
));
6379 reg
= gen_reg_rtx (SImode
);
6381 gcc_assert (REGNO (reg
) != 0);
6383 emit_move_insn (reg
, reg0
);
6389 legitimize_tls_address (rtx x
, rtx reg
)
6391 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
6392 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
6396 case TLS_MODEL_GLOBAL_DYNAMIC
:
6397 if (TARGET_GNU2_TLS
)
6399 reg
= arm_tls_descseq_addr (x
, reg
);
6401 tp
= arm_load_tp (NULL_RTX
);
6403 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
6407 /* Original scheme */
6408 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
6409 dest
= gen_reg_rtx (Pmode
);
6410 emit_libcall_block (insns
, dest
, ret
, x
);
6414 case TLS_MODEL_LOCAL_DYNAMIC
:
6415 if (TARGET_GNU2_TLS
)
6417 reg
= arm_tls_descseq_addr (x
, reg
);
6419 tp
= arm_load_tp (NULL_RTX
);
6421 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
6425 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
6427 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6428 share the LDM result with other LD model accesses. */
6429 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
6431 dest
= gen_reg_rtx (Pmode
);
6432 emit_libcall_block (insns
, dest
, ret
, eqv
);
6434 /* Load the addend. */
6435 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
6436 GEN_INT (TLS_LDO32
)),
6438 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
6439 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
6443 case TLS_MODEL_INITIAL_EXEC
:
6444 labelno
= GEN_INT (pic_labelno
++);
6445 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6446 label
= gen_rtx_CONST (VOIDmode
, label
);
6447 sum
= gen_rtx_UNSPEC (Pmode
,
6448 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
6449 GEN_INT (TARGET_ARM
? 8 : 4)),
6451 reg
= load_tls_operand (sum
, reg
);
6454 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
6455 else if (TARGET_THUMB2
)
6456 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
6459 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6460 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
6463 tp
= arm_load_tp (NULL_RTX
);
6465 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6467 case TLS_MODEL_LOCAL_EXEC
:
6468 tp
= arm_load_tp (NULL_RTX
);
6470 reg
= gen_rtx_UNSPEC (Pmode
,
6471 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
6473 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
6475 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6482 /* Try machine-dependent ways of modifying an illegitimate address
6483 to be legitimate. If we find one, return the new, valid address. */
6485 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6489 /* TODO: legitimize_address for Thumb2. */
6492 return thumb_legitimize_address (x
, orig_x
, mode
);
6495 if (arm_tls_symbol_p (x
))
6496 return legitimize_tls_address (x
, NULL_RTX
);
6498 if (GET_CODE (x
) == PLUS
)
6500 rtx xop0
= XEXP (x
, 0);
6501 rtx xop1
= XEXP (x
, 1);
6503 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
6504 xop0
= force_reg (SImode
, xop0
);
6506 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
6507 xop1
= force_reg (SImode
, xop1
);
6509 if (ARM_BASE_REGISTER_RTX_P (xop0
)
6510 && CONST_INT_P (xop1
))
6512 HOST_WIDE_INT n
, low_n
;
6516 /* VFP addressing modes actually allow greater offsets, but for
6517 now we just stick with the lowest common denominator. */
6519 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
6531 low_n
= ((mode
) == TImode
? 0
6532 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
6536 base_reg
= gen_reg_rtx (SImode
);
6537 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
6538 emit_move_insn (base_reg
, val
);
6539 x
= plus_constant (Pmode
, base_reg
, low_n
);
6541 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6542 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6545 /* XXX We don't allow MINUS any more -- see comment in
6546 arm_legitimate_address_outer_p (). */
6547 else if (GET_CODE (x
) == MINUS
)
6549 rtx xop0
= XEXP (x
, 0);
6550 rtx xop1
= XEXP (x
, 1);
6552 if (CONSTANT_P (xop0
))
6553 xop0
= force_reg (SImode
, xop0
);
6555 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
6556 xop1
= force_reg (SImode
, xop1
);
6558 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6559 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
6562 /* Make sure to take full advantage of the pre-indexed addressing mode
6563 with absolute addresses which often allows for the base register to
6564 be factorized for multiple adjacent memory references, and it might
6565 even allows for the mini pool to be avoided entirely. */
6566 else if (CONST_INT_P (x
) && optimize
> 0)
6569 HOST_WIDE_INT mask
, base
, index
;
6572 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6573 use a 8-bit index. So let's use a 12-bit index for SImode only and
6574 hope that arm_gen_constant will enable ldrb to use more bits. */
6575 bits
= (mode
== SImode
) ? 12 : 8;
6576 mask
= (1 << bits
) - 1;
6577 base
= INTVAL (x
) & ~mask
;
6578 index
= INTVAL (x
) & mask
;
6579 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
6581 /* It'll most probably be more efficient to generate the base
6582 with more bits set and use a negative index instead. */
6586 base_reg
= force_reg (SImode
, GEN_INT (base
));
6587 x
= plus_constant (Pmode
, base_reg
, index
);
6592 /* We need to find and carefully transform any SYMBOL and LABEL
6593 references; so go back to the original address expression. */
6594 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6596 if (new_x
!= orig_x
)
6604 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6605 to be legitimate. If we find one, return the new, valid address. */
6607 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6609 if (arm_tls_symbol_p (x
))
6610 return legitimize_tls_address (x
, NULL_RTX
);
6612 if (GET_CODE (x
) == PLUS
6613 && CONST_INT_P (XEXP (x
, 1))
6614 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
6615 || INTVAL (XEXP (x
, 1)) < 0))
6617 rtx xop0
= XEXP (x
, 0);
6618 rtx xop1
= XEXP (x
, 1);
6619 HOST_WIDE_INT offset
= INTVAL (xop1
);
6621 /* Try and fold the offset into a biasing of the base register and
6622 then offsetting that. Don't do this when optimizing for space
6623 since it can cause too many CSEs. */
6624 if (optimize_size
&& offset
>= 0
6625 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
6627 HOST_WIDE_INT delta
;
6630 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
6631 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
6632 delta
= 31 * GET_MODE_SIZE (mode
);
6634 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
6636 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
6638 x
= plus_constant (Pmode
, xop0
, delta
);
6640 else if (offset
< 0 && offset
> -256)
6641 /* Small negative offsets are best done with a subtract before the
6642 dereference, forcing these into a register normally takes two
6644 x
= force_operand (x
, NULL_RTX
);
6647 /* For the remaining cases, force the constant into a register. */
6648 xop1
= force_reg (SImode
, xop1
);
6649 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6652 else if (GET_CODE (x
) == PLUS
6653 && s_register_operand (XEXP (x
, 1), SImode
)
6654 && !s_register_operand (XEXP (x
, 0), SImode
))
6656 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
6658 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
6663 /* We need to find and carefully transform any SYMBOL and LABEL
6664 references; so go back to the original address expression. */
6665 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6667 if (new_x
!= orig_x
)
6675 arm_legitimize_reload_address (rtx
*p
,
6676 enum machine_mode mode
,
6677 int opnum
, int type
,
6678 int ind_levels ATTRIBUTE_UNUSED
)
6680 /* We must recognize output that we have already generated ourselves. */
6681 if (GET_CODE (*p
) == PLUS
6682 && GET_CODE (XEXP (*p
, 0)) == PLUS
6683 && REG_P (XEXP (XEXP (*p
, 0), 0))
6684 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
6685 && CONST_INT_P (XEXP (*p
, 1)))
6687 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
6688 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
6689 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6693 if (GET_CODE (*p
) == PLUS
6694 && REG_P (XEXP (*p
, 0))
6695 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
6696 /* If the base register is equivalent to a constant, let the generic
6697 code handle it. Otherwise we will run into problems if a future
6698 reload pass decides to rematerialize the constant. */
6699 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
6700 && CONST_INT_P (XEXP (*p
, 1)))
6702 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
6703 HOST_WIDE_INT low
, high
;
6705 /* Detect coprocessor load/stores. */
6706 bool coproc_p
= ((TARGET_HARD_FLOAT
6708 && (mode
== SFmode
|| mode
== DFmode
))
6709 || (TARGET_REALLY_IWMMXT
6710 && VALID_IWMMXT_REG_MODE (mode
))
6712 && (VALID_NEON_DREG_MODE (mode
)
6713 || VALID_NEON_QREG_MODE (mode
))));
6715 /* For some conditions, bail out when lower two bits are unaligned. */
6716 if ((val
& 0x3) != 0
6717 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6719 /* For DI, and DF under soft-float: */
6720 || ((mode
== DImode
|| mode
== DFmode
)
6721 /* Without ldrd, we use stm/ldm, which does not
6722 fair well with unaligned bits. */
6724 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6725 || TARGET_THUMB2
))))
6728 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6729 of which the (reg+high) gets turned into a reload add insn,
6730 we try to decompose the index into high/low values that can often
6731 also lead to better reload CSE.
6733 ldr r0, [r2, #4100] // Offset too large
6734 ldr r1, [r2, #4104] // Offset too large
6736 is best reloaded as:
6742 which post-reload CSE can simplify in most cases to eliminate the
6743 second add instruction:
6748 The idea here is that we want to split out the bits of the constant
6749 as a mask, rather than as subtracting the maximum offset that the
6750 respective type of load/store used can handle.
6752 When encountering negative offsets, we can still utilize it even if
6753 the overall offset is positive; sometimes this may lead to an immediate
6754 that can be constructed with fewer instructions.
6756 ldr r0, [r2, #0x3FFFFC]
6758 This is best reloaded as:
6759 add t1, r2, #0x400000
6762 The trick for spotting this for a load insn with N bits of offset
6763 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6764 negative offset that is going to make bit N and all the bits below
6765 it become zero in the remainder part.
6767 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6768 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6769 used in most cases of ARM load/store instructions. */
6771 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6772 (((VAL) & ((1 << (N)) - 1)) \
6773 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6778 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
6780 /* NEON quad-word load/stores are made of two double-word accesses,
6781 so the valid index range is reduced by 8. Treat as 9-bit range if
6783 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
6784 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
6786 else if (GET_MODE_SIZE (mode
) == 8)
6789 low
= (TARGET_THUMB2
6790 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
6791 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
6793 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6794 to access doublewords. The supported load/store offsets are
6795 -8, -4, and 4, which we try to produce here. */
6796 low
= ((val
& 0xf) ^ 0x8) - 0x8;
6798 else if (GET_MODE_SIZE (mode
) < 8)
6800 /* NEON element load/stores do not have an offset. */
6801 if (TARGET_NEON_FP16
&& mode
== HFmode
)
6806 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6807 Try the wider 12-bit range first, and re-try if the result
6809 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6811 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6815 if (mode
== HImode
|| mode
== HFmode
)
6818 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6821 /* The storehi/movhi_bytes fallbacks can use only
6822 [-4094,+4094] of the full ldrb/strb index range. */
6823 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6824 if (low
== 4095 || low
== -4095)
6829 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6835 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
6836 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
6837 - (unsigned HOST_WIDE_INT
) 0x80000000);
6838 /* Check for overflow or zero */
6839 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
6842 /* Reload the high part into a base reg; leave the low part
6844 *p
= gen_rtx_PLUS (GET_MODE (*p
),
6845 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
6848 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
6849 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
6850 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6858 thumb_legitimize_reload_address (rtx
*x_p
,
6859 enum machine_mode mode
,
6860 int opnum
, int type
,
6861 int ind_levels ATTRIBUTE_UNUSED
)
6865 if (GET_CODE (x
) == PLUS
6866 && GET_MODE_SIZE (mode
) < 4
6867 && REG_P (XEXP (x
, 0))
6868 && XEXP (x
, 0) == stack_pointer_rtx
6869 && CONST_INT_P (XEXP (x
, 1))
6870 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6875 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6876 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6880 /* If both registers are hi-regs, then it's better to reload the
6881 entire expression rather than each register individually. That
6882 only requires one reload register rather than two. */
6883 if (GET_CODE (x
) == PLUS
6884 && REG_P (XEXP (x
, 0))
6885 && REG_P (XEXP (x
, 1))
6886 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
6887 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
6892 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6893 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6900 /* Test for various thread-local symbols. */
6902 /* Return TRUE if X is a thread-local symbol. */
6905 arm_tls_symbol_p (rtx x
)
6907 if (! TARGET_HAVE_TLS
)
6910 if (GET_CODE (x
) != SYMBOL_REF
)
6913 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6916 /* Helper for arm_tls_referenced_p. */
6919 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
6921 if (GET_CODE (*x
) == SYMBOL_REF
)
6922 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
6924 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6925 TLS offsets, not real symbol references. */
6926 if (GET_CODE (*x
) == UNSPEC
6927 && XINT (*x
, 1) == UNSPEC_TLS
)
6933 /* Return TRUE if X contains any TLS symbol references. */
6936 arm_tls_referenced_p (rtx x
)
6938 if (! TARGET_HAVE_TLS
)
6941 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
6944 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6946 On the ARM, allow any integer (invalid ones are removed later by insn
6947 patterns), nice doubles and symbol_refs which refer to the function's
6950 When generating pic allow anything. */
6953 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
6955 /* At present, we have no support for Neon structure constants, so forbid
6956 them here. It might be possible to handle simple cases like 0 and -1
6958 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
6961 return flag_pic
|| !label_mentioned_p (x
);
6965 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6967 return (CONST_INT_P (x
)
6968 || CONST_DOUBLE_P (x
)
6969 || CONSTANT_ADDRESS_P (x
)
6974 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
6976 return (!arm_cannot_force_const_mem (mode
, x
)
6978 ? arm_legitimate_constant_p_1 (mode
, x
)
6979 : thumb_legitimate_constant_p (mode
, x
)));
6982 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6985 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6989 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
6991 split_const (x
, &base
, &offset
);
6992 if (GET_CODE (base
) == SYMBOL_REF
6993 && !offset_within_block_p (base
, INTVAL (offset
)))
6996 return arm_tls_referenced_p (x
);
6999 #define REG_OR_SUBREG_REG(X) \
7001 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7003 #define REG_OR_SUBREG_RTX(X) \
7004 (REG_P (X) ? (X) : SUBREG_REG (X))
7007 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7009 enum machine_mode mode
= GET_MODE (x
);
7023 return COSTS_N_INSNS (1);
7026 if (CONST_INT_P (XEXP (x
, 1)))
7029 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7036 return COSTS_N_INSNS (2) + cycles
;
7038 return COSTS_N_INSNS (1) + 16;
7041 return (COSTS_N_INSNS (1)
7042 + 4 * ((MEM_P (SET_SRC (x
)))
7043 + MEM_P (SET_DEST (x
))));
7048 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7050 if (thumb_shiftable_const (INTVAL (x
)))
7051 return COSTS_N_INSNS (2);
7052 return COSTS_N_INSNS (3);
7054 else if ((outer
== PLUS
|| outer
== COMPARE
)
7055 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7057 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7058 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7059 return COSTS_N_INSNS (1);
7060 else if (outer
== AND
)
7063 /* This duplicates the tests in the andsi3 expander. */
7064 for (i
= 9; i
<= 31; i
++)
7065 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7066 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7067 return COSTS_N_INSNS (2);
7069 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7070 || outer
== LSHIFTRT
)
7072 return COSTS_N_INSNS (2);
7078 return COSTS_N_INSNS (3);
7096 /* XXX another guess. */
7097 /* Memory costs quite a lot for the first word, but subsequent words
7098 load at the equivalent of a single insn each. */
7099 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7100 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7105 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7111 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
7112 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
7118 return total
+ COSTS_N_INSNS (1);
7120 /* Assume a two-shift sequence. Increase the cost slightly so
7121 we prefer actual shifts over an extend operation. */
7122 return total
+ 1 + COSTS_N_INSNS (2);
7130 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
7132 enum machine_mode mode
= GET_MODE (x
);
7133 enum rtx_code subcode
;
7135 enum rtx_code code
= GET_CODE (x
);
7141 /* Memory costs quite a lot for the first word, but subsequent words
7142 load at the equivalent of a single insn each. */
7143 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
7150 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
7151 *total
= COSTS_N_INSNS (2);
7152 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
7153 *total
= COSTS_N_INSNS (4);
7155 *total
= COSTS_N_INSNS (20);
7159 if (REG_P (XEXP (x
, 1)))
7160 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
7161 else if (!CONST_INT_P (XEXP (x
, 1)))
7162 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7168 *total
+= COSTS_N_INSNS (4);
7173 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
7174 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7177 *total
+= COSTS_N_INSNS (3);
7181 *total
+= COSTS_N_INSNS (1);
7182 /* Increase the cost of complex shifts because they aren't any faster,
7183 and reduce dual issue opportunities. */
7184 if (arm_tune_cortex_a9
7185 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
7193 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7194 if (CONST_INT_P (XEXP (x
, 0))
7195 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
7197 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7201 if (CONST_INT_P (XEXP (x
, 1))
7202 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7204 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7211 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7213 if (TARGET_HARD_FLOAT
7215 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7217 *total
= COSTS_N_INSNS (1);
7218 if (CONST_DOUBLE_P (XEXP (x
, 0))
7219 && arm_const_double_rtx (XEXP (x
, 0)))
7221 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7225 if (CONST_DOUBLE_P (XEXP (x
, 1))
7226 && arm_const_double_rtx (XEXP (x
, 1)))
7228 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7234 *total
= COSTS_N_INSNS (20);
7238 *total
= COSTS_N_INSNS (1);
7239 if (CONST_INT_P (XEXP (x
, 0))
7240 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
7242 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7246 subcode
= GET_CODE (XEXP (x
, 1));
7247 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7248 || subcode
== LSHIFTRT
7249 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7251 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7252 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
7256 /* A shift as a part of RSB costs no more than RSB itself. */
7257 if (GET_CODE (XEXP (x
, 0)) == MULT
7258 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7260 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
7261 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7266 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
7268 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7269 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
7273 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
7274 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
7276 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7277 if (REG_P (XEXP (XEXP (x
, 1), 0))
7278 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
7279 *total
+= COSTS_N_INSNS (1);
7287 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
7288 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7289 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7291 *total
= COSTS_N_INSNS (1);
7292 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
7294 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7298 /* MLA: All arguments must be registers. We filter out
7299 multiplication by a power of two, so that we fall down into
7301 if (GET_CODE (XEXP (x
, 0)) == MULT
7302 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7304 /* The cost comes from the cost of the multiply. */
7308 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7310 if (TARGET_HARD_FLOAT
7312 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7314 *total
= COSTS_N_INSNS (1);
7315 if (CONST_DOUBLE_P (XEXP (x
, 1))
7316 && arm_const_double_rtx (XEXP (x
, 1)))
7318 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7325 *total
= COSTS_N_INSNS (20);
7329 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
7330 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
7332 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7333 if (REG_P (XEXP (XEXP (x
, 0), 0))
7334 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
7335 *total
+= COSTS_N_INSNS (1);
7341 case AND
: case XOR
: case IOR
:
7343 /* Normally the frame registers will be spilt into reg+const during
7344 reload, so it is a bad idea to combine them with other instructions,
7345 since then they might not be moved outside of loops. As a compromise
7346 we allow integration with ops that have a constant as their second
7348 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
7349 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
7350 && !CONST_INT_P (XEXP (x
, 1)))
7351 *total
= COSTS_N_INSNS (1);
7355 *total
+= COSTS_N_INSNS (2);
7356 if (CONST_INT_P (XEXP (x
, 1))
7357 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7359 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7366 *total
+= COSTS_N_INSNS (1);
7367 if (CONST_INT_P (XEXP (x
, 1))
7368 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7370 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7373 subcode
= GET_CODE (XEXP (x
, 0));
7374 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7375 || subcode
== LSHIFTRT
7376 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7378 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7379 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7384 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7386 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7387 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7391 if (subcode
== UMIN
|| subcode
== UMAX
7392 || subcode
== SMIN
|| subcode
== SMAX
)
7394 *total
= COSTS_N_INSNS (3);
7401 /* This should have been handled by the CPU specific routines. */
7405 if (arm_arch3m
&& mode
== SImode
7406 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
7407 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7408 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
7409 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
7410 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
7411 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
7413 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
7416 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7420 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7422 if (TARGET_HARD_FLOAT
7424 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7426 *total
= COSTS_N_INSNS (1);
7429 *total
= COSTS_N_INSNS (2);
7435 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
7436 if (mode
== SImode
&& code
== NOT
)
7438 subcode
= GET_CODE (XEXP (x
, 0));
7439 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7440 || subcode
== LSHIFTRT
7441 || subcode
== ROTATE
|| subcode
== ROTATERT
7443 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
7445 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7446 /* Register shifts cost an extra cycle. */
7447 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
7448 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
7457 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7459 *total
= COSTS_N_INSNS (4);
7463 operand
= XEXP (x
, 0);
7465 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
7466 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
7467 && REG_P (XEXP (operand
, 0))
7468 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
7469 *total
+= COSTS_N_INSNS (1);
7470 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
7471 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
7475 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7477 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7483 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
7484 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7486 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7492 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
7493 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7495 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7515 /* SCC insns. In the case where the comparison has already been
7516 performed, then they cost 2 instructions. Otherwise they need
7517 an additional comparison before them. */
7518 *total
= COSTS_N_INSNS (2);
7519 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7526 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7532 *total
+= COSTS_N_INSNS (1);
7533 if (CONST_INT_P (XEXP (x
, 1))
7534 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7536 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7540 subcode
= GET_CODE (XEXP (x
, 0));
7541 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7542 || subcode
== LSHIFTRT
7543 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7545 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7546 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7551 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7553 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7554 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7564 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7565 if (!CONST_INT_P (XEXP (x
, 1))
7566 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7567 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7571 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7573 if (TARGET_HARD_FLOAT
7575 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7577 *total
= COSTS_N_INSNS (1);
7580 *total
= COSTS_N_INSNS (20);
7583 *total
= COSTS_N_INSNS (1);
7585 *total
+= COSTS_N_INSNS (3);
7591 if (GET_MODE_CLASS (mode
) == MODE_INT
)
7593 rtx op
= XEXP (x
, 0);
7594 enum machine_mode opmode
= GET_MODE (op
);
7597 *total
+= COSTS_N_INSNS (1);
7599 if (opmode
!= SImode
)
7603 /* If !arm_arch4, we use one of the extendhisi2_mem
7604 or movhi_bytes patterns for HImode. For a QImode
7605 sign extension, we first zero-extend from memory
7606 and then perform a shift sequence. */
7607 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
7608 *total
+= COSTS_N_INSNS (2);
7611 *total
+= COSTS_N_INSNS (1);
7613 /* We don't have the necessary insn, so we need to perform some
7615 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
7616 /* An and with constant 255. */
7617 *total
+= COSTS_N_INSNS (1);
7619 /* A shift sequence. Increase costs slightly to avoid
7620 combining two shifts into an extend operation. */
7621 *total
+= COSTS_N_INSNS (2) + 1;
7627 switch (GET_MODE (XEXP (x
, 0)))
7634 *total
= COSTS_N_INSNS (1);
7644 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7648 if (const_ok_for_arm (INTVAL (x
))
7649 || const_ok_for_arm (~INTVAL (x
)))
7650 *total
= COSTS_N_INSNS (1);
7652 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
7653 INTVAL (x
), NULL_RTX
,
7660 *total
= COSTS_N_INSNS (3);
7664 *total
= COSTS_N_INSNS (1);
7668 *total
= COSTS_N_INSNS (1);
7669 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7673 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
7674 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7675 *total
= COSTS_N_INSNS (1);
7677 *total
= COSTS_N_INSNS (4);
7681 /* The vec_extract patterns accept memory operands that require an
7682 address reload. Account for the cost of that reload to give the
7683 auto-inc-dec pass an incentive to try to replace them. */
7684 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
7685 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
7687 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
7688 if (!neon_vector_mem_operand (SET_DEST (x
), 2))
7689 *total
+= COSTS_N_INSNS (1);
7692 /* Likewise for the vec_set patterns. */
7693 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
7694 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
7695 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
7697 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
7698 *total
= rtx_cost (mem
, code
, 0, speed
);
7699 if (!neon_vector_mem_operand (mem
, 2))
7700 *total
+= COSTS_N_INSNS (1);
7706 /* We cost this as high as our memory costs to allow this to
7707 be hoisted from loops. */
7708 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
7710 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
7716 && TARGET_HARD_FLOAT
7718 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
7719 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
7720 *total
= COSTS_N_INSNS (1);
7722 *total
= COSTS_N_INSNS (4);
7726 *total
= COSTS_N_INSNS (4);
7731 /* Estimates the size cost of thumb1 instructions.
7732 For now most of the code is copied from thumb1_rtx_costs. We need more
7733 fine grain tuning when we have more related test cases. */
7735 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7737 enum machine_mode mode
= GET_MODE (x
);
7750 return COSTS_N_INSNS (1);
7753 if (CONST_INT_P (XEXP (x
, 1)))
7755 /* Thumb1 mul instruction can't operate on const. We must Load it
7756 into a register first. */
7757 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
7758 return COSTS_N_INSNS (1) + const_size
;
7760 return COSTS_N_INSNS (1);
7763 return (COSTS_N_INSNS (1)
7764 + 4 * ((MEM_P (SET_SRC (x
)))
7765 + MEM_P (SET_DEST (x
))));
7770 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7771 return COSTS_N_INSNS (1);
7772 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7773 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
7774 return COSTS_N_INSNS (2);
7775 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7776 if (thumb_shiftable_const (INTVAL (x
)))
7777 return COSTS_N_INSNS (2);
7778 return COSTS_N_INSNS (3);
7780 else if ((outer
== PLUS
|| outer
== COMPARE
)
7781 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7783 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7784 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7785 return COSTS_N_INSNS (1);
7786 else if (outer
== AND
)
7789 /* This duplicates the tests in the andsi3 expander. */
7790 for (i
= 9; i
<= 31; i
++)
7791 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7792 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7793 return COSTS_N_INSNS (2);
7795 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7796 || outer
== LSHIFTRT
)
7798 return COSTS_N_INSNS (2);
7804 return COSTS_N_INSNS (3);
7822 /* XXX another guess. */
7823 /* Memory costs quite a lot for the first word, but subsequent words
7824 load at the equivalent of a single insn each. */
7825 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7826 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7831 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7836 /* XXX still guessing. */
7837 switch (GET_MODE (XEXP (x
, 0)))
7840 return (1 + (mode
== DImode
? 4 : 0)
7841 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
7844 return (4 + (mode
== DImode
? 4 : 0)
7845 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
7848 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
7859 /* RTX costs when optimizing for size. */
7861 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7864 enum machine_mode mode
= GET_MODE (x
);
7867 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
7871 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7875 /* A memory access costs 1 insn if the mode is small, or the address is
7876 a single register, otherwise it costs one insn per word. */
7877 if (REG_P (XEXP (x
, 0)))
7878 *total
= COSTS_N_INSNS (1);
7880 && GET_CODE (XEXP (x
, 0)) == PLUS
7881 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
7882 /* This will be split into two instructions.
7883 See arm.md:calculate_pic_address. */
7884 *total
= COSTS_N_INSNS (2);
7886 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7893 /* Needs a libcall, so it costs about this. */
7894 *total
= COSTS_N_INSNS (2);
7898 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
7900 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
7908 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
7910 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
7913 else if (mode
== SImode
)
7915 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
7916 /* Slightly disparage register shifts, but not by much. */
7917 if (!CONST_INT_P (XEXP (x
, 1)))
7918 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
7922 /* Needs a libcall. */
7923 *total
= COSTS_N_INSNS (2);
7927 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7928 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7930 *total
= COSTS_N_INSNS (1);
7936 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
7937 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
7939 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
7940 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
7941 || subcode1
== ROTATE
|| subcode1
== ROTATERT
7942 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
7943 || subcode1
== ASHIFTRT
)
7945 /* It's just the cost of the two operands. */
7950 *total
= COSTS_N_INSNS (1);
7954 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7958 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7959 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7961 *total
= COSTS_N_INSNS (1);
7965 /* A shift as a part of ADD costs nothing. */
7966 if (GET_CODE (XEXP (x
, 0)) == MULT
7967 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7969 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
7970 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
7971 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
7976 case AND
: case XOR
: case IOR
:
7979 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
7981 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
7982 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
7983 || (code
== AND
&& subcode
== NOT
))
7985 /* It's just the cost of the two operands. */
7991 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7995 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7999 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8000 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8002 *total
= COSTS_N_INSNS (1);
8008 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8017 if (cc_register (XEXP (x
, 0), VOIDmode
))
8020 *total
= COSTS_N_INSNS (1);
8024 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8025 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8026 *total
= COSTS_N_INSNS (1);
8028 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
8033 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
8036 if (const_ok_for_arm (INTVAL (x
)))
8037 /* A multiplication by a constant requires another instruction
8038 to load the constant to a register. */
8039 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
8041 else if (const_ok_for_arm (~INTVAL (x
)))
8042 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
8043 else if (const_ok_for_arm (-INTVAL (x
)))
8045 if (outer_code
== COMPARE
|| outer_code
== PLUS
8046 || outer_code
== MINUS
)
8049 *total
= COSTS_N_INSNS (1);
8052 *total
= COSTS_N_INSNS (2);
8058 *total
= COSTS_N_INSNS (2);
8062 *total
= COSTS_N_INSNS (4);
8067 && TARGET_HARD_FLOAT
8068 && outer_code
== SET
8069 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8070 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8071 *total
= COSTS_N_INSNS (1);
8073 *total
= COSTS_N_INSNS (4);
8078 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8079 cost of these slightly. */
8080 *total
= COSTS_N_INSNS (1) + 1;
8087 if (mode
!= VOIDmode
)
8088 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8090 *total
= COSTS_N_INSNS (4); /* How knows? */
8095 /* RTX costs when optimizing for size. */
8097 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
8098 int *total
, bool speed
)
8101 return arm_size_rtx_costs (x
, (enum rtx_code
) code
,
8102 (enum rtx_code
) outer_code
, total
);
8104 return current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
8105 (enum rtx_code
) outer_code
,
8109 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8110 supported on any "slowmul" cores, so it can be ignored. */
8113 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8114 int *total
, bool speed
)
8116 enum machine_mode mode
= GET_MODE (x
);
8120 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8127 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
8130 *total
= COSTS_N_INSNS (20);
8134 if (CONST_INT_P (XEXP (x
, 1)))
8136 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
8137 & (unsigned HOST_WIDE_INT
) 0xffffffff);
8138 int cost
, const_ok
= const_ok_for_arm (i
);
8139 int j
, booth_unit_size
;
8141 /* Tune as appropriate. */
8142 cost
= const_ok
? 4 : 8;
8143 booth_unit_size
= 2;
8144 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
8146 i
>>= booth_unit_size
;
8150 *total
= COSTS_N_INSNS (cost
);
8151 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8155 *total
= COSTS_N_INSNS (20);
8159 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
8164 /* RTX cost for cores with a fast multiply unit (M variants). */
8167 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8168 int *total
, bool speed
)
8170 enum machine_mode mode
= GET_MODE (x
);
8174 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8178 /* ??? should thumb2 use different costs? */
8182 /* There is no point basing this on the tuning, since it is always the
8183 fast variant if it exists at all. */
8185 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
8186 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8187 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8189 *total
= COSTS_N_INSNS(2);
8196 *total
= COSTS_N_INSNS (5);
8200 if (CONST_INT_P (XEXP (x
, 1)))
8202 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
8203 & (unsigned HOST_WIDE_INT
) 0xffffffff);
8204 int cost
, const_ok
= const_ok_for_arm (i
);
8205 int j
, booth_unit_size
;
8207 /* Tune as appropriate. */
8208 cost
= const_ok
? 4 : 8;
8209 booth_unit_size
= 8;
8210 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
8212 i
>>= booth_unit_size
;
8216 *total
= COSTS_N_INSNS(cost
);
8222 *total
= COSTS_N_INSNS (4);
8226 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8228 if (TARGET_HARD_FLOAT
8230 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8232 *total
= COSTS_N_INSNS (1);
8237 /* Requires a lib call */
8238 *total
= COSTS_N_INSNS (20);
8242 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8247 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8248 so it can be ignored. */
8251 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8252 int *total
, bool speed
)
8254 enum machine_mode mode
= GET_MODE (x
);
8258 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8265 if (GET_CODE (XEXP (x
, 0)) != MULT
)
8266 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8268 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8269 will stall until the multiplication is complete. */
8270 *total
= COSTS_N_INSNS (3);
8274 /* There is no point basing this on the tuning, since it is always the
8275 fast variant if it exists at all. */
8277 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
8278 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8279 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8281 *total
= COSTS_N_INSNS (2);
8288 *total
= COSTS_N_INSNS (5);
8292 if (CONST_INT_P (XEXP (x
, 1)))
8294 /* If operand 1 is a constant we can more accurately
8295 calculate the cost of the multiply. The multiplier can
8296 retire 15 bits on the first cycle and a further 12 on the
8297 second. We do, of course, have to load the constant into
8298 a register first. */
8299 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8300 /* There's a general overhead of one cycle. */
8302 unsigned HOST_WIDE_INT masked_const
;
8307 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
8309 masked_const
= i
& 0xffff8000;
8310 if (masked_const
!= 0)
8313 masked_const
= i
& 0xf8000000;
8314 if (masked_const
!= 0)
8317 *total
= COSTS_N_INSNS (cost
);
8323 *total
= COSTS_N_INSNS (3);
8327 /* Requires a lib call */
8328 *total
= COSTS_N_INSNS (20);
8332 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8337 /* RTX costs for 9e (and later) cores. */
8340 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8341 int *total
, bool speed
)
8343 enum machine_mode mode
= GET_MODE (x
);
8350 *total
= COSTS_N_INSNS (3);
8354 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8362 /* There is no point basing this on the tuning, since it is always the
8363 fast variant if it exists at all. */
8365 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
8366 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8367 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8369 *total
= COSTS_N_INSNS (2);
8376 *total
= COSTS_N_INSNS (5);
8382 *total
= COSTS_N_INSNS (2);
8386 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8388 if (TARGET_HARD_FLOAT
8390 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8392 *total
= COSTS_N_INSNS (1);
8397 *total
= COSTS_N_INSNS (20);
8401 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8404 /* All address computations that can be done are free, but rtx cost returns
8405 the same for practically all of them. So we weight the different types
8406 of address here in the order (most pref first):
8407 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8409 arm_arm_address_cost (rtx x
)
8411 enum rtx_code c
= GET_CODE (x
);
8413 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
8415 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
8420 if (CONST_INT_P (XEXP (x
, 1)))
8423 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
8433 arm_thumb_address_cost (rtx x
)
8435 enum rtx_code c
= GET_CODE (x
);
8440 && REG_P (XEXP (x
, 0))
8441 && CONST_INT_P (XEXP (x
, 1)))
8448 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
8449 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
8451 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
8454 /* Adjust cost hook for XScale. */
8456 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8458 /* Some true dependencies can have a higher cost depending
8459 on precisely how certain input operands are used. */
8460 if (REG_NOTE_KIND(link
) == 0
8461 && recog_memoized (insn
) >= 0
8462 && recog_memoized (dep
) >= 0)
8464 int shift_opnum
= get_attr_shift (insn
);
8465 enum attr_type attr_type
= get_attr_type (dep
);
8467 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8468 operand for INSN. If we have a shifted input operand and the
8469 instruction we depend on is another ALU instruction, then we may
8470 have to account for an additional stall. */
8471 if (shift_opnum
!= 0
8472 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
8474 rtx shifted_operand
;
8477 /* Get the shifted operand. */
8478 extract_insn (insn
);
8479 shifted_operand
= recog_data
.operand
[shift_opnum
];
8481 /* Iterate over all the operands in DEP. If we write an operand
8482 that overlaps with SHIFTED_OPERAND, then we have increase the
8483 cost of this dependency. */
8485 preprocess_constraints ();
8486 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
8488 /* We can ignore strict inputs. */
8489 if (recog_data
.operand_type
[opno
] == OP_IN
)
8492 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
8504 /* Adjust cost hook for Cortex A9. */
8506 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8508 switch (REG_NOTE_KIND (link
))
8515 case REG_DEP_OUTPUT
:
8516 if (recog_memoized (insn
) >= 0
8517 && recog_memoized (dep
) >= 0)
8519 if (GET_CODE (PATTERN (insn
)) == SET
)
8522 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
8524 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
8526 enum attr_type attr_type_insn
= get_attr_type (insn
);
8527 enum attr_type attr_type_dep
= get_attr_type (dep
);
8529 /* By default all dependencies of the form
8532 have an extra latency of 1 cycle because
8533 of the input and output dependency in this
8534 case. However this gets modeled as an true
8535 dependency and hence all these checks. */
8536 if (REG_P (SET_DEST (PATTERN (insn
)))
8537 && REG_P (SET_DEST (PATTERN (dep
)))
8538 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
8539 SET_DEST (PATTERN (dep
))))
8541 /* FMACS is a special case where the dependent
8542 instruction can be issued 3 cycles before
8543 the normal latency in case of an output
8545 if ((attr_type_insn
== TYPE_FMACS
8546 || attr_type_insn
== TYPE_FMACD
)
8547 && (attr_type_dep
== TYPE_FMACS
8548 || attr_type_dep
== TYPE_FMACD
))
8550 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8551 *cost
= insn_default_latency (dep
) - 3;
8553 *cost
= insn_default_latency (dep
);
8558 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8559 *cost
= insn_default_latency (dep
) + 1;
8561 *cost
= insn_default_latency (dep
);
8577 /* Adjust cost hook for FA726TE. */
8579 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8581 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8582 have penalty of 3. */
8583 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
8584 && recog_memoized (insn
) >= 0
8585 && recog_memoized (dep
) >= 0
8586 && get_attr_conds (dep
) == CONDS_SET
)
8588 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8589 if (get_attr_conds (insn
) == CONDS_USE
8590 && get_attr_type (insn
) != TYPE_BRANCH
)
8596 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
8597 || get_attr_conds (insn
) == CONDS_USE
)
8607 /* Implement TARGET_REGISTER_MOVE_COST.
8609 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8610 it is typically more expensive than a single memory access. We set
8611 the cost to less than two memory accesses so that floating
8612 point to integer conversion does not go through memory. */
8615 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
8616 reg_class_t from
, reg_class_t to
)
8620 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
8621 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
8623 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
8624 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
8626 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
8633 if (from
== HI_REGS
|| to
== HI_REGS
)
8640 /* Implement TARGET_MEMORY_MOVE_COST. */
8643 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
8644 bool in ATTRIBUTE_UNUSED
)
8650 if (GET_MODE_SIZE (mode
) < 4)
8653 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
8657 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8658 It corrects the value of COST based on the relationship between
8659 INSN and DEP through the dependence LINK. It returns the new
8660 value. There is a per-core adjust_cost hook to adjust scheduler costs
8661 and the per-core hook can choose to completely override the generic
8662 adjust_cost function. Only put bits of code into arm_adjust_cost that
8663 are common across all cores. */
8665 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
8669 /* When generating Thumb-1 code, we want to place flag-setting operations
8670 close to a conditional branch which depends on them, so that we can
8671 omit the comparison. */
8673 && REG_NOTE_KIND (link
) == 0
8674 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
8675 && recog_memoized (dep
) >= 0
8676 && get_attr_conds (dep
) == CONDS_SET
)
8679 if (current_tune
->sched_adjust_cost
!= NULL
)
8681 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
8685 /* XXX Is this strictly true? */
8686 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8687 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8690 /* Call insns don't incur a stall, even if they follow a load. */
8691 if (REG_NOTE_KIND (link
) == 0
8695 if ((i_pat
= single_set (insn
)) != NULL
8696 && MEM_P (SET_SRC (i_pat
))
8697 && (d_pat
= single_set (dep
)) != NULL
8698 && MEM_P (SET_DEST (d_pat
)))
8700 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
8701 /* This is a load after a store, there is no conflict if the load reads
8702 from a cached area. Assume that loads from the stack, and from the
8703 constant pool are cached, and that others will miss. This is a
8706 if ((GET_CODE (src_mem
) == SYMBOL_REF
8707 && CONSTANT_POOL_ADDRESS_P (src_mem
))
8708 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
8709 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
8710 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
8718 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
8721 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
8723 return (optimize
> 0) ? 2 : 0;
8727 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
8729 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
8732 static bool fp_consts_inited
= false;
8734 static REAL_VALUE_TYPE value_fp0
;
8737 init_fp_table (void)
8741 r
= REAL_VALUE_ATOF ("0", DFmode
);
8743 fp_consts_inited
= true;
8746 /* Return TRUE if rtx X is a valid immediate FP constant. */
8748 arm_const_double_rtx (rtx x
)
8752 if (!fp_consts_inited
)
8755 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8756 if (REAL_VALUE_MINUS_ZERO (r
))
8759 if (REAL_VALUES_EQUAL (r
, value_fp0
))
8765 /* VFPv3 has a fairly wide range of representable immediates, formed from
8766 "quarter-precision" floating-point values. These can be evaluated using this
8767 formula (with ^ for exponentiation):
8771 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8772 16 <= n <= 31 and 0 <= r <= 7.
8774 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8776 - A (most-significant) is the sign bit.
8777 - BCD are the exponent (encoded as r XOR 3).
8778 - EFGH are the mantissa (encoded as n - 16).
8781 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8782 fconst[sd] instruction, or -1 if X isn't suitable. */
8784 vfp3_const_double_index (rtx x
)
8786 REAL_VALUE_TYPE r
, m
;
8788 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
8789 unsigned HOST_WIDE_INT mask
;
8790 HOST_WIDE_INT m1
, m2
;
8791 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8793 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
8796 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8798 /* We can't represent these things, so detect them first. */
8799 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
8802 /* Extract sign, exponent and mantissa. */
8803 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
8804 r
= real_value_abs (&r
);
8805 exponent
= REAL_EXP (&r
);
8806 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8807 highest (sign) bit, with a fixed binary point at bit point_pos.
8808 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8809 bits for the mantissa, this may fail (low bits would be lost). */
8810 real_ldexp (&m
, &r
, point_pos
- exponent
);
8811 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
8815 /* If there are bits set in the low part of the mantissa, we can't
8816 represent this value. */
8820 /* Now make it so that mantissa contains the most-significant bits, and move
8821 the point_pos to indicate that the least-significant bits have been
8823 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8826 /* We can permit four significant bits of mantissa only, plus a high bit
8827 which is always 1. */
8828 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8829 if ((mantissa
& mask
) != 0)
8832 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8833 mantissa
>>= point_pos
- 5;
8835 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8836 floating-point immediate zero with Neon using an integer-zero load, but
8837 that case is handled elsewhere.) */
8841 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
8843 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8844 normalized significands are in the range [1, 2). (Our mantissa is shifted
8845 left 4 places at this point relative to normalized IEEE754 values). GCC
8846 internally uses [0.5, 1) (see real.c), so the exponent returned from
8847 REAL_EXP must be altered. */
8848 exponent
= 5 - exponent
;
8850 if (exponent
< 0 || exponent
> 7)
8853 /* Sign, mantissa and exponent are now in the correct form to plug into the
8854 formula described in the comment above. */
8855 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
8858 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8860 vfp3_const_double_rtx (rtx x
)
8865 return vfp3_const_double_index (x
) != -1;
8868 /* Recognize immediates which can be used in various Neon instructions. Legal
8869 immediates are described by the following table (for VMVN variants, the
8870 bitwise inverse of the constant shown is recognized. In either case, VMOV
8871 is output and the correct instruction to use for a given constant is chosen
8872 by the assembler). The constant shown is replicated across all elements of
8873 the destination vector.
8875 insn elems variant constant (binary)
8876 ---- ----- ------- -----------------
8877 vmov i32 0 00000000 00000000 00000000 abcdefgh
8878 vmov i32 1 00000000 00000000 abcdefgh 00000000
8879 vmov i32 2 00000000 abcdefgh 00000000 00000000
8880 vmov i32 3 abcdefgh 00000000 00000000 00000000
8881 vmov i16 4 00000000 abcdefgh
8882 vmov i16 5 abcdefgh 00000000
8883 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8884 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8885 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8886 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8887 vmvn i16 10 00000000 abcdefgh
8888 vmvn i16 11 abcdefgh 00000000
8889 vmov i32 12 00000000 00000000 abcdefgh 11111111
8890 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8891 vmov i32 14 00000000 abcdefgh 11111111 11111111
8892 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8894 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8895 eeeeeeee ffffffff gggggggg hhhhhhhh
8896 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8897 vmov f32 19 00000000 00000000 00000000 00000000
8899 For case 18, B = !b. Representable values are exactly those accepted by
8900 vfp3_const_double_index, but are output as floating-point numbers rather
8903 For case 19, we will change it to vmov.i32 when assembling.
8905 Variants 0-5 (inclusive) may also be used as immediates for the second
8906 operand of VORR/VBIC instructions.
8908 The INVERSE argument causes the bitwise inverse of the given operand to be
8909 recognized instead (used for recognizing legal immediates for the VAND/VORN
8910 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8911 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8912 output, rather than the real insns vbic/vorr).
8914 INVERSE makes no difference to the recognition of float vectors.
8916 The return value is the variant of immediate as shown in the above table, or
8917 -1 if the given value doesn't match any of the listed patterns.
8920 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
8921 rtx
*modconst
, int *elementwidth
)
8923 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8925 for (i = 0; i < idx; i += (STRIDE)) \
8930 immtype = (CLASS); \
8931 elsize = (ELSIZE); \
8935 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
8936 unsigned int innersize
;
8937 unsigned char bytes
[16];
8938 int immtype
= -1, matches
;
8939 unsigned int invmask
= inverse
? 0xff : 0;
8940 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
8944 n_elts
= CONST_VECTOR_NUNITS (op
);
8945 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8950 if (mode
== VOIDmode
)
8952 innersize
= GET_MODE_SIZE (mode
);
8955 /* Vectors of float constants. */
8956 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8958 rtx el0
= CONST_VECTOR_ELT (op
, 0);
8961 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
8964 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
8966 for (i
= 1; i
< n_elts
; i
++)
8968 rtx elt
= CONST_VECTOR_ELT (op
, i
);
8971 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
8973 if (!REAL_VALUES_EQUAL (r0
, re
))
8978 *modconst
= CONST_VECTOR_ELT (op
, 0);
8983 if (el0
== CONST0_RTX (GET_MODE (el0
)))
8989 /* Splat vector constant out into a byte vector. */
8990 for (i
= 0; i
< n_elts
; i
++)
8992 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
8993 unsigned HOST_WIDE_INT elpart
;
8994 unsigned int part
, parts
;
8996 if (CONST_INT_P (el
))
8998 elpart
= INTVAL (el
);
9001 else if (CONST_DOUBLE_P (el
))
9003 elpart
= CONST_DOUBLE_LOW (el
);
9009 for (part
= 0; part
< parts
; part
++)
9012 for (byte
= 0; byte
< innersize
; byte
++)
9014 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
9015 elpart
>>= BITS_PER_UNIT
;
9017 if (CONST_DOUBLE_P (el
))
9018 elpart
= CONST_DOUBLE_HIGH (el
);
9023 gcc_assert (idx
== GET_MODE_SIZE (mode
));
9027 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
9028 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
9030 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
9031 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
9033 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
9034 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
9036 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
9037 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
9039 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
9041 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
9043 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
9044 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
9046 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
9047 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
9049 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
9050 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
9052 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
9053 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
9055 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
9057 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
9059 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
9060 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
9062 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
9063 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
9065 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
9066 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
9068 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
9069 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
9071 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
9073 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
9074 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
9082 *elementwidth
= elsize
;
9086 unsigned HOST_WIDE_INT imm
= 0;
9088 /* Un-invert bytes of recognized vector, if necessary. */
9090 for (i
= 0; i
< idx
; i
++)
9091 bytes
[i
] ^= invmask
;
9095 /* FIXME: Broken on 32-bit H_W_I hosts. */
9096 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
9098 for (i
= 0; i
< 8; i
++)
9099 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
9100 << (i
* BITS_PER_UNIT
);
9102 *modconst
= GEN_INT (imm
);
9106 unsigned HOST_WIDE_INT imm
= 0;
9108 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
9109 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
9111 *modconst
= GEN_INT (imm
);
9119 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9120 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9121 float elements), and a modified constant (whatever should be output for a
9122 VMOV) in *MODCONST. */
9125 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
9126 rtx
*modconst
, int *elementwidth
)
9130 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
9136 *modconst
= tmpconst
;
9139 *elementwidth
= tmpwidth
;
9144 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9145 the immediate is valid, write a constant suitable for using as an operand
9146 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9147 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9150 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
9151 rtx
*modconst
, int *elementwidth
)
9155 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
9157 if (retval
< 0 || retval
> 5)
9161 *modconst
= tmpconst
;
9164 *elementwidth
= tmpwidth
;
9169 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9170 the immediate is valid, write a constant suitable for using as an operand
9171 to VSHR/VSHL to *MODCONST and the corresponding element width to
9172 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9173 because they have different limitations. */
9176 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
9177 rtx
*modconst
, int *elementwidth
,
9180 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
9181 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
9182 unsigned HOST_WIDE_INT last_elt
= 0;
9183 unsigned HOST_WIDE_INT maxshift
;
9185 /* Split vector constant out into a byte vector. */
9186 for (i
= 0; i
< n_elts
; i
++)
9188 rtx el
= CONST_VECTOR_ELT (op
, i
);
9189 unsigned HOST_WIDE_INT elpart
;
9191 if (CONST_INT_P (el
))
9192 elpart
= INTVAL (el
);
9193 else if (CONST_DOUBLE_P (el
))
9198 if (i
!= 0 && elpart
!= last_elt
)
9204 /* Shift less than element size. */
9205 maxshift
= innersize
* 8;
9209 /* Left shift immediate value can be from 0 to <size>-1. */
9210 if (last_elt
>= maxshift
)
9215 /* Right shift immediate value can be from 1 to <size>. */
9216 if (last_elt
== 0 || last_elt
> maxshift
)
9221 *elementwidth
= innersize
* 8;
9224 *modconst
= CONST_VECTOR_ELT (op
, 0);
9229 /* Return a string suitable for output of Neon immediate logic operation
9233 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
9234 int inverse
, int quad
)
9236 int width
, is_valid
;
9237 static char templ
[40];
9239 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
9241 gcc_assert (is_valid
!= 0);
9244 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
9246 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
9251 /* Return a string suitable for output of Neon immediate shift operation
9252 (VSHR or VSHL) MNEM. */
9255 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
9256 enum machine_mode mode
, int quad
,
9259 int width
, is_valid
;
9260 static char templ
[40];
9262 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
9263 gcc_assert (is_valid
!= 0);
9266 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
9268 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
9273 /* Output a sequence of pairwise operations to implement a reduction.
9274 NOTE: We do "too much work" here, because pairwise operations work on two
9275 registers-worth of operands in one go. Unfortunately we can't exploit those
9276 extra calculations to do the full operation in fewer steps, I don't think.
9277 Although all vector elements of the result but the first are ignored, we
9278 actually calculate the same result in each of the elements. An alternative
9279 such as initially loading a vector with zero to use as each of the second
9280 operands would use up an additional register and take an extra instruction,
9281 for no particular gain. */
9284 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
9285 rtx (*reduc
) (rtx
, rtx
, rtx
))
9287 enum machine_mode inner
= GET_MODE_INNER (mode
);
9288 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
9291 for (i
= parts
/ 2; i
>= 1; i
/= 2)
9293 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
9294 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
9299 /* If VALS is a vector constant that can be loaded into a register
9300 using VDUP, generate instructions to do so and return an RTX to
9301 assign to the register. Otherwise return NULL_RTX. */
9304 neon_vdup_constant (rtx vals
)
9306 enum machine_mode mode
= GET_MODE (vals
);
9307 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
9308 int n_elts
= GET_MODE_NUNITS (mode
);
9309 bool all_same
= true;
9313 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
9316 for (i
= 0; i
< n_elts
; ++i
)
9318 x
= XVECEXP (vals
, 0, i
);
9319 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
9324 /* The elements are not all the same. We could handle repeating
9325 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9326 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9330 /* We can load this constant by using VDUP and a constant in a
9331 single ARM register. This will be cheaper than a vector
9334 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
9335 return gen_rtx_VEC_DUPLICATE (mode
, x
);
9338 /* Generate code to load VALS, which is a PARALLEL containing only
9339 constants (for vec_init) or CONST_VECTOR, efficiently into a
9340 register. Returns an RTX to copy into the register, or NULL_RTX
9341 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9344 neon_make_constant (rtx vals
)
9346 enum machine_mode mode
= GET_MODE (vals
);
9348 rtx const_vec
= NULL_RTX
;
9349 int n_elts
= GET_MODE_NUNITS (mode
);
9353 if (GET_CODE (vals
) == CONST_VECTOR
)
9355 else if (GET_CODE (vals
) == PARALLEL
)
9357 /* A CONST_VECTOR must contain only CONST_INTs and
9358 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9359 Only store valid constants in a CONST_VECTOR. */
9360 for (i
= 0; i
< n_elts
; ++i
)
9362 rtx x
= XVECEXP (vals
, 0, i
);
9363 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
9366 if (n_const
== n_elts
)
9367 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
9372 if (const_vec
!= NULL
9373 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
9374 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9376 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
9377 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9378 pipeline cycle; creating the constant takes one or two ARM
9381 else if (const_vec
!= NULL_RTX
)
9382 /* Load from constant pool. On Cortex-A8 this takes two cycles
9383 (for either double or quad vectors). We can not take advantage
9384 of single-cycle VLD1 because we need a PC-relative addressing
9388 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9389 We can not construct an initializer. */
9393 /* Initialize vector TARGET to VALS. */
9396 neon_expand_vector_init (rtx target
, rtx vals
)
9398 enum machine_mode mode
= GET_MODE (target
);
9399 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
9400 int n_elts
= GET_MODE_NUNITS (mode
);
9401 int n_var
= 0, one_var
= -1;
9402 bool all_same
= true;
9406 for (i
= 0; i
< n_elts
; ++i
)
9408 x
= XVECEXP (vals
, 0, i
);
9409 if (!CONSTANT_P (x
))
9410 ++n_var
, one_var
= i
;
9412 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
9418 rtx constant
= neon_make_constant (vals
);
9419 if (constant
!= NULL_RTX
)
9421 emit_move_insn (target
, constant
);
9426 /* Splat a single non-constant element if we can. */
9427 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
9429 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
9430 emit_insn (gen_rtx_SET (VOIDmode
, target
,
9431 gen_rtx_VEC_DUPLICATE (mode
, x
)));
9435 /* One field is non-constant. Load constant then overwrite varying
9436 field. This is more efficient than using the stack. */
9439 rtx copy
= copy_rtx (vals
);
9440 rtx index
= GEN_INT (one_var
);
9442 /* Load constant part of vector, substitute neighboring value for
9444 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
9445 neon_expand_vector_init (target
, copy
);
9447 /* Insert variable. */
9448 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
9452 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
9455 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
9458 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
9461 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
9464 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
9467 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
9470 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
9473 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
9476 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
9484 /* Construct the vector in memory one field at a time
9485 and load the whole vector. */
9486 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
9487 for (i
= 0; i
< n_elts
; i
++)
9488 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
9489 i
* GET_MODE_SIZE (inner_mode
)),
9490 XVECEXP (vals
, 0, i
));
9491 emit_move_insn (target
, mem
);
9494 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9495 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9496 reported source locations are bogus. */
9499 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
9504 gcc_assert (CONST_INT_P (operand
));
9506 lane
= INTVAL (operand
);
9508 if (lane
< low
|| lane
>= high
)
9512 /* Bounds-check lanes. */
9515 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
9517 bounds_check (operand
, low
, high
, "lane out of range");
9520 /* Bounds-check constants. */
9523 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
9525 bounds_check (operand
, low
, high
, "constant out of range");
9529 neon_element_bits (enum machine_mode mode
)
9532 return GET_MODE_BITSIZE (mode
);
9534 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
9538 /* Predicates for `match_operand' and `match_operator'. */
9540 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9541 WB is true if full writeback address modes are allowed and is false
9542 if limited writeback address modes (POST_INC and PRE_DEC) are
9546 arm_coproc_mem_operand (rtx op
, bool wb
)
9550 /* Reject eliminable registers. */
9551 if (! (reload_in_progress
|| reload_completed
)
9552 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9553 || reg_mentioned_p (arg_pointer_rtx
, op
)
9554 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9555 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9556 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9557 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9560 /* Constants are converted into offsets from labels. */
9566 if (reload_completed
9567 && (GET_CODE (ind
) == LABEL_REF
9568 || (GET_CODE (ind
) == CONST
9569 && GET_CODE (XEXP (ind
, 0)) == PLUS
9570 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9571 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
9574 /* Match: (mem (reg)). */
9576 return arm_address_register_rtx_p (ind
, 0);
9578 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9579 acceptable in any case (subject to verification by
9580 arm_address_register_rtx_p). We need WB to be true to accept
9581 PRE_INC and POST_DEC. */
9582 if (GET_CODE (ind
) == POST_INC
9583 || GET_CODE (ind
) == PRE_DEC
9585 && (GET_CODE (ind
) == PRE_INC
9586 || GET_CODE (ind
) == POST_DEC
)))
9587 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9590 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
9591 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
9592 && GET_CODE (XEXP (ind
, 1)) == PLUS
9593 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
9594 ind
= XEXP (ind
, 1);
9599 if (GET_CODE (ind
) == PLUS
9600 && REG_P (XEXP (ind
, 0))
9601 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9602 && CONST_INT_P (XEXP (ind
, 1))
9603 && INTVAL (XEXP (ind
, 1)) > -1024
9604 && INTVAL (XEXP (ind
, 1)) < 1024
9605 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
9611 /* Return TRUE if OP is a memory operand which we can load or store a vector
9612 to/from. TYPE is one of the following values:
9613 0 - Vector load/stor (vldr)
9614 1 - Core registers (ldm)
9615 2 - Element/structure loads (vld1)
9618 neon_vector_mem_operand (rtx op
, int type
)
9622 /* Reject eliminable registers. */
9623 if (! (reload_in_progress
|| reload_completed
)
9624 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9625 || reg_mentioned_p (arg_pointer_rtx
, op
)
9626 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9627 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9628 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9629 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9632 /* Constants are converted into offsets from labels. */
9638 if (reload_completed
9639 && (GET_CODE (ind
) == LABEL_REF
9640 || (GET_CODE (ind
) == CONST
9641 && GET_CODE (XEXP (ind
, 0)) == PLUS
9642 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9643 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
9646 /* Match: (mem (reg)). */
9648 return arm_address_register_rtx_p (ind
, 0);
9650 /* Allow post-increment with Neon registers. */
9651 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
9652 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
9653 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9655 /* FIXME: vld1 allows register post-modify. */
9661 && GET_CODE (ind
) == PLUS
9662 && REG_P (XEXP (ind
, 0))
9663 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9664 && CONST_INT_P (XEXP (ind
, 1))
9665 && INTVAL (XEXP (ind
, 1)) > -1024
9666 /* For quad modes, we restrict the constant offset to be slightly less
9667 than what the instruction format permits. We have no such constraint
9668 on double mode offsets. (This must match arm_legitimate_index_p.) */
9669 && (INTVAL (XEXP (ind
, 1))
9670 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
9671 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
9677 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9680 neon_struct_mem_operand (rtx op
)
9684 /* Reject eliminable registers. */
9685 if (! (reload_in_progress
|| reload_completed
)
9686 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9687 || reg_mentioned_p (arg_pointer_rtx
, op
)
9688 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9689 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9690 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9691 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9694 /* Constants are converted into offsets from labels. */
9700 if (reload_completed
9701 && (GET_CODE (ind
) == LABEL_REF
9702 || (GET_CODE (ind
) == CONST
9703 && GET_CODE (XEXP (ind
, 0)) == PLUS
9704 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9705 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
9708 /* Match: (mem (reg)). */
9710 return arm_address_register_rtx_p (ind
, 0);
9712 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9713 if (GET_CODE (ind
) == POST_INC
9714 || GET_CODE (ind
) == PRE_DEC
)
9715 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9720 /* Return true if X is a register that will be eliminated later on. */
9722 arm_eliminable_register (rtx x
)
9724 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
9725 || REGNO (x
) == ARG_POINTER_REGNUM
9726 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
9727 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
9730 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9731 coprocessor registers. Otherwise return NO_REGS. */
9734 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
9738 if (!TARGET_NEON_FP16
)
9739 return GENERAL_REGS
;
9740 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2))
9742 return GENERAL_REGS
;
9745 /* The neon move patterns handle all legitimate vector and struct
9748 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
9749 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
9750 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
9751 || VALID_NEON_STRUCT_MODE (mode
)))
9754 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
9757 return GENERAL_REGS
;
9760 /* Values which must be returned in the most-significant end of the return
9764 arm_return_in_msb (const_tree valtype
)
9766 return (TARGET_AAPCS_BASED
9768 && (AGGREGATE_TYPE_P (valtype
)
9769 || TREE_CODE (valtype
) == COMPLEX_TYPE
9770 || FIXED_POINT_TYPE_P (valtype
)));
9773 /* Return TRUE if X references a SYMBOL_REF. */
9775 symbol_mentioned_p (rtx x
)
9780 if (GET_CODE (x
) == SYMBOL_REF
)
9783 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9784 are constant offsets, not symbols. */
9785 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9788 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9790 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9796 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9797 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
9800 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
9807 /* Return TRUE if X references a LABEL_REF. */
9809 label_mentioned_p (rtx x
)
9814 if (GET_CODE (x
) == LABEL_REF
)
9817 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9818 instruction, but they are constant offsets, not symbols. */
9819 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9822 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9823 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9829 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9830 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
9833 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
9841 tls_mentioned_p (rtx x
)
9843 switch (GET_CODE (x
))
9846 return tls_mentioned_p (XEXP (x
, 0));
9849 if (XINT (x
, 1) == UNSPEC_TLS
)
9857 /* Must not copy any rtx that uses a pc-relative address. */
9860 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
9862 if (GET_CODE (*x
) == UNSPEC
9863 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
9864 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
9870 arm_cannot_copy_insn_p (rtx insn
)
9872 /* The tls call insn cannot be copied, as it is paired with a data
9874 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
9877 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
9883 enum rtx_code code
= GET_CODE (x
);
9900 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9903 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
9904 int *mask
, bool *signed_sat
)
9906 /* The high bound must be a power of two minus one. */
9907 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
9911 /* The low bound is either zero (for usat) or one less than the
9912 negation of the high bound (for ssat). */
9913 if (INTVAL (lo_bound
) == 0)
9918 *signed_sat
= false;
9923 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
9936 /* Return 1 if memory locations are adjacent. */
9938 adjacent_mem_locations (rtx a
, rtx b
)
9940 /* We don't guarantee to preserve the order of these memory refs. */
9941 if (volatile_refs_p (a
) || volatile_refs_p (b
))
9944 if ((REG_P (XEXP (a
, 0))
9945 || (GET_CODE (XEXP (a
, 0)) == PLUS
9946 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
9947 && (REG_P (XEXP (b
, 0))
9948 || (GET_CODE (XEXP (b
, 0)) == PLUS
9949 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
9951 HOST_WIDE_INT val0
= 0, val1
= 0;
9955 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
9957 reg0
= XEXP (XEXP (a
, 0), 0);
9958 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
9963 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
9965 reg1
= XEXP (XEXP (b
, 0), 0);
9966 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
9971 /* Don't accept any offset that will require multiple
9972 instructions to handle, since this would cause the
9973 arith_adjacentmem pattern to output an overlong sequence. */
9974 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
9977 /* Don't allow an eliminable register: register elimination can make
9978 the offset too large. */
9979 if (arm_eliminable_register (reg0
))
9982 val_diff
= val1
- val0
;
9986 /* If the target has load delay slots, then there's no benefit
9987 to using an ldm instruction unless the offset is zero and
9988 we are optimizing for size. */
9989 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
9990 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
9991 && (val_diff
== 4 || val_diff
== -4));
9994 return ((REGNO (reg0
) == REGNO (reg1
))
9995 && (val_diff
== 4 || val_diff
== -4));
10001 /* Return true if OP is a valid load or store multiple operation. LOAD is true
10002 for load operations, false for store operations. CONSECUTIVE is true
10003 if the register numbers in the operation must be consecutive in the register
10004 bank. RETURN_PC is true if value is to be loaded in PC.
10005 The pattern we are trying to match for load is:
10006 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10007 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10010 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10013 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10014 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10015 3. If consecutive is TRUE, then for kth register being loaded,
10016 REGNO (R_dk) = REGNO (R_d0) + k.
10017 The pattern for store is similar. */
10019 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
10020 bool consecutive
, bool return_pc
)
10022 HOST_WIDE_INT count
= XVECLEN (op
, 0);
10023 rtx reg
, mem
, addr
;
10025 unsigned first_regno
;
10026 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
10028 bool addr_reg_in_reglist
= false;
10029 bool update
= false;
10034 /* If not in SImode, then registers must be consecutive
10035 (e.g., VLDM instructions for DFmode). */
10036 gcc_assert ((mode
== SImode
) || consecutive
);
10037 /* Setting return_pc for stores is illegal. */
10038 gcc_assert (!return_pc
|| load
);
10040 /* Set up the increments and the regs per val based on the mode. */
10041 reg_increment
= GET_MODE_SIZE (mode
);
10042 regs_per_val
= reg_increment
/ 4;
10043 offset_adj
= return_pc
? 1 : 0;
10046 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
10047 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
10050 /* Check if this is a write-back. */
10051 elt
= XVECEXP (op
, 0, offset_adj
);
10052 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
10058 /* The offset adjustment must be the number of registers being
10059 popped times the size of a single register. */
10060 if (!REG_P (SET_DEST (elt
))
10061 || !REG_P (XEXP (SET_SRC (elt
), 0))
10062 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
10063 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
10064 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
10065 ((count
- 1 - offset_adj
) * reg_increment
))
10069 i
= i
+ offset_adj
;
10070 base
= base
+ offset_adj
;
10071 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10072 success depends on the type: VLDM can do just one reg,
10073 LDM must do at least two. */
10074 if ((count
<= i
) && (mode
== SImode
))
10077 elt
= XVECEXP (op
, 0, i
- 1);
10078 if (GET_CODE (elt
) != SET
)
10083 reg
= SET_DEST (elt
);
10084 mem
= SET_SRC (elt
);
10088 reg
= SET_SRC (elt
);
10089 mem
= SET_DEST (elt
);
10092 if (!REG_P (reg
) || !MEM_P (mem
))
10095 regno
= REGNO (reg
);
10096 first_regno
= regno
;
10097 addr
= XEXP (mem
, 0);
10098 if (GET_CODE (addr
) == PLUS
)
10100 if (!CONST_INT_P (XEXP (addr
, 1)))
10103 offset
= INTVAL (XEXP (addr
, 1));
10104 addr
= XEXP (addr
, 0);
10110 /* Don't allow SP to be loaded unless it is also the base register. It
10111 guarantees that SP is reset correctly when an LDM instruction
10112 is interruptted. Otherwise, we might end up with a corrupt stack. */
10113 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
10116 for (; i
< count
; i
++)
10118 elt
= XVECEXP (op
, 0, i
);
10119 if (GET_CODE (elt
) != SET
)
10124 reg
= SET_DEST (elt
);
10125 mem
= SET_SRC (elt
);
10129 reg
= SET_SRC (elt
);
10130 mem
= SET_DEST (elt
);
10134 || GET_MODE (reg
) != mode
10135 || REGNO (reg
) <= regno
10138 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
10139 /* Don't allow SP to be loaded unless it is also the base register. It
10140 guarantees that SP is reset correctly when an LDM instruction
10141 is interrupted. Otherwise, we might end up with a corrupt stack. */
10142 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
10144 || GET_MODE (mem
) != mode
10145 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
10146 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
10147 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
10148 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
10149 offset
+ (i
- base
) * reg_increment
))
10150 && (!REG_P (XEXP (mem
, 0))
10151 || offset
+ (i
- base
) * reg_increment
!= 0)))
10154 regno
= REGNO (reg
);
10155 if (regno
== REGNO (addr
))
10156 addr_reg_in_reglist
= true;
10161 if (update
&& addr_reg_in_reglist
)
10164 /* For Thumb-1, address register is always modified - either by write-back
10165 or by explicit load. If the pattern does not describe an update,
10166 then the address register must be in the list of loaded registers. */
10168 return update
|| addr_reg_in_reglist
;
10174 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10175 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10176 instruction. ADD_OFFSET is nonzero if the base address register needs
10177 to be modified with an add instruction before we can use it. */
10180 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
10181 int nops
, HOST_WIDE_INT add_offset
)
10183 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10184 if the offset isn't small enough. The reason 2 ldrs are faster
10185 is because these ARMs are able to do more than one cache access
10186 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10187 whilst the ARM8 has a double bandwidth cache. This means that
10188 these cores can do both an instruction fetch and a data fetch in
10189 a single cycle, so the trick of calculating the address into a
10190 scratch register (one of the result regs) and then doing a load
10191 multiple actually becomes slower (and no smaller in code size).
10192 That is the transformation
10194 ldr rd1, [rbase + offset]
10195 ldr rd2, [rbase + offset + 4]
10199 add rd1, rbase, offset
10200 ldmia rd1, {rd1, rd2}
10202 produces worse code -- '3 cycles + any stalls on rd2' instead of
10203 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10204 access per cycle, the first sequence could never complete in less
10205 than 6 cycles, whereas the ldm sequence would only take 5 and
10206 would make better use of sequential accesses if not hitting the
10209 We cheat here and test 'arm_ld_sched' which we currently know to
10210 only be true for the ARM8, ARM9 and StrongARM. If this ever
10211 changes, then the test below needs to be reworked. */
10212 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
10215 /* XScale has load-store double instructions, but they have stricter
10216 alignment requirements than load-store multiple, so we cannot
10219 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10220 the pipeline until completion.
10228 An ldr instruction takes 1-3 cycles, but does not block the
10237 Best case ldr will always win. However, the more ldr instructions
10238 we issue, the less likely we are to be able to schedule them well.
10239 Using ldr instructions also increases code size.
10241 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10242 for counts of 3 or 4 regs. */
10243 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
10248 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10249 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10250 an array ORDER which describes the sequence to use when accessing the
10251 offsets that produces an ascending order. In this sequence, each
10252 offset must be larger by exactly 4 than the previous one. ORDER[0]
10253 must have been filled in with the lowest offset by the caller.
10254 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10255 we use to verify that ORDER produces an ascending order of registers.
10256 Return true if it was possible to construct such an order, false if
10260 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
10261 int *unsorted_regs
)
10264 for (i
= 1; i
< nops
; i
++)
10268 order
[i
] = order
[i
- 1];
10269 for (j
= 0; j
< nops
; j
++)
10270 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
10272 /* We must find exactly one offset that is higher than the
10273 previous one by 4. */
10274 if (order
[i
] != order
[i
- 1])
10278 if (order
[i
] == order
[i
- 1])
10280 /* The register numbers must be ascending. */
10281 if (unsorted_regs
!= NULL
10282 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
10288 /* Used to determine in a peephole whether a sequence of load
10289 instructions can be changed into a load-multiple instruction.
10290 NOPS is the number of separate load instructions we are examining. The
10291 first NOPS entries in OPERANDS are the destination registers, the
10292 next NOPS entries are memory operands. If this function is
10293 successful, *BASE is set to the common base register of the memory
10294 accesses; *LOAD_OFFSET is set to the first memory location's offset
10295 from that base register.
10296 REGS is an array filled in with the destination register numbers.
10297 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10298 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10299 the sequence of registers in REGS matches the loads from ascending memory
10300 locations, and the function verifies that the register numbers are
10301 themselves ascending. If CHECK_REGS is false, the register numbers
10302 are stored in the order they are found in the operands. */
10304 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
10305 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
10307 int unsorted_regs
[MAX_LDM_STM_OPS
];
10308 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
10309 int order
[MAX_LDM_STM_OPS
];
10310 rtx base_reg_rtx
= NULL
;
10314 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10315 easily extended if required. */
10316 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
10318 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
10320 /* Loop over the operands and check that the memory references are
10321 suitable (i.e. immediate offsets from the same base register). At
10322 the same time, extract the target register, and the memory
10324 for (i
= 0; i
< nops
; i
++)
10329 /* Convert a subreg of a mem into the mem itself. */
10330 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
10331 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
10333 gcc_assert (MEM_P (operands
[nops
+ i
]));
10335 /* Don't reorder volatile memory references; it doesn't seem worth
10336 looking for the case where the order is ok anyway. */
10337 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
10340 offset
= const0_rtx
;
10342 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
10343 || (GET_CODE (reg
) == SUBREG
10344 && REG_P (reg
= SUBREG_REG (reg
))))
10345 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
10346 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
10347 || (GET_CODE (reg
) == SUBREG
10348 && REG_P (reg
= SUBREG_REG (reg
))))
10349 && (CONST_INT_P (offset
10350 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
10354 base_reg
= REGNO (reg
);
10355 base_reg_rtx
= reg
;
10356 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
10359 else if (base_reg
!= (int) REGNO (reg
))
10360 /* Not addressed from the same base register. */
10363 unsorted_regs
[i
] = (REG_P (operands
[i
])
10364 ? REGNO (operands
[i
])
10365 : REGNO (SUBREG_REG (operands
[i
])));
10367 /* If it isn't an integer register, or if it overwrites the
10368 base register but isn't the last insn in the list, then
10369 we can't do this. */
10370 if (unsorted_regs
[i
] < 0
10371 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
10372 || unsorted_regs
[i
] > 14
10373 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
10376 unsorted_offsets
[i
] = INTVAL (offset
);
10377 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
10381 /* Not a suitable memory address. */
10385 /* All the useful information has now been extracted from the
10386 operands into unsorted_regs and unsorted_offsets; additionally,
10387 order[0] has been set to the lowest offset in the list. Sort
10388 the offsets into order, verifying that they are adjacent, and
10389 check that the register numbers are ascending. */
10390 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
10391 check_regs
? unsorted_regs
: NULL
))
10395 memcpy (saved_order
, order
, sizeof order
);
10401 for (i
= 0; i
< nops
; i
++)
10402 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
10404 *load_offset
= unsorted_offsets
[order
[0]];
10408 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
10411 if (unsorted_offsets
[order
[0]] == 0)
10412 ldm_case
= 1; /* ldmia */
10413 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
10414 ldm_case
= 2; /* ldmib */
10415 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
10416 ldm_case
= 3; /* ldmda */
10417 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
10418 ldm_case
= 4; /* ldmdb */
10419 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
10420 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
10425 if (!multiple_operation_profitable_p (false, nops
,
10427 ? unsorted_offsets
[order
[0]] : 0))
10433 /* Used to determine in a peephole whether a sequence of store instructions can
10434 be changed into a store-multiple instruction.
10435 NOPS is the number of separate store instructions we are examining.
10436 NOPS_TOTAL is the total number of instructions recognized by the peephole
10438 The first NOPS entries in OPERANDS are the source registers, the next
10439 NOPS entries are memory operands. If this function is successful, *BASE is
10440 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10441 to the first memory location's offset from that base register. REGS is an
10442 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10443 likewise filled with the corresponding rtx's.
10444 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10445 numbers to an ascending order of stores.
10446 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10447 from ascending memory locations, and the function verifies that the register
10448 numbers are themselves ascending. If CHECK_REGS is false, the register
10449 numbers are stored in the order they are found in the operands. */
10451 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
10452 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
10453 HOST_WIDE_INT
*load_offset
, bool check_regs
)
10455 int unsorted_regs
[MAX_LDM_STM_OPS
];
10456 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
10457 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
10458 int order
[MAX_LDM_STM_OPS
];
10460 rtx base_reg_rtx
= NULL
;
10463 /* Write back of base register is currently only supported for Thumb 1. */
10464 int base_writeback
= TARGET_THUMB1
;
10466 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10467 easily extended if required. */
10468 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
10470 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
10472 /* Loop over the operands and check that the memory references are
10473 suitable (i.e. immediate offsets from the same base register). At
10474 the same time, extract the target register, and the memory
10476 for (i
= 0; i
< nops
; i
++)
10481 /* Convert a subreg of a mem into the mem itself. */
10482 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
10483 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
10485 gcc_assert (MEM_P (operands
[nops
+ i
]));
10487 /* Don't reorder volatile memory references; it doesn't seem worth
10488 looking for the case where the order is ok anyway. */
10489 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
10492 offset
= const0_rtx
;
10494 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
10495 || (GET_CODE (reg
) == SUBREG
10496 && REG_P (reg
= SUBREG_REG (reg
))))
10497 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
10498 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
10499 || (GET_CODE (reg
) == SUBREG
10500 && REG_P (reg
= SUBREG_REG (reg
))))
10501 && (CONST_INT_P (offset
10502 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
10504 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
10505 ? operands
[i
] : SUBREG_REG (operands
[i
]));
10506 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
10510 base_reg
= REGNO (reg
);
10511 base_reg_rtx
= reg
;
10512 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
10515 else if (base_reg
!= (int) REGNO (reg
))
10516 /* Not addressed from the same base register. */
10519 /* If it isn't an integer register, then we can't do this. */
10520 if (unsorted_regs
[i
] < 0
10521 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
10522 /* The effects are unpredictable if the base register is
10523 both updated and stored. */
10524 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
10525 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
10526 || unsorted_regs
[i
] > 14)
10529 unsorted_offsets
[i
] = INTVAL (offset
);
10530 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
10534 /* Not a suitable memory address. */
10538 /* All the useful information has now been extracted from the
10539 operands into unsorted_regs and unsorted_offsets; additionally,
10540 order[0] has been set to the lowest offset in the list. Sort
10541 the offsets into order, verifying that they are adjacent, and
10542 check that the register numbers are ascending. */
10543 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
10544 check_regs
? unsorted_regs
: NULL
))
10548 memcpy (saved_order
, order
, sizeof order
);
10554 for (i
= 0; i
< nops
; i
++)
10556 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
10558 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
10561 *load_offset
= unsorted_offsets
[order
[0]];
10565 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
10568 if (unsorted_offsets
[order
[0]] == 0)
10569 stm_case
= 1; /* stmia */
10570 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
10571 stm_case
= 2; /* stmib */
10572 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
10573 stm_case
= 3; /* stmda */
10574 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
10575 stm_case
= 4; /* stmdb */
10579 if (!multiple_operation_profitable_p (false, nops
, 0))
10585 /* Routines for use in generating RTL. */
10587 /* Generate a load-multiple instruction. COUNT is the number of loads in
10588 the instruction; REGS and MEMS are arrays containing the operands.
10589 BASEREG is the base register to be used in addressing the memory operands.
10590 WBACK_OFFSET is nonzero if the instruction should update the base
10594 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
10595 HOST_WIDE_INT wback_offset
)
10600 if (!multiple_operation_profitable_p (false, count
, 0))
10606 for (i
= 0; i
< count
; i
++)
10607 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
10609 if (wback_offset
!= 0)
10610 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
10612 seq
= get_insns ();
10618 result
= gen_rtx_PARALLEL (VOIDmode
,
10619 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
10620 if (wback_offset
!= 0)
10622 XVECEXP (result
, 0, 0)
10623 = gen_rtx_SET (VOIDmode
, basereg
,
10624 plus_constant (Pmode
, basereg
, wback_offset
));
10629 for (j
= 0; i
< count
; i
++, j
++)
10630 XVECEXP (result
, 0, i
)
10631 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
10636 /* Generate a store-multiple instruction. COUNT is the number of stores in
10637 the instruction; REGS and MEMS are arrays containing the operands.
10638 BASEREG is the base register to be used in addressing the memory operands.
10639 WBACK_OFFSET is nonzero if the instruction should update the base
10643 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
10644 HOST_WIDE_INT wback_offset
)
10649 if (GET_CODE (basereg
) == PLUS
)
10650 basereg
= XEXP (basereg
, 0);
10652 if (!multiple_operation_profitable_p (false, count
, 0))
10658 for (i
= 0; i
< count
; i
++)
10659 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
10661 if (wback_offset
!= 0)
10662 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
10664 seq
= get_insns ();
10670 result
= gen_rtx_PARALLEL (VOIDmode
,
10671 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
10672 if (wback_offset
!= 0)
10674 XVECEXP (result
, 0, 0)
10675 = gen_rtx_SET (VOIDmode
, basereg
,
10676 plus_constant (Pmode
, basereg
, wback_offset
));
10681 for (j
= 0; i
< count
; i
++, j
++)
10682 XVECEXP (result
, 0, i
)
10683 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
10688 /* Generate either a load-multiple or a store-multiple instruction. This
10689 function can be used in situations where we can start with a single MEM
10690 rtx and adjust its address upwards.
10691 COUNT is the number of operations in the instruction, not counting a
10692 possible update of the base register. REGS is an array containing the
10694 BASEREG is the base register to be used in addressing the memory operands,
10695 which are constructed from BASEMEM.
10696 WRITE_BACK specifies whether the generated instruction should include an
10697 update of the base register.
10698 OFFSETP is used to pass an offset to and from this function; this offset
10699 is not used when constructing the address (instead BASEMEM should have an
10700 appropriate offset in its address), it is used only for setting
10701 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10704 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
10705 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
10707 rtx mems
[MAX_LDM_STM_OPS
];
10708 HOST_WIDE_INT offset
= *offsetp
;
10711 gcc_assert (count
<= MAX_LDM_STM_OPS
);
10713 if (GET_CODE (basereg
) == PLUS
)
10714 basereg
= XEXP (basereg
, 0);
10716 for (i
= 0; i
< count
; i
++)
10718 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
10719 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
10727 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
10728 write_back
? 4 * count
: 0);
10730 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
10731 write_back
? 4 * count
: 0);
10735 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10736 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10738 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
10743 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10744 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10746 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
10750 /* Called from a peephole2 expander to turn a sequence of loads into an
10751 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10752 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10753 is true if we can reorder the registers because they are used commutatively
10755 Returns true iff we could generate a new instruction. */
10758 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
10760 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10761 rtx mems
[MAX_LDM_STM_OPS
];
10762 int i
, j
, base_reg
;
10764 HOST_WIDE_INT offset
;
10765 int write_back
= FALSE
;
10769 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
10770 &base_reg
, &offset
, !sort_regs
);
10776 for (i
= 0; i
< nops
- 1; i
++)
10777 for (j
= i
+ 1; j
< nops
; j
++)
10778 if (regs
[i
] > regs
[j
])
10784 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10788 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
10789 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
10795 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
10796 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
10798 if (!TARGET_THUMB1
)
10800 base_reg
= regs
[0];
10801 base_reg_rtx
= newbase
;
10805 for (i
= 0; i
< nops
; i
++)
10807 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
10808 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10811 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10812 write_back
? offset
+ i
* 4 : 0));
10816 /* Called from a peephole2 expander to turn a sequence of stores into an
10817 STM instruction. OPERANDS are the operands found by the peephole matcher;
10818 NOPS indicates how many separate stores we are trying to combine.
10819 Returns true iff we could generate a new instruction. */
10822 gen_stm_seq (rtx
*operands
, int nops
)
10825 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10826 rtx mems
[MAX_LDM_STM_OPS
];
10829 HOST_WIDE_INT offset
;
10830 int write_back
= FALSE
;
10833 bool base_reg_dies
;
10835 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
10836 mem_order
, &base_reg
, &offset
, true);
10841 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10843 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
10846 gcc_assert (base_reg_dies
);
10852 gcc_assert (base_reg_dies
);
10853 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10857 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
10859 for (i
= 0; i
< nops
; i
++)
10861 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
10862 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10865 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10866 write_back
? offset
+ i
* 4 : 0));
10870 /* Called from a peephole2 expander to turn a sequence of stores that are
10871 preceded by constant loads into an STM instruction. OPERANDS are the
10872 operands found by the peephole matcher; NOPS indicates how many
10873 separate stores we are trying to combine; there are 2 * NOPS
10874 instructions in the peephole.
10875 Returns true iff we could generate a new instruction. */
10878 gen_const_stm_seq (rtx
*operands
, int nops
)
10880 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
10881 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10882 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
10883 rtx mems
[MAX_LDM_STM_OPS
];
10886 HOST_WIDE_INT offset
;
10887 int write_back
= FALSE
;
10890 bool base_reg_dies
;
10892 HARD_REG_SET allocated
;
10894 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
10895 mem_order
, &base_reg
, &offset
, false);
10900 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
10902 /* If the same register is used more than once, try to find a free
10904 CLEAR_HARD_REG_SET (allocated
);
10905 for (i
= 0; i
< nops
; i
++)
10907 for (j
= i
+ 1; j
< nops
; j
++)
10908 if (regs
[i
] == regs
[j
])
10910 rtx t
= peep2_find_free_register (0, nops
* 2,
10911 TARGET_THUMB1
? "l" : "r",
10912 SImode
, &allocated
);
10916 regs
[i
] = REGNO (t
);
10920 /* Compute an ordering that maps the register numbers to an ascending
10923 for (i
= 0; i
< nops
; i
++)
10924 if (regs
[i
] < regs
[reg_order
[0]])
10927 for (i
= 1; i
< nops
; i
++)
10929 int this_order
= reg_order
[i
- 1];
10930 for (j
= 0; j
< nops
; j
++)
10931 if (regs
[j
] > regs
[reg_order
[i
- 1]]
10932 && (this_order
== reg_order
[i
- 1]
10933 || regs
[j
] < regs
[this_order
]))
10935 reg_order
[i
] = this_order
;
10938 /* Ensure that registers that must be live after the instruction end
10939 up with the correct value. */
10940 for (i
= 0; i
< nops
; i
++)
10942 int this_order
= reg_order
[i
];
10943 if ((this_order
!= mem_order
[i
]
10944 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
10945 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
10949 /* Load the constants. */
10950 for (i
= 0; i
< nops
; i
++)
10952 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
10953 sorted_regs
[i
] = regs
[reg_order
[i
]];
10954 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
10957 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10959 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
10962 gcc_assert (base_reg_dies
);
10968 gcc_assert (base_reg_dies
);
10969 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10973 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
10975 for (i
= 0; i
< nops
; i
++)
10977 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
10978 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10981 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
10982 write_back
? offset
+ i
* 4 : 0));
10986 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10987 unaligned copies on processors which support unaligned semantics for those
10988 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10989 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10990 An interleave factor of 1 (the minimum) will perform no interleaving.
10991 Load/store multiple are used for aligned addresses where possible. */
10994 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
10995 HOST_WIDE_INT length
,
10996 unsigned int interleave_factor
)
10998 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
10999 int *regnos
= XALLOCAVEC (int, interleave_factor
);
11000 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
11001 HOST_WIDE_INT i
, j
;
11002 HOST_WIDE_INT remaining
= length
, words
;
11003 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
11005 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
11006 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
11007 HOST_WIDE_INT srcoffset
, dstoffset
;
11008 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
11011 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
11013 /* Use hard registers if we have aligned source or destination so we can use
11014 load/store multiple with contiguous registers. */
11015 if (dst_aligned
|| src_aligned
)
11016 for (i
= 0; i
< interleave_factor
; i
++)
11017 regs
[i
] = gen_rtx_REG (SImode
, i
);
11019 for (i
= 0; i
< interleave_factor
; i
++)
11020 regs
[i
] = gen_reg_rtx (SImode
);
11022 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
11023 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
11025 srcoffset
= dstoffset
= 0;
11027 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11028 For copying the last bytes we want to subtract this offset again. */
11029 src_autoinc
= dst_autoinc
= 0;
11031 for (i
= 0; i
< interleave_factor
; i
++)
11034 /* Copy BLOCK_SIZE_BYTES chunks. */
11036 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
11039 if (src_aligned
&& interleave_factor
> 1)
11041 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
11042 TRUE
, srcbase
, &srcoffset
));
11043 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
11047 for (j
= 0; j
< interleave_factor
; j
++)
11049 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
11051 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
11052 srcoffset
+ j
* UNITS_PER_WORD
);
11053 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
11055 srcoffset
+= block_size_bytes
;
11059 if (dst_aligned
&& interleave_factor
> 1)
11061 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
11062 TRUE
, dstbase
, &dstoffset
));
11063 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
11067 for (j
= 0; j
< interleave_factor
; j
++)
11069 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
11071 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
11072 dstoffset
+ j
* UNITS_PER_WORD
);
11073 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
11075 dstoffset
+= block_size_bytes
;
11078 remaining
-= block_size_bytes
;
11081 /* Copy any whole words left (note these aren't interleaved with any
11082 subsequent halfword/byte load/stores in the interests of simplicity). */
11084 words
= remaining
/ UNITS_PER_WORD
;
11086 gcc_assert (words
< interleave_factor
);
11088 if (src_aligned
&& words
> 1)
11090 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
11092 src_autoinc
+= UNITS_PER_WORD
* words
;
11096 for (j
= 0; j
< words
; j
++)
11098 addr
= plus_constant (Pmode
, src
,
11099 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
11100 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
11101 srcoffset
+ j
* UNITS_PER_WORD
);
11102 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
11104 srcoffset
+= words
* UNITS_PER_WORD
;
11107 if (dst_aligned
&& words
> 1)
11109 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
11111 dst_autoinc
+= words
* UNITS_PER_WORD
;
11115 for (j
= 0; j
< words
; j
++)
11117 addr
= plus_constant (Pmode
, dst
,
11118 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
11119 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
11120 dstoffset
+ j
* UNITS_PER_WORD
);
11121 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
11123 dstoffset
+= words
* UNITS_PER_WORD
;
11126 remaining
-= words
* UNITS_PER_WORD
;
11128 gcc_assert (remaining
< 4);
11130 /* Copy a halfword if necessary. */
11132 if (remaining
>= 2)
11134 halfword_tmp
= gen_reg_rtx (SImode
);
11136 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
11137 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
11138 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
11140 /* Either write out immediately, or delay until we've loaded the last
11141 byte, depending on interleave factor. */
11142 if (interleave_factor
== 1)
11144 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11145 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
11146 emit_insn (gen_unaligned_storehi (mem
,
11147 gen_lowpart (HImode
, halfword_tmp
)));
11148 halfword_tmp
= NULL
;
11156 gcc_assert (remaining
< 2);
11158 /* Copy last byte. */
11160 if ((remaining
& 1) != 0)
11162 byte_tmp
= gen_reg_rtx (SImode
);
11164 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
11165 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
11166 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
11168 if (interleave_factor
== 1)
11170 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11171 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
11172 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
11181 /* Store last halfword if we haven't done so already. */
11185 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11186 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
11187 emit_insn (gen_unaligned_storehi (mem
,
11188 gen_lowpart (HImode
, halfword_tmp
)));
11192 /* Likewise for last byte. */
11196 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11197 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
11198 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
11202 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
11205 /* From mips_adjust_block_mem:
11207 Helper function for doing a loop-based block operation on memory
11208 reference MEM. Each iteration of the loop will operate on LENGTH
11211 Create a new base register for use within the loop and point it to
11212 the start of MEM. Create a new memory reference that uses this
11213 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11216 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
11219 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
11221 /* Although the new mem does not refer to a known location,
11222 it does keep up to LENGTH bytes of alignment. */
11223 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
11224 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
11227 /* From mips_block_move_loop:
11229 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11230 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11231 the memory regions do not overlap. */
11234 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
11235 unsigned int interleave_factor
,
11236 HOST_WIDE_INT bytes_per_iter
)
11238 rtx label
, src_reg
, dest_reg
, final_src
, test
;
11239 HOST_WIDE_INT leftover
;
11241 leftover
= length
% bytes_per_iter
;
11242 length
-= leftover
;
11244 /* Create registers and memory references for use within the loop. */
11245 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
11246 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
11248 /* Calculate the value that SRC_REG should have after the last iteration of
11250 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
11251 0, 0, OPTAB_WIDEN
);
11253 /* Emit the start of the loop. */
11254 label
= gen_label_rtx ();
11255 emit_label (label
);
11257 /* Emit the loop body. */
11258 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
11259 interleave_factor
);
11261 /* Move on to the next block. */
11262 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
11263 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
11265 /* Emit the loop condition. */
11266 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
11267 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
11269 /* Mop up any left-over bytes. */
11271 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
11274 /* Emit a block move when either the source or destination is unaligned (not
11275 aligned to a four-byte boundary). This may need further tuning depending on
11276 core type, optimize_size setting, etc. */
11279 arm_movmemqi_unaligned (rtx
*operands
)
11281 HOST_WIDE_INT length
= INTVAL (operands
[2]);
11285 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
11286 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
11287 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11288 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11289 or dst_aligned though: allow more interleaving in those cases since the
11290 resulting code can be smaller. */
11291 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
11292 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
11295 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
11296 interleave_factor
, bytes_per_iter
);
11298 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
11299 interleave_factor
);
11303 /* Note that the loop created by arm_block_move_unaligned_loop may be
11304 subject to loop unrolling, which makes tuning this condition a little
11307 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
11309 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
11316 arm_gen_movmemqi (rtx
*operands
)
11318 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
11319 HOST_WIDE_INT srcoffset
, dstoffset
;
11321 rtx src
, dst
, srcbase
, dstbase
;
11322 rtx part_bytes_reg
= NULL
;
11325 if (!CONST_INT_P (operands
[2])
11326 || !CONST_INT_P (operands
[3])
11327 || INTVAL (operands
[2]) > 64)
11330 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
11331 return arm_movmemqi_unaligned (operands
);
11333 if (INTVAL (operands
[3]) & 3)
11336 dstbase
= operands
[0];
11337 srcbase
= operands
[1];
11339 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
11340 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
11342 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
11343 out_words_to_go
= INTVAL (operands
[2]) / 4;
11344 last_bytes
= INTVAL (operands
[2]) & 3;
11345 dstoffset
= srcoffset
= 0;
11347 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
11348 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
11350 for (i
= 0; in_words_to_go
>= 2; i
+=4)
11352 if (in_words_to_go
> 4)
11353 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
11354 TRUE
, srcbase
, &srcoffset
));
11356 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
11357 src
, FALSE
, srcbase
,
11360 if (out_words_to_go
)
11362 if (out_words_to_go
> 4)
11363 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
11364 TRUE
, dstbase
, &dstoffset
));
11365 else if (out_words_to_go
!= 1)
11366 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
11367 out_words_to_go
, dst
,
11370 dstbase
, &dstoffset
));
11373 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
11374 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
11375 if (last_bytes
!= 0)
11377 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
11383 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
11384 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
11387 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11388 if (out_words_to_go
)
11392 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
11393 sreg
= copy_to_reg (mem
);
11395 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
11396 emit_move_insn (mem
, sreg
);
11399 gcc_assert (!in_words_to_go
); /* Sanity check */
11402 if (in_words_to_go
)
11404 gcc_assert (in_words_to_go
> 0);
11406 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
11407 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
11410 gcc_assert (!last_bytes
|| part_bytes_reg
);
11412 if (BYTES_BIG_ENDIAN
&& last_bytes
)
11414 rtx tmp
= gen_reg_rtx (SImode
);
11416 /* The bytes we want are in the top end of the word. */
11417 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
11418 GEN_INT (8 * (4 - last_bytes
))));
11419 part_bytes_reg
= tmp
;
11423 mem
= adjust_automodify_address (dstbase
, QImode
,
11424 plus_constant (Pmode
, dst
,
11426 dstoffset
+ last_bytes
- 1);
11427 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
11431 tmp
= gen_reg_rtx (SImode
);
11432 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
11433 part_bytes_reg
= tmp
;
11440 if (last_bytes
> 1)
11442 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
11443 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
11447 rtx tmp
= gen_reg_rtx (SImode
);
11448 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
11449 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
11450 part_bytes_reg
= tmp
;
11457 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
11458 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
11465 /* Select a dominance comparison mode if possible for a test of the general
11466 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11467 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11468 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11469 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11470 In all cases OP will be either EQ or NE, but we don't need to know which
11471 here. If we are unable to support a dominance comparison we return
11472 CC mode. This will then fail to match for the RTL expressions that
11473 generate this call. */
11475 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
11477 enum rtx_code cond1
, cond2
;
11480 /* Currently we will probably get the wrong result if the individual
11481 comparisons are not simple. This also ensures that it is safe to
11482 reverse a comparison if necessary. */
11483 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
11485 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
11489 /* The if_then_else variant of this tests the second condition if the
11490 first passes, but is true if the first fails. Reverse the first
11491 condition to get a true "inclusive-or" expression. */
11492 if (cond_or
== DOM_CC_NX_OR_Y
)
11493 cond1
= reverse_condition (cond1
);
11495 /* If the comparisons are not equal, and one doesn't dominate the other,
11496 then we can't do this. */
11498 && !comparison_dominates_p (cond1
, cond2
)
11499 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
11504 enum rtx_code temp
= cond1
;
11512 if (cond_or
== DOM_CC_X_AND_Y
)
11517 case EQ
: return CC_DEQmode
;
11518 case LE
: return CC_DLEmode
;
11519 case LEU
: return CC_DLEUmode
;
11520 case GE
: return CC_DGEmode
;
11521 case GEU
: return CC_DGEUmode
;
11522 default: gcc_unreachable ();
11526 if (cond_or
== DOM_CC_X_AND_Y
)
11538 gcc_unreachable ();
11542 if (cond_or
== DOM_CC_X_AND_Y
)
11554 gcc_unreachable ();
11558 if (cond_or
== DOM_CC_X_AND_Y
)
11559 return CC_DLTUmode
;
11564 return CC_DLTUmode
;
11566 return CC_DLEUmode
;
11570 gcc_unreachable ();
11574 if (cond_or
== DOM_CC_X_AND_Y
)
11575 return CC_DGTUmode
;
11580 return CC_DGTUmode
;
11582 return CC_DGEUmode
;
11586 gcc_unreachable ();
11589 /* The remaining cases only occur when both comparisons are the
11592 gcc_assert (cond1
== cond2
);
11596 gcc_assert (cond1
== cond2
);
11600 gcc_assert (cond1
== cond2
);
11604 gcc_assert (cond1
== cond2
);
11605 return CC_DLEUmode
;
11608 gcc_assert (cond1
== cond2
);
11609 return CC_DGEUmode
;
11612 gcc_unreachable ();
11617 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
11619 /* All floating point compares return CCFP if it is an equality
11620 comparison, and CCFPE otherwise. */
11621 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
11644 gcc_unreachable ();
11648 /* A compare with a shifted operand. Because of canonicalization, the
11649 comparison will have to be swapped when we emit the assembler. */
11650 if (GET_MODE (y
) == SImode
11651 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
11652 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
11653 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
11654 || GET_CODE (x
) == ROTATERT
))
11657 /* This operation is performed swapped, but since we only rely on the Z
11658 flag we don't need an additional mode. */
11659 if (GET_MODE (y
) == SImode
11660 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
11661 && GET_CODE (x
) == NEG
11662 && (op
== EQ
|| op
== NE
))
11665 /* This is a special case that is used by combine to allow a
11666 comparison of a shifted byte load to be split into a zero-extend
11667 followed by a comparison of the shifted integer (only valid for
11668 equalities and unsigned inequalities). */
11669 if (GET_MODE (x
) == SImode
11670 && GET_CODE (x
) == ASHIFT
11671 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
11672 && GET_CODE (XEXP (x
, 0)) == SUBREG
11673 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
11674 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
11675 && (op
== EQ
|| op
== NE
11676 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
11677 && CONST_INT_P (y
))
11680 /* A construct for a conditional compare, if the false arm contains
11681 0, then both conditions must be true, otherwise either condition
11682 must be true. Not all conditions are possible, so CCmode is
11683 returned if it can't be done. */
11684 if (GET_CODE (x
) == IF_THEN_ELSE
11685 && (XEXP (x
, 2) == const0_rtx
11686 || XEXP (x
, 2) == const1_rtx
)
11687 && COMPARISON_P (XEXP (x
, 0))
11688 && COMPARISON_P (XEXP (x
, 1)))
11689 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
11690 INTVAL (XEXP (x
, 2)));
11692 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11693 if (GET_CODE (x
) == AND
11694 && (op
== EQ
|| op
== NE
)
11695 && COMPARISON_P (XEXP (x
, 0))
11696 && COMPARISON_P (XEXP (x
, 1)))
11697 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
11700 if (GET_CODE (x
) == IOR
11701 && (op
== EQ
|| op
== NE
)
11702 && COMPARISON_P (XEXP (x
, 0))
11703 && COMPARISON_P (XEXP (x
, 1)))
11704 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
11707 /* An operation (on Thumb) where we want to test for a single bit.
11708 This is done by shifting that bit up into the top bit of a
11709 scratch register; we can then branch on the sign bit. */
11711 && GET_MODE (x
) == SImode
11712 && (op
== EQ
|| op
== NE
)
11713 && GET_CODE (x
) == ZERO_EXTRACT
11714 && XEXP (x
, 1) == const1_rtx
)
11717 /* An operation that sets the condition codes as a side-effect, the
11718 V flag is not set correctly, so we can only use comparisons where
11719 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11721 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11722 if (GET_MODE (x
) == SImode
11724 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
11725 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
11726 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
11727 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
11728 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
11729 || GET_CODE (x
) == LSHIFTRT
11730 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
11731 || GET_CODE (x
) == ROTATERT
11732 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
11733 return CC_NOOVmode
;
11735 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
11738 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
11739 && GET_CODE (x
) == PLUS
11740 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
11743 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
11749 /* A DImode comparison against zero can be implemented by
11750 or'ing the two halves together. */
11751 if (y
== const0_rtx
)
11754 /* We can do an equality test in three Thumb instructions. */
11764 /* DImode unsigned comparisons can be implemented by cmp +
11765 cmpeq without a scratch register. Not worth doing in
11776 /* DImode signed and unsigned comparisons can be implemented
11777 by cmp + sbcs with a scratch register, but that does not
11778 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11779 gcc_assert (op
!= EQ
&& op
!= NE
);
11783 gcc_unreachable ();
11787 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
11788 return GET_MODE (x
);
11793 /* X and Y are two things to compare using CODE. Emit the compare insn and
11794 return the rtx for register 0 in the proper mode. FP means this is a
11795 floating point compare: I don't think that it is needed on the arm. */
11797 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
11799 enum machine_mode mode
;
11801 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
11803 /* We might have X as a constant, Y as a register because of the predicates
11804 used for cmpdi. If so, force X to a register here. */
11805 if (dimode_comparison
&& !REG_P (x
))
11806 x
= force_reg (DImode
, x
);
11808 mode
= SELECT_CC_MODE (code
, x
, y
);
11809 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
11811 if (dimode_comparison
11812 && mode
!= CC_CZmode
)
11816 /* To compare two non-zero values for equality, XOR them and
11817 then compare against zero. Not used for ARM mode; there
11818 CC_CZmode is cheaper. */
11819 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
11821 gcc_assert (!reload_completed
);
11822 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
11826 /* A scratch register is required. */
11827 if (reload_completed
)
11828 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
11830 scratch
= gen_rtx_SCRATCH (SImode
);
11832 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
11833 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
11834 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
11837 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
11842 /* Generate a sequence of insns that will generate the correct return
11843 address mask depending on the physical architecture that the program
11846 arm_gen_return_addr_mask (void)
11848 rtx reg
= gen_reg_rtx (Pmode
);
11850 emit_insn (gen_return_addr_mask (reg
));
11855 arm_reload_in_hi (rtx
*operands
)
11857 rtx ref
= operands
[1];
11859 HOST_WIDE_INT offset
= 0;
11861 if (GET_CODE (ref
) == SUBREG
)
11863 offset
= SUBREG_BYTE (ref
);
11864 ref
= SUBREG_REG (ref
);
11869 /* We have a pseudo which has been spilt onto the stack; there
11870 are two cases here: the first where there is a simple
11871 stack-slot replacement and a second where the stack-slot is
11872 out of range, or is used as a subreg. */
11873 if (reg_equiv_mem (REGNO (ref
)))
11875 ref
= reg_equiv_mem (REGNO (ref
));
11876 base
= find_replacement (&XEXP (ref
, 0));
11879 /* The slot is out of range, or was dressed up in a SUBREG. */
11880 base
= reg_equiv_address (REGNO (ref
));
11883 base
= find_replacement (&XEXP (ref
, 0));
11885 /* Handle the case where the address is too complex to be offset by 1. */
11886 if (GET_CODE (base
) == MINUS
11887 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
11889 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11891 emit_set_insn (base_plus
, base
);
11894 else if (GET_CODE (base
) == PLUS
)
11896 /* The addend must be CONST_INT, or we would have dealt with it above. */
11897 HOST_WIDE_INT hi
, lo
;
11899 offset
+= INTVAL (XEXP (base
, 1));
11900 base
= XEXP (base
, 0);
11902 /* Rework the address into a legal sequence of insns. */
11903 /* Valid range for lo is -4095 -> 4095 */
11906 : -((-offset
) & 0xfff));
11908 /* Corner case, if lo is the max offset then we would be out of range
11909 once we have added the additional 1 below, so bump the msb into the
11910 pre-loading insn(s). */
11914 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
11915 ^ (HOST_WIDE_INT
) 0x80000000)
11916 - (HOST_WIDE_INT
) 0x80000000);
11918 gcc_assert (hi
+ lo
== offset
);
11922 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11924 /* Get the base address; addsi3 knows how to handle constants
11925 that require more than one insn. */
11926 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
11932 /* Operands[2] may overlap operands[0] (though it won't overlap
11933 operands[1]), that's why we asked for a DImode reg -- so we can
11934 use the bit that does not overlap. */
11935 if (REGNO (operands
[2]) == REGNO (operands
[0]))
11936 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11938 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
11940 emit_insn (gen_zero_extendqisi2 (scratch
,
11941 gen_rtx_MEM (QImode
,
11942 plus_constant (Pmode
, base
,
11944 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11945 gen_rtx_MEM (QImode
,
11946 plus_constant (Pmode
, base
,
11948 if (!BYTES_BIG_ENDIAN
)
11949 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11950 gen_rtx_IOR (SImode
,
11953 gen_rtx_SUBREG (SImode
, operands
[0], 0),
11957 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11958 gen_rtx_IOR (SImode
,
11959 gen_rtx_ASHIFT (SImode
, scratch
,
11961 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
11964 /* Handle storing a half-word to memory during reload by synthesizing as two
11965 byte stores. Take care not to clobber the input values until after we
11966 have moved them somewhere safe. This code assumes that if the DImode
11967 scratch in operands[2] overlaps either the input value or output address
11968 in some way, then that value must die in this insn (we absolutely need
11969 two scratch registers for some corner cases). */
11971 arm_reload_out_hi (rtx
*operands
)
11973 rtx ref
= operands
[0];
11974 rtx outval
= operands
[1];
11976 HOST_WIDE_INT offset
= 0;
11978 if (GET_CODE (ref
) == SUBREG
)
11980 offset
= SUBREG_BYTE (ref
);
11981 ref
= SUBREG_REG (ref
);
11986 /* We have a pseudo which has been spilt onto the stack; there
11987 are two cases here: the first where there is a simple
11988 stack-slot replacement and a second where the stack-slot is
11989 out of range, or is used as a subreg. */
11990 if (reg_equiv_mem (REGNO (ref
)))
11992 ref
= reg_equiv_mem (REGNO (ref
));
11993 base
= find_replacement (&XEXP (ref
, 0));
11996 /* The slot is out of range, or was dressed up in a SUBREG. */
11997 base
= reg_equiv_address (REGNO (ref
));
12000 base
= find_replacement (&XEXP (ref
, 0));
12002 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
12004 /* Handle the case where the address is too complex to be offset by 1. */
12005 if (GET_CODE (base
) == MINUS
12006 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
12008 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
12010 /* Be careful not to destroy OUTVAL. */
12011 if (reg_overlap_mentioned_p (base_plus
, outval
))
12013 /* Updating base_plus might destroy outval, see if we can
12014 swap the scratch and base_plus. */
12015 if (!reg_overlap_mentioned_p (scratch
, outval
))
12018 scratch
= base_plus
;
12023 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
12025 /* Be conservative and copy OUTVAL into the scratch now,
12026 this should only be necessary if outval is a subreg
12027 of something larger than a word. */
12028 /* XXX Might this clobber base? I can't see how it can,
12029 since scratch is known to overlap with OUTVAL, and
12030 must be wider than a word. */
12031 emit_insn (gen_movhi (scratch_hi
, outval
));
12032 outval
= scratch_hi
;
12036 emit_set_insn (base_plus
, base
);
12039 else if (GET_CODE (base
) == PLUS
)
12041 /* The addend must be CONST_INT, or we would have dealt with it above. */
12042 HOST_WIDE_INT hi
, lo
;
12044 offset
+= INTVAL (XEXP (base
, 1));
12045 base
= XEXP (base
, 0);
12047 /* Rework the address into a legal sequence of insns. */
12048 /* Valid range for lo is -4095 -> 4095 */
12051 : -((-offset
) & 0xfff));
12053 /* Corner case, if lo is the max offset then we would be out of range
12054 once we have added the additional 1 below, so bump the msb into the
12055 pre-loading insn(s). */
12059 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
12060 ^ (HOST_WIDE_INT
) 0x80000000)
12061 - (HOST_WIDE_INT
) 0x80000000);
12063 gcc_assert (hi
+ lo
== offset
);
12067 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
12069 /* Be careful not to destroy OUTVAL. */
12070 if (reg_overlap_mentioned_p (base_plus
, outval
))
12072 /* Updating base_plus might destroy outval, see if we
12073 can swap the scratch and base_plus. */
12074 if (!reg_overlap_mentioned_p (scratch
, outval
))
12077 scratch
= base_plus
;
12082 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
12084 /* Be conservative and copy outval into scratch now,
12085 this should only be necessary if outval is a
12086 subreg of something larger than a word. */
12087 /* XXX Might this clobber base? I can't see how it
12088 can, since scratch is known to overlap with
12090 emit_insn (gen_movhi (scratch_hi
, outval
));
12091 outval
= scratch_hi
;
12095 /* Get the base address; addsi3 knows how to handle constants
12096 that require more than one insn. */
12097 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
12103 if (BYTES_BIG_ENDIAN
)
12105 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
12106 plus_constant (Pmode
, base
,
12108 gen_lowpart (QImode
, outval
)));
12109 emit_insn (gen_lshrsi3 (scratch
,
12110 gen_rtx_SUBREG (SImode
, outval
, 0),
12112 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
12114 gen_lowpart (QImode
, scratch
)));
12118 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
12120 gen_lowpart (QImode
, outval
)));
12121 emit_insn (gen_lshrsi3 (scratch
,
12122 gen_rtx_SUBREG (SImode
, outval
, 0),
12124 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
12125 plus_constant (Pmode
, base
,
12127 gen_lowpart (QImode
, scratch
)));
12131 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12132 (padded to the size of a word) should be passed in a register. */
12135 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
12137 if (TARGET_AAPCS_BASED
)
12138 return must_pass_in_stack_var_size (mode
, type
);
12140 return must_pass_in_stack_var_size_or_pad (mode
, type
);
12144 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12145 Return true if an argument passed on the stack should be padded upwards,
12146 i.e. if the least-significant byte has useful data.
12147 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12148 aggregate types are placed in the lowest memory address. */
12151 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
12153 if (!TARGET_AAPCS_BASED
)
12154 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
12156 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
12163 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12164 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12165 register has useful data, and return the opposite if the most
12166 significant byte does. */
12169 arm_pad_reg_upward (enum machine_mode mode
,
12170 tree type
, int first ATTRIBUTE_UNUSED
)
12172 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
12174 /* For AAPCS, small aggregates, small fixed-point types,
12175 and small complex types are always padded upwards. */
12178 if ((AGGREGATE_TYPE_P (type
)
12179 || TREE_CODE (type
) == COMPLEX_TYPE
12180 || FIXED_POINT_TYPE_P (type
))
12181 && int_size_in_bytes (type
) <= 4)
12186 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
12187 && GET_MODE_SIZE (mode
) <= 4)
12192 /* Otherwise, use default padding. */
12193 return !BYTES_BIG_ENDIAN
;
12196 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12197 assuming that the address in the base register is word aligned. */
12199 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
12201 HOST_WIDE_INT max_offset
;
12203 /* Offset must be a multiple of 4 in Thumb mode. */
12204 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
12209 else if (TARGET_ARM
)
12212 gcc_unreachable ();
12214 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
12217 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12218 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
12219 Assumes that the address in the base register RN is word aligned. Pattern
12220 guarantees that both memory accesses use the same base register,
12221 the offsets are constants within the range, and the gap between the offsets is 4.
12222 If preload complete then check that registers are legal. WBACK indicates whether
12223 address is updated. LOAD indicates whether memory access is load or store. */
12225 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
12226 bool wback
, bool load
)
12228 unsigned int t
, t2
, n
;
12230 if (!reload_completed
)
12233 if (!offset_ok_for_ldrd_strd (offset
))
12240 if ((TARGET_THUMB2
)
12241 && ((wback
&& (n
== t
|| n
== t2
))
12242 || (t
== SP_REGNUM
)
12243 || (t
== PC_REGNUM
)
12244 || (t2
== SP_REGNUM
)
12245 || (t2
== PC_REGNUM
)
12246 || (!load
&& (n
== PC_REGNUM
))
12247 || (load
&& (t
== t2
))
12248 /* Triggers Cortex-M3 LDRD errata. */
12249 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
12253 && ((wback
&& (n
== t
|| n
== t2
))
12254 || (t2
== PC_REGNUM
)
12255 || (t
% 2 != 0) /* First destination register is not even. */
12257 /* PC can be used as base register (for offset addressing only),
12258 but it is depricated. */
12259 || (n
== PC_REGNUM
)))
12266 /* Print a symbolic form of X to the debug file, F. */
12268 arm_print_value (FILE *f
, rtx x
)
12270 switch (GET_CODE (x
))
12273 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
12277 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
12285 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
12287 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
12288 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
12296 fprintf (f
, "\"%s\"", XSTR (x
, 0));
12300 fprintf (f
, "`%s'", XSTR (x
, 0));
12304 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
12308 arm_print_value (f
, XEXP (x
, 0));
12312 arm_print_value (f
, XEXP (x
, 0));
12314 arm_print_value (f
, XEXP (x
, 1));
12322 fprintf (f
, "????");
12327 /* Routines for manipulation of the constant pool. */
12329 /* Arm instructions cannot load a large constant directly into a
12330 register; they have to come from a pc relative load. The constant
12331 must therefore be placed in the addressable range of the pc
12332 relative load. Depending on the precise pc relative load
12333 instruction the range is somewhere between 256 bytes and 4k. This
12334 means that we often have to dump a constant inside a function, and
12335 generate code to branch around it.
12337 It is important to minimize this, since the branches will slow
12338 things down and make the code larger.
12340 Normally we can hide the table after an existing unconditional
12341 branch so that there is no interruption of the flow, but in the
12342 worst case the code looks like this:
12360 We fix this by performing a scan after scheduling, which notices
12361 which instructions need to have their operands fetched from the
12362 constant table and builds the table.
12364 The algorithm starts by building a table of all the constants that
12365 need fixing up and all the natural barriers in the function (places
12366 where a constant table can be dropped without breaking the flow).
12367 For each fixup we note how far the pc-relative replacement will be
12368 able to reach and the offset of the instruction into the function.
12370 Having built the table we then group the fixes together to form
12371 tables that are as large as possible (subject to addressing
12372 constraints) and emit each table of constants after the last
12373 barrier that is within range of all the instructions in the group.
12374 If a group does not contain a barrier, then we forcibly create one
12375 by inserting a jump instruction into the flow. Once the table has
12376 been inserted, the insns are then modified to reference the
12377 relevant entry in the pool.
12379 Possible enhancements to the algorithm (not implemented) are:
12381 1) For some processors and object formats, there may be benefit in
12382 aligning the pools to the start of cache lines; this alignment
12383 would need to be taken into account when calculating addressability
12386 /* These typedefs are located at the start of this file, so that
12387 they can be used in the prototypes there. This comment is to
12388 remind readers of that fact so that the following structures
12389 can be understood more easily.
12391 typedef struct minipool_node Mnode;
12392 typedef struct minipool_fixup Mfix; */
12394 struct minipool_node
12396 /* Doubly linked chain of entries. */
12399 /* The maximum offset into the code that this entry can be placed. While
12400 pushing fixes for forward references, all entries are sorted in order
12401 of increasing max_address. */
12402 HOST_WIDE_INT max_address
;
12403 /* Similarly for an entry inserted for a backwards ref. */
12404 HOST_WIDE_INT min_address
;
12405 /* The number of fixes referencing this entry. This can become zero
12406 if we "unpush" an entry. In this case we ignore the entry when we
12407 come to emit the code. */
12409 /* The offset from the start of the minipool. */
12410 HOST_WIDE_INT offset
;
12411 /* The value in table. */
12413 /* The mode of value. */
12414 enum machine_mode mode
;
12415 /* The size of the value. With iWMMXt enabled
12416 sizes > 4 also imply an alignment of 8-bytes. */
12420 struct minipool_fixup
12424 HOST_WIDE_INT address
;
12426 enum machine_mode mode
;
12430 HOST_WIDE_INT forwards
;
12431 HOST_WIDE_INT backwards
;
12434 /* Fixes less than a word need padding out to a word boundary. */
12435 #define MINIPOOL_FIX_SIZE(mode) \
12436 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12438 static Mnode
* minipool_vector_head
;
12439 static Mnode
* minipool_vector_tail
;
12440 static rtx minipool_vector_label
;
12441 static int minipool_pad
;
12443 /* The linked list of all minipool fixes required for this function. */
12444 Mfix
* minipool_fix_head
;
12445 Mfix
* minipool_fix_tail
;
12446 /* The fix entry for the current minipool, once it has been placed. */
12447 Mfix
* minipool_barrier
;
12449 /* Determines if INSN is the start of a jump table. Returns the end
12450 of the TABLE or NULL_RTX. */
12452 is_jump_table (rtx insn
)
12456 if (jump_to_label_p (insn
)
12457 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
12458 == next_real_insn (insn
))
12461 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
12462 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
12468 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12469 #define JUMP_TABLES_IN_TEXT_SECTION 0
12472 static HOST_WIDE_INT
12473 get_jump_table_size (rtx insn
)
12475 /* ADDR_VECs only take room if read-only data does into the text
12477 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
12479 rtx body
= PATTERN (insn
);
12480 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
12481 HOST_WIDE_INT size
;
12482 HOST_WIDE_INT modesize
;
12484 modesize
= GET_MODE_SIZE (GET_MODE (body
));
12485 size
= modesize
* XVECLEN (body
, elt
);
12489 /* Round up size of TBB table to a halfword boundary. */
12490 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
12493 /* No padding necessary for TBH. */
12496 /* Add two bytes for alignment on Thumb. */
12501 gcc_unreachable ();
12509 /* Return the maximum amount of padding that will be inserted before
12512 static HOST_WIDE_INT
12513 get_label_padding (rtx label
)
12515 HOST_WIDE_INT align
, min_insn_size
;
12517 align
= 1 << label_to_alignment (label
);
12518 min_insn_size
= TARGET_THUMB
? 2 : 4;
12519 return align
> min_insn_size
? align
- min_insn_size
: 0;
12522 /* Move a minipool fix MP from its current location to before MAX_MP.
12523 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12524 constraints may need updating. */
12526 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
12527 HOST_WIDE_INT max_address
)
12529 /* The code below assumes these are different. */
12530 gcc_assert (mp
!= max_mp
);
12532 if (max_mp
== NULL
)
12534 if (max_address
< mp
->max_address
)
12535 mp
->max_address
= max_address
;
12539 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
12540 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
12542 mp
->max_address
= max_address
;
12544 /* Unlink MP from its current position. Since max_mp is non-null,
12545 mp->prev must be non-null. */
12546 mp
->prev
->next
= mp
->next
;
12547 if (mp
->next
!= NULL
)
12548 mp
->next
->prev
= mp
->prev
;
12550 minipool_vector_tail
= mp
->prev
;
12552 /* Re-insert it before MAX_MP. */
12554 mp
->prev
= max_mp
->prev
;
12557 if (mp
->prev
!= NULL
)
12558 mp
->prev
->next
= mp
;
12560 minipool_vector_head
= mp
;
12563 /* Save the new entry. */
12566 /* Scan over the preceding entries and adjust their addresses as
12568 while (mp
->prev
!= NULL
12569 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
12571 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
12578 /* Add a constant to the minipool for a forward reference. Returns the
12579 node added or NULL if the constant will not fit in this pool. */
12581 add_minipool_forward_ref (Mfix
*fix
)
12583 /* If set, max_mp is the first pool_entry that has a lower
12584 constraint than the one we are trying to add. */
12585 Mnode
* max_mp
= NULL
;
12586 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
12589 /* If the minipool starts before the end of FIX->INSN then this FIX
12590 can not be placed into the current pool. Furthermore, adding the
12591 new constant pool entry may cause the pool to start FIX_SIZE bytes
12593 if (minipool_vector_head
&&
12594 (fix
->address
+ get_attr_length (fix
->insn
)
12595 >= minipool_vector_head
->max_address
- fix
->fix_size
))
12598 /* Scan the pool to see if a constant with the same value has
12599 already been added. While we are doing this, also note the
12600 location where we must insert the constant if it doesn't already
12602 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12604 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
12605 && fix
->mode
== mp
->mode
12606 && (!LABEL_P (fix
->value
)
12607 || (CODE_LABEL_NUMBER (fix
->value
)
12608 == CODE_LABEL_NUMBER (mp
->value
)))
12609 && rtx_equal_p (fix
->value
, mp
->value
))
12611 /* More than one fix references this entry. */
12613 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
12616 /* Note the insertion point if necessary. */
12618 && mp
->max_address
> max_address
)
12621 /* If we are inserting an 8-bytes aligned quantity and
12622 we have not already found an insertion point, then
12623 make sure that all such 8-byte aligned quantities are
12624 placed at the start of the pool. */
12625 if (ARM_DOUBLEWORD_ALIGN
12627 && fix
->fix_size
>= 8
12628 && mp
->fix_size
< 8)
12631 max_address
= mp
->max_address
;
12635 /* The value is not currently in the minipool, so we need to create
12636 a new entry for it. If MAX_MP is NULL, the entry will be put on
12637 the end of the list since the placement is less constrained than
12638 any existing entry. Otherwise, we insert the new fix before
12639 MAX_MP and, if necessary, adjust the constraints on the other
12642 mp
->fix_size
= fix
->fix_size
;
12643 mp
->mode
= fix
->mode
;
12644 mp
->value
= fix
->value
;
12646 /* Not yet required for a backwards ref. */
12647 mp
->min_address
= -65536;
12649 if (max_mp
== NULL
)
12651 mp
->max_address
= max_address
;
12653 mp
->prev
= minipool_vector_tail
;
12655 if (mp
->prev
== NULL
)
12657 minipool_vector_head
= mp
;
12658 minipool_vector_label
= gen_label_rtx ();
12661 mp
->prev
->next
= mp
;
12663 minipool_vector_tail
= mp
;
12667 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
12668 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
12670 mp
->max_address
= max_address
;
12673 mp
->prev
= max_mp
->prev
;
12675 if (mp
->prev
!= NULL
)
12676 mp
->prev
->next
= mp
;
12678 minipool_vector_head
= mp
;
12681 /* Save the new entry. */
12684 /* Scan over the preceding entries and adjust their addresses as
12686 while (mp
->prev
!= NULL
12687 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
12689 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
12697 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
12698 HOST_WIDE_INT min_address
)
12700 HOST_WIDE_INT offset
;
12702 /* The code below assumes these are different. */
12703 gcc_assert (mp
!= min_mp
);
12705 if (min_mp
== NULL
)
12707 if (min_address
> mp
->min_address
)
12708 mp
->min_address
= min_address
;
12712 /* We will adjust this below if it is too loose. */
12713 mp
->min_address
= min_address
;
12715 /* Unlink MP from its current position. Since min_mp is non-null,
12716 mp->next must be non-null. */
12717 mp
->next
->prev
= mp
->prev
;
12718 if (mp
->prev
!= NULL
)
12719 mp
->prev
->next
= mp
->next
;
12721 minipool_vector_head
= mp
->next
;
12723 /* Reinsert it after MIN_MP. */
12725 mp
->next
= min_mp
->next
;
12727 if (mp
->next
!= NULL
)
12728 mp
->next
->prev
= mp
;
12730 minipool_vector_tail
= mp
;
12736 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12738 mp
->offset
= offset
;
12739 if (mp
->refcount
> 0)
12740 offset
+= mp
->fix_size
;
12742 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
12743 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
12749 /* Add a constant to the minipool for a backward reference. Returns the
12750 node added or NULL if the constant will not fit in this pool.
12752 Note that the code for insertion for a backwards reference can be
12753 somewhat confusing because the calculated offsets for each fix do
12754 not take into account the size of the pool (which is still under
12757 add_minipool_backward_ref (Mfix
*fix
)
12759 /* If set, min_mp is the last pool_entry that has a lower constraint
12760 than the one we are trying to add. */
12761 Mnode
*min_mp
= NULL
;
12762 /* This can be negative, since it is only a constraint. */
12763 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
12766 /* If we can't reach the current pool from this insn, or if we can't
12767 insert this entry at the end of the pool without pushing other
12768 fixes out of range, then we don't try. This ensures that we
12769 can't fail later on. */
12770 if (min_address
>= minipool_barrier
->address
12771 || (minipool_vector_tail
->min_address
+ fix
->fix_size
12772 >= minipool_barrier
->address
))
12775 /* Scan the pool to see if a constant with the same value has
12776 already been added. While we are doing this, also note the
12777 location where we must insert the constant if it doesn't already
12779 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
12781 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
12782 && fix
->mode
== mp
->mode
12783 && (!LABEL_P (fix
->value
)
12784 || (CODE_LABEL_NUMBER (fix
->value
)
12785 == CODE_LABEL_NUMBER (mp
->value
)))
12786 && rtx_equal_p (fix
->value
, mp
->value
)
12787 /* Check that there is enough slack to move this entry to the
12788 end of the table (this is conservative). */
12789 && (mp
->max_address
12790 > (minipool_barrier
->address
12791 + minipool_vector_tail
->offset
12792 + minipool_vector_tail
->fix_size
)))
12795 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
12798 if (min_mp
!= NULL
)
12799 mp
->min_address
+= fix
->fix_size
;
12802 /* Note the insertion point if necessary. */
12803 if (mp
->min_address
< min_address
)
12805 /* For now, we do not allow the insertion of 8-byte alignment
12806 requiring nodes anywhere but at the start of the pool. */
12807 if (ARM_DOUBLEWORD_ALIGN
12808 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
12813 else if (mp
->max_address
12814 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
12816 /* Inserting before this entry would push the fix beyond
12817 its maximum address (which can happen if we have
12818 re-located a forwards fix); force the new fix to come
12820 if (ARM_DOUBLEWORD_ALIGN
12821 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
12826 min_address
= mp
->min_address
+ fix
->fix_size
;
12829 /* Do not insert a non-8-byte aligned quantity before 8-byte
12830 aligned quantities. */
12831 else if (ARM_DOUBLEWORD_ALIGN
12832 && fix
->fix_size
< 8
12833 && mp
->fix_size
>= 8)
12836 min_address
= mp
->min_address
+ fix
->fix_size
;
12841 /* We need to create a new entry. */
12843 mp
->fix_size
= fix
->fix_size
;
12844 mp
->mode
= fix
->mode
;
12845 mp
->value
= fix
->value
;
12847 mp
->max_address
= minipool_barrier
->address
+ 65536;
12849 mp
->min_address
= min_address
;
12851 if (min_mp
== NULL
)
12854 mp
->next
= minipool_vector_head
;
12856 if (mp
->next
== NULL
)
12858 minipool_vector_tail
= mp
;
12859 minipool_vector_label
= gen_label_rtx ();
12862 mp
->next
->prev
= mp
;
12864 minipool_vector_head
= mp
;
12868 mp
->next
= min_mp
->next
;
12872 if (mp
->next
!= NULL
)
12873 mp
->next
->prev
= mp
;
12875 minipool_vector_tail
= mp
;
12878 /* Save the new entry. */
12886 /* Scan over the following entries and adjust their offsets. */
12887 while (mp
->next
!= NULL
)
12889 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
12890 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
12893 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
12895 mp
->next
->offset
= mp
->offset
;
12904 assign_minipool_offsets (Mfix
*barrier
)
12906 HOST_WIDE_INT offset
= 0;
12909 minipool_barrier
= barrier
;
12911 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12913 mp
->offset
= offset
;
12915 if (mp
->refcount
> 0)
12916 offset
+= mp
->fix_size
;
12920 /* Output the literal table */
12922 dump_minipool (rtx scan
)
12928 if (ARM_DOUBLEWORD_ALIGN
)
12929 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12930 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
12937 fprintf (dump_file
,
12938 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12939 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
12941 scan
= emit_label_after (gen_label_rtx (), scan
);
12942 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
12943 scan
= emit_label_after (minipool_vector_label
, scan
);
12945 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
12947 if (mp
->refcount
> 0)
12951 fprintf (dump_file
,
12952 ";; Offset %u, min %ld, max %ld ",
12953 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
12954 (unsigned long) mp
->max_address
);
12955 arm_print_value (dump_file
, mp
->value
);
12956 fputc ('\n', dump_file
);
12959 switch (mp
->fix_size
)
12961 #ifdef HAVE_consttable_1
12963 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
12967 #ifdef HAVE_consttable_2
12969 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
12973 #ifdef HAVE_consttable_4
12975 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
12979 #ifdef HAVE_consttable_8
12981 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
12985 #ifdef HAVE_consttable_16
12987 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
12992 gcc_unreachable ();
13000 minipool_vector_head
= minipool_vector_tail
= NULL
;
13001 scan
= emit_insn_after (gen_consttable_end (), scan
);
13002 scan
= emit_barrier_after (scan
);
13005 /* Return the cost of forcibly inserting a barrier after INSN. */
13007 arm_barrier_cost (rtx insn
)
13009 /* Basing the location of the pool on the loop depth is preferable,
13010 but at the moment, the basic block information seems to be
13011 corrupt by this stage of the compilation. */
13012 int base_cost
= 50;
13013 rtx next
= next_nonnote_insn (insn
);
13015 if (next
!= NULL
&& LABEL_P (next
))
13018 switch (GET_CODE (insn
))
13021 /* It will always be better to place the table before the label, rather
13030 return base_cost
- 10;
13033 return base_cost
+ 10;
13037 /* Find the best place in the insn stream in the range
13038 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13039 Create the barrier by inserting a jump and add a new fix entry for
13042 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
13044 HOST_WIDE_INT count
= 0;
13046 rtx from
= fix
->insn
;
13047 /* The instruction after which we will insert the jump. */
13048 rtx selected
= NULL
;
13050 /* The address at which the jump instruction will be placed. */
13051 HOST_WIDE_INT selected_address
;
13053 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
13054 rtx label
= gen_label_rtx ();
13056 selected_cost
= arm_barrier_cost (from
);
13057 selected_address
= fix
->address
;
13059 while (from
&& count
< max_count
)
13064 /* This code shouldn't have been called if there was a natural barrier
13066 gcc_assert (!BARRIER_P (from
));
13068 /* Count the length of this insn. This must stay in sync with the
13069 code that pushes minipool fixes. */
13070 if (LABEL_P (from
))
13071 count
+= get_label_padding (from
);
13073 count
+= get_attr_length (from
);
13075 /* If there is a jump table, add its length. */
13076 tmp
= is_jump_table (from
);
13079 count
+= get_jump_table_size (tmp
);
13081 /* Jump tables aren't in a basic block, so base the cost on
13082 the dispatch insn. If we select this location, we will
13083 still put the pool after the table. */
13084 new_cost
= arm_barrier_cost (from
);
13086 if (count
< max_count
13087 && (!selected
|| new_cost
<= selected_cost
))
13090 selected_cost
= new_cost
;
13091 selected_address
= fix
->address
+ count
;
13094 /* Continue after the dispatch table. */
13095 from
= NEXT_INSN (tmp
);
13099 new_cost
= arm_barrier_cost (from
);
13101 if (count
< max_count
13102 && (!selected
|| new_cost
<= selected_cost
))
13105 selected_cost
= new_cost
;
13106 selected_address
= fix
->address
+ count
;
13109 from
= NEXT_INSN (from
);
13112 /* Make sure that we found a place to insert the jump. */
13113 gcc_assert (selected
);
13115 /* Make sure we do not split a call and its corresponding
13116 CALL_ARG_LOCATION note. */
13117 if (CALL_P (selected
))
13119 rtx next
= NEXT_INSN (selected
);
13120 if (next
&& NOTE_P (next
)
13121 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
13125 /* Create a new JUMP_INSN that branches around a barrier. */
13126 from
= emit_jump_insn_after (gen_jump (label
), selected
);
13127 JUMP_LABEL (from
) = label
;
13128 barrier
= emit_barrier_after (from
);
13129 emit_label_after (label
, barrier
);
13131 /* Create a minipool barrier entry for the new barrier. */
13132 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
13133 new_fix
->insn
= barrier
;
13134 new_fix
->address
= selected_address
;
13135 new_fix
->next
= fix
->next
;
13136 fix
->next
= new_fix
;
13141 /* Record that there is a natural barrier in the insn stream at
13144 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
13146 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
13149 fix
->address
= address
;
13152 if (minipool_fix_head
!= NULL
)
13153 minipool_fix_tail
->next
= fix
;
13155 minipool_fix_head
= fix
;
13157 minipool_fix_tail
= fix
;
13160 /* Record INSN, which will need fixing up to load a value from the
13161 minipool. ADDRESS is the offset of the insn since the start of the
13162 function; LOC is a pointer to the part of the insn which requires
13163 fixing; VALUE is the constant that must be loaded, which is of type
13166 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
13167 enum machine_mode mode
, rtx value
)
13169 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
13172 fix
->address
= address
;
13175 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
13176 fix
->value
= value
;
13177 fix
->forwards
= get_attr_pool_range (insn
);
13178 fix
->backwards
= get_attr_neg_pool_range (insn
);
13179 fix
->minipool
= NULL
;
13181 /* If an insn doesn't have a range defined for it, then it isn't
13182 expecting to be reworked by this code. Better to stop now than
13183 to generate duff assembly code. */
13184 gcc_assert (fix
->forwards
|| fix
->backwards
);
13186 /* If an entry requires 8-byte alignment then assume all constant pools
13187 require 4 bytes of padding. Trying to do this later on a per-pool
13188 basis is awkward because existing pool entries have to be modified. */
13189 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
13194 fprintf (dump_file
,
13195 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13196 GET_MODE_NAME (mode
),
13197 INSN_UID (insn
), (unsigned long) address
,
13198 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
13199 arm_print_value (dump_file
, fix
->value
);
13200 fprintf (dump_file
, "\n");
13203 /* Add it to the chain of fixes. */
13206 if (minipool_fix_head
!= NULL
)
13207 minipool_fix_tail
->next
= fix
;
13209 minipool_fix_head
= fix
;
13211 minipool_fix_tail
= fix
;
13214 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13215 Returns the number of insns needed, or 99 if we don't know how to
13218 arm_const_double_inline_cost (rtx val
)
13220 rtx lowpart
, highpart
;
13221 enum machine_mode mode
;
13223 mode
= GET_MODE (val
);
13225 if (mode
== VOIDmode
)
13228 gcc_assert (GET_MODE_SIZE (mode
) == 8);
13230 lowpart
= gen_lowpart (SImode
, val
);
13231 highpart
= gen_highpart_mode (SImode
, mode
, val
);
13233 gcc_assert (CONST_INT_P (lowpart
));
13234 gcc_assert (CONST_INT_P (highpart
));
13236 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
13237 NULL_RTX
, NULL_RTX
, 0, 0)
13238 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
13239 NULL_RTX
, NULL_RTX
, 0, 0));
13242 /* Return true if it is worthwhile to split a 64-bit constant into two
13243 32-bit operations. This is the case if optimizing for size, or
13244 if we have load delay slots, or if one 32-bit part can be done with
13245 a single data operation. */
13247 arm_const_double_by_parts (rtx val
)
13249 enum machine_mode mode
= GET_MODE (val
);
13252 if (optimize_size
|| arm_ld_sched
)
13255 if (mode
== VOIDmode
)
13258 part
= gen_highpart_mode (SImode
, mode
, val
);
13260 gcc_assert (CONST_INT_P (part
));
13262 if (const_ok_for_arm (INTVAL (part
))
13263 || const_ok_for_arm (~INTVAL (part
)))
13266 part
= gen_lowpart (SImode
, val
);
13268 gcc_assert (CONST_INT_P (part
));
13270 if (const_ok_for_arm (INTVAL (part
))
13271 || const_ok_for_arm (~INTVAL (part
)))
13277 /* Return true if it is possible to inline both the high and low parts
13278 of a 64-bit constant into 32-bit data processing instructions. */
13280 arm_const_double_by_immediates (rtx val
)
13282 enum machine_mode mode
= GET_MODE (val
);
13285 if (mode
== VOIDmode
)
13288 part
= gen_highpart_mode (SImode
, mode
, val
);
13290 gcc_assert (CONST_INT_P (part
));
13292 if (!const_ok_for_arm (INTVAL (part
)))
13295 part
= gen_lowpart (SImode
, val
);
13297 gcc_assert (CONST_INT_P (part
));
13299 if (!const_ok_for_arm (INTVAL (part
)))
13305 /* Scan INSN and note any of its operands that need fixing.
13306 If DO_PUSHES is false we do not actually push any of the fixups
13309 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
13313 extract_insn (insn
);
13315 if (!constrain_operands (1))
13316 fatal_insn_not_found (insn
);
13318 if (recog_data
.n_alternatives
== 0)
13321 /* Fill in recog_op_alt with information about the constraints of
13323 preprocess_constraints ();
13325 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
13327 /* Things we need to fix can only occur in inputs. */
13328 if (recog_data
.operand_type
[opno
] != OP_IN
)
13331 /* If this alternative is a memory reference, then any mention
13332 of constants in this alternative is really to fool reload
13333 into allowing us to accept one there. We need to fix them up
13334 now so that we output the right code. */
13335 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
13337 rtx op
= recog_data
.operand
[opno
];
13339 if (CONSTANT_P (op
))
13342 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
13343 recog_data
.operand_mode
[opno
], op
);
13345 else if (MEM_P (op
)
13346 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
13347 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
13351 rtx cop
= avoid_constant_pool_reference (op
);
13353 /* Casting the address of something to a mode narrower
13354 than a word can cause avoid_constant_pool_reference()
13355 to return the pool reference itself. That's no good to
13356 us here. Lets just hope that we can use the
13357 constant pool value directly. */
13359 cop
= get_pool_constant (XEXP (op
, 0));
13361 push_minipool_fix (insn
, address
,
13362 recog_data
.operand_loc
[opno
],
13363 recog_data
.operand_mode
[opno
], cop
);
13373 /* Convert instructions to their cc-clobbering variant if possible, since
13374 that allows us to use smaller encodings. */
13377 thumb2_reorg (void)
13382 INIT_REG_SET (&live
);
13384 /* We are freeing block_for_insn in the toplev to keep compatibility
13385 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13386 compute_bb_for_insn ();
13393 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
13394 df_simulate_initialize_backwards (bb
, &live
);
13395 FOR_BB_INSNS_REVERSE (bb
, insn
)
13397 if (NONJUMP_INSN_P (insn
)
13398 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
13399 && GET_CODE (PATTERN (insn
)) == SET
)
13401 enum {SKIP
, CONV
, SWAP_CONV
} action
= SKIP
;
13402 rtx pat
= PATTERN (insn
);
13403 rtx dst
= XEXP (pat
, 0);
13404 rtx src
= XEXP (pat
, 1);
13405 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
13407 if (!OBJECT_P (src
))
13408 op0
= XEXP (src
, 0);
13410 if (BINARY_P (src
))
13411 op1
= XEXP (src
, 1);
13413 if (low_register_operand (dst
, SImode
))
13415 switch (GET_CODE (src
))
13418 /* Adding two registers and storing the result
13419 in the first source is already a 16-bit
13421 if (rtx_equal_p (dst
, op0
)
13422 && register_operand (op1
, SImode
))
13425 if (low_register_operand (op0
, SImode
))
13427 /* ADDS <Rd>,<Rn>,<Rm> */
13428 if (low_register_operand (op1
, SImode
))
13430 /* ADDS <Rdn>,#<imm8> */
13431 /* SUBS <Rdn>,#<imm8> */
13432 else if (rtx_equal_p (dst
, op0
)
13433 && CONST_INT_P (op1
)
13434 && IN_RANGE (INTVAL (op1
), -255, 255))
13436 /* ADDS <Rd>,<Rn>,#<imm3> */
13437 /* SUBS <Rd>,<Rn>,#<imm3> */
13438 else if (CONST_INT_P (op1
)
13439 && IN_RANGE (INTVAL (op1
), -7, 7))
13445 /* RSBS <Rd>,<Rn>,#0
13446 Not handled here: see NEG below. */
13447 /* SUBS <Rd>,<Rn>,#<imm3>
13449 Not handled here: see PLUS above. */
13450 /* SUBS <Rd>,<Rn>,<Rm> */
13451 if (low_register_operand (op0
, SImode
)
13452 && low_register_operand (op1
, SImode
))
13457 /* MULS <Rdm>,<Rn>,<Rdm>
13458 As an exception to the rule, this is only used
13459 when optimizing for size since MULS is slow on all
13460 known implementations. We do not even want to use
13461 MULS in cold code, if optimizing for speed, so we
13462 test the global flag here. */
13463 if (!optimize_size
)
13465 /* else fall through. */
13469 /* ANDS <Rdn>,<Rm> */
13470 if (rtx_equal_p (dst
, op0
)
13471 && low_register_operand (op1
, SImode
))
13473 else if (rtx_equal_p (dst
, op1
)
13474 && low_register_operand (op0
, SImode
))
13475 action
= SWAP_CONV
;
13481 /* ASRS <Rdn>,<Rm> */
13482 /* LSRS <Rdn>,<Rm> */
13483 /* LSLS <Rdn>,<Rm> */
13484 if (rtx_equal_p (dst
, op0
)
13485 && low_register_operand (op1
, SImode
))
13487 /* ASRS <Rd>,<Rm>,#<imm5> */
13488 /* LSRS <Rd>,<Rm>,#<imm5> */
13489 /* LSLS <Rd>,<Rm>,#<imm5> */
13490 else if (low_register_operand (op0
, SImode
)
13491 && CONST_INT_P (op1
)
13492 && IN_RANGE (INTVAL (op1
), 0, 31))
13497 /* RORS <Rdn>,<Rm> */
13498 if (rtx_equal_p (dst
, op0
)
13499 && low_register_operand (op1
, SImode
))
13505 /* MVNS <Rd>,<Rm> */
13506 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13507 if (low_register_operand (op0
, SImode
))
13512 /* MOVS <Rd>,#<imm8> */
13513 if (CONST_INT_P (src
)
13514 && IN_RANGE (INTVAL (src
), 0, 255))
13519 /* MOVS and MOV<c> with registers have different
13520 encodings, so are not relevant here. */
13528 if (action
!= SKIP
)
13530 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
13531 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
13534 if (action
== SWAP_CONV
)
13536 src
= copy_rtx (src
);
13537 XEXP (src
, 0) = op1
;
13538 XEXP (src
, 1) = op0
;
13539 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
13540 vec
= gen_rtvec (2, pat
, clobber
);
13542 else /* action == CONV */
13543 vec
= gen_rtvec (2, pat
, clobber
);
13545 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
13546 INSN_CODE (insn
) = -1;
13550 if (NONDEBUG_INSN_P (insn
))
13551 df_simulate_one_insn_backwards (bb
, insn
, &live
);
13555 CLEAR_REG_SET (&live
);
13558 /* Gcc puts the pool in the wrong place for ARM, since we can only
13559 load addresses a limited distance around the pc. We do some
13560 special munging to move the constant pool values to the correct
13561 point in the code. */
13566 HOST_WIDE_INT address
= 0;
13572 /* Ensure all insns that must be split have been split at this point.
13573 Otherwise, the pool placement code below may compute incorrect
13574 insn lengths. Note that when optimizing, all insns have already
13575 been split at this point. */
13577 split_all_insns_noflow ();
13579 minipool_fix_head
= minipool_fix_tail
= NULL
;
13581 /* The first insn must always be a note, or the code below won't
13582 scan it properly. */
13583 insn
= get_insns ();
13584 gcc_assert (NOTE_P (insn
));
13587 /* Scan all the insns and record the operands that will need fixing. */
13588 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
13590 if (BARRIER_P (insn
))
13591 push_minipool_barrier (insn
, address
);
13592 else if (INSN_P (insn
))
13596 note_invalid_constants (insn
, address
, true);
13597 address
+= get_attr_length (insn
);
13599 /* If the insn is a vector jump, add the size of the table
13600 and skip the table. */
13601 if ((table
= is_jump_table (insn
)) != NULL
)
13603 address
+= get_jump_table_size (table
);
13607 else if (LABEL_P (insn
))
13608 /* Add the worst-case padding due to alignment. We don't add
13609 the _current_ padding because the minipool insertions
13610 themselves might change it. */
13611 address
+= get_label_padding (insn
);
13614 fix
= minipool_fix_head
;
13616 /* Now scan the fixups and perform the required changes. */
13621 Mfix
* last_added_fix
;
13622 Mfix
* last_barrier
= NULL
;
13625 /* Skip any further barriers before the next fix. */
13626 while (fix
&& BARRIER_P (fix
->insn
))
13629 /* No more fixes. */
13633 last_added_fix
= NULL
;
13635 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
13637 if (BARRIER_P (ftmp
->insn
))
13639 if (ftmp
->address
>= minipool_vector_head
->max_address
)
13642 last_barrier
= ftmp
;
13644 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
13647 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
13650 /* If we found a barrier, drop back to that; any fixes that we
13651 could have reached but come after the barrier will now go in
13652 the next mini-pool. */
13653 if (last_barrier
!= NULL
)
13655 /* Reduce the refcount for those fixes that won't go into this
13657 for (fdel
= last_barrier
->next
;
13658 fdel
&& fdel
!= ftmp
;
13661 fdel
->minipool
->refcount
--;
13662 fdel
->minipool
= NULL
;
13665 ftmp
= last_barrier
;
13669 /* ftmp is first fix that we can't fit into this pool and
13670 there no natural barriers that we could use. Insert a
13671 new barrier in the code somewhere between the previous
13672 fix and this one, and arrange to jump around it. */
13673 HOST_WIDE_INT max_address
;
13675 /* The last item on the list of fixes must be a barrier, so
13676 we can never run off the end of the list of fixes without
13677 last_barrier being set. */
13680 max_address
= minipool_vector_head
->max_address
;
13681 /* Check that there isn't another fix that is in range that
13682 we couldn't fit into this pool because the pool was
13683 already too large: we need to put the pool before such an
13684 instruction. The pool itself may come just after the
13685 fix because create_fix_barrier also allows space for a
13686 jump instruction. */
13687 if (ftmp
->address
< max_address
)
13688 max_address
= ftmp
->address
+ 1;
13690 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
13693 assign_minipool_offsets (last_barrier
);
13697 if (!BARRIER_P (ftmp
->insn
)
13698 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
13705 /* Scan over the fixes we have identified for this pool, fixing them
13706 up and adding the constants to the pool itself. */
13707 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
13708 this_fix
= this_fix
->next
)
13709 if (!BARRIER_P (this_fix
->insn
))
13712 = plus_constant (Pmode
,
13713 gen_rtx_LABEL_REF (VOIDmode
,
13714 minipool_vector_label
),
13715 this_fix
->minipool
->offset
);
13716 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
13719 dump_minipool (last_barrier
->insn
);
13723 /* From now on we must synthesize any constants that we can't handle
13724 directly. This can happen if the RTL gets split during final
13725 instruction generation. */
13726 after_arm_reorg
= 1;
13728 /* Free the minipool memory. */
13729 obstack_free (&minipool_obstack
, minipool_startobj
);
13732 /* Routines to output assembly language. */
13734 /* If the rtx is the correct value then return the string of the number.
13735 In this way we can ensure that valid double constants are generated even
13736 when cross compiling. */
13738 fp_immediate_constant (rtx x
)
13742 if (!fp_consts_inited
)
13745 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
13747 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
13751 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13752 static const char *
13753 fp_const_from_val (REAL_VALUE_TYPE
*r
)
13755 if (!fp_consts_inited
)
13758 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
13762 /* OPERANDS[0] is the entire list of insns that constitute pop,
13763 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13764 is in the list, UPDATE is true iff the list contains explicit
13765 update of base register. */
13767 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
13773 const char *conditional
;
13774 int num_saves
= XVECLEN (operands
[0], 0);
13775 unsigned int regno
;
13776 unsigned int regno_base
= REGNO (operands
[1]);
13779 offset
+= update
? 1 : 0;
13780 offset
+= return_pc
? 1 : 0;
13782 /* Is the base register in the list? */
13783 for (i
= offset
; i
< num_saves
; i
++)
13785 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
13786 /* If SP is in the list, then the base register must be SP. */
13787 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
13788 /* If base register is in the list, there must be no explicit update. */
13789 if (regno
== regno_base
)
13790 gcc_assert (!update
);
13793 conditional
= reverse
? "%?%D0" : "%?%d0";
13794 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
13796 /* Output pop (not stmfd) because it has a shorter encoding. */
13797 gcc_assert (update
);
13798 sprintf (pattern
, "pop%s\t{", conditional
);
13802 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13803 It's just a convention, their semantics are identical. */
13804 if (regno_base
== SP_REGNUM
)
13805 sprintf (pattern
, "ldm%sfd\t", conditional
);
13806 else if (TARGET_UNIFIED_ASM
)
13807 sprintf (pattern
, "ldmia%s\t", conditional
);
13809 sprintf (pattern
, "ldm%sia\t", conditional
);
13811 strcat (pattern
, reg_names
[regno_base
]);
13813 strcat (pattern
, "!, {");
13815 strcat (pattern
, ", {");
13818 /* Output the first destination register. */
13820 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
13822 /* Output the rest of the destination registers. */
13823 for (i
= offset
+ 1; i
< num_saves
; i
++)
13825 strcat (pattern
, ", ");
13827 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
13830 strcat (pattern
, "}");
13832 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
13833 strcat (pattern
, "^");
13835 output_asm_insn (pattern
, &cond
);
13839 /* Output the assembly for a store multiple. */
13842 vfp_output_fstmd (rtx
* operands
)
13849 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
13850 p
= strlen (pattern
);
13852 gcc_assert (REG_P (operands
[1]));
13854 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
13855 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
13857 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
13859 strcpy (&pattern
[p
], "}");
13861 output_asm_insn (pattern
, operands
);
13866 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13867 number of bytes pushed. */
13870 vfp_emit_fstmd (int base_reg
, int count
)
13877 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13878 register pairs are stored by a store multiple insn. We avoid this
13879 by pushing an extra pair. */
13880 if (count
== 2 && !arm_arch6
)
13882 if (base_reg
== LAST_VFP_REGNUM
- 3)
13887 /* FSTMD may not store more than 16 doubleword registers at once. Split
13888 larger stores into multiple parts (up to a maximum of two, in
13893 /* NOTE: base_reg is an internal register number, so each D register
13895 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
13896 saved
+= vfp_emit_fstmd (base_reg
, 16);
13900 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
13901 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
13903 reg
= gen_rtx_REG (DFmode
, base_reg
);
13906 XVECEXP (par
, 0, 0)
13907 = gen_rtx_SET (VOIDmode
,
13910 gen_rtx_PRE_MODIFY (Pmode
,
13913 (Pmode
, stack_pointer_rtx
,
13916 gen_rtx_UNSPEC (BLKmode
,
13917 gen_rtvec (1, reg
),
13918 UNSPEC_PUSH_MULT
));
13920 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13921 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
13922 RTX_FRAME_RELATED_P (tmp
) = 1;
13923 XVECEXP (dwarf
, 0, 0) = tmp
;
13925 tmp
= gen_rtx_SET (VOIDmode
,
13926 gen_frame_mem (DFmode
, stack_pointer_rtx
),
13928 RTX_FRAME_RELATED_P (tmp
) = 1;
13929 XVECEXP (dwarf
, 0, 1) = tmp
;
13931 for (i
= 1; i
< count
; i
++)
13933 reg
= gen_rtx_REG (DFmode
, base_reg
);
13935 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
13937 tmp
= gen_rtx_SET (VOIDmode
,
13938 gen_frame_mem (DFmode
,
13939 plus_constant (Pmode
,
13943 RTX_FRAME_RELATED_P (tmp
) = 1;
13944 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
13947 par
= emit_insn (par
);
13948 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
13949 RTX_FRAME_RELATED_P (par
) = 1;
13954 /* Emit a call instruction with pattern PAT. ADDR is the address of
13955 the call target. */
13958 arm_emit_call_insn (rtx pat
, rtx addr
)
13962 insn
= emit_call_insn (pat
);
13964 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13965 If the call might use such an entry, add a use of the PIC register
13966 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13967 if (TARGET_VXWORKS_RTP
13969 && GET_CODE (addr
) == SYMBOL_REF
13970 && (SYMBOL_REF_DECL (addr
)
13971 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
13972 : !SYMBOL_REF_LOCAL_P (addr
)))
13974 require_pic_register ();
13975 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
13979 /* Output a 'call' insn. */
13981 output_call (rtx
*operands
)
13983 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
13985 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13986 if (REGNO (operands
[0]) == LR_REGNUM
)
13988 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
13989 output_asm_insn ("mov%?\t%0, %|lr", operands
);
13992 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
13994 if (TARGET_INTERWORK
|| arm_arch4t
)
13995 output_asm_insn ("bx%?\t%0", operands
);
13997 output_asm_insn ("mov%?\t%|pc, %0", operands
);
14002 /* Output a 'call' insn that is a reference in memory. This is
14003 disabled for ARMv5 and we prefer a blx instead because otherwise
14004 there's a significant performance overhead. */
14006 output_call_mem (rtx
*operands
)
14008 gcc_assert (!arm_arch5
);
14009 if (TARGET_INTERWORK
)
14011 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
14012 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
14013 output_asm_insn ("bx%?\t%|ip", operands
);
14015 else if (regno_use_in (LR_REGNUM
, operands
[0]))
14017 /* LR is used in the memory address. We load the address in the
14018 first instruction. It's safe to use IP as the target of the
14019 load since the call will kill it anyway. */
14020 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
14021 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
14023 output_asm_insn ("bx%?\t%|ip", operands
);
14025 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
14029 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
14030 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
14037 /* Output a move from arm registers to arm registers of a long double
14038 OPERANDS[0] is the destination.
14039 OPERANDS[1] is the source. */
14041 output_mov_long_double_arm_from_arm (rtx
*operands
)
14043 /* We have to be careful here because the two might overlap. */
14044 int dest_start
= REGNO (operands
[0]);
14045 int src_start
= REGNO (operands
[1]);
14049 if (dest_start
< src_start
)
14051 for (i
= 0; i
< 3; i
++)
14053 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
14054 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
14055 output_asm_insn ("mov%?\t%0, %1", ops
);
14060 for (i
= 2; i
>= 0; i
--)
14062 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
14063 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
14064 output_asm_insn ("mov%?\t%0, %1", ops
);
14072 arm_emit_movpair (rtx dest
, rtx src
)
14074 /* If the src is an immediate, simplify it. */
14075 if (CONST_INT_P (src
))
14077 HOST_WIDE_INT val
= INTVAL (src
);
14078 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
14079 if ((val
>> 16) & 0x0000ffff)
14080 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
14082 GEN_INT ((val
>> 16) & 0x0000ffff));
14085 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
14086 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
14089 /* Output a move between double words. It must be REG<-MEM
14092 output_move_double (rtx
*operands
, bool emit
, int *count
)
14094 enum rtx_code code0
= GET_CODE (operands
[0]);
14095 enum rtx_code code1
= GET_CODE (operands
[1]);
14100 /* The only case when this might happen is when
14101 you are looking at the length of a DImode instruction
14102 that has an invalid constant in it. */
14103 if (code0
== REG
&& code1
!= MEM
)
14105 gcc_assert (!emit
);
14112 unsigned int reg0
= REGNO (operands
[0]);
14114 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
14116 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
14118 switch (GET_CODE (XEXP (operands
[1], 0)))
14125 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
14126 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
14128 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
14133 gcc_assert (TARGET_LDRD
);
14135 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
14142 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
14144 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
14152 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
14154 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
14159 gcc_assert (TARGET_LDRD
);
14161 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
14166 /* Autoicrement addressing modes should never have overlapping
14167 base and destination registers, and overlapping index registers
14168 are already prohibited, so this doesn't need to worry about
14170 otherops
[0] = operands
[0];
14171 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
14172 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
14174 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
14176 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
14178 /* Registers overlap so split out the increment. */
14181 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
14182 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
14189 /* Use a single insn if we can.
14190 FIXME: IWMMXT allows offsets larger than ldrd can
14191 handle, fix these up with a pair of ldr. */
14193 || !CONST_INT_P (otherops
[2])
14194 || (INTVAL (otherops
[2]) > -256
14195 && INTVAL (otherops
[2]) < 256))
14198 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
14204 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
14205 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
14215 /* Use a single insn if we can.
14216 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14217 fix these up with a pair of ldr. */
14219 || !CONST_INT_P (otherops
[2])
14220 || (INTVAL (otherops
[2]) > -256
14221 && INTVAL (otherops
[2]) < 256))
14224 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
14230 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
14231 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
14241 /* We might be able to use ldrd %0, %1 here. However the range is
14242 different to ldr/adr, and it is broken on some ARMv7-M
14243 implementations. */
14244 /* Use the second register of the pair to avoid problematic
14246 otherops
[1] = operands
[1];
14248 output_asm_insn ("adr%?\t%0, %1", otherops
);
14249 operands
[1] = otherops
[0];
14253 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
14255 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
14262 /* ??? This needs checking for thumb2. */
14264 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
14265 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
14267 otherops
[0] = operands
[0];
14268 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
14269 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
14271 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
14273 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
14275 switch ((int) INTVAL (otherops
[2]))
14279 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
14285 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
14291 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
14295 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
14296 operands
[1] = otherops
[0];
14298 && (REG_P (otherops
[2])
14300 || (CONST_INT_P (otherops
[2])
14301 && INTVAL (otherops
[2]) > -256
14302 && INTVAL (otherops
[2]) < 256)))
14304 if (reg_overlap_mentioned_p (operands
[0],
14308 /* Swap base and index registers over to
14309 avoid a conflict. */
14311 otherops
[1] = otherops
[2];
14314 /* If both registers conflict, it will usually
14315 have been fixed by a splitter. */
14316 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
14317 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
14321 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
14322 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
14329 otherops
[0] = operands
[0];
14331 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
14336 if (CONST_INT_P (otherops
[2]))
14340 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
14341 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
14343 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
14349 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
14355 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
14362 return "ldr%(d%)\t%0, [%1]";
14364 return "ldm%(ia%)\t%1, %M0";
14368 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
14369 /* Take care of overlapping base/data reg. */
14370 if (reg_mentioned_p (operands
[0], operands
[1]))
14374 output_asm_insn ("ldr%?\t%0, %1", otherops
);
14375 output_asm_insn ("ldr%?\t%0, %1", operands
);
14385 output_asm_insn ("ldr%?\t%0, %1", operands
);
14386 output_asm_insn ("ldr%?\t%0, %1", otherops
);
14396 /* Constraints should ensure this. */
14397 gcc_assert (code0
== MEM
&& code1
== REG
);
14398 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
14400 switch (GET_CODE (XEXP (operands
[0], 0)))
14406 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
14408 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
14413 gcc_assert (TARGET_LDRD
);
14415 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
14422 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
14424 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
14432 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
14434 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
14439 gcc_assert (TARGET_LDRD
);
14441 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
14446 otherops
[0] = operands
[1];
14447 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
14448 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
14450 /* IWMMXT allows offsets larger than ldrd can handle,
14451 fix these up with a pair of ldr. */
14453 && CONST_INT_P (otherops
[2])
14454 && (INTVAL(otherops
[2]) <= -256
14455 || INTVAL(otherops
[2]) >= 256))
14457 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
14461 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
14462 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
14471 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
14472 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
14478 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
14481 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
14486 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
14491 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
14492 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
14494 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
14498 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
14505 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
14512 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
14517 && (REG_P (otherops
[2])
14519 || (CONST_INT_P (otherops
[2])
14520 && INTVAL (otherops
[2]) > -256
14521 && INTVAL (otherops
[2]) < 256)))
14523 otherops
[0] = operands
[1];
14524 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
14526 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
14532 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
14533 otherops
[1] = operands
[1];
14536 output_asm_insn ("str%?\t%1, %0", operands
);
14537 output_asm_insn ("str%?\t%H1, %0", otherops
);
14547 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14548 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14551 output_move_quad (rtx
*operands
)
14553 if (REG_P (operands
[0]))
14555 /* Load, or reg->reg move. */
14557 if (MEM_P (operands
[1]))
14559 switch (GET_CODE (XEXP (operands
[1], 0)))
14562 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
14567 output_asm_insn ("adr%?\t%0, %1", operands
);
14568 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
14572 gcc_unreachable ();
14580 gcc_assert (REG_P (operands
[1]));
14582 dest
= REGNO (operands
[0]);
14583 src
= REGNO (operands
[1]);
14585 /* This seems pretty dumb, but hopefully GCC won't try to do it
14588 for (i
= 0; i
< 4; i
++)
14590 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
14591 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
14592 output_asm_insn ("mov%?\t%0, %1", ops
);
14595 for (i
= 3; i
>= 0; i
--)
14597 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
14598 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
14599 output_asm_insn ("mov%?\t%0, %1", ops
);
14605 gcc_assert (MEM_P (operands
[0]));
14606 gcc_assert (REG_P (operands
[1]));
14607 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
14609 switch (GET_CODE (XEXP (operands
[0], 0)))
14612 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
14616 gcc_unreachable ();
14623 /* Output a VFP load or store instruction. */
14626 output_move_vfp (rtx
*operands
)
14628 rtx reg
, mem
, addr
, ops
[2];
14629 int load
= REG_P (operands
[0]);
14630 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
14631 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
14634 enum machine_mode mode
;
14636 reg
= operands
[!load
];
14637 mem
= operands
[load
];
14639 mode
= GET_MODE (reg
);
14641 gcc_assert (REG_P (reg
));
14642 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
14643 gcc_assert (mode
== SFmode
14647 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
14648 gcc_assert (MEM_P (mem
));
14650 addr
= XEXP (mem
, 0);
14652 switch (GET_CODE (addr
))
14655 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14656 ops
[0] = XEXP (addr
, 0);
14661 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
14662 ops
[0] = XEXP (addr
, 0);
14667 templ
= "f%s%c%%?\t%%%s0, %%1%s";
14673 sprintf (buff
, templ
,
14674 load
? "ld" : "st",
14677 integer_p
? "\t%@ int" : "");
14678 output_asm_insn (buff
, ops
);
14683 /* Output a Neon double-word or quad-word load or store, or a load
14684 or store for larger structure modes.
14686 WARNING: The ordering of elements is weird in big-endian mode,
14687 because the EABI requires that vectors stored in memory appear
14688 as though they were stored by a VSTM, as required by the EABI.
14689 GCC RTL defines element ordering based on in-memory order.
14690 This can be different from the architectural ordering of elements
14691 within a NEON register. The intrinsics defined in arm_neon.h use the
14692 NEON register element ordering, not the GCC RTL element ordering.
14694 For example, the in-memory ordering of a big-endian a quadword
14695 vector with 16-bit elements when stored from register pair {d0,d1}
14696 will be (lowest address first, d0[N] is NEON register element N):
14698 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14700 When necessary, quadword registers (dN, dN+1) are moved to ARM
14701 registers from rN in the order:
14703 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14705 So that STM/LDM can be used on vectors in ARM registers, and the
14706 same memory layout will result as if VSTM/VLDM were used.
14708 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
14709 possible, which allows use of appropriate alignment tags.
14710 Note that the choice of "64" is independent of the actual vector
14711 element size; this size simply ensures that the behavior is
14712 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
14714 Due to limitations of those instructions, use of VST1.64/VLD1.64
14715 is not possible if:
14716 - the address contains PRE_DEC, or
14717 - the mode refers to more than 4 double-word registers
14719 In those cases, it would be possible to replace VSTM/VLDM by a
14720 sequence of instructions; this is not currently implemented since
14721 this is not certain to actually improve performance. */
14724 output_move_neon (rtx
*operands
)
14726 rtx reg
, mem
, addr
, ops
[2];
14727 int regno
, nregs
, load
= REG_P (operands
[0]);
14730 enum machine_mode mode
;
14732 reg
= operands
[!load
];
14733 mem
= operands
[load
];
14735 mode
= GET_MODE (reg
);
14737 gcc_assert (REG_P (reg
));
14738 regno
= REGNO (reg
);
14739 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
14740 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
14741 || NEON_REGNO_OK_FOR_QUAD (regno
));
14742 gcc_assert (VALID_NEON_DREG_MODE (mode
)
14743 || VALID_NEON_QREG_MODE (mode
)
14744 || VALID_NEON_STRUCT_MODE (mode
));
14745 gcc_assert (MEM_P (mem
));
14747 addr
= XEXP (mem
, 0);
14749 /* Strip off const from addresses like (const (plus (...))). */
14750 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
14751 addr
= XEXP (addr
, 0);
14753 switch (GET_CODE (addr
))
14756 /* We have to use vldm / vstm for too-large modes. */
14759 templ
= "v%smia%%?\t%%0!, %%h1";
14760 ops
[0] = XEXP (addr
, 0);
14764 templ
= "v%s1.64\t%%h1, %%A0";
14771 /* We have to use vldm / vstm in this case, since there is no
14772 pre-decrement form of the vld1 / vst1 instructions. */
14773 templ
= "v%smdb%%?\t%%0!, %%h1";
14774 ops
[0] = XEXP (addr
, 0);
14779 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14780 gcc_unreachable ();
14787 for (i
= 0; i
< nregs
; i
++)
14789 /* We're only using DImode here because it's a convenient size. */
14790 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
14791 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
14792 if (reg_overlap_mentioned_p (ops
[0], mem
))
14794 gcc_assert (overlap
== -1);
14799 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
14800 output_asm_insn (buff
, ops
);
14805 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
14806 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
14807 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
14808 output_asm_insn (buff
, ops
);
14815 /* We have to use vldm / vstm for too-large modes. */
14817 templ
= "v%smia%%?\t%%m0, %%h1";
14819 templ
= "v%s1.64\t%%h1, %%A0";
14825 sprintf (buff
, templ
, load
? "ld" : "st");
14826 output_asm_insn (buff
, ops
);
14831 /* Compute and return the length of neon_mov<mode>, where <mode> is
14832 one of VSTRUCT modes: EI, OI, CI or XI. */
14834 arm_attr_length_move_neon (rtx insn
)
14836 rtx reg
, mem
, addr
;
14838 enum machine_mode mode
;
14840 extract_insn_cached (insn
);
14842 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
14844 mode
= GET_MODE (recog_data
.operand
[0]);
14855 gcc_unreachable ();
14859 load
= REG_P (recog_data
.operand
[0]);
14860 reg
= recog_data
.operand
[!load
];
14861 mem
= recog_data
.operand
[load
];
14863 gcc_assert (MEM_P (mem
));
14865 mode
= GET_MODE (reg
);
14866 addr
= XEXP (mem
, 0);
14868 /* Strip off const from addresses like (const (plus (...))). */
14869 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
14870 addr
= XEXP (addr
, 0);
14872 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
14874 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
14881 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14885 arm_address_offset_is_imm (rtx insn
)
14889 extract_insn_cached (insn
);
14891 if (REG_P (recog_data
.operand
[0]))
14894 mem
= recog_data
.operand
[0];
14896 gcc_assert (MEM_P (mem
));
14898 addr
= XEXP (mem
, 0);
14901 || (GET_CODE (addr
) == PLUS
14902 && REG_P (XEXP (addr
, 0))
14903 && CONST_INT_P (XEXP (addr
, 1))))
14909 /* Output an ADD r, s, #n where n may be too big for one instruction.
14910 If adding zero to one register, output nothing. */
14912 output_add_immediate (rtx
*operands
)
14914 HOST_WIDE_INT n
= INTVAL (operands
[2]);
14916 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
14919 output_multi_immediate (operands
,
14920 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14923 output_multi_immediate (operands
,
14924 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14931 /* Output a multiple immediate operation.
14932 OPERANDS is the vector of operands referred to in the output patterns.
14933 INSTR1 is the output pattern to use for the first constant.
14934 INSTR2 is the output pattern to use for subsequent constants.
14935 IMMED_OP is the index of the constant slot in OPERANDS.
14936 N is the constant value. */
14937 static const char *
14938 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
14939 int immed_op
, HOST_WIDE_INT n
)
14941 #if HOST_BITS_PER_WIDE_INT > 32
14947 /* Quick and easy output. */
14948 operands
[immed_op
] = const0_rtx
;
14949 output_asm_insn (instr1
, operands
);
14954 const char * instr
= instr1
;
14956 /* Note that n is never zero here (which would give no output). */
14957 for (i
= 0; i
< 32; i
+= 2)
14961 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
14962 output_asm_insn (instr
, operands
);
14972 /* Return the name of a shifter operation. */
14973 static const char *
14974 arm_shift_nmem(enum rtx_code code
)
14979 return ARM_LSL_NAME
;
14995 /* Return the appropriate ARM instruction for the operation code.
14996 The returned result should not be overwritten. OP is the rtx of the
14997 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
15000 arithmetic_instr (rtx op
, int shift_first_arg
)
15002 switch (GET_CODE (op
))
15008 return shift_first_arg
? "rsb" : "sub";
15023 return arm_shift_nmem(GET_CODE(op
));
15026 gcc_unreachable ();
15030 /* Ensure valid constant shifts and return the appropriate shift mnemonic
15031 for the operation code. The returned result should not be overwritten.
15032 OP is the rtx code of the shift.
15033 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
15035 static const char *
15036 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
15039 enum rtx_code code
= GET_CODE (op
);
15041 switch (GET_CODE (XEXP (op
, 1)))
15049 *amountp
= INTVAL (XEXP (op
, 1));
15053 gcc_unreachable ();
15059 gcc_assert (*amountp
!= -1);
15060 *amountp
= 32 - *amountp
;
15063 /* Fall through. */
15069 mnem
= arm_shift_nmem(code
);
15073 /* We never have to worry about the amount being other than a
15074 power of 2, since this case can never be reloaded from a reg. */
15075 gcc_assert (*amountp
!= -1);
15076 *amountp
= int_log2 (*amountp
);
15077 return ARM_LSL_NAME
;
15080 gcc_unreachable ();
15083 if (*amountp
!= -1)
15085 /* This is not 100% correct, but follows from the desire to merge
15086 multiplication by a power of 2 with the recognizer for a
15087 shift. >=32 is not a valid shift for "lsl", so we must try and
15088 output a shift that produces the correct arithmetical result.
15089 Using lsr #32 is identical except for the fact that the carry bit
15090 is not set correctly if we set the flags; but we never use the
15091 carry bit from such an operation, so we can ignore that. */
15092 if (code
== ROTATERT
)
15093 /* Rotate is just modulo 32. */
15095 else if (*amountp
!= (*amountp
& 31))
15097 if (code
== ASHIFT
)
15102 /* Shifts of 0 are no-ops. */
15110 /* Obtain the shift from the POWER of two. */
15112 static HOST_WIDE_INT
15113 int_log2 (HOST_WIDE_INT power
)
15115 HOST_WIDE_INT shift
= 0;
15117 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
15119 gcc_assert (shift
<= 31);
15126 /* Output a .ascii pseudo-op, keeping track of lengths. This is
15127 because /bin/as is horribly restrictive. The judgement about
15128 whether or not each character is 'printable' (and can be output as
15129 is) or not (and must be printed with an octal escape) must be made
15130 with reference to the *host* character set -- the situation is
15131 similar to that discussed in the comments above pp_c_char in
15132 c-pretty-print.c. */
15134 #define MAX_ASCII_LEN 51
15137 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
15140 int len_so_far
= 0;
15142 fputs ("\t.ascii\t\"", stream
);
15144 for (i
= 0; i
< len
; i
++)
15148 if (len_so_far
>= MAX_ASCII_LEN
)
15150 fputs ("\"\n\t.ascii\t\"", stream
);
15156 if (c
== '\\' || c
== '\"')
15158 putc ('\\', stream
);
15166 fprintf (stream
, "\\%03o", c
);
15171 fputs ("\"\n", stream
);
15174 /* Compute the register save mask for registers 0 through 12
15175 inclusive. This code is used by arm_compute_save_reg_mask. */
15177 static unsigned long
15178 arm_compute_save_reg0_reg12_mask (void)
15180 unsigned long func_type
= arm_current_func_type ();
15181 unsigned long save_reg_mask
= 0;
15184 if (IS_INTERRUPT (func_type
))
15186 unsigned int max_reg
;
15187 /* Interrupt functions must not corrupt any registers,
15188 even call clobbered ones. If this is a leaf function
15189 we can just examine the registers used by the RTL, but
15190 otherwise we have to assume that whatever function is
15191 called might clobber anything, and so we have to save
15192 all the call-clobbered registers as well. */
15193 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
15194 /* FIQ handlers have registers r8 - r12 banked, so
15195 we only need to check r0 - r7, Normal ISRs only
15196 bank r14 and r15, so we must check up to r12.
15197 r13 is the stack pointer which is always preserved,
15198 so we do not need to consider it here. */
15203 for (reg
= 0; reg
<= max_reg
; reg
++)
15204 if (df_regs_ever_live_p (reg
)
15205 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
15206 save_reg_mask
|= (1 << reg
);
15208 /* Also save the pic base register if necessary. */
15210 && !TARGET_SINGLE_PIC_BASE
15211 && arm_pic_register
!= INVALID_REGNUM
15212 && crtl
->uses_pic_offset_table
)
15213 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
15215 else if (IS_VOLATILE(func_type
))
15217 /* For noreturn functions we historically omitted register saves
15218 altogether. However this really messes up debugging. As a
15219 compromise save just the frame pointers. Combined with the link
15220 register saved elsewhere this should be sufficient to get
15222 if (frame_pointer_needed
)
15223 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
15224 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
15225 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
15226 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
15227 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
15231 /* In the normal case we only need to save those registers
15232 which are call saved and which are used by this function. */
15233 for (reg
= 0; reg
<= 11; reg
++)
15234 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
15235 save_reg_mask
|= (1 << reg
);
15237 /* Handle the frame pointer as a special case. */
15238 if (frame_pointer_needed
)
15239 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
15241 /* If we aren't loading the PIC register,
15242 don't stack it even though it may be live. */
15244 && !TARGET_SINGLE_PIC_BASE
15245 && arm_pic_register
!= INVALID_REGNUM
15246 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
15247 || crtl
->uses_pic_offset_table
))
15248 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
15250 /* The prologue will copy SP into R0, so save it. */
15251 if (IS_STACKALIGN (func_type
))
15252 save_reg_mask
|= 1;
15255 /* Save registers so the exception handler can modify them. */
15256 if (crtl
->calls_eh_return
)
15262 reg
= EH_RETURN_DATA_REGNO (i
);
15263 if (reg
== INVALID_REGNUM
)
15265 save_reg_mask
|= 1 << reg
;
15269 return save_reg_mask
;
15273 /* Compute the number of bytes used to store the static chain register on the
15274 stack, above the stack frame. We need to know this accurately to get the
15275 alignment of the rest of the stack frame correct. */
15277 static int arm_compute_static_chain_stack_bytes (void)
15279 unsigned long func_type
= arm_current_func_type ();
15280 int static_chain_stack_bytes
= 0;
15282 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
&&
15283 IS_NESTED (func_type
) &&
15284 df_regs_ever_live_p (3) && crtl
->args
.pretend_args_size
== 0)
15285 static_chain_stack_bytes
= 4;
15287 return static_chain_stack_bytes
;
15291 /* Compute a bit mask of which registers need to be
15292 saved on the stack for the current function.
15293 This is used by arm_get_frame_offsets, which may add extra registers. */
15295 static unsigned long
15296 arm_compute_save_reg_mask (void)
15298 unsigned int save_reg_mask
= 0;
15299 unsigned long func_type
= arm_current_func_type ();
15302 if (IS_NAKED (func_type
))
15303 /* This should never really happen. */
15306 /* If we are creating a stack frame, then we must save the frame pointer,
15307 IP (which will hold the old stack pointer), LR and the PC. */
15308 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
15310 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
15313 | (1 << PC_REGNUM
);
15315 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
15317 /* Decide if we need to save the link register.
15318 Interrupt routines have their own banked link register,
15319 so they never need to save it.
15320 Otherwise if we do not use the link register we do not need to save
15321 it. If we are pushing other registers onto the stack however, we
15322 can save an instruction in the epilogue by pushing the link register
15323 now and then popping it back into the PC. This incurs extra memory
15324 accesses though, so we only do it when optimizing for size, and only
15325 if we know that we will not need a fancy return sequence. */
15326 if (df_regs_ever_live_p (LR_REGNUM
)
15329 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
15330 && !crtl
->calls_eh_return
))
15331 save_reg_mask
|= 1 << LR_REGNUM
;
15333 if (cfun
->machine
->lr_save_eliminated
)
15334 save_reg_mask
&= ~ (1 << LR_REGNUM
);
15336 if (TARGET_REALLY_IWMMXT
15337 && ((bit_count (save_reg_mask
)
15338 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
15339 arm_compute_static_chain_stack_bytes())
15342 /* The total number of registers that are going to be pushed
15343 onto the stack is odd. We need to ensure that the stack
15344 is 64-bit aligned before we start to save iWMMXt registers,
15345 and also before we start to create locals. (A local variable
15346 might be a double or long long which we will load/store using
15347 an iWMMXt instruction). Therefore we need to push another
15348 ARM register, so that the stack will be 64-bit aligned. We
15349 try to avoid using the arg registers (r0 -r3) as they might be
15350 used to pass values in a tail call. */
15351 for (reg
= 4; reg
<= 12; reg
++)
15352 if ((save_reg_mask
& (1 << reg
)) == 0)
15356 save_reg_mask
|= (1 << reg
);
15359 cfun
->machine
->sibcall_blocked
= 1;
15360 save_reg_mask
|= (1 << 3);
15364 /* We may need to push an additional register for use initializing the
15365 PIC base register. */
15366 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
15367 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
15369 reg
= thumb_find_work_register (1 << 4);
15370 if (!call_used_regs
[reg
])
15371 save_reg_mask
|= (1 << reg
);
15374 return save_reg_mask
;
15378 /* Compute a bit mask of which registers need to be
15379 saved on the stack for the current function. */
15380 static unsigned long
15381 thumb1_compute_save_reg_mask (void)
15383 unsigned long mask
;
15387 for (reg
= 0; reg
< 12; reg
++)
15388 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15392 && !TARGET_SINGLE_PIC_BASE
15393 && arm_pic_register
!= INVALID_REGNUM
15394 && crtl
->uses_pic_offset_table
)
15395 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
15397 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15398 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
15399 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
15401 /* LR will also be pushed if any lo regs are pushed. */
15402 if (mask
& 0xff || thumb_force_lr_save ())
15403 mask
|= (1 << LR_REGNUM
);
15405 /* Make sure we have a low work register if we need one.
15406 We will need one if we are going to push a high register,
15407 but we are not currently intending to push a low register. */
15408 if ((mask
& 0xff) == 0
15409 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
15411 /* Use thumb_find_work_register to choose which register
15412 we will use. If the register is live then we will
15413 have to push it. Use LAST_LO_REGNUM as our fallback
15414 choice for the register to select. */
15415 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
15416 /* Make sure the register returned by thumb_find_work_register is
15417 not part of the return value. */
15418 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
15419 reg
= LAST_LO_REGNUM
;
15421 if (! call_used_regs
[reg
])
15425 /* The 504 below is 8 bytes less than 512 because there are two possible
15426 alignment words. We can't tell here if they will be present or not so we
15427 have to play it safe and assume that they are. */
15428 if ((CALLER_INTERWORKING_SLOT_SIZE
+
15429 ROUND_UP_WORD (get_frame_size ()) +
15430 crtl
->outgoing_args_size
) >= 504)
15432 /* This is the same as the code in thumb1_expand_prologue() which
15433 determines which register to use for stack decrement. */
15434 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
15435 if (mask
& (1 << reg
))
15438 if (reg
> LAST_LO_REGNUM
)
15440 /* Make sure we have a register available for stack decrement. */
15441 mask
|= 1 << LAST_LO_REGNUM
;
15449 /* Return the number of bytes required to save VFP registers. */
15451 arm_get_vfp_saved_size (void)
15453 unsigned int regno
;
15458 /* Space for saved VFP registers. */
15459 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15462 for (regno
= FIRST_VFP_REGNUM
;
15463 regno
< LAST_VFP_REGNUM
;
15466 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
15467 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
15471 /* Workaround ARM10 VFPr1 bug. */
15472 if (count
== 2 && !arm_arch6
)
15474 saved
+= count
* 8;
15483 if (count
== 2 && !arm_arch6
)
15485 saved
+= count
* 8;
15492 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15493 everything bar the final return instruction. If simple_return is true,
15494 then do not output epilogue, because it has already been emitted in RTL. */
15496 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
15497 bool simple_return
)
15499 char conditional
[10];
15502 unsigned long live_regs_mask
;
15503 unsigned long func_type
;
15504 arm_stack_offsets
*offsets
;
15506 func_type
= arm_current_func_type ();
15508 if (IS_NAKED (func_type
))
15511 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
15513 /* If this function was declared non-returning, and we have
15514 found a tail call, then we have to trust that the called
15515 function won't return. */
15520 /* Otherwise, trap an attempted return by aborting. */
15522 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
15524 assemble_external_libcall (ops
[1]);
15525 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
15531 gcc_assert (!cfun
->calls_alloca
|| really_return
);
15533 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
15535 cfun
->machine
->return_used_this_function
= 1;
15537 offsets
= arm_get_frame_offsets ();
15538 live_regs_mask
= offsets
->saved_regs_mask
;
15540 if (!simple_return
&& live_regs_mask
)
15542 const char * return_reg
;
15544 /* If we do not have any special requirements for function exit
15545 (e.g. interworking) then we can load the return address
15546 directly into the PC. Otherwise we must load it into LR. */
15548 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
15549 return_reg
= reg_names
[PC_REGNUM
];
15551 return_reg
= reg_names
[LR_REGNUM
];
15553 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
15555 /* There are three possible reasons for the IP register
15556 being saved. 1) a stack frame was created, in which case
15557 IP contains the old stack pointer, or 2) an ISR routine
15558 corrupted it, or 3) it was saved to align the stack on
15559 iWMMXt. In case 1, restore IP into SP, otherwise just
15561 if (frame_pointer_needed
)
15563 live_regs_mask
&= ~ (1 << IP_REGNUM
);
15564 live_regs_mask
|= (1 << SP_REGNUM
);
15567 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
15570 /* On some ARM architectures it is faster to use LDR rather than
15571 LDM to load a single register. On other architectures, the
15572 cost is the same. In 26 bit mode, or for exception handlers,
15573 we have to use LDM to load the PC so that the CPSR is also
15575 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
15576 if (live_regs_mask
== (1U << reg
))
15579 if (reg
<= LAST_ARM_REGNUM
15580 && (reg
!= LR_REGNUM
15582 || ! IS_INTERRUPT (func_type
)))
15584 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
15585 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
15592 /* Generate the load multiple instruction to restore the
15593 registers. Note we can get here, even if
15594 frame_pointer_needed is true, but only if sp already
15595 points to the base of the saved core registers. */
15596 if (live_regs_mask
& (1 << SP_REGNUM
))
15598 unsigned HOST_WIDE_INT stack_adjust
;
15600 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
15601 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
15603 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
15604 if (TARGET_UNIFIED_ASM
)
15605 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
15607 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
15610 /* If we can't use ldmib (SA110 bug),
15611 then try to pop r3 instead. */
15613 live_regs_mask
|= 1 << 3;
15615 if (TARGET_UNIFIED_ASM
)
15616 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
15618 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
15622 if (TARGET_UNIFIED_ASM
)
15623 sprintf (instr
, "pop%s\t{", conditional
);
15625 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
15627 p
= instr
+ strlen (instr
);
15629 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
15630 if (live_regs_mask
& (1 << reg
))
15632 int l
= strlen (reg_names
[reg
]);
15638 memcpy (p
, ", ", 2);
15642 memcpy (p
, "%|", 2);
15643 memcpy (p
+ 2, reg_names
[reg
], l
);
15647 if (live_regs_mask
& (1 << LR_REGNUM
))
15649 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
15650 /* If returning from an interrupt, restore the CPSR. */
15651 if (IS_INTERRUPT (func_type
))
15658 output_asm_insn (instr
, & operand
);
15660 /* See if we need to generate an extra instruction to
15661 perform the actual function return. */
15663 && func_type
!= ARM_FT_INTERWORKED
15664 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
15666 /* The return has already been handled
15667 by loading the LR into the PC. */
15674 switch ((int) ARM_FUNC_TYPE (func_type
))
15678 /* ??? This is wrong for unified assembly syntax. */
15679 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
15682 case ARM_FT_INTERWORKED
:
15683 sprintf (instr
, "bx%s\t%%|lr", conditional
);
15686 case ARM_FT_EXCEPTION
:
15687 /* ??? This is wrong for unified assembly syntax. */
15688 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
15692 /* Use bx if it's available. */
15693 if (arm_arch5
|| arm_arch4t
)
15694 sprintf (instr
, "bx%s\t%%|lr", conditional
);
15696 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
15700 output_asm_insn (instr
, & operand
);
15706 /* Write the function name into the code section, directly preceding
15707 the function prologue.
15709 Code will be output similar to this:
15711 .ascii "arm_poke_function_name", 0
15714 .word 0xff000000 + (t1 - t0)
15715 arm_poke_function_name
15717 stmfd sp!, {fp, ip, lr, pc}
15720 When performing a stack backtrace, code can inspect the value
15721 of 'pc' stored at 'fp' + 0. If the trace function then looks
15722 at location pc - 12 and the top 8 bits are set, then we know
15723 that there is a function name embedded immediately preceding this
15724 location and has length ((pc[-3]) & 0xff000000).
15726 We assume that pc is declared as a pointer to an unsigned long.
15728 It is of no benefit to output the function name if we are assembling
15729 a leaf function. These function types will not contain a stack
15730 backtrace structure, therefore it is not possible to determine the
15733 arm_poke_function_name (FILE *stream
, const char *name
)
15735 unsigned long alignlength
;
15736 unsigned long length
;
15739 length
= strlen (name
) + 1;
15740 alignlength
= ROUND_UP_WORD (length
);
15742 ASM_OUTPUT_ASCII (stream
, name
, length
);
15743 ASM_OUTPUT_ALIGN (stream
, 2);
15744 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
15745 assemble_aligned_integer (UNITS_PER_WORD
, x
);
15748 /* Place some comments into the assembler stream
15749 describing the current function. */
15751 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
15753 unsigned long func_type
;
15755 /* ??? Do we want to print some of the below anyway? */
15759 /* Sanity check. */
15760 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
15762 func_type
= arm_current_func_type ();
15764 switch ((int) ARM_FUNC_TYPE (func_type
))
15767 case ARM_FT_NORMAL
:
15769 case ARM_FT_INTERWORKED
:
15770 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
15773 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
15776 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
15778 case ARM_FT_EXCEPTION
:
15779 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
15783 if (IS_NAKED (func_type
))
15784 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15786 if (IS_VOLATILE (func_type
))
15787 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
15789 if (IS_NESTED (func_type
))
15790 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
15791 if (IS_STACKALIGN (func_type
))
15792 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15794 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15796 crtl
->args
.pretend_args_size
, frame_size
);
15798 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15799 frame_pointer_needed
,
15800 cfun
->machine
->uses_anonymous_args
);
15802 if (cfun
->machine
->lr_save_eliminated
)
15803 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
15805 if (crtl
->calls_eh_return
)
15806 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
15811 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
15812 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
15814 arm_stack_offsets
*offsets
;
15820 /* Emit any call-via-reg trampolines that are needed for v4t support
15821 of call_reg and call_value_reg type insns. */
15822 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
15824 rtx label
= cfun
->machine
->call_via
[regno
];
15828 switch_to_section (function_section (current_function_decl
));
15829 targetm
.asm_out
.internal_label (asm_out_file
, "L",
15830 CODE_LABEL_NUMBER (label
));
15831 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
15835 /* ??? Probably not safe to set this here, since it assumes that a
15836 function will be emitted as assembly immediately after we generate
15837 RTL for it. This does not happen for inline functions. */
15838 cfun
->machine
->return_used_this_function
= 0;
15840 else /* TARGET_32BIT */
15842 /* We need to take into account any stack-frame rounding. */
15843 offsets
= arm_get_frame_offsets ();
15845 gcc_assert (!use_return_insn (FALSE
, NULL
)
15846 || (cfun
->machine
->return_used_this_function
!= 0)
15847 || offsets
->saved_regs
== offsets
->outgoing_args
15848 || frame_pointer_needed
);
15850 /* Reset the ARM-specific per-function variables. */
15851 after_arm_reorg
= 0;
15855 /* Generate and emit an insn that we will recognize as a push_multi.
15856 Unfortunately, since this insn does not reflect very well the actual
15857 semantics of the operation, we need to annotate the insn for the benefit
15858 of DWARF2 frame unwind information. */
15860 emit_multi_reg_push (unsigned long mask
)
15863 int num_dwarf_regs
;
15867 int dwarf_par_index
;
15870 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15871 if (mask
& (1 << i
))
15874 gcc_assert (num_regs
&& num_regs
<= 16);
15876 /* We don't record the PC in the dwarf frame information. */
15877 num_dwarf_regs
= num_regs
;
15878 if (mask
& (1 << PC_REGNUM
))
15881 /* For the body of the insn we are going to generate an UNSPEC in
15882 parallel with several USEs. This allows the insn to be recognized
15883 by the push_multi pattern in the arm.md file.
15885 The body of the insn looks something like this:
15888 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15889 (const_int:SI <num>)))
15890 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15896 For the frame note however, we try to be more explicit and actually
15897 show each register being stored into the stack frame, plus a (single)
15898 decrement of the stack pointer. We do it this way in order to be
15899 friendly to the stack unwinding code, which only wants to see a single
15900 stack decrement per instruction. The RTL we generate for the note looks
15901 something like this:
15904 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15905 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15906 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15907 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15911 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15912 instead we'd have a parallel expression detailing all
15913 the stores to the various memory addresses so that debug
15914 information is more up-to-date. Remember however while writing
15915 this to take care of the constraints with the push instruction.
15917 Note also that this has to be taken care of for the VFP registers.
15919 For more see PR43399. */
15921 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
15922 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
15923 dwarf_par_index
= 1;
15925 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15927 if (mask
& (1 << i
))
15929 reg
= gen_rtx_REG (SImode
, i
);
15931 XVECEXP (par
, 0, 0)
15932 = gen_rtx_SET (VOIDmode
,
15935 gen_rtx_PRE_MODIFY (Pmode
,
15938 (Pmode
, stack_pointer_rtx
,
15941 gen_rtx_UNSPEC (BLKmode
,
15942 gen_rtvec (1, reg
),
15943 UNSPEC_PUSH_MULT
));
15945 if (i
!= PC_REGNUM
)
15947 tmp
= gen_rtx_SET (VOIDmode
,
15948 gen_frame_mem (SImode
, stack_pointer_rtx
),
15950 RTX_FRAME_RELATED_P (tmp
) = 1;
15951 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
15959 for (j
= 1, i
++; j
< num_regs
; i
++)
15961 if (mask
& (1 << i
))
15963 reg
= gen_rtx_REG (SImode
, i
);
15965 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
15967 if (i
!= PC_REGNUM
)
15970 = gen_rtx_SET (VOIDmode
,
15973 plus_constant (Pmode
, stack_pointer_rtx
,
15976 RTX_FRAME_RELATED_P (tmp
) = 1;
15977 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
15984 par
= emit_insn (par
);
15986 tmp
= gen_rtx_SET (VOIDmode
,
15988 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
15989 RTX_FRAME_RELATED_P (tmp
) = 1;
15990 XVECEXP (dwarf
, 0, 0) = tmp
;
15992 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
15997 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
15998 SAVED_REGS_MASK shows which registers need to be restored.
16000 Unfortunately, since this insn does not reflect very well the actual
16001 semantics of the operation, we need to annotate the insn for the benefit
16002 of DWARF2 frame unwind information. */
16004 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
16009 rtx dwarf
= NULL_RTX
;
16015 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
16016 offset_adj
= return_in_pc
? 1 : 0;
16017 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
16018 if (saved_regs_mask
& (1 << i
))
16021 gcc_assert (num_regs
&& num_regs
<= 16);
16023 /* If SP is in reglist, then we don't emit SP update insn. */
16024 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
16026 /* The parallel needs to hold num_regs SETs
16027 and one SET for the stack update. */
16028 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
16033 XVECEXP (par
, 0, 0) = tmp
;
16038 /* Increment the stack pointer, based on there being
16039 num_regs 4-byte registers to restore. */
16040 tmp
= gen_rtx_SET (VOIDmode
,
16042 plus_constant (Pmode
,
16045 RTX_FRAME_RELATED_P (tmp
) = 1;
16046 XVECEXP (par
, 0, offset_adj
) = tmp
;
16049 /* Now restore every reg, which may include PC. */
16050 for (j
= 0, i
= 0; j
< num_regs
; i
++)
16051 if (saved_regs_mask
& (1 << i
))
16053 reg
= gen_rtx_REG (SImode
, i
);
16054 tmp
= gen_rtx_SET (VOIDmode
,
16058 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
16059 RTX_FRAME_RELATED_P (tmp
) = 1;
16060 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
16062 /* We need to maintain a sequence for DWARF info too. As dwarf info
16063 should not have PC, skip PC. */
16064 if (i
!= PC_REGNUM
)
16065 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
16071 par
= emit_jump_insn (par
);
16073 par
= emit_insn (par
);
16075 REG_NOTES (par
) = dwarf
;
16078 /* Generate and emit an insn pattern that we will recognize as a pop_multi
16079 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
16081 Unfortunately, since this insn does not reflect very well the actual
16082 semantics of the operation, we need to annotate the insn for the benefit
16083 of DWARF2 frame unwind information. */
16085 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
16089 rtx dwarf
= NULL_RTX
;
16092 gcc_assert (num_regs
&& num_regs
<= 32);
16094 /* Workaround ARM10 VFPr1 bug. */
16095 if (num_regs
== 2 && !arm_arch6
)
16097 if (first_reg
== 15)
16103 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
16104 there could be up to 32 D-registers to restore.
16105 If there are more than 16 D-registers, make two recursive calls,
16106 each of which emits one pop_multi instruction. */
16109 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
16110 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
16114 /* The parallel needs to hold num_regs SETs
16115 and one SET for the stack update. */
16116 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
16118 /* Increment the stack pointer, based on there being
16119 num_regs 8-byte registers to restore. */
16120 tmp
= gen_rtx_SET (VOIDmode
,
16122 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
16123 RTX_FRAME_RELATED_P (tmp
) = 1;
16124 XVECEXP (par
, 0, 0) = tmp
;
16126 /* Now show every reg that will be restored, using a SET for each. */
16127 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
16129 reg
= gen_rtx_REG (DFmode
, i
);
16131 tmp
= gen_rtx_SET (VOIDmode
,
16135 plus_constant (Pmode
, base_reg
, 8 * j
)));
16136 RTX_FRAME_RELATED_P (tmp
) = 1;
16137 XVECEXP (par
, 0, j
+ 1) = tmp
;
16139 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
16144 par
= emit_insn (par
);
16145 REG_NOTES (par
) = dwarf
;
16148 /* Calculate the size of the return value that is passed in registers. */
16150 arm_size_return_regs (void)
16152 enum machine_mode mode
;
16154 if (crtl
->return_rtx
!= 0)
16155 mode
= GET_MODE (crtl
->return_rtx
);
16157 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
16159 return GET_MODE_SIZE (mode
);
16162 /* Return true if the current function needs to save/restore LR. */
16164 thumb_force_lr_save (void)
16166 return !cfun
->machine
->lr_save_eliminated
16167 && (!leaf_function_p ()
16168 || thumb_far_jump_used_p ()
16169 || df_regs_ever_live_p (LR_REGNUM
));
16173 /* Return true if r3 is used by any of the tail call insns in the
16174 current function. */
16176 any_sibcall_uses_r3 (void)
16181 if (!crtl
->tail_call_emit
)
16183 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
16184 if (e
->flags
& EDGE_SIBCALL
)
16186 rtx call
= BB_END (e
->src
);
16187 if (!CALL_P (call
))
16188 call
= prev_nonnote_nondebug_insn (call
);
16189 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
16190 if (find_regno_fusage (call
, USE
, 3))
16197 /* Compute the distance from register FROM to register TO.
16198 These can be the arg pointer (26), the soft frame pointer (25),
16199 the stack pointer (13) or the hard frame pointer (11).
16200 In thumb mode r7 is used as the soft frame pointer, if needed.
16201 Typical stack layout looks like this:
16203 old stack pointer -> | |
16206 | | saved arguments for
16207 | | vararg functions
16210 hard FP & arg pointer -> | | \
16218 soft frame pointer -> | | /
16223 locals base pointer -> | | /
16228 current stack pointer -> | | /
16231 For a given function some or all of these stack components
16232 may not be needed, giving rise to the possibility of
16233 eliminating some of the registers.
16235 The values returned by this function must reflect the behavior
16236 of arm_expand_prologue() and arm_compute_save_reg_mask().
16238 The sign of the number returned reflects the direction of stack
16239 growth, so the values are positive for all eliminations except
16240 from the soft frame pointer to the hard frame pointer.
16242 SFP may point just inside the local variables block to ensure correct
16246 /* Calculate stack offsets. These are used to calculate register elimination
16247 offsets and in prologue/epilogue code. Also calculates which registers
16248 should be saved. */
16250 static arm_stack_offsets
*
16251 arm_get_frame_offsets (void)
16253 struct arm_stack_offsets
*offsets
;
16254 unsigned long func_type
;
16258 HOST_WIDE_INT frame_size
;
16261 offsets
= &cfun
->machine
->stack_offsets
;
16263 /* We need to know if we are a leaf function. Unfortunately, it
16264 is possible to be called after start_sequence has been called,
16265 which causes get_insns to return the insns for the sequence,
16266 not the function, which will cause leaf_function_p to return
16267 the incorrect result.
16269 to know about leaf functions once reload has completed, and the
16270 frame size cannot be changed after that time, so we can safely
16271 use the cached value. */
16273 if (reload_completed
)
16276 /* Initially this is the size of the local variables. It will translated
16277 into an offset once we have determined the size of preceding data. */
16278 frame_size
= ROUND_UP_WORD (get_frame_size ());
16280 leaf
= leaf_function_p ();
16282 /* Space for variadic functions. */
16283 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
16285 /* In Thumb mode this is incorrect, but never used. */
16286 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
16287 arm_compute_static_chain_stack_bytes();
16291 unsigned int regno
;
16293 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
16294 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
16295 saved
= core_saved
;
16297 /* We know that SP will be doubleword aligned on entry, and we must
16298 preserve that condition at any subroutine call. We also require the
16299 soft frame pointer to be doubleword aligned. */
16301 if (TARGET_REALLY_IWMMXT
)
16303 /* Check for the call-saved iWMMXt registers. */
16304 for (regno
= FIRST_IWMMXT_REGNUM
;
16305 regno
<= LAST_IWMMXT_REGNUM
;
16307 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
16311 func_type
= arm_current_func_type ();
16312 /* Space for saved VFP registers. */
16313 if (! IS_VOLATILE (func_type
)
16314 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
16315 saved
+= arm_get_vfp_saved_size ();
16317 else /* TARGET_THUMB1 */
16319 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
16320 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
16321 saved
= core_saved
;
16322 if (TARGET_BACKTRACE
)
16326 /* Saved registers include the stack frame. */
16327 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
16328 arm_compute_static_chain_stack_bytes();
16329 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
16330 /* A leaf function does not need any stack alignment if it has nothing
16332 if (leaf
&& frame_size
== 0
16333 /* However if it calls alloca(), we have a dynamically allocated
16334 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16335 && ! cfun
->calls_alloca
)
16337 offsets
->outgoing_args
= offsets
->soft_frame
;
16338 offsets
->locals_base
= offsets
->soft_frame
;
16342 /* Ensure SFP has the correct alignment. */
16343 if (ARM_DOUBLEWORD_ALIGN
16344 && (offsets
->soft_frame
& 7))
16346 offsets
->soft_frame
+= 4;
16347 /* Try to align stack by pushing an extra reg. Don't bother doing this
16348 when there is a stack frame as the alignment will be rolled into
16349 the normal stack adjustment. */
16350 if (frame_size
+ crtl
->outgoing_args_size
== 0)
16354 /* If it is safe to use r3, then do so. This sometimes
16355 generates better code on Thumb-2 by avoiding the need to
16356 use 32-bit push/pop instructions. */
16357 if (! any_sibcall_uses_r3 ()
16358 && arm_size_return_regs () <= 12
16359 && (offsets
->saved_regs_mask
& (1 << 3)) == 0)
16364 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
16366 /* Avoid fixed registers; they may be changed at
16367 arbitrary times so it's unsafe to restore them
16368 during the epilogue. */
16370 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
16379 offsets
->saved_regs
+= 4;
16380 offsets
->saved_regs_mask
|= (1 << reg
);
16385 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
16386 offsets
->outgoing_args
= (offsets
->locals_base
16387 + crtl
->outgoing_args_size
);
16389 if (ARM_DOUBLEWORD_ALIGN
)
16391 /* Ensure SP remains doubleword aligned. */
16392 if (offsets
->outgoing_args
& 7)
16393 offsets
->outgoing_args
+= 4;
16394 gcc_assert (!(offsets
->outgoing_args
& 7));
16401 /* Calculate the relative offsets for the different stack pointers. Positive
16402 offsets are in the direction of stack growth. */
16405 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
16407 arm_stack_offsets
*offsets
;
16409 offsets
= arm_get_frame_offsets ();
16411 /* OK, now we have enough information to compute the distances.
16412 There must be an entry in these switch tables for each pair
16413 of registers in ELIMINABLE_REGS, even if some of the entries
16414 seem to be redundant or useless. */
16417 case ARG_POINTER_REGNUM
:
16420 case THUMB_HARD_FRAME_POINTER_REGNUM
:
16423 case FRAME_POINTER_REGNUM
:
16424 /* This is the reverse of the soft frame pointer
16425 to hard frame pointer elimination below. */
16426 return offsets
->soft_frame
- offsets
->saved_args
;
16428 case ARM_HARD_FRAME_POINTER_REGNUM
:
16429 /* This is only non-zero in the case where the static chain register
16430 is stored above the frame. */
16431 return offsets
->frame
- offsets
->saved_args
- 4;
16433 case STACK_POINTER_REGNUM
:
16434 /* If nothing has been pushed on the stack at all
16435 then this will return -4. This *is* correct! */
16436 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
16439 gcc_unreachable ();
16441 gcc_unreachable ();
16443 case FRAME_POINTER_REGNUM
:
16446 case THUMB_HARD_FRAME_POINTER_REGNUM
:
16449 case ARM_HARD_FRAME_POINTER_REGNUM
:
16450 /* The hard frame pointer points to the top entry in the
16451 stack frame. The soft frame pointer to the bottom entry
16452 in the stack frame. If there is no stack frame at all,
16453 then they are identical. */
16455 return offsets
->frame
- offsets
->soft_frame
;
16457 case STACK_POINTER_REGNUM
:
16458 return offsets
->outgoing_args
- offsets
->soft_frame
;
16461 gcc_unreachable ();
16463 gcc_unreachable ();
16466 /* You cannot eliminate from the stack pointer.
16467 In theory you could eliminate from the hard frame
16468 pointer to the stack pointer, but this will never
16469 happen, since if a stack frame is not needed the
16470 hard frame pointer will never be used. */
16471 gcc_unreachable ();
16475 /* Given FROM and TO register numbers, say whether this elimination is
16476 allowed. Frame pointer elimination is automatically handled.
16478 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16479 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16480 pointer, we must eliminate FRAME_POINTER_REGNUM into
16481 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16482 ARG_POINTER_REGNUM. */
16485 arm_can_eliminate (const int from
, const int to
)
16487 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
16488 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
16489 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
16490 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
16494 /* Emit RTL to save coprocessor registers on function entry. Returns the
16495 number of bytes pushed. */
16498 arm_save_coproc_regs(void)
16500 int saved_size
= 0;
16502 unsigned start_reg
;
16505 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
16506 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
16508 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
16509 insn
= gen_rtx_MEM (V2SImode
, insn
);
16510 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
16511 RTX_FRAME_RELATED_P (insn
) = 1;
16515 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
16517 start_reg
= FIRST_VFP_REGNUM
;
16519 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
16521 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
16522 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
16524 if (start_reg
!= reg
)
16525 saved_size
+= vfp_emit_fstmd (start_reg
,
16526 (reg
- start_reg
) / 2);
16527 start_reg
= reg
+ 2;
16530 if (start_reg
!= reg
)
16531 saved_size
+= vfp_emit_fstmd (start_reg
,
16532 (reg
- start_reg
) / 2);
16538 /* Set the Thumb frame pointer from the stack pointer. */
16541 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
16543 HOST_WIDE_INT amount
;
16546 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
16548 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16549 stack_pointer_rtx
, GEN_INT (amount
)));
16552 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
16553 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16554 expects the first two operands to be the same. */
16557 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16559 hard_frame_pointer_rtx
));
16563 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16564 hard_frame_pointer_rtx
,
16565 stack_pointer_rtx
));
16567 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
16568 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
16569 RTX_FRAME_RELATED_P (dwarf
) = 1;
16570 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
16573 RTX_FRAME_RELATED_P (insn
) = 1;
16576 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16579 arm_expand_prologue (void)
16584 unsigned long live_regs_mask
;
16585 unsigned long func_type
;
16587 int saved_pretend_args
= 0;
16588 int saved_regs
= 0;
16589 unsigned HOST_WIDE_INT args_to_push
;
16590 arm_stack_offsets
*offsets
;
16592 func_type
= arm_current_func_type ();
16594 /* Naked functions don't have prologues. */
16595 if (IS_NAKED (func_type
))
16598 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16599 args_to_push
= crtl
->args
.pretend_args_size
;
16601 /* Compute which register we will have to save onto the stack. */
16602 offsets
= arm_get_frame_offsets ();
16603 live_regs_mask
= offsets
->saved_regs_mask
;
16605 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
16607 if (IS_STACKALIGN (func_type
))
16611 /* Handle a word-aligned stack pointer. We generate the following:
16616 <save and restore r0 in normal prologue/epilogue>
16620 The unwinder doesn't need to know about the stack realignment.
16621 Just tell it we saved SP in r0. */
16622 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
16624 r0
= gen_rtx_REG (SImode
, 0);
16625 r1
= gen_rtx_REG (SImode
, 1);
16627 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
16628 RTX_FRAME_RELATED_P (insn
) = 1;
16629 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
16631 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
16633 /* ??? The CFA changes here, which may cause GDB to conclude that it
16634 has entered a different function. That said, the unwind info is
16635 correct, individually, before and after this instruction because
16636 we've described the save of SP, which will override the default
16637 handling of SP as restoring from the CFA. */
16638 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
16641 /* For APCS frames, if IP register is clobbered
16642 when creating frame, save that register in a special
16644 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
16646 if (IS_INTERRUPT (func_type
))
16648 /* Interrupt functions must not corrupt any registers.
16649 Creating a frame pointer however, corrupts the IP
16650 register, so we must push it first. */
16651 emit_multi_reg_push (1 << IP_REGNUM
);
16653 /* Do not set RTX_FRAME_RELATED_P on this insn.
16654 The dwarf stack unwinding code only wants to see one
16655 stack decrement per function, and this is not it. If
16656 this instruction is labeled as being part of the frame
16657 creation sequence then dwarf2out_frame_debug_expr will
16658 die when it encounters the assignment of IP to FP
16659 later on, since the use of SP here establishes SP as
16660 the CFA register and not IP.
16662 Anyway this instruction is not really part of the stack
16663 frame creation although it is part of the prologue. */
16665 else if (IS_NESTED (func_type
))
16667 /* The Static chain register is the same as the IP register
16668 used as a scratch register during stack frame creation.
16669 To get around this need to find somewhere to store IP
16670 whilst the frame is being created. We try the following
16673 1. The last argument register.
16674 2. A slot on the stack above the frame. (This only
16675 works if the function is not a varargs function).
16676 3. Register r3, after pushing the argument registers
16679 Note - we only need to tell the dwarf2 backend about the SP
16680 adjustment in the second variant; the static chain register
16681 doesn't need to be unwound, as it doesn't contain a value
16682 inherited from the caller. */
16684 if (df_regs_ever_live_p (3) == false)
16685 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
16686 else if (args_to_push
== 0)
16690 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16693 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
16694 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
16697 /* Just tell the dwarf backend that we adjusted SP. */
16698 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16699 plus_constant (Pmode
, stack_pointer_rtx
,
16701 RTX_FRAME_RELATED_P (insn
) = 1;
16702 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
16706 /* Store the args on the stack. */
16707 if (cfun
->machine
->uses_anonymous_args
)
16708 insn
= emit_multi_reg_push
16709 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
16712 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16713 GEN_INT (- args_to_push
)));
16715 RTX_FRAME_RELATED_P (insn
) = 1;
16717 saved_pretend_args
= 1;
16718 fp_offset
= args_to_push
;
16721 /* Now reuse r3 to preserve IP. */
16722 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
16726 insn
= emit_set_insn (ip_rtx
,
16727 plus_constant (Pmode
, stack_pointer_rtx
,
16729 RTX_FRAME_RELATED_P (insn
) = 1;
16734 /* Push the argument registers, or reserve space for them. */
16735 if (cfun
->machine
->uses_anonymous_args
)
16736 insn
= emit_multi_reg_push
16737 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
16740 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16741 GEN_INT (- args_to_push
)));
16742 RTX_FRAME_RELATED_P (insn
) = 1;
16745 /* If this is an interrupt service routine, and the link register
16746 is going to be pushed, and we're not generating extra
16747 push of IP (needed when frame is needed and frame layout if apcs),
16748 subtracting four from LR now will mean that the function return
16749 can be done with a single instruction. */
16750 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
16751 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
16752 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
16755 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
16757 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
16760 if (live_regs_mask
)
16762 saved_regs
+= bit_count (live_regs_mask
) * 4;
16763 if (optimize_size
&& !frame_pointer_needed
16764 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
16766 /* If no coprocessor registers are being pushed and we don't have
16767 to worry about a frame pointer then push extra registers to
16768 create the stack frame. This is done is a way that does not
16769 alter the frame layout, so is independent of the epilogue. */
16773 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
16775 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
16776 if (frame
&& n
* 4 >= frame
)
16779 live_regs_mask
|= (1 << n
) - 1;
16780 saved_regs
+= frame
;
16783 insn
= emit_multi_reg_push (live_regs_mask
);
16784 RTX_FRAME_RELATED_P (insn
) = 1;
16787 if (! IS_VOLATILE (func_type
))
16788 saved_regs
+= arm_save_coproc_regs ();
16790 if (frame_pointer_needed
&& TARGET_ARM
)
16792 /* Create the new frame pointer. */
16793 if (TARGET_APCS_FRAME
)
16795 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
16796 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
16797 RTX_FRAME_RELATED_P (insn
) = 1;
16799 if (IS_NESTED (func_type
))
16801 /* Recover the static chain register. */
16802 if (!df_regs_ever_live_p (3)
16803 || saved_pretend_args
)
16804 insn
= gen_rtx_REG (SImode
, 3);
16805 else /* if (crtl->args.pretend_args_size == 0) */
16807 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
16808 insn
= gen_frame_mem (SImode
, insn
);
16810 emit_set_insn (ip_rtx
, insn
);
16811 /* Add a USE to stop propagate_one_insn() from barfing. */
16812 emit_insn (gen_force_register_use (ip_rtx
));
16817 insn
= GEN_INT (saved_regs
- 4);
16818 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16819 stack_pointer_rtx
, insn
));
16820 RTX_FRAME_RELATED_P (insn
) = 1;
16824 if (flag_stack_usage_info
)
16825 current_function_static_stack_size
16826 = offsets
->outgoing_args
- offsets
->saved_args
;
16828 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
16830 /* This add can produce multiple insns for a large constant, so we
16831 need to get tricky. */
16832 rtx last
= get_last_insn ();
16834 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
16835 - offsets
->outgoing_args
);
16837 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16841 last
= last
? NEXT_INSN (last
) : get_insns ();
16842 RTX_FRAME_RELATED_P (last
) = 1;
16844 while (last
!= insn
);
16846 /* If the frame pointer is needed, emit a special barrier that
16847 will prevent the scheduler from moving stores to the frame
16848 before the stack adjustment. */
16849 if (frame_pointer_needed
)
16850 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
16851 hard_frame_pointer_rtx
));
16855 if (frame_pointer_needed
&& TARGET_THUMB2
)
16856 thumb_set_frame_pointer (offsets
);
16858 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
16860 unsigned long mask
;
16862 mask
= live_regs_mask
;
16863 mask
&= THUMB2_WORK_REGS
;
16864 if (!IS_NESTED (func_type
))
16865 mask
|= (1 << IP_REGNUM
);
16866 arm_load_pic_register (mask
);
16869 /* If we are profiling, make sure no instructions are scheduled before
16870 the call to mcount. Similarly if the user has requested no
16871 scheduling in the prolog. Similarly if we want non-call exceptions
16872 using the EABI unwinder, to prevent faulting instructions from being
16873 swapped with a stack adjustment. */
16874 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
16875 || (arm_except_unwind_info (&global_options
) == UI_TARGET
16876 && cfun
->can_throw_non_call_exceptions
))
16877 emit_insn (gen_blockage ());
16879 /* If the link register is being kept alive, with the return address in it,
16880 then make sure that it does not get reused by the ce2 pass. */
16881 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
16882 cfun
->machine
->lr_save_eliminated
= 1;
16885 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16887 arm_print_condition (FILE *stream
)
16889 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
16891 /* Branch conversion is not implemented for Thumb-2. */
16894 output_operand_lossage ("predicated Thumb instruction");
16897 if (current_insn_predicate
!= NULL
)
16899 output_operand_lossage
16900 ("predicated instruction in conditional sequence");
16904 fputs (arm_condition_codes
[arm_current_cc
], stream
);
16906 else if (current_insn_predicate
)
16908 enum arm_cond_code code
;
16912 output_operand_lossage ("predicated Thumb instruction");
16916 code
= get_arm_condition_code (current_insn_predicate
);
16917 fputs (arm_condition_codes
[code
], stream
);
16922 /* If CODE is 'd', then the X is a condition operand and the instruction
16923 should only be executed if the condition is true.
16924 if CODE is 'D', then the X is a condition operand and the instruction
16925 should only be executed if the condition is false: however, if the mode
16926 of the comparison is CCFPEmode, then always execute the instruction -- we
16927 do this because in these circumstances !GE does not necessarily imply LT;
16928 in these cases the instruction pattern will take care to make sure that
16929 an instruction containing %d will follow, thereby undoing the effects of
16930 doing this instruction unconditionally.
16931 If CODE is 'N' then X is a floating point operand that must be negated
16933 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16934 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16936 arm_print_operand (FILE *stream
, rtx x
, int code
)
16941 fputs (ASM_COMMENT_START
, stream
);
16945 fputs (user_label_prefix
, stream
);
16949 fputs (REGISTER_PREFIX
, stream
);
16953 arm_print_condition (stream
);
16957 /* Nothing in unified syntax, otherwise the current condition code. */
16958 if (!TARGET_UNIFIED_ASM
)
16959 arm_print_condition (stream
);
16963 /* The current condition code in unified syntax, otherwise nothing. */
16964 if (TARGET_UNIFIED_ASM
)
16965 arm_print_condition (stream
);
16969 /* The current condition code for a condition code setting instruction.
16970 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16971 if (TARGET_UNIFIED_ASM
)
16973 fputc('s', stream
);
16974 arm_print_condition (stream
);
16978 arm_print_condition (stream
);
16979 fputc('s', stream
);
16984 /* If the instruction is conditionally executed then print
16985 the current condition code, otherwise print 's'. */
16986 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
16987 if (current_insn_predicate
)
16988 arm_print_condition (stream
);
16990 fputc('s', stream
);
16993 /* %# is a "break" sequence. It doesn't output anything, but is used to
16994 separate e.g. operand numbers from following text, if that text consists
16995 of further digits which we don't want to be part of the operand
17003 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
17004 r
= real_value_negate (&r
);
17005 fprintf (stream
, "%s", fp_const_from_val (&r
));
17009 /* An integer or symbol address without a preceding # sign. */
17011 switch (GET_CODE (x
))
17014 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
17018 output_addr_const (stream
, x
);
17022 if (GET_CODE (XEXP (x
, 0)) == PLUS
17023 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
17025 output_addr_const (stream
, x
);
17028 /* Fall through. */
17031 output_operand_lossage ("Unsupported operand for code '%c'", code
);
17035 /* An integer that we want to print in HEX. */
17037 switch (GET_CODE (x
))
17040 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
17044 output_operand_lossage ("Unsupported operand for code '%c'", code
);
17049 if (CONST_INT_P (x
))
17052 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
17053 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
17057 putc ('~', stream
);
17058 output_addr_const (stream
, x
);
17063 /* The low 16 bits of an immediate constant. */
17064 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
17068 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
17072 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
17080 if (!shift_operator (x
, SImode
))
17082 output_operand_lossage ("invalid shift operand");
17086 shift
= shift_op (x
, &val
);
17090 fprintf (stream
, ", %s ", shift
);
17092 arm_print_operand (stream
, XEXP (x
, 1), 0);
17094 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
17099 /* An explanation of the 'Q', 'R' and 'H' register operands:
17101 In a pair of registers containing a DI or DF value the 'Q'
17102 operand returns the register number of the register containing
17103 the least significant part of the value. The 'R' operand returns
17104 the register number of the register containing the most
17105 significant part of the value.
17107 The 'H' operand returns the higher of the two register numbers.
17108 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17109 same as the 'Q' operand, since the most significant part of the
17110 value is held in the lower number register. The reverse is true
17111 on systems where WORDS_BIG_ENDIAN is false.
17113 The purpose of these operands is to distinguish between cases
17114 where the endian-ness of the values is important (for example
17115 when they are added together), and cases where the endian-ness
17116 is irrelevant, but the order of register operations is important.
17117 For example when loading a value from memory into a register
17118 pair, the endian-ness does not matter. Provided that the value
17119 from the lower memory address is put into the lower numbered
17120 register, and the value from the higher address is put into the
17121 higher numbered register, the load will work regardless of whether
17122 the value being loaded is big-wordian or little-wordian. The
17123 order of the two register loads can matter however, if the address
17124 of the memory location is actually held in one of the registers
17125 being overwritten by the load.
17127 The 'Q' and 'R' constraints are also available for 64-bit
17130 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
17132 rtx part
= gen_lowpart (SImode
, x
);
17133 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
17137 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
17139 output_operand_lossage ("invalid operand for code '%c'", code
);
17143 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
17147 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
17149 enum machine_mode mode
= GET_MODE (x
);
17152 if (mode
== VOIDmode
)
17154 part
= gen_highpart_mode (SImode
, mode
, x
);
17155 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
17159 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
17161 output_operand_lossage ("invalid operand for code '%c'", code
);
17165 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
17169 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
17171 output_operand_lossage ("invalid operand for code '%c'", code
);
17175 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
17179 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
17181 output_operand_lossage ("invalid operand for code '%c'", code
);
17185 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
17189 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
17191 output_operand_lossage ("invalid operand for code '%c'", code
);
17195 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
17199 asm_fprintf (stream
, "%r",
17200 REG_P (XEXP (x
, 0))
17201 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
17205 asm_fprintf (stream
, "{%r-%r}",
17207 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
17210 /* Like 'M', but writing doubleword vector registers, for use by Neon
17214 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
17215 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
17217 asm_fprintf (stream
, "{d%d}", regno
);
17219 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
17224 /* CONST_TRUE_RTX means always -- that's the default. */
17225 if (x
== const_true_rtx
)
17228 if (!COMPARISON_P (x
))
17230 output_operand_lossage ("invalid operand for code '%c'", code
);
17234 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
17239 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17240 want to do that. */
17241 if (x
== const_true_rtx
)
17243 output_operand_lossage ("instruction never executed");
17246 if (!COMPARISON_P (x
))
17248 output_operand_lossage ("invalid operand for code '%c'", code
);
17252 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
17253 (get_arm_condition_code (x
))],
17263 /* Former Maverick support, removed after GCC-4.7. */
17264 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
17269 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
17270 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
17271 /* Bad value for wCG register number. */
17273 output_operand_lossage ("invalid operand for code '%c'", code
);
17278 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
17281 /* Print an iWMMXt control register name. */
17283 if (!CONST_INT_P (x
)
17285 || INTVAL (x
) >= 16)
17286 /* Bad value for wC register number. */
17288 output_operand_lossage ("invalid operand for code '%c'", code
);
17294 static const char * wc_reg_names
[16] =
17296 "wCID", "wCon", "wCSSF", "wCASF",
17297 "wC4", "wC5", "wC6", "wC7",
17298 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17299 "wC12", "wC13", "wC14", "wC15"
17302 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
17306 /* Print the high single-precision register of a VFP double-precision
17310 int mode
= GET_MODE (x
);
17313 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
17315 output_operand_lossage ("invalid operand for code '%c'", code
);
17320 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
17322 output_operand_lossage ("invalid operand for code '%c'", code
);
17326 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
17330 /* Print a VFP/Neon double precision or quad precision register name. */
17334 int mode
= GET_MODE (x
);
17335 int is_quad
= (code
== 'q');
17338 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
17340 output_operand_lossage ("invalid operand for code '%c'", code
);
17345 || !IS_VFP_REGNUM (REGNO (x
)))
17347 output_operand_lossage ("invalid operand for code '%c'", code
);
17352 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
17353 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
17355 output_operand_lossage ("invalid operand for code '%c'", code
);
17359 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
17360 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
17364 /* These two codes print the low/high doubleword register of a Neon quad
17365 register, respectively. For pair-structure types, can also print
17366 low/high quadword registers. */
17370 int mode
= GET_MODE (x
);
17373 if ((GET_MODE_SIZE (mode
) != 16
17374 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
17376 output_operand_lossage ("invalid operand for code '%c'", code
);
17381 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
17383 output_operand_lossage ("invalid operand for code '%c'", code
);
17387 if (GET_MODE_SIZE (mode
) == 16)
17388 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
17389 + (code
== 'f' ? 1 : 0));
17391 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
17392 + (code
== 'f' ? 1 : 0));
17396 /* Print a VFPv3 floating-point constant, represented as an integer
17400 int index
= vfp3_const_double_index (x
);
17401 gcc_assert (index
!= -1);
17402 fprintf (stream
, "%d", index
);
17406 /* Print bits representing opcode features for Neon.
17408 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17409 and polynomials as unsigned.
17411 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17413 Bit 2 is 1 for rounding functions, 0 otherwise. */
17415 /* Identify the type as 's', 'u', 'p' or 'f'. */
17418 HOST_WIDE_INT bits
= INTVAL (x
);
17419 fputc ("uspf"[bits
& 3], stream
);
17423 /* Likewise, but signed and unsigned integers are both 'i'. */
17426 HOST_WIDE_INT bits
= INTVAL (x
);
17427 fputc ("iipf"[bits
& 3], stream
);
17431 /* As for 'T', but emit 'u' instead of 'p'. */
17434 HOST_WIDE_INT bits
= INTVAL (x
);
17435 fputc ("usuf"[bits
& 3], stream
);
17439 /* Bit 2: rounding (vs none). */
17442 HOST_WIDE_INT bits
= INTVAL (x
);
17443 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
17447 /* Memory operand for vld1/vst1 instruction. */
17451 bool postinc
= FALSE
;
17452 unsigned align
, memsize
, align_bits
;
17454 gcc_assert (MEM_P (x
));
17455 addr
= XEXP (x
, 0);
17456 if (GET_CODE (addr
) == POST_INC
)
17459 addr
= XEXP (addr
, 0);
17461 asm_fprintf (stream
, "[%r", REGNO (addr
));
17463 /* We know the alignment of this access, so we can emit a hint in the
17464 instruction (for some alignments) as an aid to the memory subsystem
17466 align
= MEM_ALIGN (x
) >> 3;
17467 memsize
= MEM_SIZE (x
);
17469 /* Only certain alignment specifiers are supported by the hardware. */
17470 if (memsize
== 32 && (align
% 32) == 0)
17472 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
17474 else if (memsize
>= 8 && (align
% 8) == 0)
17479 if (align_bits
!= 0)
17480 asm_fprintf (stream
, ":%d", align_bits
);
17482 asm_fprintf (stream
, "]");
17485 fputs("!", stream
);
17493 gcc_assert (MEM_P (x
));
17494 addr
= XEXP (x
, 0);
17495 gcc_assert (REG_P (addr
));
17496 asm_fprintf (stream
, "[%r]", REGNO (addr
));
17500 /* Translate an S register number into a D register number and element index. */
17503 int mode
= GET_MODE (x
);
17506 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
17508 output_operand_lossage ("invalid operand for code '%c'", code
);
17513 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
17515 output_operand_lossage ("invalid operand for code '%c'", code
);
17519 regno
= regno
- FIRST_VFP_REGNUM
;
17520 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
17525 gcc_assert (CONST_DOUBLE_P (x
));
17526 fprintf (stream
, "#%d", vfp3_const_double_for_fract_bits (x
));
17529 /* Register specifier for vld1.16/vst1.16. Translate the S register
17530 number into a D register number and element index. */
17533 int mode
= GET_MODE (x
);
17536 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
17538 output_operand_lossage ("invalid operand for code '%c'", code
);
17543 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
17545 output_operand_lossage ("invalid operand for code '%c'", code
);
17549 regno
= regno
- FIRST_VFP_REGNUM
;
17550 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
17557 output_operand_lossage ("missing operand");
17561 switch (GET_CODE (x
))
17564 asm_fprintf (stream
, "%r", REGNO (x
));
17568 output_memory_reference_mode
= GET_MODE (x
);
17569 output_address (XEXP (x
, 0));
17576 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17577 sizeof (fpstr
), 0, 1);
17578 fprintf (stream
, "#%s", fpstr
);
17581 fprintf (stream
, "#%s", fp_immediate_constant (x
));
17585 gcc_assert (GET_CODE (x
) != NEG
);
17586 fputc ('#', stream
);
17587 if (GET_CODE (x
) == HIGH
)
17589 fputs (":lower16:", stream
);
17593 output_addr_const (stream
, x
);
17599 /* Target hook for printing a memory address. */
17601 arm_print_operand_address (FILE *stream
, rtx x
)
17605 int is_minus
= GET_CODE (x
) == MINUS
;
17608 asm_fprintf (stream
, "[%r]", REGNO (x
));
17609 else if (GET_CODE (x
) == PLUS
|| is_minus
)
17611 rtx base
= XEXP (x
, 0);
17612 rtx index
= XEXP (x
, 1);
17613 HOST_WIDE_INT offset
= 0;
17615 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
17617 /* Ensure that BASE is a register. */
17618 /* (one of them must be). */
17619 /* Also ensure the SP is not used as in index register. */
17624 switch (GET_CODE (index
))
17627 offset
= INTVAL (index
);
17630 asm_fprintf (stream
, "[%r, #%wd]",
17631 REGNO (base
), offset
);
17635 asm_fprintf (stream
, "[%r, %s%r]",
17636 REGNO (base
), is_minus
? "-" : "",
17646 asm_fprintf (stream
, "[%r, %s%r",
17647 REGNO (base
), is_minus
? "-" : "",
17648 REGNO (XEXP (index
, 0)));
17649 arm_print_operand (stream
, index
, 'S');
17650 fputs ("]", stream
);
17655 gcc_unreachable ();
17658 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
17659 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
17661 extern enum machine_mode output_memory_reference_mode
;
17663 gcc_assert (REG_P (XEXP (x
, 0)));
17665 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
17666 asm_fprintf (stream
, "[%r, #%s%d]!",
17667 REGNO (XEXP (x
, 0)),
17668 GET_CODE (x
) == PRE_DEC
? "-" : "",
17669 GET_MODE_SIZE (output_memory_reference_mode
));
17671 asm_fprintf (stream
, "[%r], #%s%d",
17672 REGNO (XEXP (x
, 0)),
17673 GET_CODE (x
) == POST_DEC
? "-" : "",
17674 GET_MODE_SIZE (output_memory_reference_mode
));
17676 else if (GET_CODE (x
) == PRE_MODIFY
)
17678 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
17679 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
17680 asm_fprintf (stream
, "#%wd]!",
17681 INTVAL (XEXP (XEXP (x
, 1), 1)));
17683 asm_fprintf (stream
, "%r]!",
17684 REGNO (XEXP (XEXP (x
, 1), 1)));
17686 else if (GET_CODE (x
) == POST_MODIFY
)
17688 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
17689 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
17690 asm_fprintf (stream
, "#%wd",
17691 INTVAL (XEXP (XEXP (x
, 1), 1)));
17693 asm_fprintf (stream
, "%r",
17694 REGNO (XEXP (XEXP (x
, 1), 1)));
17696 else output_addr_const (stream
, x
);
17701 asm_fprintf (stream
, "[%r]", REGNO (x
));
17702 else if (GET_CODE (x
) == POST_INC
)
17703 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
17704 else if (GET_CODE (x
) == PLUS
)
17706 gcc_assert (REG_P (XEXP (x
, 0)));
17707 if (CONST_INT_P (XEXP (x
, 1)))
17708 asm_fprintf (stream
, "[%r, #%wd]",
17709 REGNO (XEXP (x
, 0)),
17710 INTVAL (XEXP (x
, 1)));
17712 asm_fprintf (stream
, "[%r, %r]",
17713 REGNO (XEXP (x
, 0)),
17714 REGNO (XEXP (x
, 1)));
17717 output_addr_const (stream
, x
);
17721 /* Target hook for indicating whether a punctuation character for
17722 TARGET_PRINT_OPERAND is valid. */
17724 arm_print_operand_punct_valid_p (unsigned char code
)
17726 return (code
== '@' || code
== '|' || code
== '.'
17727 || code
== '(' || code
== ')' || code
== '#'
17728 || (TARGET_32BIT
&& (code
== '?'))
17729 || (TARGET_THUMB2
&& (code
== '!'))
17730 || (TARGET_THUMB
&& (code
== '_')));
17733 /* Target hook for assembling integer objects. The ARM version needs to
17734 handle word-sized values specially. */
17736 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
17738 enum machine_mode mode
;
17740 if (size
== UNITS_PER_WORD
&& aligned_p
)
17742 fputs ("\t.word\t", asm_out_file
);
17743 output_addr_const (asm_out_file
, x
);
17745 /* Mark symbols as position independent. We only do this in the
17746 .text segment, not in the .data segment. */
17747 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
17748 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
17750 /* See legitimize_pic_address for an explanation of the
17751 TARGET_VXWORKS_RTP check. */
17752 if (TARGET_VXWORKS_RTP
17753 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
17754 fputs ("(GOT)", asm_out_file
);
17756 fputs ("(GOTOFF)", asm_out_file
);
17758 fputc ('\n', asm_out_file
);
17762 mode
= GET_MODE (x
);
17764 if (arm_vector_mode_supported_p (mode
))
17768 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
17770 units
= CONST_VECTOR_NUNITS (x
);
17771 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
17773 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
17774 for (i
= 0; i
< units
; i
++)
17776 rtx elt
= CONST_VECTOR_ELT (x
, i
);
17778 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
17781 for (i
= 0; i
< units
; i
++)
17783 rtx elt
= CONST_VECTOR_ELT (x
, i
);
17784 REAL_VALUE_TYPE rval
;
17786 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
17789 (rval
, GET_MODE_INNER (mode
),
17790 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
17796 return default_assemble_integer (x
, size
, aligned_p
);
17800 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
17804 if (!TARGET_AAPCS_BASED
)
17807 default_named_section_asm_out_constructor
17808 : default_named_section_asm_out_destructor
) (symbol
, priority
);
17812 /* Put these in the .init_array section, using a special relocation. */
17813 if (priority
!= DEFAULT_INIT_PRIORITY
)
17816 sprintf (buf
, "%s.%.5u",
17817 is_ctor
? ".init_array" : ".fini_array",
17819 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
17826 switch_to_section (s
);
17827 assemble_align (POINTER_SIZE
);
17828 fputs ("\t.word\t", asm_out_file
);
17829 output_addr_const (asm_out_file
, symbol
);
17830 fputs ("(target1)\n", asm_out_file
);
17833 /* Add a function to the list of static constructors. */
17836 arm_elf_asm_constructor (rtx symbol
, int priority
)
17838 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
17841 /* Add a function to the list of static destructors. */
17844 arm_elf_asm_destructor (rtx symbol
, int priority
)
17846 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
17849 /* A finite state machine takes care of noticing whether or not instructions
17850 can be conditionally executed, and thus decrease execution time and code
17851 size by deleting branch instructions. The fsm is controlled by
17852 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17854 /* The state of the fsm controlling condition codes are:
17855 0: normal, do nothing special
17856 1: make ASM_OUTPUT_OPCODE not output this instruction
17857 2: make ASM_OUTPUT_OPCODE not output this instruction
17858 3: make instructions conditional
17859 4: make instructions conditional
17861 State transitions (state->state by whom under condition):
17862 0 -> 1 final_prescan_insn if the `target' is a label
17863 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17864 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17865 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17866 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17867 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17868 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17869 (the target insn is arm_target_insn).
17871 If the jump clobbers the conditions then we use states 2 and 4.
17873 A similar thing can be done with conditional return insns.
17875 XXX In case the `target' is an unconditional branch, this conditionalising
17876 of the instructions always reduces code size, but not always execution
17877 time. But then, I want to reduce the code size to somewhere near what
17878 /bin/cc produces. */
17880 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17881 instructions. When a COND_EXEC instruction is seen the subsequent
17882 instructions are scanned so that multiple conditional instructions can be
17883 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17884 specify the length and true/false mask for the IT block. These will be
17885 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17887 /* Returns the index of the ARM condition code string in
17888 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17889 COMPARISON should be an rtx like `(eq (...) (...))'. */
17892 maybe_get_arm_condition_code (rtx comparison
)
17894 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
17895 enum arm_cond_code code
;
17896 enum rtx_code comp_code
= GET_CODE (comparison
);
17898 if (GET_MODE_CLASS (mode
) != MODE_CC
)
17899 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
17900 XEXP (comparison
, 1));
17904 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
17905 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
17906 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
17907 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
17908 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
17909 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
17910 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
17911 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
17912 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
17913 case CC_DLTUmode
: code
= ARM_CC
;
17916 if (comp_code
== EQ
)
17917 return ARM_INVERSE_CONDITION_CODE (code
);
17918 if (comp_code
== NE
)
17925 case NE
: return ARM_NE
;
17926 case EQ
: return ARM_EQ
;
17927 case GE
: return ARM_PL
;
17928 case LT
: return ARM_MI
;
17929 default: return ARM_NV
;
17935 case NE
: return ARM_NE
;
17936 case EQ
: return ARM_EQ
;
17937 default: return ARM_NV
;
17943 case NE
: return ARM_MI
;
17944 case EQ
: return ARM_PL
;
17945 default: return ARM_NV
;
17950 /* We can handle all cases except UNEQ and LTGT. */
17953 case GE
: return ARM_GE
;
17954 case GT
: return ARM_GT
;
17955 case LE
: return ARM_LS
;
17956 case LT
: return ARM_MI
;
17957 case NE
: return ARM_NE
;
17958 case EQ
: return ARM_EQ
;
17959 case ORDERED
: return ARM_VC
;
17960 case UNORDERED
: return ARM_VS
;
17961 case UNLT
: return ARM_LT
;
17962 case UNLE
: return ARM_LE
;
17963 case UNGT
: return ARM_HI
;
17964 case UNGE
: return ARM_PL
;
17965 /* UNEQ and LTGT do not have a representation. */
17966 case UNEQ
: /* Fall through. */
17967 case LTGT
: /* Fall through. */
17968 default: return ARM_NV
;
17974 case NE
: return ARM_NE
;
17975 case EQ
: return ARM_EQ
;
17976 case GE
: return ARM_LE
;
17977 case GT
: return ARM_LT
;
17978 case LE
: return ARM_GE
;
17979 case LT
: return ARM_GT
;
17980 case GEU
: return ARM_LS
;
17981 case GTU
: return ARM_CC
;
17982 case LEU
: return ARM_CS
;
17983 case LTU
: return ARM_HI
;
17984 default: return ARM_NV
;
17990 case LTU
: return ARM_CS
;
17991 case GEU
: return ARM_CC
;
17992 default: return ARM_NV
;
17998 case NE
: return ARM_NE
;
17999 case EQ
: return ARM_EQ
;
18000 case GEU
: return ARM_CS
;
18001 case GTU
: return ARM_HI
;
18002 case LEU
: return ARM_LS
;
18003 case LTU
: return ARM_CC
;
18004 default: return ARM_NV
;
18010 case GE
: return ARM_GE
;
18011 case LT
: return ARM_LT
;
18012 case GEU
: return ARM_CS
;
18013 case LTU
: return ARM_CC
;
18014 default: return ARM_NV
;
18020 case NE
: return ARM_NE
;
18021 case EQ
: return ARM_EQ
;
18022 case GE
: return ARM_GE
;
18023 case GT
: return ARM_GT
;
18024 case LE
: return ARM_LE
;
18025 case LT
: return ARM_LT
;
18026 case GEU
: return ARM_CS
;
18027 case GTU
: return ARM_HI
;
18028 case LEU
: return ARM_LS
;
18029 case LTU
: return ARM_CC
;
18030 default: return ARM_NV
;
18033 default: gcc_unreachable ();
18037 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18038 static enum arm_cond_code
18039 get_arm_condition_code (rtx comparison
)
18041 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
18042 gcc_assert (code
!= ARM_NV
);
18046 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18049 thumb2_final_prescan_insn (rtx insn
)
18051 rtx first_insn
= insn
;
18052 rtx body
= PATTERN (insn
);
18054 enum arm_cond_code code
;
18058 /* Remove the previous insn from the count of insns to be output. */
18059 if (arm_condexec_count
)
18060 arm_condexec_count
--;
18062 /* Nothing to do if we are already inside a conditional block. */
18063 if (arm_condexec_count
)
18066 if (GET_CODE (body
) != COND_EXEC
)
18069 /* Conditional jumps are implemented directly. */
18073 predicate
= COND_EXEC_TEST (body
);
18074 arm_current_cc
= get_arm_condition_code (predicate
);
18076 n
= get_attr_ce_count (insn
);
18077 arm_condexec_count
= 1;
18078 arm_condexec_mask
= (1 << n
) - 1;
18079 arm_condexec_masklen
= n
;
18080 /* See if subsequent instructions can be combined into the same block. */
18083 insn
= next_nonnote_insn (insn
);
18085 /* Jumping into the middle of an IT block is illegal, so a label or
18086 barrier terminates the block. */
18087 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
18090 body
= PATTERN (insn
);
18091 /* USE and CLOBBER aren't really insns, so just skip them. */
18092 if (GET_CODE (body
) == USE
18093 || GET_CODE (body
) == CLOBBER
)
18096 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18097 if (GET_CODE (body
) != COND_EXEC
)
18099 /* Allow up to 4 conditionally executed instructions in a block. */
18100 n
= get_attr_ce_count (insn
);
18101 if (arm_condexec_masklen
+ n
> 4)
18104 predicate
= COND_EXEC_TEST (body
);
18105 code
= get_arm_condition_code (predicate
);
18106 mask
= (1 << n
) - 1;
18107 if (arm_current_cc
== code
)
18108 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
18109 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
18112 arm_condexec_count
++;
18113 arm_condexec_masklen
+= n
;
18115 /* A jump must be the last instruction in a conditional block. */
18119 /* Restore recog_data (getting the attributes of other insns can
18120 destroy this array, but final.c assumes that it remains intact
18121 across this call). */
18122 extract_constrain_insn_cached (first_insn
);
18126 arm_final_prescan_insn (rtx insn
)
18128 /* BODY will hold the body of INSN. */
18129 rtx body
= PATTERN (insn
);
18131 /* This will be 1 if trying to repeat the trick, and things need to be
18132 reversed if it appears to fail. */
18135 /* If we start with a return insn, we only succeed if we find another one. */
18136 int seeking_return
= 0;
18137 enum rtx_code return_code
= UNKNOWN
;
18139 /* START_INSN will hold the insn from where we start looking. This is the
18140 first insn after the following code_label if REVERSE is true. */
18141 rtx start_insn
= insn
;
18143 /* If in state 4, check if the target branch is reached, in order to
18144 change back to state 0. */
18145 if (arm_ccfsm_state
== 4)
18147 if (insn
== arm_target_insn
)
18149 arm_target_insn
= NULL
;
18150 arm_ccfsm_state
= 0;
18155 /* If in state 3, it is possible to repeat the trick, if this insn is an
18156 unconditional branch to a label, and immediately following this branch
18157 is the previous target label which is only used once, and the label this
18158 branch jumps to is not too far off. */
18159 if (arm_ccfsm_state
== 3)
18161 if (simplejump_p (insn
))
18163 start_insn
= next_nonnote_insn (start_insn
);
18164 if (BARRIER_P (start_insn
))
18166 /* XXX Isn't this always a barrier? */
18167 start_insn
= next_nonnote_insn (start_insn
);
18169 if (LABEL_P (start_insn
)
18170 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
18171 && LABEL_NUSES (start_insn
) == 1)
18176 else if (ANY_RETURN_P (body
))
18178 start_insn
= next_nonnote_insn (start_insn
);
18179 if (BARRIER_P (start_insn
))
18180 start_insn
= next_nonnote_insn (start_insn
);
18181 if (LABEL_P (start_insn
)
18182 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
18183 && LABEL_NUSES (start_insn
) == 1)
18186 seeking_return
= 1;
18187 return_code
= GET_CODE (body
);
18196 gcc_assert (!arm_ccfsm_state
|| reverse
);
18197 if (!JUMP_P (insn
))
18200 /* This jump might be paralleled with a clobber of the condition codes
18201 the jump should always come first */
18202 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
18203 body
= XVECEXP (body
, 0, 0);
18206 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
18207 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
18210 int fail
= FALSE
, succeed
= FALSE
;
18211 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18212 int then_not_else
= TRUE
;
18213 rtx this_insn
= start_insn
, label
= 0;
18215 /* Register the insn jumped to. */
18218 if (!seeking_return
)
18219 label
= XEXP (SET_SRC (body
), 0);
18221 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
18222 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
18223 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
18225 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
18226 then_not_else
= FALSE
;
18228 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
18230 seeking_return
= 1;
18231 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
18233 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
18235 seeking_return
= 1;
18236 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
18237 then_not_else
= FALSE
;
18240 gcc_unreachable ();
18242 /* See how many insns this branch skips, and what kind of insns. If all
18243 insns are okay, and the label or unconditional branch to the same
18244 label is not too far away, succeed. */
18245 for (insns_skipped
= 0;
18246 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
18250 this_insn
= next_nonnote_insn (this_insn
);
18254 switch (GET_CODE (this_insn
))
18257 /* Succeed if it is the target label, otherwise fail since
18258 control falls in from somewhere else. */
18259 if (this_insn
== label
)
18261 arm_ccfsm_state
= 1;
18269 /* Succeed if the following insn is the target label.
18271 If return insns are used then the last insn in a function
18272 will be a barrier. */
18273 this_insn
= next_nonnote_insn (this_insn
);
18274 if (this_insn
&& this_insn
== label
)
18276 arm_ccfsm_state
= 1;
18284 /* The AAPCS says that conditional calls should not be
18285 used since they make interworking inefficient (the
18286 linker can't transform BL<cond> into BLX). That's
18287 only a problem if the machine has BLX. */
18294 /* Succeed if the following insn is the target label, or
18295 if the following two insns are a barrier and the
18297 this_insn
= next_nonnote_insn (this_insn
);
18298 if (this_insn
&& BARRIER_P (this_insn
))
18299 this_insn
= next_nonnote_insn (this_insn
);
18301 if (this_insn
&& this_insn
== label
18302 && insns_skipped
< max_insns_skipped
)
18304 arm_ccfsm_state
= 1;
18312 /* If this is an unconditional branch to the same label, succeed.
18313 If it is to another label, do nothing. If it is conditional,
18315 /* XXX Probably, the tests for SET and the PC are
18318 scanbody
= PATTERN (this_insn
);
18319 if (GET_CODE (scanbody
) == SET
18320 && GET_CODE (SET_DEST (scanbody
)) == PC
)
18322 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
18323 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
18325 arm_ccfsm_state
= 2;
18328 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
18331 /* Fail if a conditional return is undesirable (e.g. on a
18332 StrongARM), but still allow this if optimizing for size. */
18333 else if (GET_CODE (scanbody
) == return_code
18334 && !use_return_insn (TRUE
, NULL
)
18337 else if (GET_CODE (scanbody
) == return_code
)
18339 arm_ccfsm_state
= 2;
18342 else if (GET_CODE (scanbody
) == PARALLEL
)
18344 switch (get_attr_conds (this_insn
))
18354 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
18359 /* Instructions using or affecting the condition codes make it
18361 scanbody
= PATTERN (this_insn
);
18362 if (!(GET_CODE (scanbody
) == SET
18363 || GET_CODE (scanbody
) == PARALLEL
)
18364 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
18374 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
18375 arm_target_label
= CODE_LABEL_NUMBER (label
);
18378 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
18380 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
18382 this_insn
= next_nonnote_insn (this_insn
);
18383 gcc_assert (!this_insn
18384 || (!BARRIER_P (this_insn
)
18385 && !LABEL_P (this_insn
)));
18389 /* Oh, dear! we ran off the end.. give up. */
18390 extract_constrain_insn_cached (insn
);
18391 arm_ccfsm_state
= 0;
18392 arm_target_insn
= NULL
;
18395 arm_target_insn
= this_insn
;
18398 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18401 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
18403 if (reverse
|| then_not_else
)
18404 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
18407 /* Restore recog_data (getting the attributes of other insns can
18408 destroy this array, but final.c assumes that it remains intact
18409 across this call. */
18410 extract_constrain_insn_cached (insn
);
18414 /* Output IT instructions. */
18416 thumb2_asm_output_opcode (FILE * stream
)
18421 if (arm_condexec_mask
)
18423 for (n
= 0; n
< arm_condexec_masklen
; n
++)
18424 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
18426 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
18427 arm_condition_codes
[arm_current_cc
]);
18428 arm_condexec_mask
= 0;
18432 /* Returns true if REGNO is a valid register
18433 for holding a quantity of type MODE. */
18435 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
18437 if (GET_MODE_CLASS (mode
) == MODE_CC
)
18438 return (regno
== CC_REGNUM
18439 || (TARGET_HARD_FLOAT
&& TARGET_VFP
18440 && regno
== VFPCC_REGNUM
));
18443 /* For the Thumb we only allow values bigger than SImode in
18444 registers 0 - 6, so that there is always a second low
18445 register available to hold the upper part of the value.
18446 We probably we ought to ensure that the register is the
18447 start of an even numbered register pair. */
18448 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
18450 if (TARGET_HARD_FLOAT
&& TARGET_VFP
18451 && IS_VFP_REGNUM (regno
))
18453 if (mode
== SFmode
|| mode
== SImode
)
18454 return VFP_REGNO_OK_FOR_SINGLE (regno
);
18456 if (mode
== DFmode
)
18457 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
18459 /* VFP registers can hold HFmode values, but there is no point in
18460 putting them there unless we have hardware conversion insns. */
18461 if (mode
== HFmode
)
18462 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
18465 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
18466 || (VALID_NEON_QREG_MODE (mode
)
18467 && NEON_REGNO_OK_FOR_QUAD (regno
))
18468 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
18469 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
18470 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
18471 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
18472 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
18477 if (TARGET_REALLY_IWMMXT
)
18479 if (IS_IWMMXT_GR_REGNUM (regno
))
18480 return mode
== SImode
;
18482 if (IS_IWMMXT_REGNUM (regno
))
18483 return VALID_IWMMXT_REG_MODE (mode
);
18486 /* We allow almost any value to be stored in the general registers.
18487 Restrict doubleword quantities to even register pairs so that we can
18488 use ldrd. Do not allow very large Neon structure opaque modes in
18489 general registers; they would use too many. */
18490 if (regno
<= LAST_ARM_REGNUM
)
18491 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
18492 && ARM_NUM_REGS (mode
) <= 4;
18494 if (regno
== FRAME_POINTER_REGNUM
18495 || regno
== ARG_POINTER_REGNUM
)
18496 /* We only allow integers in the fake hard registers. */
18497 return GET_MODE_CLASS (mode
) == MODE_INT
;
18502 /* Implement MODES_TIEABLE_P. */
18505 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18507 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
18510 /* We specifically want to allow elements of "structure" modes to
18511 be tieable to the structure. This more general condition allows
18512 other rarer situations too. */
18514 && (VALID_NEON_DREG_MODE (mode1
)
18515 || VALID_NEON_QREG_MODE (mode1
)
18516 || VALID_NEON_STRUCT_MODE (mode1
))
18517 && (VALID_NEON_DREG_MODE (mode2
)
18518 || VALID_NEON_QREG_MODE (mode2
)
18519 || VALID_NEON_STRUCT_MODE (mode2
)))
18525 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18526 not used in arm mode. */
18529 arm_regno_class (int regno
)
18533 if (regno
== STACK_POINTER_REGNUM
)
18535 if (regno
== CC_REGNUM
)
18542 if (TARGET_THUMB2
&& regno
< 8)
18545 if ( regno
<= LAST_ARM_REGNUM
18546 || regno
== FRAME_POINTER_REGNUM
18547 || regno
== ARG_POINTER_REGNUM
)
18548 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
18550 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
18551 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
18553 if (IS_VFP_REGNUM (regno
))
18555 if (regno
<= D7_VFP_REGNUM
)
18556 return VFP_D0_D7_REGS
;
18557 else if (regno
<= LAST_LO_VFP_REGNUM
)
18558 return VFP_LO_REGS
;
18560 return VFP_HI_REGS
;
18563 if (IS_IWMMXT_REGNUM (regno
))
18564 return IWMMXT_REGS
;
18566 if (IS_IWMMXT_GR_REGNUM (regno
))
18567 return IWMMXT_GR_REGS
;
18572 /* Handle a special case when computing the offset
18573 of an argument from the frame pointer. */
18575 arm_debugger_arg_offset (int value
, rtx addr
)
18579 /* We are only interested if dbxout_parms() failed to compute the offset. */
18583 /* We can only cope with the case where the address is held in a register. */
18587 /* If we are using the frame pointer to point at the argument, then
18588 an offset of 0 is correct. */
18589 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
18592 /* If we are using the stack pointer to point at the
18593 argument, then an offset of 0 is correct. */
18594 /* ??? Check this is consistent with thumb2 frame layout. */
18595 if ((TARGET_THUMB
|| !frame_pointer_needed
)
18596 && REGNO (addr
) == SP_REGNUM
)
18599 /* Oh dear. The argument is pointed to by a register rather
18600 than being held in a register, or being stored at a known
18601 offset from the frame pointer. Since GDB only understands
18602 those two kinds of argument we must translate the address
18603 held in the register into an offset from the frame pointer.
18604 We do this by searching through the insns for the function
18605 looking to see where this register gets its value. If the
18606 register is initialized from the frame pointer plus an offset
18607 then we are in luck and we can continue, otherwise we give up.
18609 This code is exercised by producing debugging information
18610 for a function with arguments like this:
18612 double func (double a, double b, int c, double d) {return d;}
18614 Without this code the stab for parameter 'd' will be set to
18615 an offset of 0 from the frame pointer, rather than 8. */
18617 /* The if() statement says:
18619 If the insn is a normal instruction
18620 and if the insn is setting the value in a register
18621 and if the register being set is the register holding the address of the argument
18622 and if the address is computing by an addition
18623 that involves adding to a register
18624 which is the frame pointer
18629 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
18631 if ( NONJUMP_INSN_P (insn
)
18632 && GET_CODE (PATTERN (insn
)) == SET
18633 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
18634 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
18635 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
18636 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18637 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
18640 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
18649 warning (0, "unable to compute real location of stacked parameter");
18650 value
= 8; /* XXX magic hack */
18670 T_MAX
/* Size of enum. Keep last. */
18671 } neon_builtin_type_mode
;
18673 #define TYPE_MODE_BIT(X) (1 << (X))
18675 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18676 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18677 | TYPE_MODE_BIT (T_DI))
18678 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18679 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18680 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18682 #define v8qi_UP T_V8QI
18683 #define v4hi_UP T_V4HI
18684 #define v2si_UP T_V2SI
18685 #define v2sf_UP T_V2SF
18687 #define v16qi_UP T_V16QI
18688 #define v8hi_UP T_V8HI
18689 #define v4si_UP T_V4SI
18690 #define v4sf_UP T_V4SF
18691 #define v2di_UP T_V2DI
18696 #define UP(X) X##_UP
18729 NEON_LOADSTRUCTLANE
,
18731 NEON_STORESTRUCTLANE
,
18740 const neon_itype itype
;
18741 const neon_builtin_type_mode mode
;
18742 const enum insn_code code
;
18743 unsigned int fcode
;
18744 } neon_builtin_datum
;
18746 #define CF(N,X) CODE_FOR_neon_##N##X
18748 #define VAR1(T, N, A) \
18749 {#N, NEON_##T, UP (A), CF (N, A), 0}
18750 #define VAR2(T, N, A, B) \
18752 {#N, NEON_##T, UP (B), CF (N, B), 0}
18753 #define VAR3(T, N, A, B, C) \
18754 VAR2 (T, N, A, B), \
18755 {#N, NEON_##T, UP (C), CF (N, C), 0}
18756 #define VAR4(T, N, A, B, C, D) \
18757 VAR3 (T, N, A, B, C), \
18758 {#N, NEON_##T, UP (D), CF (N, D), 0}
18759 #define VAR5(T, N, A, B, C, D, E) \
18760 VAR4 (T, N, A, B, C, D), \
18761 {#N, NEON_##T, UP (E), CF (N, E), 0}
18762 #define VAR6(T, N, A, B, C, D, E, F) \
18763 VAR5 (T, N, A, B, C, D, E), \
18764 {#N, NEON_##T, UP (F), CF (N, F), 0}
18765 #define VAR7(T, N, A, B, C, D, E, F, G) \
18766 VAR6 (T, N, A, B, C, D, E, F), \
18767 {#N, NEON_##T, UP (G), CF (N, G), 0}
18768 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18769 VAR7 (T, N, A, B, C, D, E, F, G), \
18770 {#N, NEON_##T, UP (H), CF (N, H), 0}
18771 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18772 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18773 {#N, NEON_##T, UP (I), CF (N, I), 0}
18774 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18775 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18776 {#N, NEON_##T, UP (J), CF (N, J), 0}
18778 /* The mode entries in the following table correspond to the "key" type of the
18779 instruction variant, i.e. equivalent to that which would be specified after
18780 the assembler mnemonic, which usually refers to the last vector operand.
18781 (Signed/unsigned/polynomial types are not differentiated between though, and
18782 are all mapped onto the same mode for a given element size.) The modes
18783 listed per instruction should be the same as those defined for that
18784 instruction's pattern in neon.md. */
18786 static neon_builtin_datum neon_builtin_data
[] =
18788 VAR10 (BINOP
, vadd
,
18789 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18790 VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
),
18791 VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
),
18792 VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18793 VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18794 VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
),
18795 VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18796 VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18797 VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
),
18798 VAR2 (TERNOP
, vfma
, v2sf
, v4sf
),
18799 VAR2 (TERNOP
, vfms
, v2sf
, v4sf
),
18800 VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18801 VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
),
18802 VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
),
18803 VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
),
18804 VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
),
18805 VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
),
18806 VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
),
18807 VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
),
18808 VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
),
18809 VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
),
18810 VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
),
18811 VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
),
18812 VAR2 (BINOP
, vqdmull
, v4hi
, v2si
),
18813 VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18814 VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18815 VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18816 VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
),
18817 VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
),
18818 VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
),
18819 VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18820 VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18821 VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18822 VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
),
18823 VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18824 VAR10 (BINOP
, vsub
,
18825 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18826 VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
),
18827 VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
),
18828 VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18829 VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18830 VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
),
18831 VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18832 VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18833 VAR6 (BINOP
, vcgeu
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18834 VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18835 VAR6 (BINOP
, vcgtu
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18836 VAR2 (BINOP
, vcage
, v2sf
, v4sf
),
18837 VAR2 (BINOP
, vcagt
, v2sf
, v4sf
),
18838 VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18839 VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18840 VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
),
18841 VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18842 VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
),
18843 VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18844 VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18845 VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
),
18846 VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18847 VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18848 VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
),
18849 VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
),
18850 VAR2 (BINOP
, vrecps
, v2sf
, v4sf
),
18851 VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
),
18852 VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18853 VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18854 VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18855 VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18856 VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18857 VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18858 VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18859 VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18860 VAR2 (UNOP
, vcnt
, v8qi
, v16qi
),
18861 VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
),
18862 VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
),
18863 VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18864 /* FIXME: vget_lane supports more variants than this! */
18865 VAR10 (GETLANE
, vget_lane
,
18866 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18867 VAR10 (SETLANE
, vset_lane
,
18868 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18869 VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18870 VAR10 (DUP
, vdup_n
,
18871 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18872 VAR10 (DUPLANE
, vdup_lane
,
18873 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18874 VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18875 VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18876 VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18877 VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
),
18878 VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
),
18879 VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
),
18880 VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
),
18881 VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18882 VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18883 VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
),
18884 VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
),
18885 VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18886 VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
),
18887 VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
),
18888 VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18889 VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18890 VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
),
18891 VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
),
18892 VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18893 VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
),
18894 VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
),
18895 VAR10 (BINOP
, vext
,
18896 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18897 VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18898 VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
),
18899 VAR2 (UNOP
, vrev16
, v8qi
, v16qi
),
18900 VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
),
18901 VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
),
18902 VAR10 (SELECT
, vbsl
,
18903 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18904 VAR1 (VTBL
, vtbl1
, v8qi
),
18905 VAR1 (VTBL
, vtbl2
, v8qi
),
18906 VAR1 (VTBL
, vtbl3
, v8qi
),
18907 VAR1 (VTBL
, vtbl4
, v8qi
),
18908 VAR1 (VTBX
, vtbx1
, v8qi
),
18909 VAR1 (VTBX
, vtbx2
, v8qi
),
18910 VAR1 (VTBX
, vtbx3
, v8qi
),
18911 VAR1 (VTBX
, vtbx4
, v8qi
),
18912 VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18913 VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18914 VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18915 VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18916 VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18917 VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18918 VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18919 VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18920 VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18921 VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18922 VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18923 VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18924 VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18925 VAR10 (LOAD1
, vld1
,
18926 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18927 VAR10 (LOAD1LANE
, vld1_lane
,
18928 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18929 VAR10 (LOAD1
, vld1_dup
,
18930 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18931 VAR10 (STORE1
, vst1
,
18932 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18933 VAR10 (STORE1LANE
, vst1_lane
,
18934 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18936 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18937 VAR7 (LOADSTRUCTLANE
, vld2_lane
,
18938 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18939 VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18940 VAR9 (STORESTRUCT
, vst2
,
18941 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18942 VAR7 (STORESTRUCTLANE
, vst2_lane
,
18943 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18945 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18946 VAR7 (LOADSTRUCTLANE
, vld3_lane
,
18947 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18948 VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18949 VAR9 (STORESTRUCT
, vst3
,
18950 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18951 VAR7 (STORESTRUCTLANE
, vst3_lane
,
18952 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18953 VAR9 (LOADSTRUCT
, vld4
,
18954 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18955 VAR7 (LOADSTRUCTLANE
, vld4_lane
,
18956 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18957 VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18958 VAR9 (STORESTRUCT
, vst4
,
18959 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18960 VAR7 (STORESTRUCTLANE
, vst4_lane
,
18961 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18962 VAR10 (LOGICBINOP
, vand
,
18963 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18964 VAR10 (LOGICBINOP
, vorr
,
18965 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18966 VAR10 (BINOP
, veor
,
18967 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18968 VAR10 (LOGICBINOP
, vbic
,
18969 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18970 VAR10 (LOGICBINOP
, vorn
,
18971 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
)
18986 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18987 symbolic names defined here (which would require too much duplication).
18991 ARM_BUILTIN_GETWCGR0
,
18992 ARM_BUILTIN_GETWCGR1
,
18993 ARM_BUILTIN_GETWCGR2
,
18994 ARM_BUILTIN_GETWCGR3
,
18996 ARM_BUILTIN_SETWCGR0
,
18997 ARM_BUILTIN_SETWCGR1
,
18998 ARM_BUILTIN_SETWCGR2
,
18999 ARM_BUILTIN_SETWCGR3
,
19003 ARM_BUILTIN_WAVG2BR
,
19004 ARM_BUILTIN_WAVG2HR
,
19005 ARM_BUILTIN_WAVG2B
,
19006 ARM_BUILTIN_WAVG2H
,
19013 ARM_BUILTIN_WMACSZ
,
19015 ARM_BUILTIN_WMACUZ
,
19018 ARM_BUILTIN_WSADBZ
,
19020 ARM_BUILTIN_WSADHZ
,
19022 ARM_BUILTIN_WALIGNI
,
19023 ARM_BUILTIN_WALIGNR0
,
19024 ARM_BUILTIN_WALIGNR1
,
19025 ARM_BUILTIN_WALIGNR2
,
19026 ARM_BUILTIN_WALIGNR3
,
19029 ARM_BUILTIN_TMIAPH
,
19030 ARM_BUILTIN_TMIABB
,
19031 ARM_BUILTIN_TMIABT
,
19032 ARM_BUILTIN_TMIATB
,
19033 ARM_BUILTIN_TMIATT
,
19035 ARM_BUILTIN_TMOVMSKB
,
19036 ARM_BUILTIN_TMOVMSKH
,
19037 ARM_BUILTIN_TMOVMSKW
,
19039 ARM_BUILTIN_TBCSTB
,
19040 ARM_BUILTIN_TBCSTH
,
19041 ARM_BUILTIN_TBCSTW
,
19043 ARM_BUILTIN_WMADDS
,
19044 ARM_BUILTIN_WMADDU
,
19046 ARM_BUILTIN_WPACKHSS
,
19047 ARM_BUILTIN_WPACKWSS
,
19048 ARM_BUILTIN_WPACKDSS
,
19049 ARM_BUILTIN_WPACKHUS
,
19050 ARM_BUILTIN_WPACKWUS
,
19051 ARM_BUILTIN_WPACKDUS
,
19056 ARM_BUILTIN_WADDSSB
,
19057 ARM_BUILTIN_WADDSSH
,
19058 ARM_BUILTIN_WADDSSW
,
19059 ARM_BUILTIN_WADDUSB
,
19060 ARM_BUILTIN_WADDUSH
,
19061 ARM_BUILTIN_WADDUSW
,
19065 ARM_BUILTIN_WSUBSSB
,
19066 ARM_BUILTIN_WSUBSSH
,
19067 ARM_BUILTIN_WSUBSSW
,
19068 ARM_BUILTIN_WSUBUSB
,
19069 ARM_BUILTIN_WSUBUSH
,
19070 ARM_BUILTIN_WSUBUSW
,
19077 ARM_BUILTIN_WCMPEQB
,
19078 ARM_BUILTIN_WCMPEQH
,
19079 ARM_BUILTIN_WCMPEQW
,
19080 ARM_BUILTIN_WCMPGTUB
,
19081 ARM_BUILTIN_WCMPGTUH
,
19082 ARM_BUILTIN_WCMPGTUW
,
19083 ARM_BUILTIN_WCMPGTSB
,
19084 ARM_BUILTIN_WCMPGTSH
,
19085 ARM_BUILTIN_WCMPGTSW
,
19087 ARM_BUILTIN_TEXTRMSB
,
19088 ARM_BUILTIN_TEXTRMSH
,
19089 ARM_BUILTIN_TEXTRMSW
,
19090 ARM_BUILTIN_TEXTRMUB
,
19091 ARM_BUILTIN_TEXTRMUH
,
19092 ARM_BUILTIN_TEXTRMUW
,
19093 ARM_BUILTIN_TINSRB
,
19094 ARM_BUILTIN_TINSRH
,
19095 ARM_BUILTIN_TINSRW
,
19097 ARM_BUILTIN_WMAXSW
,
19098 ARM_BUILTIN_WMAXSH
,
19099 ARM_BUILTIN_WMAXSB
,
19100 ARM_BUILTIN_WMAXUW
,
19101 ARM_BUILTIN_WMAXUH
,
19102 ARM_BUILTIN_WMAXUB
,
19103 ARM_BUILTIN_WMINSW
,
19104 ARM_BUILTIN_WMINSH
,
19105 ARM_BUILTIN_WMINSB
,
19106 ARM_BUILTIN_WMINUW
,
19107 ARM_BUILTIN_WMINUH
,
19108 ARM_BUILTIN_WMINUB
,
19110 ARM_BUILTIN_WMULUM
,
19111 ARM_BUILTIN_WMULSM
,
19112 ARM_BUILTIN_WMULUL
,
19114 ARM_BUILTIN_PSADBH
,
19115 ARM_BUILTIN_WSHUFH
,
19129 ARM_BUILTIN_WSLLHI
,
19130 ARM_BUILTIN_WSLLWI
,
19131 ARM_BUILTIN_WSLLDI
,
19132 ARM_BUILTIN_WSRAHI
,
19133 ARM_BUILTIN_WSRAWI
,
19134 ARM_BUILTIN_WSRADI
,
19135 ARM_BUILTIN_WSRLHI
,
19136 ARM_BUILTIN_WSRLWI
,
19137 ARM_BUILTIN_WSRLDI
,
19138 ARM_BUILTIN_WRORHI
,
19139 ARM_BUILTIN_WRORWI
,
19140 ARM_BUILTIN_WRORDI
,
19142 ARM_BUILTIN_WUNPCKIHB
,
19143 ARM_BUILTIN_WUNPCKIHH
,
19144 ARM_BUILTIN_WUNPCKIHW
,
19145 ARM_BUILTIN_WUNPCKILB
,
19146 ARM_BUILTIN_WUNPCKILH
,
19147 ARM_BUILTIN_WUNPCKILW
,
19149 ARM_BUILTIN_WUNPCKEHSB
,
19150 ARM_BUILTIN_WUNPCKEHSH
,
19151 ARM_BUILTIN_WUNPCKEHSW
,
19152 ARM_BUILTIN_WUNPCKEHUB
,
19153 ARM_BUILTIN_WUNPCKEHUH
,
19154 ARM_BUILTIN_WUNPCKEHUW
,
19155 ARM_BUILTIN_WUNPCKELSB
,
19156 ARM_BUILTIN_WUNPCKELSH
,
19157 ARM_BUILTIN_WUNPCKELSW
,
19158 ARM_BUILTIN_WUNPCKELUB
,
19159 ARM_BUILTIN_WUNPCKELUH
,
19160 ARM_BUILTIN_WUNPCKELUW
,
19166 ARM_BUILTIN_WADDSUBHX
,
19167 ARM_BUILTIN_WSUBADDHX
,
19169 ARM_BUILTIN_WABSDIFFB
,
19170 ARM_BUILTIN_WABSDIFFH
,
19171 ARM_BUILTIN_WABSDIFFW
,
19173 ARM_BUILTIN_WADDCH
,
19174 ARM_BUILTIN_WADDCW
,
19177 ARM_BUILTIN_WAVG4R
,
19179 ARM_BUILTIN_WMADDSX
,
19180 ARM_BUILTIN_WMADDUX
,
19182 ARM_BUILTIN_WMADDSN
,
19183 ARM_BUILTIN_WMADDUN
,
19185 ARM_BUILTIN_WMULWSM
,
19186 ARM_BUILTIN_WMULWUM
,
19188 ARM_BUILTIN_WMULWSMR
,
19189 ARM_BUILTIN_WMULWUMR
,
19191 ARM_BUILTIN_WMULWL
,
19193 ARM_BUILTIN_WMULSMR
,
19194 ARM_BUILTIN_WMULUMR
,
19196 ARM_BUILTIN_WQMULM
,
19197 ARM_BUILTIN_WQMULMR
,
19199 ARM_BUILTIN_WQMULWM
,
19200 ARM_BUILTIN_WQMULWMR
,
19202 ARM_BUILTIN_WADDBHUSM
,
19203 ARM_BUILTIN_WADDBHUSL
,
19205 ARM_BUILTIN_WQMIABB
,
19206 ARM_BUILTIN_WQMIABT
,
19207 ARM_BUILTIN_WQMIATB
,
19208 ARM_BUILTIN_WQMIATT
,
19210 ARM_BUILTIN_WQMIABBN
,
19211 ARM_BUILTIN_WQMIABTN
,
19212 ARM_BUILTIN_WQMIATBN
,
19213 ARM_BUILTIN_WQMIATTN
,
19215 ARM_BUILTIN_WMIABB
,
19216 ARM_BUILTIN_WMIABT
,
19217 ARM_BUILTIN_WMIATB
,
19218 ARM_BUILTIN_WMIATT
,
19220 ARM_BUILTIN_WMIABBN
,
19221 ARM_BUILTIN_WMIABTN
,
19222 ARM_BUILTIN_WMIATBN
,
19223 ARM_BUILTIN_WMIATTN
,
19225 ARM_BUILTIN_WMIAWBB
,
19226 ARM_BUILTIN_WMIAWBT
,
19227 ARM_BUILTIN_WMIAWTB
,
19228 ARM_BUILTIN_WMIAWTT
,
19230 ARM_BUILTIN_WMIAWBBN
,
19231 ARM_BUILTIN_WMIAWBTN
,
19232 ARM_BUILTIN_WMIAWTBN
,
19233 ARM_BUILTIN_WMIAWTTN
,
19235 ARM_BUILTIN_WMERGE
,
19237 ARM_BUILTIN_NEON_BASE
,
19239 ARM_BUILTIN_MAX
= ARM_BUILTIN_NEON_BASE
+ ARRAY_SIZE (neon_builtin_data
)
19242 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
19245 arm_init_neon_builtins (void)
19247 unsigned int i
, fcode
;
19250 tree neon_intQI_type_node
;
19251 tree neon_intHI_type_node
;
19252 tree neon_polyQI_type_node
;
19253 tree neon_polyHI_type_node
;
19254 tree neon_intSI_type_node
;
19255 tree neon_intDI_type_node
;
19256 tree neon_float_type_node
;
19258 tree intQI_pointer_node
;
19259 tree intHI_pointer_node
;
19260 tree intSI_pointer_node
;
19261 tree intDI_pointer_node
;
19262 tree float_pointer_node
;
19264 tree const_intQI_node
;
19265 tree const_intHI_node
;
19266 tree const_intSI_node
;
19267 tree const_intDI_node
;
19268 tree const_float_node
;
19270 tree const_intQI_pointer_node
;
19271 tree const_intHI_pointer_node
;
19272 tree const_intSI_pointer_node
;
19273 tree const_intDI_pointer_node
;
19274 tree const_float_pointer_node
;
19276 tree V8QI_type_node
;
19277 tree V4HI_type_node
;
19278 tree V2SI_type_node
;
19279 tree V2SF_type_node
;
19280 tree V16QI_type_node
;
19281 tree V8HI_type_node
;
19282 tree V4SI_type_node
;
19283 tree V4SF_type_node
;
19284 tree V2DI_type_node
;
19286 tree intUQI_type_node
;
19287 tree intUHI_type_node
;
19288 tree intUSI_type_node
;
19289 tree intUDI_type_node
;
19291 tree intEI_type_node
;
19292 tree intOI_type_node
;
19293 tree intCI_type_node
;
19294 tree intXI_type_node
;
19296 tree V8QI_pointer_node
;
19297 tree V4HI_pointer_node
;
19298 tree V2SI_pointer_node
;
19299 tree V2SF_pointer_node
;
19300 tree V16QI_pointer_node
;
19301 tree V8HI_pointer_node
;
19302 tree V4SI_pointer_node
;
19303 tree V4SF_pointer_node
;
19304 tree V2DI_pointer_node
;
19306 tree void_ftype_pv8qi_v8qi_v8qi
;
19307 tree void_ftype_pv4hi_v4hi_v4hi
;
19308 tree void_ftype_pv2si_v2si_v2si
;
19309 tree void_ftype_pv2sf_v2sf_v2sf
;
19310 tree void_ftype_pdi_di_di
;
19311 tree void_ftype_pv16qi_v16qi_v16qi
;
19312 tree void_ftype_pv8hi_v8hi_v8hi
;
19313 tree void_ftype_pv4si_v4si_v4si
;
19314 tree void_ftype_pv4sf_v4sf_v4sf
;
19315 tree void_ftype_pv2di_v2di_v2di
;
19317 tree reinterp_ftype_dreg
[5][5];
19318 tree reinterp_ftype_qreg
[5][5];
19319 tree dreg_types
[5], qreg_types
[5];
19321 /* Create distinguished type nodes for NEON vector element types,
19322 and pointers to values of such types, so we can detect them later. */
19323 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
19324 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
19325 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
19326 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
19327 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
19328 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
19329 neon_float_type_node
= make_node (REAL_TYPE
);
19330 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
19331 layout_type (neon_float_type_node
);
19333 /* Define typedefs which exactly correspond to the modes we are basing vector
19334 types on. If you change these names you'll need to change
19335 the table used by arm_mangle_type too. */
19336 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
19337 "__builtin_neon_qi");
19338 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
19339 "__builtin_neon_hi");
19340 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
19341 "__builtin_neon_si");
19342 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
19343 "__builtin_neon_sf");
19344 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
19345 "__builtin_neon_di");
19346 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
19347 "__builtin_neon_poly8");
19348 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
19349 "__builtin_neon_poly16");
19351 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
19352 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
19353 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
19354 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
19355 float_pointer_node
= build_pointer_type (neon_float_type_node
);
19357 /* Next create constant-qualified versions of the above types. */
19358 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
19360 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
19362 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
19364 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
19366 const_float_node
= build_qualified_type (neon_float_type_node
,
19369 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
19370 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
19371 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
19372 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
19373 const_float_pointer_node
= build_pointer_type (const_float_node
);
19375 /* Now create vector types based on our NEON element types. */
19376 /* 64-bit vectors. */
19378 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
19380 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
19382 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
19384 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
19385 /* 128-bit vectors. */
19387 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
19389 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
19391 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
19393 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
19395 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
19397 /* Unsigned integer types for various mode sizes. */
19398 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
19399 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
19400 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
19401 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
19403 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
19404 "__builtin_neon_uqi");
19405 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
19406 "__builtin_neon_uhi");
19407 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
19408 "__builtin_neon_usi");
19409 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
19410 "__builtin_neon_udi");
19412 /* Opaque integer types for structures of vectors. */
19413 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
19414 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
19415 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
19416 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
19418 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
19419 "__builtin_neon_ti");
19420 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
19421 "__builtin_neon_ei");
19422 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
19423 "__builtin_neon_oi");
19424 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
19425 "__builtin_neon_ci");
19426 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
19427 "__builtin_neon_xi");
19429 /* Pointers to vector types. */
19430 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
19431 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
19432 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
19433 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
19434 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
19435 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
19436 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
19437 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
19438 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
19440 /* Operations which return results as pairs. */
19441 void_ftype_pv8qi_v8qi_v8qi
=
19442 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
19443 V8QI_type_node
, NULL
);
19444 void_ftype_pv4hi_v4hi_v4hi
=
19445 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
19446 V4HI_type_node
, NULL
);
19447 void_ftype_pv2si_v2si_v2si
=
19448 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
19449 V2SI_type_node
, NULL
);
19450 void_ftype_pv2sf_v2sf_v2sf
=
19451 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
19452 V2SF_type_node
, NULL
);
19453 void_ftype_pdi_di_di
=
19454 build_function_type_list (void_type_node
, intDI_pointer_node
,
19455 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
19456 void_ftype_pv16qi_v16qi_v16qi
=
19457 build_function_type_list (void_type_node
, V16QI_pointer_node
,
19458 V16QI_type_node
, V16QI_type_node
, NULL
);
19459 void_ftype_pv8hi_v8hi_v8hi
=
19460 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
19461 V8HI_type_node
, NULL
);
19462 void_ftype_pv4si_v4si_v4si
=
19463 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
19464 V4SI_type_node
, NULL
);
19465 void_ftype_pv4sf_v4sf_v4sf
=
19466 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
19467 V4SF_type_node
, NULL
);
19468 void_ftype_pv2di_v2di_v2di
=
19469 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
19470 V2DI_type_node
, NULL
);
19472 dreg_types
[0] = V8QI_type_node
;
19473 dreg_types
[1] = V4HI_type_node
;
19474 dreg_types
[2] = V2SI_type_node
;
19475 dreg_types
[3] = V2SF_type_node
;
19476 dreg_types
[4] = neon_intDI_type_node
;
19478 qreg_types
[0] = V16QI_type_node
;
19479 qreg_types
[1] = V8HI_type_node
;
19480 qreg_types
[2] = V4SI_type_node
;
19481 qreg_types
[3] = V4SF_type_node
;
19482 qreg_types
[4] = V2DI_type_node
;
19484 for (i
= 0; i
< 5; i
++)
19487 for (j
= 0; j
< 5; j
++)
19489 reinterp_ftype_dreg
[i
][j
]
19490 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
19491 reinterp_ftype_qreg
[i
][j
]
19492 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
19496 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
19497 i
< ARRAY_SIZE (neon_builtin_data
);
19500 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
19502 const char* const modenames
[] = {
19503 "v8qi", "v4hi", "v2si", "v2sf", "di",
19504 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19509 int is_load
= 0, is_store
= 0;
19511 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
19518 case NEON_LOAD1LANE
:
19519 case NEON_LOADSTRUCT
:
19520 case NEON_LOADSTRUCTLANE
:
19522 /* Fall through. */
19524 case NEON_STORE1LANE
:
19525 case NEON_STORESTRUCT
:
19526 case NEON_STORESTRUCTLANE
:
19529 /* Fall through. */
19532 case NEON_LOGICBINOP
:
19533 case NEON_SHIFTINSERT
:
19540 case NEON_SHIFTIMM
:
19541 case NEON_SHIFTACC
:
19547 case NEON_LANEMULL
:
19548 case NEON_LANEMULH
:
19550 case NEON_SCALARMUL
:
19551 case NEON_SCALARMULL
:
19552 case NEON_SCALARMULH
:
19553 case NEON_SCALARMAC
:
19559 tree return_type
= void_type_node
, args
= void_list_node
;
19561 /* Build a function type directly from the insn_data for
19562 this builtin. The build_function_type() function takes
19563 care of removing duplicates for us. */
19564 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
19568 if (is_load
&& k
== 1)
19570 /* Neon load patterns always have the memory
19571 operand in the operand 1 position. */
19572 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
19573 == neon_struct_operand
);
19579 eltype
= const_intQI_pointer_node
;
19584 eltype
= const_intHI_pointer_node
;
19589 eltype
= const_intSI_pointer_node
;
19594 eltype
= const_float_pointer_node
;
19599 eltype
= const_intDI_pointer_node
;
19602 default: gcc_unreachable ();
19605 else if (is_store
&& k
== 0)
19607 /* Similarly, Neon store patterns use operand 0 as
19608 the memory location to store to. */
19609 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
19610 == neon_struct_operand
);
19616 eltype
= intQI_pointer_node
;
19621 eltype
= intHI_pointer_node
;
19626 eltype
= intSI_pointer_node
;
19631 eltype
= float_pointer_node
;
19636 eltype
= intDI_pointer_node
;
19639 default: gcc_unreachable ();
19644 switch (insn_data
[d
->code
].operand
[k
].mode
)
19646 case VOIDmode
: eltype
= void_type_node
; break;
19648 case QImode
: eltype
= neon_intQI_type_node
; break;
19649 case HImode
: eltype
= neon_intHI_type_node
; break;
19650 case SImode
: eltype
= neon_intSI_type_node
; break;
19651 case SFmode
: eltype
= neon_float_type_node
; break;
19652 case DImode
: eltype
= neon_intDI_type_node
; break;
19653 case TImode
: eltype
= intTI_type_node
; break;
19654 case EImode
: eltype
= intEI_type_node
; break;
19655 case OImode
: eltype
= intOI_type_node
; break;
19656 case CImode
: eltype
= intCI_type_node
; break;
19657 case XImode
: eltype
= intXI_type_node
; break;
19658 /* 64-bit vectors. */
19659 case V8QImode
: eltype
= V8QI_type_node
; break;
19660 case V4HImode
: eltype
= V4HI_type_node
; break;
19661 case V2SImode
: eltype
= V2SI_type_node
; break;
19662 case V2SFmode
: eltype
= V2SF_type_node
; break;
19663 /* 128-bit vectors. */
19664 case V16QImode
: eltype
= V16QI_type_node
; break;
19665 case V8HImode
: eltype
= V8HI_type_node
; break;
19666 case V4SImode
: eltype
= V4SI_type_node
; break;
19667 case V4SFmode
: eltype
= V4SF_type_node
; break;
19668 case V2DImode
: eltype
= V2DI_type_node
; break;
19669 default: gcc_unreachable ();
19673 if (k
== 0 && !is_store
)
19674 return_type
= eltype
;
19676 args
= tree_cons (NULL_TREE
, eltype
, args
);
19679 ftype
= build_function_type (return_type
, args
);
19683 case NEON_RESULTPAIR
:
19685 switch (insn_data
[d
->code
].operand
[1].mode
)
19687 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
19688 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
19689 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
19690 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
19691 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
19692 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
19693 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
19694 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
19695 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
19696 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
19697 default: gcc_unreachable ();
19702 case NEON_REINTERP
:
19704 /* We iterate over 5 doubleword types, then 5 quadword
19706 int rhs
= d
->mode
% 5;
19707 switch (insn_data
[d
->code
].operand
[0].mode
)
19709 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
19710 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
19711 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
19712 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
19713 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
19714 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
19715 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
19716 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
19717 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
19718 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
19719 default: gcc_unreachable ();
19725 gcc_unreachable ();
19728 gcc_assert (ftype
!= NULL
);
19730 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
19732 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
19734 arm_builtin_decls
[fcode
] = decl
;
19738 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19741 if ((MASK) & insn_flags) \
19744 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19745 BUILT_IN_MD, NULL, NULL_TREE); \
19746 arm_builtin_decls[CODE] = bdecl; \
19751 struct builtin_description
19753 const unsigned int mask
;
19754 const enum insn_code icode
;
19755 const char * const name
;
19756 const enum arm_builtins code
;
19757 const enum rtx_code comparison
;
19758 const unsigned int flag
;
19761 static const struct builtin_description bdesc_2arg
[] =
19763 #define IWMMXT_BUILTIN(code, string, builtin) \
19764 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19765 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19767 #define IWMMXT2_BUILTIN(code, string, builtin) \
19768 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
19769 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19771 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
19772 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
19773 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
19774 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
19775 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
19776 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
19777 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
19778 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
19779 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
19780 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
19781 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
19782 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
19783 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
19784 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
19785 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
19786 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
19787 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
19788 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
19789 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
19790 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
19791 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
19792 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
19793 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
19794 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
19795 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
19796 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
19797 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
19798 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
19799 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
19800 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
19801 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
19802 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
19803 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
19804 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
19805 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
19806 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
19807 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
19808 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
19809 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
19810 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
19811 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
19812 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
19813 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
19814 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
19815 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
19816 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
19817 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
19818 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
19819 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
19820 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
19821 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
19822 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
19823 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
19824 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
19825 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
19826 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
19827 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
19828 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
19829 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
19830 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
19831 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
19832 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
19833 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
19834 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
19835 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
19836 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
19837 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
19838 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
19839 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
19840 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
19841 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
19842 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
19843 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
19844 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
19845 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
19846 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
19847 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
19848 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
19850 #define IWMMXT_BUILTIN2(code, builtin) \
19851 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19853 #define IWMMXT2_BUILTIN2(code, builtin) \
19854 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19856 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
19857 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
19858 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
19859 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
19860 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
19861 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
19862 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
19863 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
19864 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
19865 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
19868 static const struct builtin_description bdesc_1arg
[] =
19870 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
19871 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
19872 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
19873 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
19874 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
19875 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
19876 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
19877 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
19878 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
19879 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
19880 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
19881 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
19882 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
19883 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
19884 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
19885 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
19886 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
19887 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
19888 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
19889 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
19890 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
19891 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
19892 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
19893 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
19896 /* Set up all the iWMMXt builtins. This is not called if
19897 TARGET_IWMMXT is zero. */
19900 arm_init_iwmmxt_builtins (void)
19902 const struct builtin_description
* d
;
19905 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
19906 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
19907 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
19909 tree v8qi_ftype_v8qi_v8qi_int
19910 = build_function_type_list (V8QI_type_node
,
19911 V8QI_type_node
, V8QI_type_node
,
19912 integer_type_node
, NULL_TREE
);
19913 tree v4hi_ftype_v4hi_int
19914 = build_function_type_list (V4HI_type_node
,
19915 V4HI_type_node
, integer_type_node
, NULL_TREE
);
19916 tree v2si_ftype_v2si_int
19917 = build_function_type_list (V2SI_type_node
,
19918 V2SI_type_node
, integer_type_node
, NULL_TREE
);
19919 tree v2si_ftype_di_di
19920 = build_function_type_list (V2SI_type_node
,
19921 long_long_integer_type_node
,
19922 long_long_integer_type_node
,
19924 tree di_ftype_di_int
19925 = build_function_type_list (long_long_integer_type_node
,
19926 long_long_integer_type_node
,
19927 integer_type_node
, NULL_TREE
);
19928 tree di_ftype_di_int_int
19929 = build_function_type_list (long_long_integer_type_node
,
19930 long_long_integer_type_node
,
19932 integer_type_node
, NULL_TREE
);
19933 tree int_ftype_v8qi
19934 = build_function_type_list (integer_type_node
,
19935 V8QI_type_node
, NULL_TREE
);
19936 tree int_ftype_v4hi
19937 = build_function_type_list (integer_type_node
,
19938 V4HI_type_node
, NULL_TREE
);
19939 tree int_ftype_v2si
19940 = build_function_type_list (integer_type_node
,
19941 V2SI_type_node
, NULL_TREE
);
19942 tree int_ftype_v8qi_int
19943 = build_function_type_list (integer_type_node
,
19944 V8QI_type_node
, integer_type_node
, NULL_TREE
);
19945 tree int_ftype_v4hi_int
19946 = build_function_type_list (integer_type_node
,
19947 V4HI_type_node
, integer_type_node
, NULL_TREE
);
19948 tree int_ftype_v2si_int
19949 = build_function_type_list (integer_type_node
,
19950 V2SI_type_node
, integer_type_node
, NULL_TREE
);
19951 tree v8qi_ftype_v8qi_int_int
19952 = build_function_type_list (V8QI_type_node
,
19953 V8QI_type_node
, integer_type_node
,
19954 integer_type_node
, NULL_TREE
);
19955 tree v4hi_ftype_v4hi_int_int
19956 = build_function_type_list (V4HI_type_node
,
19957 V4HI_type_node
, integer_type_node
,
19958 integer_type_node
, NULL_TREE
);
19959 tree v2si_ftype_v2si_int_int
19960 = build_function_type_list (V2SI_type_node
,
19961 V2SI_type_node
, integer_type_node
,
19962 integer_type_node
, NULL_TREE
);
19963 /* Miscellaneous. */
19964 tree v8qi_ftype_v4hi_v4hi
19965 = build_function_type_list (V8QI_type_node
,
19966 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
19967 tree v4hi_ftype_v2si_v2si
19968 = build_function_type_list (V4HI_type_node
,
19969 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
19970 tree v8qi_ftype_v4hi_v8qi
19971 = build_function_type_list (V8QI_type_node
,
19972 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
19973 tree v2si_ftype_v4hi_v4hi
19974 = build_function_type_list (V2SI_type_node
,
19975 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
19976 tree v2si_ftype_v8qi_v8qi
19977 = build_function_type_list (V2SI_type_node
,
19978 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
19979 tree v4hi_ftype_v4hi_di
19980 = build_function_type_list (V4HI_type_node
,
19981 V4HI_type_node
, long_long_integer_type_node
,
19983 tree v2si_ftype_v2si_di
19984 = build_function_type_list (V2SI_type_node
,
19985 V2SI_type_node
, long_long_integer_type_node
,
19988 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
19989 tree int_ftype_void
19990 = build_function_type_list (integer_type_node
, NULL_TREE
);
19992 = build_function_type_list (long_long_integer_type_node
,
19993 V8QI_type_node
, NULL_TREE
);
19995 = build_function_type_list (long_long_integer_type_node
,
19996 V4HI_type_node
, NULL_TREE
);
19998 = build_function_type_list (long_long_integer_type_node
,
19999 V2SI_type_node
, NULL_TREE
);
20000 tree v2si_ftype_v4hi
20001 = build_function_type_list (V2SI_type_node
,
20002 V4HI_type_node
, NULL_TREE
);
20003 tree v4hi_ftype_v8qi
20004 = build_function_type_list (V4HI_type_node
,
20005 V8QI_type_node
, NULL_TREE
);
20006 tree v8qi_ftype_v8qi
20007 = build_function_type_list (V8QI_type_node
,
20008 V8QI_type_node
, NULL_TREE
);
20009 tree v4hi_ftype_v4hi
20010 = build_function_type_list (V4HI_type_node
,
20011 V4HI_type_node
, NULL_TREE
);
20012 tree v2si_ftype_v2si
20013 = build_function_type_list (V2SI_type_node
,
20014 V2SI_type_node
, NULL_TREE
);
20016 tree di_ftype_di_v4hi_v4hi
20017 = build_function_type_list (long_long_unsigned_type_node
,
20018 long_long_unsigned_type_node
,
20019 V4HI_type_node
, V4HI_type_node
,
20022 tree di_ftype_v4hi_v4hi
20023 = build_function_type_list (long_long_unsigned_type_node
,
20024 V4HI_type_node
,V4HI_type_node
,
20027 tree v2si_ftype_v2si_v4hi_v4hi
20028 = build_function_type_list (V2SI_type_node
,
20029 V2SI_type_node
, V4HI_type_node
,
20030 V4HI_type_node
, NULL_TREE
);
20032 tree v2si_ftype_v2si_v8qi_v8qi
20033 = build_function_type_list (V2SI_type_node
,
20034 V2SI_type_node
, V8QI_type_node
,
20035 V8QI_type_node
, NULL_TREE
);
20037 tree di_ftype_di_v2si_v2si
20038 = build_function_type_list (long_long_unsigned_type_node
,
20039 long_long_unsigned_type_node
,
20040 V2SI_type_node
, V2SI_type_node
,
20043 tree di_ftype_di_di_int
20044 = build_function_type_list (long_long_unsigned_type_node
,
20045 long_long_unsigned_type_node
,
20046 long_long_unsigned_type_node
,
20047 integer_type_node
, NULL_TREE
);
20049 tree void_ftype_int
20050 = build_function_type_list (void_type_node
,
20051 integer_type_node
, NULL_TREE
);
20053 tree v8qi_ftype_char
20054 = build_function_type_list (V8QI_type_node
,
20055 signed_char_type_node
, NULL_TREE
);
20057 tree v4hi_ftype_short
20058 = build_function_type_list (V4HI_type_node
,
20059 short_integer_type_node
, NULL_TREE
);
20061 tree v2si_ftype_int
20062 = build_function_type_list (V2SI_type_node
,
20063 integer_type_node
, NULL_TREE
);
20065 /* Normal vector binops. */
20066 tree v8qi_ftype_v8qi_v8qi
20067 = build_function_type_list (V8QI_type_node
,
20068 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
20069 tree v4hi_ftype_v4hi_v4hi
20070 = build_function_type_list (V4HI_type_node
,
20071 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
20072 tree v2si_ftype_v2si_v2si
20073 = build_function_type_list (V2SI_type_node
,
20074 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
20075 tree di_ftype_di_di
20076 = build_function_type_list (long_long_unsigned_type_node
,
20077 long_long_unsigned_type_node
,
20078 long_long_unsigned_type_node
,
20081 /* Add all builtins that are more or less simple operations on two
20083 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
20085 /* Use one of the operands; the target can have a different mode for
20086 mask-generating compares. */
20087 enum machine_mode mode
;
20093 mode
= insn_data
[d
->icode
].operand
[1].mode
;
20098 type
= v8qi_ftype_v8qi_v8qi
;
20101 type
= v4hi_ftype_v4hi_v4hi
;
20104 type
= v2si_ftype_v2si_v2si
;
20107 type
= di_ftype_di_di
;
20111 gcc_unreachable ();
20114 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
20117 /* Add the remaining MMX insns with somewhat more complicated types. */
20118 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20119 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20120 ARM_BUILTIN_ ## CODE)
20122 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
20123 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
20124 ARM_BUILTIN_ ## CODE)
20126 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
20127 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
20128 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
20129 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
20130 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
20131 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
20132 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
20133 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
20134 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
20136 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
20137 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
20138 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
20139 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
20140 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
20141 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
20143 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
20144 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
20145 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
20146 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
20147 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
20148 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
20150 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
20151 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
20152 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
20153 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
20154 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
20155 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
20157 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
20158 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
20159 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
20160 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
20161 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
20162 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
20164 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
20166 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
20167 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
20168 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
20169 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
20170 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
20171 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
20172 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
20173 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
20174 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
20175 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
20177 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
20178 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
20179 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
20180 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
20181 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
20182 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
20183 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
20184 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
20185 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
20187 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
20188 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
20189 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
20191 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
20192 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
20193 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
20195 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
20196 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
20198 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
20199 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
20200 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
20201 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
20202 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
20203 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
20205 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
20206 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
20207 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
20208 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
20209 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
20210 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
20211 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
20212 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
20213 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
20214 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
20215 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
20216 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
20218 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
20219 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
20220 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
20221 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
20223 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
20224 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
20225 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
20226 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
20227 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
20228 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
20229 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
20231 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
20232 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
20233 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
20235 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
20236 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
20237 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
20238 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
20240 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
20241 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
20242 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
20243 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
20245 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
20246 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
20247 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
20248 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
20250 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
20251 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
20252 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
20253 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
20255 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
20256 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
20257 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
20258 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
20260 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
20261 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
20262 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
20263 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
20265 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
20267 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
20268 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
20269 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
20271 #undef iwmmx_mbuiltin
20272 #undef iwmmx2_mbuiltin
20276 arm_init_fp16_builtins (void)
20278 tree fp16_type
= make_node (REAL_TYPE
);
20279 TYPE_PRECISION (fp16_type
) = 16;
20280 layout_type (fp16_type
);
20281 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
20285 arm_init_builtins (void)
20287 if (TARGET_REALLY_IWMMXT
)
20288 arm_init_iwmmxt_builtins ();
20291 arm_init_neon_builtins ();
20293 if (arm_fp16_format
)
20294 arm_init_fp16_builtins ();
20297 /* Return the ARM builtin for CODE. */
20300 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
20302 if (code
>= ARM_BUILTIN_MAX
)
20303 return error_mark_node
;
20305 return arm_builtin_decls
[code
];
20308 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20310 static const char *
20311 arm_invalid_parameter_type (const_tree t
)
20313 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
20314 return N_("function parameters cannot have __fp16 type");
20318 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20320 static const char *
20321 arm_invalid_return_type (const_tree t
)
20323 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
20324 return N_("functions cannot return __fp16 type");
20328 /* Implement TARGET_PROMOTED_TYPE. */
20331 arm_promoted_type (const_tree t
)
20333 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
20334 return float_type_node
;
20338 /* Implement TARGET_CONVERT_TO_TYPE.
20339 Specifically, this hook implements the peculiarity of the ARM
20340 half-precision floating-point C semantics that requires conversions between
20341 __fp16 to or from double to do an intermediate conversion to float. */
20344 arm_convert_to_type (tree type
, tree expr
)
20346 tree fromtype
= TREE_TYPE (expr
);
20347 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
20349 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
20350 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
20351 return convert (type
, convert (float_type_node
, expr
));
20355 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20356 This simply adds HFmode as a supported mode; even though we don't
20357 implement arithmetic on this type directly, it's supported by
20358 optabs conversions, much the way the double-word arithmetic is
20359 special-cased in the default hook. */
20362 arm_scalar_mode_supported_p (enum machine_mode mode
)
20364 if (mode
== HFmode
)
20365 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
20366 else if (ALL_FIXED_POINT_MODE_P (mode
))
20369 return default_scalar_mode_supported_p (mode
);
20372 /* Errors in the source file can cause expand_expr to return const0_rtx
20373 where we expect a vector. To avoid crashing, use one of the vector
20374 clear instructions. */
20377 safe_vector_operand (rtx x
, enum machine_mode mode
)
20379 if (x
!= const0_rtx
)
20381 x
= gen_reg_rtx (mode
);
20383 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
20384 : gen_rtx_SUBREG (DImode
, x
, 0)));
20388 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20391 arm_expand_binop_builtin (enum insn_code icode
,
20392 tree exp
, rtx target
)
20395 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20396 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20397 rtx op0
= expand_normal (arg0
);
20398 rtx op1
= expand_normal (arg1
);
20399 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20400 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
20401 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
20403 if (VECTOR_MODE_P (mode0
))
20404 op0
= safe_vector_operand (op0
, mode0
);
20405 if (VECTOR_MODE_P (mode1
))
20406 op1
= safe_vector_operand (op1
, mode1
);
20409 || GET_MODE (target
) != tmode
20410 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20411 target
= gen_reg_rtx (tmode
);
20413 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
20414 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
20416 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20417 op0
= copy_to_mode_reg (mode0
, op0
);
20418 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20419 op1
= copy_to_mode_reg (mode1
, op1
);
20421 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20428 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20431 arm_expand_unop_builtin (enum insn_code icode
,
20432 tree exp
, rtx target
, int do_load
)
20435 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20436 rtx op0
= expand_normal (arg0
);
20437 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20438 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
20441 || GET_MODE (target
) != tmode
20442 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20443 target
= gen_reg_rtx (tmode
);
20445 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
20448 if (VECTOR_MODE_P (mode0
))
20449 op0
= safe_vector_operand (op0
, mode0
);
20451 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20452 op0
= copy_to_mode_reg (mode0
, op0
);
20455 pat
= GEN_FCN (icode
) (target
, op0
);
20463 NEON_ARG_COPY_TO_REG
,
20469 #define NEON_MAX_BUILTIN_ARGS 5
20471 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20472 and return an expression for the accessed memory.
20474 The intrinsic function operates on a block of registers that has
20475 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
20476 function references the memory at EXP of type TYPE and in mode
20477 MEM_MODE; this mode may be BLKmode if no more suitable mode is
20481 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
20482 enum machine_mode reg_mode
,
20483 neon_builtin_type_mode type_mode
)
20485 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
20486 tree elem_type
, upper_bound
, array_type
;
20488 /* Work out the size of the register block in bytes. */
20489 reg_size
= GET_MODE_SIZE (reg_mode
);
20491 /* Work out the size of each vector in bytes. */
20492 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
20493 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
20495 /* Work out how many vectors there are. */
20496 gcc_assert (reg_size
% vector_size
== 0);
20497 nvectors
= reg_size
/ vector_size
;
20499 /* Work out the type of each element. */
20500 gcc_assert (POINTER_TYPE_P (type
));
20501 elem_type
= TREE_TYPE (type
);
20503 /* Work out how many elements are being loaded or stored.
20504 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20505 and memory elements; anything else implies a lane load or store. */
20506 if (mem_mode
== reg_mode
)
20507 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
20511 /* Create a type that describes the full access. */
20512 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
20513 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
20515 /* Dereference EXP using that type. */
20516 return fold_build2 (MEM_REF
, array_type
, exp
,
20517 build_int_cst (build_pointer_type (array_type
), 0));
20520 /* Expand a Neon builtin. */
20522 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
20523 neon_builtin_type_mode type_mode
,
20524 tree exp
, int fcode
, ...)
20528 tree arg
[NEON_MAX_BUILTIN_ARGS
];
20529 rtx op
[NEON_MAX_BUILTIN_ARGS
];
20532 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20533 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
20534 enum machine_mode other_mode
;
20540 || GET_MODE (target
) != tmode
20541 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
20542 target
= gen_reg_rtx (tmode
);
20544 va_start (ap
, fcode
);
20546 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
20550 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
20552 if (thisarg
== NEON_ARG_STOP
)
20556 opno
= argc
+ have_retval
;
20557 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
20558 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
20559 arg_type
= TREE_VALUE (formals
);
20560 if (thisarg
== NEON_ARG_MEMORY
)
20562 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
20563 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
20564 mode
[argc
], other_mode
,
20568 op
[argc
] = expand_normal (arg
[argc
]);
20572 case NEON_ARG_COPY_TO_REG
:
20573 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20574 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
20575 (op
[argc
], mode
[argc
]))
20576 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
20579 case NEON_ARG_CONSTANT
:
20580 /* FIXME: This error message is somewhat unhelpful. */
20581 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
20582 (op
[argc
], mode
[argc
]))
20583 error ("argument must be a constant");
20586 case NEON_ARG_MEMORY
:
20587 gcc_assert (MEM_P (op
[argc
]));
20588 PUT_MODE (op
[argc
], mode
[argc
]);
20589 /* ??? arm_neon.h uses the same built-in functions for signed
20590 and unsigned accesses, casting where necessary. This isn't
20592 set_mem_alias_set (op
[argc
], 0);
20593 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
20594 (op
[argc
], mode
[argc
]))
20595 op
[argc
] = (replace_equiv_address
20596 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
20599 case NEON_ARG_STOP
:
20600 gcc_unreachable ();
20604 formals
= TREE_CHAIN (formals
);
20614 pat
= GEN_FCN (icode
) (target
, op
[0]);
20618 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
20622 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
20626 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
20630 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
20634 gcc_unreachable ();
20640 pat
= GEN_FCN (icode
) (op
[0]);
20644 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
20648 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
20652 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
20656 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
20660 gcc_unreachable ();
20671 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20672 constants defined per-instruction or per instruction-variant. Instead, the
20673 required info is looked up in the table neon_builtin_data. */
20675 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
20677 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
20678 neon_itype itype
= d
->itype
;
20679 enum insn_code icode
= d
->code
;
20680 neon_builtin_type_mode type_mode
= d
->mode
;
20687 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20688 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20692 case NEON_SCALARMUL
:
20693 case NEON_SCALARMULL
:
20694 case NEON_SCALARMULH
:
20695 case NEON_SHIFTINSERT
:
20696 case NEON_LOGICBINOP
:
20697 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20698 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20702 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20703 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20704 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20708 case NEON_SHIFTIMM
:
20709 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20710 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
20714 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20715 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20719 case NEON_REINTERP
:
20720 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20721 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20725 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20726 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20728 case NEON_RESULTPAIR
:
20729 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
20730 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20734 case NEON_LANEMULL
:
20735 case NEON_LANEMULH
:
20736 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20737 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20738 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20741 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20742 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20743 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20745 case NEON_SHIFTACC
:
20746 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20747 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20748 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20750 case NEON_SCALARMAC
:
20751 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20752 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20753 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20757 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20758 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20762 case NEON_LOADSTRUCT
:
20763 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20764 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
20766 case NEON_LOAD1LANE
:
20767 case NEON_LOADSTRUCTLANE
:
20768 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20769 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20773 case NEON_STORESTRUCT
:
20774 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
20775 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20777 case NEON_STORE1LANE
:
20778 case NEON_STORESTRUCTLANE
:
20779 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
20780 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20784 gcc_unreachable ();
20787 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20789 neon_reinterpret (rtx dest
, rtx src
)
20791 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
20794 /* Emit code to place a Neon pair result in memory locations (with equal
20797 neon_emit_pair_result_insn (enum machine_mode mode
,
20798 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
20801 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
20802 rtx tmp1
= gen_reg_rtx (mode
);
20803 rtx tmp2
= gen_reg_rtx (mode
);
20805 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
20807 emit_move_insn (mem
, tmp1
);
20808 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
20809 emit_move_insn (mem
, tmp2
);
20812 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20813 not to early-clobber SRC registers in the process.
20815 We assume that the operands described by SRC and DEST represent a
20816 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20817 number of components into which the copy has been decomposed. */
20819 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
20823 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
20824 || REGNO (operands
[0]) < REGNO (operands
[1]))
20826 for (i
= 0; i
< count
; i
++)
20828 operands
[2 * i
] = dest
[i
];
20829 operands
[2 * i
+ 1] = src
[i
];
20834 for (i
= 0; i
< count
; i
++)
20836 operands
[2 * i
] = dest
[count
- i
- 1];
20837 operands
[2 * i
+ 1] = src
[count
- i
- 1];
20842 /* Split operands into moves from op[1] + op[2] into op[0]. */
20845 neon_split_vcombine (rtx operands
[3])
20847 unsigned int dest
= REGNO (operands
[0]);
20848 unsigned int src1
= REGNO (operands
[1]);
20849 unsigned int src2
= REGNO (operands
[2]);
20850 enum machine_mode halfmode
= GET_MODE (operands
[1]);
20851 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
20852 rtx destlo
, desthi
;
20854 if (src1
== dest
&& src2
== dest
+ halfregs
)
20856 /* No-op move. Can't split to nothing; emit something. */
20857 emit_note (NOTE_INSN_DELETED
);
20861 /* Preserve register attributes for variable tracking. */
20862 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
20863 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
20864 GET_MODE_SIZE (halfmode
));
20866 /* Special case of reversed high/low parts. Use VSWP. */
20867 if (src2
== dest
&& src1
== dest
+ halfregs
)
20869 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
20870 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
20871 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
20875 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
20877 /* Try to avoid unnecessary moves if part of the result
20878 is in the right place already. */
20880 emit_move_insn (destlo
, operands
[1]);
20881 if (src2
!= dest
+ halfregs
)
20882 emit_move_insn (desthi
, operands
[2]);
20886 if (src2
!= dest
+ halfregs
)
20887 emit_move_insn (desthi
, operands
[2]);
20889 emit_move_insn (destlo
, operands
[1]);
20893 /* Expand an expression EXP that calls a built-in function,
20894 with result going to TARGET if that's convenient
20895 (and in mode MODE if that's convenient).
20896 SUBTARGET may be used as the target for computing one of EXP's operands.
20897 IGNORE is nonzero if the value is to be ignored. */
20900 arm_expand_builtin (tree exp
,
20902 rtx subtarget ATTRIBUTE_UNUSED
,
20903 enum machine_mode mode ATTRIBUTE_UNUSED
,
20904 int ignore ATTRIBUTE_UNUSED
)
20906 const struct builtin_description
* d
;
20907 enum insn_code icode
;
20908 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
20916 int fcode
= DECL_FUNCTION_CODE (fndecl
);
20918 enum machine_mode tmode
;
20919 enum machine_mode mode0
;
20920 enum machine_mode mode1
;
20921 enum machine_mode mode2
;
20927 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
20928 return arm_expand_neon_builtin (fcode
, exp
, target
);
20932 case ARM_BUILTIN_TEXTRMSB
:
20933 case ARM_BUILTIN_TEXTRMUB
:
20934 case ARM_BUILTIN_TEXTRMSH
:
20935 case ARM_BUILTIN_TEXTRMUH
:
20936 case ARM_BUILTIN_TEXTRMSW
:
20937 case ARM_BUILTIN_TEXTRMUW
:
20938 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
20939 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
20940 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
20941 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
20942 : CODE_FOR_iwmmxt_textrmw
);
20944 arg0
= CALL_EXPR_ARG (exp
, 0);
20945 arg1
= CALL_EXPR_ARG (exp
, 1);
20946 op0
= expand_normal (arg0
);
20947 op1
= expand_normal (arg1
);
20948 tmode
= insn_data
[icode
].operand
[0].mode
;
20949 mode0
= insn_data
[icode
].operand
[1].mode
;
20950 mode1
= insn_data
[icode
].operand
[2].mode
;
20952 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20953 op0
= copy_to_mode_reg (mode0
, op0
);
20954 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20956 /* @@@ better error message */
20957 error ("selector must be an immediate");
20958 return gen_reg_rtx (tmode
);
20961 opint
= INTVAL (op1
);
20962 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
20964 if (opint
> 7 || opint
< 0)
20965 error ("the range of selector should be in 0 to 7");
20967 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
20969 if (opint
> 3 || opint
< 0)
20970 error ("the range of selector should be in 0 to 3");
20972 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
20974 if (opint
> 1 || opint
< 0)
20975 error ("the range of selector should be in 0 to 1");
20979 || GET_MODE (target
) != tmode
20980 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20981 target
= gen_reg_rtx (tmode
);
20982 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20988 case ARM_BUILTIN_WALIGNI
:
20989 /* If op2 is immediate, call walighi, else call walighr. */
20990 arg0
= CALL_EXPR_ARG (exp
, 0);
20991 arg1
= CALL_EXPR_ARG (exp
, 1);
20992 arg2
= CALL_EXPR_ARG (exp
, 2);
20993 op0
= expand_normal (arg0
);
20994 op1
= expand_normal (arg1
);
20995 op2
= expand_normal (arg2
);
20996 if (CONST_INT_P (op2
))
20998 icode
= CODE_FOR_iwmmxt_waligni
;
20999 tmode
= insn_data
[icode
].operand
[0].mode
;
21000 mode0
= insn_data
[icode
].operand
[1].mode
;
21001 mode1
= insn_data
[icode
].operand
[2].mode
;
21002 mode2
= insn_data
[icode
].operand
[3].mode
;
21003 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
21004 op0
= copy_to_mode_reg (mode0
, op0
);
21005 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
21006 op1
= copy_to_mode_reg (mode1
, op1
);
21007 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
21008 selector
= INTVAL (op2
);
21009 if (selector
> 7 || selector
< 0)
21010 error ("the range of selector should be in 0 to 7");
21014 icode
= CODE_FOR_iwmmxt_walignr
;
21015 tmode
= insn_data
[icode
].operand
[0].mode
;
21016 mode0
= insn_data
[icode
].operand
[1].mode
;
21017 mode1
= insn_data
[icode
].operand
[2].mode
;
21018 mode2
= insn_data
[icode
].operand
[3].mode
;
21019 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
21020 op0
= copy_to_mode_reg (mode0
, op0
);
21021 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
21022 op1
= copy_to_mode_reg (mode1
, op1
);
21023 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
21024 op2
= copy_to_mode_reg (mode2
, op2
);
21027 || GET_MODE (target
) != tmode
21028 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21029 target
= gen_reg_rtx (tmode
);
21030 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
21036 case ARM_BUILTIN_TINSRB
:
21037 case ARM_BUILTIN_TINSRH
:
21038 case ARM_BUILTIN_TINSRW
:
21039 case ARM_BUILTIN_WMERGE
:
21040 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
21041 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
21042 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
21043 : CODE_FOR_iwmmxt_tinsrw
);
21044 arg0
= CALL_EXPR_ARG (exp
, 0);
21045 arg1
= CALL_EXPR_ARG (exp
, 1);
21046 arg2
= CALL_EXPR_ARG (exp
, 2);
21047 op0
= expand_normal (arg0
);
21048 op1
= expand_normal (arg1
);
21049 op2
= expand_normal (arg2
);
21050 tmode
= insn_data
[icode
].operand
[0].mode
;
21051 mode0
= insn_data
[icode
].operand
[1].mode
;
21052 mode1
= insn_data
[icode
].operand
[2].mode
;
21053 mode2
= insn_data
[icode
].operand
[3].mode
;
21055 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
21056 op0
= copy_to_mode_reg (mode0
, op0
);
21057 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
21058 op1
= copy_to_mode_reg (mode1
, op1
);
21059 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
21061 error ("selector must be an immediate");
21064 if (icode
== CODE_FOR_iwmmxt_wmerge
)
21066 selector
= INTVAL (op2
);
21067 if (selector
> 7 || selector
< 0)
21068 error ("the range of selector should be in 0 to 7");
21070 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
21071 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
21072 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
21075 selector
= INTVAL (op2
);
21076 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
21077 error ("the range of selector should be in 0 to 7");
21078 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
21079 error ("the range of selector should be in 0 to 3");
21080 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
21081 error ("the range of selector should be in 0 to 1");
21083 op2
= GEN_INT (mask
);
21086 || GET_MODE (target
) != tmode
21087 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21088 target
= gen_reg_rtx (tmode
);
21089 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
21095 case ARM_BUILTIN_SETWCGR0
:
21096 case ARM_BUILTIN_SETWCGR1
:
21097 case ARM_BUILTIN_SETWCGR2
:
21098 case ARM_BUILTIN_SETWCGR3
:
21099 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
21100 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
21101 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
21102 : CODE_FOR_iwmmxt_setwcgr3
);
21103 arg0
= CALL_EXPR_ARG (exp
, 0);
21104 op0
= expand_normal (arg0
);
21105 mode0
= insn_data
[icode
].operand
[0].mode
;
21106 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
21107 op0
= copy_to_mode_reg (mode0
, op0
);
21108 pat
= GEN_FCN (icode
) (op0
);
21114 case ARM_BUILTIN_GETWCGR0
:
21115 case ARM_BUILTIN_GETWCGR1
:
21116 case ARM_BUILTIN_GETWCGR2
:
21117 case ARM_BUILTIN_GETWCGR3
:
21118 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
21119 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
21120 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
21121 : CODE_FOR_iwmmxt_getwcgr3
);
21122 tmode
= insn_data
[icode
].operand
[0].mode
;
21124 || GET_MODE (target
) != tmode
21125 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21126 target
= gen_reg_rtx (tmode
);
21127 pat
= GEN_FCN (icode
) (target
);
21133 case ARM_BUILTIN_WSHUFH
:
21134 icode
= CODE_FOR_iwmmxt_wshufh
;
21135 arg0
= CALL_EXPR_ARG (exp
, 0);
21136 arg1
= CALL_EXPR_ARG (exp
, 1);
21137 op0
= expand_normal (arg0
);
21138 op1
= expand_normal (arg1
);
21139 tmode
= insn_data
[icode
].operand
[0].mode
;
21140 mode1
= insn_data
[icode
].operand
[1].mode
;
21141 mode2
= insn_data
[icode
].operand
[2].mode
;
21143 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21144 op0
= copy_to_mode_reg (mode1
, op0
);
21145 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21147 error ("mask must be an immediate");
21150 selector
= INTVAL (op1
);
21151 if (selector
< 0 || selector
> 255)
21152 error ("the range of mask should be in 0 to 255");
21154 || GET_MODE (target
) != tmode
21155 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21156 target
= gen_reg_rtx (tmode
);
21157 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
21163 case ARM_BUILTIN_WMADDS
:
21164 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
21165 case ARM_BUILTIN_WMADDSX
:
21166 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
21167 case ARM_BUILTIN_WMADDSN
:
21168 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
21169 case ARM_BUILTIN_WMADDU
:
21170 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
21171 case ARM_BUILTIN_WMADDUX
:
21172 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
21173 case ARM_BUILTIN_WMADDUN
:
21174 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
21175 case ARM_BUILTIN_WSADBZ
:
21176 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
21177 case ARM_BUILTIN_WSADHZ
:
21178 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
21180 /* Several three-argument builtins. */
21181 case ARM_BUILTIN_WMACS
:
21182 case ARM_BUILTIN_WMACU
:
21183 case ARM_BUILTIN_TMIA
:
21184 case ARM_BUILTIN_TMIAPH
:
21185 case ARM_BUILTIN_TMIATT
:
21186 case ARM_BUILTIN_TMIATB
:
21187 case ARM_BUILTIN_TMIABT
:
21188 case ARM_BUILTIN_TMIABB
:
21189 case ARM_BUILTIN_WQMIABB
:
21190 case ARM_BUILTIN_WQMIABT
:
21191 case ARM_BUILTIN_WQMIATB
:
21192 case ARM_BUILTIN_WQMIATT
:
21193 case ARM_BUILTIN_WQMIABBN
:
21194 case ARM_BUILTIN_WQMIABTN
:
21195 case ARM_BUILTIN_WQMIATBN
:
21196 case ARM_BUILTIN_WQMIATTN
:
21197 case ARM_BUILTIN_WMIABB
:
21198 case ARM_BUILTIN_WMIABT
:
21199 case ARM_BUILTIN_WMIATB
:
21200 case ARM_BUILTIN_WMIATT
:
21201 case ARM_BUILTIN_WMIABBN
:
21202 case ARM_BUILTIN_WMIABTN
:
21203 case ARM_BUILTIN_WMIATBN
:
21204 case ARM_BUILTIN_WMIATTN
:
21205 case ARM_BUILTIN_WMIAWBB
:
21206 case ARM_BUILTIN_WMIAWBT
:
21207 case ARM_BUILTIN_WMIAWTB
:
21208 case ARM_BUILTIN_WMIAWTT
:
21209 case ARM_BUILTIN_WMIAWBBN
:
21210 case ARM_BUILTIN_WMIAWBTN
:
21211 case ARM_BUILTIN_WMIAWTBN
:
21212 case ARM_BUILTIN_WMIAWTTN
:
21213 case ARM_BUILTIN_WSADB
:
21214 case ARM_BUILTIN_WSADH
:
21215 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
21216 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
21217 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
21218 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
21219 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
21220 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
21221 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
21222 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
21223 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
21224 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
21225 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
21226 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
21227 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
21228 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
21229 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
21230 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
21231 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
21232 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
21233 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
21234 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
21235 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
21236 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
21237 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
21238 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
21239 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
21240 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
21241 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
21242 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
21243 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
21244 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
21245 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
21246 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
21247 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
21248 : CODE_FOR_iwmmxt_wsadh
);
21249 arg0
= CALL_EXPR_ARG (exp
, 0);
21250 arg1
= CALL_EXPR_ARG (exp
, 1);
21251 arg2
= CALL_EXPR_ARG (exp
, 2);
21252 op0
= expand_normal (arg0
);
21253 op1
= expand_normal (arg1
);
21254 op2
= expand_normal (arg2
);
21255 tmode
= insn_data
[icode
].operand
[0].mode
;
21256 mode0
= insn_data
[icode
].operand
[1].mode
;
21257 mode1
= insn_data
[icode
].operand
[2].mode
;
21258 mode2
= insn_data
[icode
].operand
[3].mode
;
21260 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
21261 op0
= copy_to_mode_reg (mode0
, op0
);
21262 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
21263 op1
= copy_to_mode_reg (mode1
, op1
);
21264 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
21265 op2
= copy_to_mode_reg (mode2
, op2
);
21267 || GET_MODE (target
) != tmode
21268 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21269 target
= gen_reg_rtx (tmode
);
21270 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
21276 case ARM_BUILTIN_WZERO
:
21277 target
= gen_reg_rtx (DImode
);
21278 emit_insn (gen_iwmmxt_clrdi (target
));
21281 case ARM_BUILTIN_WSRLHI
:
21282 case ARM_BUILTIN_WSRLWI
:
21283 case ARM_BUILTIN_WSRLDI
:
21284 case ARM_BUILTIN_WSLLHI
:
21285 case ARM_BUILTIN_WSLLWI
:
21286 case ARM_BUILTIN_WSLLDI
:
21287 case ARM_BUILTIN_WSRAHI
:
21288 case ARM_BUILTIN_WSRAWI
:
21289 case ARM_BUILTIN_WSRADI
:
21290 case ARM_BUILTIN_WRORHI
:
21291 case ARM_BUILTIN_WRORWI
:
21292 case ARM_BUILTIN_WRORDI
:
21293 case ARM_BUILTIN_WSRLH
:
21294 case ARM_BUILTIN_WSRLW
:
21295 case ARM_BUILTIN_WSRLD
:
21296 case ARM_BUILTIN_WSLLH
:
21297 case ARM_BUILTIN_WSLLW
:
21298 case ARM_BUILTIN_WSLLD
:
21299 case ARM_BUILTIN_WSRAH
:
21300 case ARM_BUILTIN_WSRAW
:
21301 case ARM_BUILTIN_WSRAD
:
21302 case ARM_BUILTIN_WRORH
:
21303 case ARM_BUILTIN_WRORW
:
21304 case ARM_BUILTIN_WRORD
:
21305 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
21306 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
21307 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
21308 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
21309 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
21310 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
21311 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
21312 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
21313 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
21314 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
21315 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
21316 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
21317 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
21318 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
21319 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
21320 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
21321 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
21322 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
21323 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
21324 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
21325 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
21326 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
21327 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
21328 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
21329 : CODE_FOR_nothing
);
21330 arg1
= CALL_EXPR_ARG (exp
, 1);
21331 op1
= expand_normal (arg1
);
21332 if (GET_MODE (op1
) == VOIDmode
)
21334 imm
= INTVAL (op1
);
21335 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
21336 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
21337 && (imm
< 0 || imm
> 32))
21339 if (fcode
== ARM_BUILTIN_WRORHI
)
21340 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21341 else if (fcode
== ARM_BUILTIN_WRORWI
)
21342 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21343 else if (fcode
== ARM_BUILTIN_WRORH
)
21344 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21346 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21348 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
21349 && (imm
< 0 || imm
> 64))
21351 if (fcode
== ARM_BUILTIN_WRORDI
)
21352 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21354 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21358 if (fcode
== ARM_BUILTIN_WSRLHI
)
21359 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21360 else if (fcode
== ARM_BUILTIN_WSRLWI
)
21361 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21362 else if (fcode
== ARM_BUILTIN_WSRLDI
)
21363 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21364 else if (fcode
== ARM_BUILTIN_WSLLHI
)
21365 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21366 else if (fcode
== ARM_BUILTIN_WSLLWI
)
21367 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21368 else if (fcode
== ARM_BUILTIN_WSLLDI
)
21369 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21370 else if (fcode
== ARM_BUILTIN_WSRAHI
)
21371 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21372 else if (fcode
== ARM_BUILTIN_WSRAWI
)
21373 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21374 else if (fcode
== ARM_BUILTIN_WSRADI
)
21375 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21376 else if (fcode
== ARM_BUILTIN_WSRLH
)
21377 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21378 else if (fcode
== ARM_BUILTIN_WSRLW
)
21379 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21380 else if (fcode
== ARM_BUILTIN_WSRLD
)
21381 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21382 else if (fcode
== ARM_BUILTIN_WSLLH
)
21383 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21384 else if (fcode
== ARM_BUILTIN_WSLLW
)
21385 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21386 else if (fcode
== ARM_BUILTIN_WSLLD
)
21387 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21388 else if (fcode
== ARM_BUILTIN_WSRAH
)
21389 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21390 else if (fcode
== ARM_BUILTIN_WSRAW
)
21391 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21393 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21396 return arm_expand_binop_builtin (icode
, exp
, target
);
21402 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
21403 if (d
->code
== (const enum arm_builtins
) fcode
)
21404 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
21406 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
21407 if (d
->code
== (const enum arm_builtins
) fcode
)
21408 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
21410 /* @@@ Should really do something sensible here. */
21414 /* Return the number (counting from 0) of
21415 the least significant set bit in MASK. */
21418 number_of_first_bit_set (unsigned mask
)
21420 return ctz_hwi (mask
);
21423 /* Like emit_multi_reg_push, but allowing for a different set of
21424 registers to be described as saved. MASK is the set of registers
21425 to be saved; REAL_REGS is the set of registers to be described as
21426 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21429 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
21431 unsigned long regno
;
21432 rtx par
[10], tmp
, reg
, insn
;
21435 /* Build the parallel of the registers actually being stored. */
21436 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
21438 regno
= ctz_hwi (mask
);
21439 reg
= gen_rtx_REG (SImode
, regno
);
21442 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
21444 tmp
= gen_rtx_USE (VOIDmode
, reg
);
21449 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
21450 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
21451 tmp
= gen_frame_mem (BLKmode
, tmp
);
21452 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
21455 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
21456 insn
= emit_insn (tmp
);
21458 /* Always build the stack adjustment note for unwind info. */
21459 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
21460 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
21463 /* Build the parallel of the registers recorded as saved for unwind. */
21464 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
21466 regno
= ctz_hwi (real_regs
);
21467 reg
= gen_rtx_REG (SImode
, regno
);
21469 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
21470 tmp
= gen_frame_mem (SImode
, tmp
);
21471 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
21472 RTX_FRAME_RELATED_P (tmp
) = 1;
21480 RTX_FRAME_RELATED_P (par
[0]) = 1;
21481 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
21484 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
21489 /* Emit code to push or pop registers to or from the stack. F is the
21490 assembly file. MASK is the registers to pop. */
21492 thumb_pop (FILE *f
, unsigned long mask
)
21495 int lo_mask
= mask
& 0xFF;
21496 int pushed_words
= 0;
21500 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
21502 /* Special case. Do not generate a POP PC statement here, do it in
21504 thumb_exit (f
, -1);
21508 fprintf (f
, "\tpop\t{");
21510 /* Look at the low registers first. */
21511 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
21515 asm_fprintf (f
, "%r", regno
);
21517 if ((lo_mask
& ~1) != 0)
21524 if (mask
& (1 << PC_REGNUM
))
21526 /* Catch popping the PC. */
21527 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
21528 || crtl
->calls_eh_return
)
21530 /* The PC is never poped directly, instead
21531 it is popped into r3 and then BX is used. */
21532 fprintf (f
, "}\n");
21534 thumb_exit (f
, -1);
21543 asm_fprintf (f
, "%r", PC_REGNUM
);
21547 fprintf (f
, "}\n");
21550 /* Generate code to return from a thumb function.
21551 If 'reg_containing_return_addr' is -1, then the return address is
21552 actually on the stack, at the stack pointer. */
21554 thumb_exit (FILE *f
, int reg_containing_return_addr
)
21556 unsigned regs_available_for_popping
;
21557 unsigned regs_to_pop
;
21559 unsigned available
;
21563 int restore_a4
= FALSE
;
21565 /* Compute the registers we need to pop. */
21569 if (reg_containing_return_addr
== -1)
21571 regs_to_pop
|= 1 << LR_REGNUM
;
21575 if (TARGET_BACKTRACE
)
21577 /* Restore the (ARM) frame pointer and stack pointer. */
21578 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
21582 /* If there is nothing to pop then just emit the BX instruction and
21584 if (pops_needed
== 0)
21586 if (crtl
->calls_eh_return
)
21587 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
21589 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
21592 /* Otherwise if we are not supporting interworking and we have not created
21593 a backtrace structure and the function was not entered in ARM mode then
21594 just pop the return address straight into the PC. */
21595 else if (!TARGET_INTERWORK
21596 && !TARGET_BACKTRACE
21597 && !is_called_in_ARM_mode (current_function_decl
)
21598 && !crtl
->calls_eh_return
)
21600 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
21604 /* Find out how many of the (return) argument registers we can corrupt. */
21605 regs_available_for_popping
= 0;
21607 /* If returning via __builtin_eh_return, the bottom three registers
21608 all contain information needed for the return. */
21609 if (crtl
->calls_eh_return
)
21613 /* If we can deduce the registers used from the function's
21614 return value. This is more reliable that examining
21615 df_regs_ever_live_p () because that will be set if the register is
21616 ever used in the function, not just if the register is used
21617 to hold a return value. */
21619 if (crtl
->return_rtx
!= 0)
21620 mode
= GET_MODE (crtl
->return_rtx
);
21622 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
21624 size
= GET_MODE_SIZE (mode
);
21628 /* In a void function we can use any argument register.
21629 In a function that returns a structure on the stack
21630 we can use the second and third argument registers. */
21631 if (mode
== VOIDmode
)
21632 regs_available_for_popping
=
21633 (1 << ARG_REGISTER (1))
21634 | (1 << ARG_REGISTER (2))
21635 | (1 << ARG_REGISTER (3));
21637 regs_available_for_popping
=
21638 (1 << ARG_REGISTER (2))
21639 | (1 << ARG_REGISTER (3));
21641 else if (size
<= 4)
21642 regs_available_for_popping
=
21643 (1 << ARG_REGISTER (2))
21644 | (1 << ARG_REGISTER (3));
21645 else if (size
<= 8)
21646 regs_available_for_popping
=
21647 (1 << ARG_REGISTER (3));
21650 /* Match registers to be popped with registers into which we pop them. */
21651 for (available
= regs_available_for_popping
,
21652 required
= regs_to_pop
;
21653 required
!= 0 && available
!= 0;
21654 available
&= ~(available
& - available
),
21655 required
&= ~(required
& - required
))
21658 /* If we have any popping registers left over, remove them. */
21660 regs_available_for_popping
&= ~available
;
21662 /* Otherwise if we need another popping register we can use
21663 the fourth argument register. */
21664 else if (pops_needed
)
21666 /* If we have not found any free argument registers and
21667 reg a4 contains the return address, we must move it. */
21668 if (regs_available_for_popping
== 0
21669 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
21671 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
21672 reg_containing_return_addr
= LR_REGNUM
;
21674 else if (size
> 12)
21676 /* Register a4 is being used to hold part of the return value,
21677 but we have dire need of a free, low register. */
21680 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
21683 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
21685 /* The fourth argument register is available. */
21686 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
21692 /* Pop as many registers as we can. */
21693 thumb_pop (f
, regs_available_for_popping
);
21695 /* Process the registers we popped. */
21696 if (reg_containing_return_addr
== -1)
21698 /* The return address was popped into the lowest numbered register. */
21699 regs_to_pop
&= ~(1 << LR_REGNUM
);
21701 reg_containing_return_addr
=
21702 number_of_first_bit_set (regs_available_for_popping
);
21704 /* Remove this register for the mask of available registers, so that
21705 the return address will not be corrupted by further pops. */
21706 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
21709 /* If we popped other registers then handle them here. */
21710 if (regs_available_for_popping
)
21714 /* Work out which register currently contains the frame pointer. */
21715 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
21717 /* Move it into the correct place. */
21718 asm_fprintf (f
, "\tmov\t%r, %r\n",
21719 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
21721 /* (Temporarily) remove it from the mask of popped registers. */
21722 regs_available_for_popping
&= ~(1 << frame_pointer
);
21723 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
21725 if (regs_available_for_popping
)
21729 /* We popped the stack pointer as well,
21730 find the register that contains it. */
21731 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
21733 /* Move it into the stack register. */
21734 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
21736 /* At this point we have popped all necessary registers, so
21737 do not worry about restoring regs_available_for_popping
21738 to its correct value:
21740 assert (pops_needed == 0)
21741 assert (regs_available_for_popping == (1 << frame_pointer))
21742 assert (regs_to_pop == (1 << STACK_POINTER)) */
21746 /* Since we have just move the popped value into the frame
21747 pointer, the popping register is available for reuse, and
21748 we know that we still have the stack pointer left to pop. */
21749 regs_available_for_popping
|= (1 << frame_pointer
);
21753 /* If we still have registers left on the stack, but we no longer have
21754 any registers into which we can pop them, then we must move the return
21755 address into the link register and make available the register that
21757 if (regs_available_for_popping
== 0 && pops_needed
> 0)
21759 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
21761 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
21762 reg_containing_return_addr
);
21764 reg_containing_return_addr
= LR_REGNUM
;
21767 /* If we have registers left on the stack then pop some more.
21768 We know that at most we will want to pop FP and SP. */
21769 if (pops_needed
> 0)
21774 thumb_pop (f
, regs_available_for_popping
);
21776 /* We have popped either FP or SP.
21777 Move whichever one it is into the correct register. */
21778 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
21779 move_to
= number_of_first_bit_set (regs_to_pop
);
21781 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
21783 regs_to_pop
&= ~(1 << move_to
);
21788 /* If we still have not popped everything then we must have only
21789 had one register available to us and we are now popping the SP. */
21790 if (pops_needed
> 0)
21794 thumb_pop (f
, regs_available_for_popping
);
21796 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
21798 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
21800 assert (regs_to_pop == (1 << STACK_POINTER))
21801 assert (pops_needed == 1)
21805 /* If necessary restore the a4 register. */
21808 if (reg_containing_return_addr
!= LR_REGNUM
)
21810 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
21811 reg_containing_return_addr
= LR_REGNUM
;
21814 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
21817 if (crtl
->calls_eh_return
)
21818 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
21820 /* Return to caller. */
21821 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
21824 /* Scan INSN just before assembler is output for it.
21825 For Thumb-1, we track the status of the condition codes; this
21826 information is used in the cbranchsi4_insn pattern. */
21828 thumb1_final_prescan_insn (rtx insn
)
21830 if (flag_print_asm_name
)
21831 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
21832 INSN_ADDRESSES (INSN_UID (insn
)));
21833 /* Don't overwrite the previous setter when we get to a cbranch. */
21834 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
21836 enum attr_conds conds
;
21838 if (cfun
->machine
->thumb1_cc_insn
)
21840 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
21841 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
21844 conds
= get_attr_conds (insn
);
21845 if (conds
== CONDS_SET
)
21847 rtx set
= single_set (insn
);
21848 cfun
->machine
->thumb1_cc_insn
= insn
;
21849 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
21850 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
21851 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
21852 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
21854 rtx src1
= XEXP (SET_SRC (set
), 1);
21855 if (src1
== const0_rtx
)
21856 cfun
->machine
->thumb1_cc_mode
= CCmode
;
21859 else if (conds
!= CONDS_NOCOND
)
21860 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
21865 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
21867 unsigned HOST_WIDE_INT mask
= 0xff;
21870 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
21871 if (val
== 0) /* XXX */
21874 for (i
= 0; i
< 25; i
++)
21875 if ((val
& (mask
<< i
)) == val
)
21881 /* Returns nonzero if the current function contains,
21882 or might contain a far jump. */
21884 thumb_far_jump_used_p (void)
21888 /* This test is only important for leaf functions. */
21889 /* assert (!leaf_function_p ()); */
21891 /* If we have already decided that far jumps may be used,
21892 do not bother checking again, and always return true even if
21893 it turns out that they are not being used. Once we have made
21894 the decision that far jumps are present (and that hence the link
21895 register will be pushed onto the stack) we cannot go back on it. */
21896 if (cfun
->machine
->far_jump_used
)
21899 /* If this function is not being called from the prologue/epilogue
21900 generation code then it must be being called from the
21901 INITIAL_ELIMINATION_OFFSET macro. */
21902 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
21904 /* In this case we know that we are being asked about the elimination
21905 of the arg pointer register. If that register is not being used,
21906 then there are no arguments on the stack, and we do not have to
21907 worry that a far jump might force the prologue to push the link
21908 register, changing the stack offsets. In this case we can just
21909 return false, since the presence of far jumps in the function will
21910 not affect stack offsets.
21912 If the arg pointer is live (or if it was live, but has now been
21913 eliminated and so set to dead) then we do have to test to see if
21914 the function might contain a far jump. This test can lead to some
21915 false negatives, since before reload is completed, then length of
21916 branch instructions is not known, so gcc defaults to returning their
21917 longest length, which in turn sets the far jump attribute to true.
21919 A false negative will not result in bad code being generated, but it
21920 will result in a needless push and pop of the link register. We
21921 hope that this does not occur too often.
21923 If we need doubleword stack alignment this could affect the other
21924 elimination offsets so we can't risk getting it wrong. */
21925 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
21926 cfun
->machine
->arg_pointer_live
= 1;
21927 else if (!cfun
->machine
->arg_pointer_live
)
21931 /* Check to see if the function contains a branch
21932 insn with the far jump attribute set. */
21933 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
21936 /* Ignore tablejump patterns. */
21937 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
21938 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
21939 && get_attr_far_jump (insn
) == FAR_JUMP_YES
21942 /* Record the fact that we have decided that
21943 the function does use far jumps. */
21944 cfun
->machine
->far_jump_used
= 1;
21952 /* Return nonzero if FUNC must be entered in ARM mode. */
21954 is_called_in_ARM_mode (tree func
)
21956 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
21958 /* Ignore the problem about functions whose address is taken. */
21959 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
21963 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
21969 /* Given the stack offsets and register mask in OFFSETS, decide how
21970 many additional registers to push instead of subtracting a constant
21971 from SP. For epilogues the principle is the same except we use pop.
21972 FOR_PROLOGUE indicates which we're generating. */
21974 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
21976 HOST_WIDE_INT amount
;
21977 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
21978 /* Extract a mask of the ones we can give to the Thumb's push/pop
21980 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
21981 /* Then count how many other high registers will need to be pushed. */
21982 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
21983 int n_free
, reg_base
, size
;
21985 if (!for_prologue
&& frame_pointer_needed
)
21986 amount
= offsets
->locals_base
- offsets
->saved_regs
;
21988 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
21990 /* If the stack frame size is 512 exactly, we can save one load
21991 instruction, which should make this a win even when optimizing
21993 if (!optimize_size
&& amount
!= 512)
21996 /* Can't do this if there are high registers to push. */
21997 if (high_regs_pushed
!= 0)
22000 /* Shouldn't do it in the prologue if no registers would normally
22001 be pushed at all. In the epilogue, also allow it if we'll have
22002 a pop insn for the PC. */
22005 || TARGET_BACKTRACE
22006 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
22007 || TARGET_INTERWORK
22008 || crtl
->args
.pretend_args_size
!= 0))
22011 /* Don't do this if thumb_expand_prologue wants to emit instructions
22012 between the push and the stack frame allocation. */
22014 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
22015 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
22022 size
= arm_size_return_regs ();
22023 reg_base
= ARM_NUM_INTS (size
);
22024 live_regs_mask
>>= reg_base
;
22027 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
22028 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
22030 live_regs_mask
>>= 1;
22036 gcc_assert (amount
/ 4 * 4 == amount
);
22038 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
22039 return (amount
- 508) / 4;
22040 if (amount
<= n_free
* 4)
22045 /* The bits which aren't usefully expanded as rtl. */
22047 thumb1_unexpanded_epilogue (void)
22049 arm_stack_offsets
*offsets
;
22051 unsigned long live_regs_mask
= 0;
22052 int high_regs_pushed
= 0;
22054 int had_to_push_lr
;
22057 if (cfun
->machine
->return_used_this_function
!= 0)
22060 if (IS_NAKED (arm_current_func_type ()))
22063 offsets
= arm_get_frame_offsets ();
22064 live_regs_mask
= offsets
->saved_regs_mask
;
22065 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
22067 /* If we can deduce the registers used from the function's return value.
22068 This is more reliable that examining df_regs_ever_live_p () because that
22069 will be set if the register is ever used in the function, not just if
22070 the register is used to hold a return value. */
22071 size
= arm_size_return_regs ();
22073 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
22076 unsigned long extra_mask
= (1 << extra_pop
) - 1;
22077 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
22080 /* The prolog may have pushed some high registers to use as
22081 work registers. e.g. the testsuite file:
22082 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
22083 compiles to produce:
22084 push {r4, r5, r6, r7, lr}
22088 as part of the prolog. We have to undo that pushing here. */
22090 if (high_regs_pushed
)
22092 unsigned long mask
= live_regs_mask
& 0xff;
22095 /* The available low registers depend on the size of the value we are
22103 /* Oh dear! We have no low registers into which we can pop
22106 ("no low registers available for popping high registers");
22108 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
22109 if (live_regs_mask
& (1 << next_hi_reg
))
22112 while (high_regs_pushed
)
22114 /* Find lo register(s) into which the high register(s) can
22116 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
22118 if (mask
& (1 << regno
))
22119 high_regs_pushed
--;
22120 if (high_regs_pushed
== 0)
22124 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
22126 /* Pop the values into the low register(s). */
22127 thumb_pop (asm_out_file
, mask
);
22129 /* Move the value(s) into the high registers. */
22130 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
22132 if (mask
& (1 << regno
))
22134 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
22137 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
22138 if (live_regs_mask
& (1 << next_hi_reg
))
22143 live_regs_mask
&= ~0x0f00;
22146 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
22147 live_regs_mask
&= 0xff;
22149 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
22151 /* Pop the return address into the PC. */
22152 if (had_to_push_lr
)
22153 live_regs_mask
|= 1 << PC_REGNUM
;
22155 /* Either no argument registers were pushed or a backtrace
22156 structure was created which includes an adjusted stack
22157 pointer, so just pop everything. */
22158 if (live_regs_mask
)
22159 thumb_pop (asm_out_file
, live_regs_mask
);
22161 /* We have either just popped the return address into the
22162 PC or it is was kept in LR for the entire function.
22163 Note that thumb_pop has already called thumb_exit if the
22164 PC was in the list. */
22165 if (!had_to_push_lr
)
22166 thumb_exit (asm_out_file
, LR_REGNUM
);
22170 /* Pop everything but the return address. */
22171 if (live_regs_mask
)
22172 thumb_pop (asm_out_file
, live_regs_mask
);
22174 if (had_to_push_lr
)
22178 /* We have no free low regs, so save one. */
22179 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
22183 /* Get the return address into a temporary register. */
22184 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
22188 /* Move the return address to lr. */
22189 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
22191 /* Restore the low register. */
22192 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
22197 regno
= LAST_ARG_REGNUM
;
22202 /* Remove the argument registers that were pushed onto the stack. */
22203 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
22204 SP_REGNUM
, SP_REGNUM
,
22205 crtl
->args
.pretend_args_size
);
22207 thumb_exit (asm_out_file
, regno
);
22213 /* Functions to save and restore machine-specific function data. */
22214 static struct machine_function
*
22215 arm_init_machine_status (void)
22217 struct machine_function
*machine
;
22218 machine
= ggc_alloc_cleared_machine_function ();
22220 #if ARM_FT_UNKNOWN != 0
22221 machine
->func_type
= ARM_FT_UNKNOWN
;
22226 /* Return an RTX indicating where the return address to the
22227 calling function can be found. */
22229 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
22234 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
22237 /* Do anything needed before RTL is emitted for each function. */
22239 arm_init_expanders (void)
22241 /* Arrange to initialize and mark the machine per-function status. */
22242 init_machine_status
= arm_init_machine_status
;
22244 /* This is to stop the combine pass optimizing away the alignment
22245 adjustment of va_arg. */
22246 /* ??? It is claimed that this should not be necessary. */
22248 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
22252 /* Like arm_compute_initial_elimination offset. Simpler because there
22253 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22254 to point at the base of the local variables after static stack
22255 space for a function has been allocated. */
22258 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
22260 arm_stack_offsets
*offsets
;
22262 offsets
= arm_get_frame_offsets ();
22266 case ARG_POINTER_REGNUM
:
22269 case STACK_POINTER_REGNUM
:
22270 return offsets
->outgoing_args
- offsets
->saved_args
;
22272 case FRAME_POINTER_REGNUM
:
22273 return offsets
->soft_frame
- offsets
->saved_args
;
22275 case ARM_HARD_FRAME_POINTER_REGNUM
:
22276 return offsets
->saved_regs
- offsets
->saved_args
;
22278 case THUMB_HARD_FRAME_POINTER_REGNUM
:
22279 return offsets
->locals_base
- offsets
->saved_args
;
22282 gcc_unreachable ();
22286 case FRAME_POINTER_REGNUM
:
22289 case STACK_POINTER_REGNUM
:
22290 return offsets
->outgoing_args
- offsets
->soft_frame
;
22292 case ARM_HARD_FRAME_POINTER_REGNUM
:
22293 return offsets
->saved_regs
- offsets
->soft_frame
;
22295 case THUMB_HARD_FRAME_POINTER_REGNUM
:
22296 return offsets
->locals_base
- offsets
->soft_frame
;
22299 gcc_unreachable ();
22304 gcc_unreachable ();
22308 /* Generate the function's prologue. */
22311 thumb1_expand_prologue (void)
22315 HOST_WIDE_INT amount
;
22316 arm_stack_offsets
*offsets
;
22317 unsigned long func_type
;
22319 unsigned long live_regs_mask
;
22320 unsigned long l_mask
;
22321 unsigned high_regs_pushed
= 0;
22323 func_type
= arm_current_func_type ();
22325 /* Naked functions don't have prologues. */
22326 if (IS_NAKED (func_type
))
22329 if (IS_INTERRUPT (func_type
))
22331 error ("interrupt Service Routines cannot be coded in Thumb mode");
22335 if (is_called_in_ARM_mode (current_function_decl
))
22336 emit_insn (gen_prologue_thumb1_interwork ());
22338 offsets
= arm_get_frame_offsets ();
22339 live_regs_mask
= offsets
->saved_regs_mask
;
22341 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22342 l_mask
= live_regs_mask
& 0x40ff;
22343 /* Then count how many other high registers will need to be pushed. */
22344 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
22346 if (crtl
->args
.pretend_args_size
)
22348 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
22350 if (cfun
->machine
->uses_anonymous_args
)
22352 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
22353 unsigned long mask
;
22355 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
22356 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
22358 insn
= thumb1_emit_multi_reg_push (mask
, 0);
22362 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
22363 stack_pointer_rtx
, x
));
22365 RTX_FRAME_RELATED_P (insn
) = 1;
22368 if (TARGET_BACKTRACE
)
22370 HOST_WIDE_INT offset
= 0;
22371 unsigned work_register
;
22372 rtx work_reg
, x
, arm_hfp_rtx
;
22374 /* We have been asked to create a stack backtrace structure.
22375 The code looks like this:
22379 0 sub SP, #16 Reserve space for 4 registers.
22380 2 push {R7} Push low registers.
22381 4 add R7, SP, #20 Get the stack pointer before the push.
22382 6 str R7, [SP, #8] Store the stack pointer
22383 (before reserving the space).
22384 8 mov R7, PC Get hold of the start of this code + 12.
22385 10 str R7, [SP, #16] Store it.
22386 12 mov R7, FP Get hold of the current frame pointer.
22387 14 str R7, [SP, #4] Store it.
22388 16 mov R7, LR Get hold of the current return address.
22389 18 str R7, [SP, #12] Store it.
22390 20 add R7, SP, #16 Point at the start of the
22391 backtrace structure.
22392 22 mov FP, R7 Put this value into the frame pointer. */
22394 work_register
= thumb_find_work_register (live_regs_mask
);
22395 work_reg
= gen_rtx_REG (SImode
, work_register
);
22396 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
22398 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
22399 stack_pointer_rtx
, GEN_INT (-16)));
22400 RTX_FRAME_RELATED_P (insn
) = 1;
22404 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
22405 RTX_FRAME_RELATED_P (insn
) = 1;
22407 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
22410 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
22411 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
22413 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
22414 x
= gen_frame_mem (SImode
, x
);
22415 emit_move_insn (x
, work_reg
);
22417 /* Make sure that the instruction fetching the PC is in the right place
22418 to calculate "start of backtrace creation code + 12". */
22419 /* ??? The stores using the common WORK_REG ought to be enough to
22420 prevent the scheduler from doing anything weird. Failing that
22421 we could always move all of the following into an UNSPEC_VOLATILE. */
22424 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
22425 emit_move_insn (work_reg
, x
);
22427 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
22428 x
= gen_frame_mem (SImode
, x
);
22429 emit_move_insn (x
, work_reg
);
22431 emit_move_insn (work_reg
, arm_hfp_rtx
);
22433 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22434 x
= gen_frame_mem (SImode
, x
);
22435 emit_move_insn (x
, work_reg
);
22439 emit_move_insn (work_reg
, arm_hfp_rtx
);
22441 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22442 x
= gen_frame_mem (SImode
, x
);
22443 emit_move_insn (x
, work_reg
);
22445 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
22446 emit_move_insn (work_reg
, x
);
22448 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
22449 x
= gen_frame_mem (SImode
, x
);
22450 emit_move_insn (x
, work_reg
);
22453 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
22454 emit_move_insn (work_reg
, x
);
22456 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
22457 x
= gen_frame_mem (SImode
, x
);
22458 emit_move_insn (x
, work_reg
);
22460 x
= GEN_INT (offset
+ 12);
22461 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
22463 emit_move_insn (arm_hfp_rtx
, work_reg
);
22465 /* Optimization: If we are not pushing any low registers but we are going
22466 to push some high registers then delay our first push. This will just
22467 be a push of LR and we can combine it with the push of the first high
22469 else if ((l_mask
& 0xff) != 0
22470 || (high_regs_pushed
== 0 && l_mask
))
22472 unsigned long mask
= l_mask
;
22473 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
22474 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
22475 RTX_FRAME_RELATED_P (insn
) = 1;
22478 if (high_regs_pushed
)
22480 unsigned pushable_regs
;
22481 unsigned next_hi_reg
;
22483 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
22484 if (live_regs_mask
& (1 << next_hi_reg
))
22487 pushable_regs
= l_mask
& 0xff;
22489 if (pushable_regs
== 0)
22490 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
22492 while (high_regs_pushed
> 0)
22494 unsigned long real_regs_mask
= 0;
22496 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
22498 if (pushable_regs
& (1 << regno
))
22500 emit_move_insn (gen_rtx_REG (SImode
, regno
),
22501 gen_rtx_REG (SImode
, next_hi_reg
));
22503 high_regs_pushed
--;
22504 real_regs_mask
|= (1 << next_hi_reg
);
22506 if (high_regs_pushed
)
22508 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
22510 if (live_regs_mask
& (1 << next_hi_reg
))
22515 pushable_regs
&= ~((1 << regno
) - 1);
22521 /* If we had to find a work register and we have not yet
22522 saved the LR then add it to the list of regs to push. */
22523 if (l_mask
== (1 << LR_REGNUM
))
22525 pushable_regs
|= l_mask
;
22526 real_regs_mask
|= l_mask
;
22530 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
22531 RTX_FRAME_RELATED_P (insn
) = 1;
22535 /* Load the pic register before setting the frame pointer,
22536 so we can use r7 as a temporary work register. */
22537 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
22538 arm_load_pic_register (live_regs_mask
);
22540 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
22541 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
22542 stack_pointer_rtx
);
22544 if (flag_stack_usage_info
)
22545 current_function_static_stack_size
22546 = offsets
->outgoing_args
- offsets
->saved_args
;
22548 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
22549 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
22554 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22555 GEN_INT (- amount
)));
22556 RTX_FRAME_RELATED_P (insn
) = 1;
22562 /* The stack decrement is too big for an immediate value in a single
22563 insn. In theory we could issue multiple subtracts, but after
22564 three of them it becomes more space efficient to place the full
22565 value in the constant pool and load into a register. (Also the
22566 ARM debugger really likes to see only one stack decrement per
22567 function). So instead we look for a scratch register into which
22568 we can load the decrement, and then we subtract this from the
22569 stack pointer. Unfortunately on the thumb the only available
22570 scratch registers are the argument registers, and we cannot use
22571 these as they may hold arguments to the function. Instead we
22572 attempt to locate a call preserved register which is used by this
22573 function. If we can find one, then we know that it will have
22574 been pushed at the start of the prologue and so we can corrupt
22576 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
22577 if (live_regs_mask
& (1 << regno
))
22580 gcc_assert(regno
<= LAST_LO_REGNUM
);
22582 reg
= gen_rtx_REG (SImode
, regno
);
22584 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
22586 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
22587 stack_pointer_rtx
, reg
));
22589 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
22590 plus_constant (Pmode
, stack_pointer_rtx
,
22592 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22593 RTX_FRAME_RELATED_P (insn
) = 1;
22597 if (frame_pointer_needed
)
22598 thumb_set_frame_pointer (offsets
);
22600 /* If we are profiling, make sure no instructions are scheduled before
22601 the call to mcount. Similarly if the user has requested no
22602 scheduling in the prolog. Similarly if we want non-call exceptions
22603 using the EABI unwinder, to prevent faulting instructions from being
22604 swapped with a stack adjustment. */
22605 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
22606 || (arm_except_unwind_info (&global_options
) == UI_TARGET
22607 && cfun
->can_throw_non_call_exceptions
))
22608 emit_insn (gen_blockage ());
22610 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
22611 if (live_regs_mask
& 0xff)
22612 cfun
->machine
->lr_save_eliminated
= 0;
22615 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22616 POP instruction can be generated. LR should be replaced by PC. All
22617 the checks required are already done by USE_RETURN_INSN (). Hence,
22618 all we really need to check here is if single register is to be
22619 returned, or multiple register return. */
22621 thumb2_expand_return (void)
22624 unsigned long saved_regs_mask
;
22625 arm_stack_offsets
*offsets
;
22627 offsets
= arm_get_frame_offsets ();
22628 saved_regs_mask
= offsets
->saved_regs_mask
;
22630 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22631 if (saved_regs_mask
& (1 << i
))
22634 if (saved_regs_mask
)
22638 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22639 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
22640 rtx addr
= gen_rtx_MEM (SImode
,
22641 gen_rtx_POST_INC (SImode
,
22642 stack_pointer_rtx
));
22643 set_mem_alias_set (addr
, get_frame_alias_set ());
22644 XVECEXP (par
, 0, 0) = ret_rtx
;
22645 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
22646 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
22647 emit_jump_insn (par
);
22651 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
22652 saved_regs_mask
|= (1 << PC_REGNUM
);
22653 arm_emit_multi_reg_pop (saved_regs_mask
);
22658 emit_jump_insn (simple_return_rtx
);
22663 thumb1_expand_epilogue (void)
22665 HOST_WIDE_INT amount
;
22666 arm_stack_offsets
*offsets
;
22669 /* Naked functions don't have prologues. */
22670 if (IS_NAKED (arm_current_func_type ()))
22673 offsets
= arm_get_frame_offsets ();
22674 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
22676 if (frame_pointer_needed
)
22678 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
22679 amount
= offsets
->locals_base
- offsets
->saved_regs
;
22681 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
22683 gcc_assert (amount
>= 0);
22686 emit_insn (gen_blockage ());
22689 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22690 GEN_INT (amount
)));
22693 /* r3 is always free in the epilogue. */
22694 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
22696 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
22697 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
22701 /* Emit a USE (stack_pointer_rtx), so that
22702 the stack adjustment will not be deleted. */
22703 emit_insn (gen_force_register_use (stack_pointer_rtx
));
22705 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
22706 emit_insn (gen_blockage ());
22708 /* Emit a clobber for each insn that will be restored in the epilogue,
22709 so that flow2 will get register lifetimes correct. */
22710 for (regno
= 0; regno
< 13; regno
++)
22711 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
22712 emit_clobber (gen_rtx_REG (SImode
, regno
));
22714 if (! df_regs_ever_live_p (LR_REGNUM
))
22715 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
22718 /* Epilogue code for APCS frame. */
22720 arm_expand_epilogue_apcs_frame (bool really_return
)
22722 unsigned long func_type
;
22723 unsigned long saved_regs_mask
;
22726 int floats_from_frame
= 0;
22727 arm_stack_offsets
*offsets
;
22729 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
22730 func_type
= arm_current_func_type ();
22732 /* Get frame offsets for ARM. */
22733 offsets
= arm_get_frame_offsets ();
22734 saved_regs_mask
= offsets
->saved_regs_mask
;
22736 /* Find the offset of the floating-point save area in the frame. */
22737 floats_from_frame
= offsets
->saved_args
- offsets
->frame
;
22739 /* Compute how many core registers saved and how far away the floats are. */
22740 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22741 if (saved_regs_mask
& (1 << i
))
22744 floats_from_frame
+= 4;
22747 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
22751 /* The offset is from IP_REGNUM. */
22752 int saved_size
= arm_get_vfp_saved_size ();
22753 if (saved_size
> 0)
22755 floats_from_frame
+= saved_size
;
22756 emit_insn (gen_addsi3 (gen_rtx_REG (SImode
, IP_REGNUM
),
22757 hard_frame_pointer_rtx
,
22758 GEN_INT (-floats_from_frame
)));
22761 /* Generate VFP register multi-pop. */
22762 start_reg
= FIRST_VFP_REGNUM
;
22764 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
22765 /* Look for a case where a reg does not need restoring. */
22766 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
22767 && (!df_regs_ever_live_p (i
+ 1)
22768 || call_used_regs
[i
+ 1]))
22770 if (start_reg
!= i
)
22771 arm_emit_vfp_multi_reg_pop (start_reg
,
22772 (i
- start_reg
) / 2,
22773 gen_rtx_REG (SImode
,
22778 /* Restore the remaining regs that we have discovered (or possibly
22779 even all of them, if the conditional in the for loop never
22781 if (start_reg
!= i
)
22782 arm_emit_vfp_multi_reg_pop (start_reg
,
22783 (i
- start_reg
) / 2,
22784 gen_rtx_REG (SImode
, IP_REGNUM
));
22789 /* The frame pointer is guaranteed to be non-double-word aligned, as
22790 it is set to double-word-aligned old_stack_pointer - 4. */
22792 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
22794 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
22795 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
22797 rtx addr
= gen_frame_mem (V2SImode
,
22798 plus_constant (Pmode
, hard_frame_pointer_rtx
,
22800 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
22801 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
22802 gen_rtx_REG (V2SImode
, i
),
22808 /* saved_regs_mask should contain IP which contains old stack pointer
22809 at the time of activation creation. Since SP and IP are adjacent registers,
22810 we can restore the value directly into SP. */
22811 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
22812 saved_regs_mask
&= ~(1 << IP_REGNUM
);
22813 saved_regs_mask
|= (1 << SP_REGNUM
);
22815 /* There are two registers left in saved_regs_mask - LR and PC. We
22816 only need to restore LR (the return address), but to
22817 save time we can load it directly into PC, unless we need a
22818 special function exit sequence, or we are not really returning. */
22820 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
22821 && !crtl
->calls_eh_return
)
22822 /* Delete LR from the register mask, so that LR on
22823 the stack is loaded into the PC in the register mask. */
22824 saved_regs_mask
&= ~(1 << LR_REGNUM
);
22826 saved_regs_mask
&= ~(1 << PC_REGNUM
);
22828 num_regs
= bit_count (saved_regs_mask
);
22829 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
22831 /* Unwind the stack to just below the saved registers. */
22832 emit_insn (gen_addsi3 (stack_pointer_rtx
,
22833 hard_frame_pointer_rtx
,
22834 GEN_INT (- 4 * num_regs
)));
22837 arm_emit_multi_reg_pop (saved_regs_mask
);
22839 if (IS_INTERRUPT (func_type
))
22841 /* Interrupt handlers will have pushed the
22842 IP onto the stack, so restore it now. */
22844 rtx addr
= gen_rtx_MEM (SImode
,
22845 gen_rtx_POST_INC (SImode
,
22846 stack_pointer_rtx
));
22847 set_mem_alias_set (addr
, get_frame_alias_set ());
22848 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
22849 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
22850 gen_rtx_REG (SImode
, IP_REGNUM
),
22854 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
22857 if (crtl
->calls_eh_return
)
22858 emit_insn (gen_addsi3 (stack_pointer_rtx
,
22860 GEN_INT (ARM_EH_STACKADJ_REGNUM
)));
22862 if (IS_STACKALIGN (func_type
))
22863 /* Restore the original stack pointer. Before prologue, the stack was
22864 realigned and the original stack pointer saved in r0. For details,
22865 see comment in arm_expand_prologue. */
22866 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
22868 emit_jump_insn (simple_return_rtx
);
22871 /* Generate RTL to represent ARM epilogue. Really_return is true if the
22872 function is not a sibcall. */
22874 arm_expand_epilogue (bool really_return
)
22876 unsigned long func_type
;
22877 unsigned long saved_regs_mask
;
22881 arm_stack_offsets
*offsets
;
22883 func_type
= arm_current_func_type ();
22885 /* Naked functions don't have epilogue. Hence, generate return pattern, and
22886 let output_return_instruction take care of instruction emition if any. */
22887 if (IS_NAKED (func_type
)
22888 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
22890 emit_jump_insn (simple_return_rtx
);
22894 /* If we are throwing an exception, then we really must be doing a
22895 return, so we can't tail-call. */
22896 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
22898 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
22900 arm_expand_epilogue_apcs_frame (really_return
);
22904 /* Get frame offsets for ARM. */
22905 offsets
= arm_get_frame_offsets ();
22906 saved_regs_mask
= offsets
->saved_regs_mask
;
22907 num_regs
= bit_count (saved_regs_mask
);
22909 if (frame_pointer_needed
)
22911 /* Restore stack pointer if necessary. */
22914 /* In ARM mode, frame pointer points to first saved register.
22915 Restore stack pointer to last saved register. */
22916 amount
= offsets
->frame
- offsets
->saved_regs
;
22918 /* Force out any pending memory operations that reference stacked data
22919 before stack de-allocation occurs. */
22920 emit_insn (gen_blockage ());
22921 emit_insn (gen_addsi3 (stack_pointer_rtx
,
22922 hard_frame_pointer_rtx
,
22923 GEN_INT (amount
)));
22925 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22927 emit_insn (gen_force_register_use (stack_pointer_rtx
));
22931 /* In Thumb-2 mode, the frame pointer points to the last saved
22933 amount
= offsets
->locals_base
- offsets
->saved_regs
;
22935 emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22936 hard_frame_pointer_rtx
,
22937 GEN_INT (amount
)));
22939 /* Force out any pending memory operations that reference stacked data
22940 before stack de-allocation occurs. */
22941 emit_insn (gen_blockage ());
22942 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
22943 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22945 emit_insn (gen_force_register_use (stack_pointer_rtx
));
22950 /* Pop off outgoing args and local frame to adjust stack pointer to
22951 last saved register. */
22952 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
22955 /* Force out any pending memory operations that reference stacked data
22956 before stack de-allocation occurs. */
22957 emit_insn (gen_blockage ());
22958 emit_insn (gen_addsi3 (stack_pointer_rtx
,
22960 GEN_INT (amount
)));
22961 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
22963 emit_insn (gen_force_register_use (stack_pointer_rtx
));
22967 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
22969 /* Generate VFP register multi-pop. */
22970 int end_reg
= LAST_VFP_REGNUM
+ 1;
22972 /* Scan the registers in reverse order. We need to match
22973 any groupings made in the prologue and generate matching
22974 vldm operations. The need to match groups is because,
22975 unlike pop, vldm can only do consecutive regs. */
22976 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
22977 /* Look for a case where a reg does not need restoring. */
22978 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
22979 && (!df_regs_ever_live_p (i
+ 1)
22980 || call_used_regs
[i
+ 1]))
22982 /* Restore the regs discovered so far (from reg+2 to
22984 if (end_reg
> i
+ 2)
22985 arm_emit_vfp_multi_reg_pop (i
+ 2,
22986 (end_reg
- (i
+ 2)) / 2,
22987 stack_pointer_rtx
);
22991 /* Restore the remaining regs that we have discovered (or possibly
22992 even all of them, if the conditional in the for loop never
22994 if (end_reg
> i
+ 2)
22995 arm_emit_vfp_multi_reg_pop (i
+ 2,
22996 (end_reg
- (i
+ 2)) / 2,
22997 stack_pointer_rtx
);
23001 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
23002 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
23005 rtx addr
= gen_rtx_MEM (V2SImode
,
23006 gen_rtx_POST_INC (SImode
,
23007 stack_pointer_rtx
));
23008 set_mem_alias_set (addr
, get_frame_alias_set ());
23009 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
23010 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
23011 gen_rtx_REG (V2SImode
, i
),
23015 if (saved_regs_mask
)
23018 bool return_in_pc
= false;
23020 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
23021 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
23022 && !IS_STACKALIGN (func_type
)
23024 && crtl
->args
.pretend_args_size
== 0
23025 && saved_regs_mask
& (1 << LR_REGNUM
)
23026 && !crtl
->calls_eh_return
)
23028 saved_regs_mask
&= ~(1 << LR_REGNUM
);
23029 saved_regs_mask
|= (1 << PC_REGNUM
);
23030 return_in_pc
= true;
23033 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
23035 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
23036 if (saved_regs_mask
& (1 << i
))
23038 rtx addr
= gen_rtx_MEM (SImode
,
23039 gen_rtx_POST_INC (SImode
,
23040 stack_pointer_rtx
));
23041 set_mem_alias_set (addr
, get_frame_alias_set ());
23043 if (i
== PC_REGNUM
)
23045 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
23046 XVECEXP (insn
, 0, 0) = ret_rtx
;
23047 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
23048 gen_rtx_REG (SImode
, i
),
23050 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
23051 insn
= emit_jump_insn (insn
);
23055 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
23057 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
23058 gen_rtx_REG (SImode
, i
),
23065 arm_emit_multi_reg_pop (saved_regs_mask
);
23068 if (return_in_pc
== true)
23072 if (crtl
->args
.pretend_args_size
)
23073 emit_insn (gen_addsi3 (stack_pointer_rtx
,
23075 GEN_INT (crtl
->args
.pretend_args_size
)));
23077 if (!really_return
)
23080 if (crtl
->calls_eh_return
)
23081 emit_insn (gen_addsi3 (stack_pointer_rtx
,
23083 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
23085 if (IS_STACKALIGN (func_type
))
23086 /* Restore the original stack pointer. Before prologue, the stack was
23087 realigned and the original stack pointer saved in r0. For details,
23088 see comment in arm_expand_prologue. */
23089 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
23091 emit_jump_insn (simple_return_rtx
);
23094 /* Implementation of insn prologue_thumb1_interwork. This is the first
23095 "instruction" of a function called in ARM mode. Swap to thumb mode. */
23098 thumb1_output_interwork (void)
23101 FILE *f
= asm_out_file
;
23103 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
23104 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
23106 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
23108 /* Generate code sequence to switch us into Thumb mode. */
23109 /* The .code 32 directive has already been emitted by
23110 ASM_DECLARE_FUNCTION_NAME. */
23111 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
23112 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
23114 /* Generate a label, so that the debugger will notice the
23115 change in instruction sets. This label is also used by
23116 the assembler to bypass the ARM code when this function
23117 is called from a Thumb encoded function elsewhere in the
23118 same file. Hence the definition of STUB_NAME here must
23119 agree with the definition in gas/config/tc-arm.c. */
23121 #define STUB_NAME ".real_start_of"
23123 fprintf (f
, "\t.code\t16\n");
23125 if (arm_dllexport_name_p (name
))
23126 name
= arm_strip_name_encoding (name
);
23128 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
23129 fprintf (f
, "\t.thumb_func\n");
23130 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
23135 /* Handle the case of a double word load into a low register from
23136 a computed memory address. The computed address may involve a
23137 register which is overwritten by the load. */
23139 thumb_load_double_from_address (rtx
*operands
)
23147 gcc_assert (REG_P (operands
[0]));
23148 gcc_assert (MEM_P (operands
[1]));
23150 /* Get the memory address. */
23151 addr
= XEXP (operands
[1], 0);
23153 /* Work out how the memory address is computed. */
23154 switch (GET_CODE (addr
))
23157 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23159 if (REGNO (operands
[0]) == REGNO (addr
))
23161 output_asm_insn ("ldr\t%H0, %2", operands
);
23162 output_asm_insn ("ldr\t%0, %1", operands
);
23166 output_asm_insn ("ldr\t%0, %1", operands
);
23167 output_asm_insn ("ldr\t%H0, %2", operands
);
23172 /* Compute <address> + 4 for the high order load. */
23173 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23175 output_asm_insn ("ldr\t%0, %1", operands
);
23176 output_asm_insn ("ldr\t%H0, %2", operands
);
23180 arg1
= XEXP (addr
, 0);
23181 arg2
= XEXP (addr
, 1);
23183 if (CONSTANT_P (arg1
))
23184 base
= arg2
, offset
= arg1
;
23186 base
= arg1
, offset
= arg2
;
23188 gcc_assert (REG_P (base
));
23190 /* Catch the case of <address> = <reg> + <reg> */
23191 if (REG_P (offset
))
23193 int reg_offset
= REGNO (offset
);
23194 int reg_base
= REGNO (base
);
23195 int reg_dest
= REGNO (operands
[0]);
23197 /* Add the base and offset registers together into the
23198 higher destination register. */
23199 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
23200 reg_dest
+ 1, reg_base
, reg_offset
);
23202 /* Load the lower destination register from the address in
23203 the higher destination register. */
23204 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
23205 reg_dest
, reg_dest
+ 1);
23207 /* Load the higher destination register from its own address
23209 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
23210 reg_dest
+ 1, reg_dest
+ 1);
23214 /* Compute <address> + 4 for the high order load. */
23215 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23217 /* If the computed address is held in the low order register
23218 then load the high order register first, otherwise always
23219 load the low order register first. */
23220 if (REGNO (operands
[0]) == REGNO (base
))
23222 output_asm_insn ("ldr\t%H0, %2", operands
);
23223 output_asm_insn ("ldr\t%0, %1", operands
);
23227 output_asm_insn ("ldr\t%0, %1", operands
);
23228 output_asm_insn ("ldr\t%H0, %2", operands
);
23234 /* With no registers to worry about we can just load the value
23236 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23238 output_asm_insn ("ldr\t%H0, %2", operands
);
23239 output_asm_insn ("ldr\t%0, %1", operands
);
23243 gcc_unreachable ();
23250 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
23257 if (REGNO (operands
[4]) > REGNO (operands
[5]))
23260 operands
[4] = operands
[5];
23263 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
23264 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
23268 if (REGNO (operands
[4]) > REGNO (operands
[5]))
23271 operands
[4] = operands
[5];
23274 if (REGNO (operands
[5]) > REGNO (operands
[6]))
23277 operands
[5] = operands
[6];
23280 if (REGNO (operands
[4]) > REGNO (operands
[5]))
23283 operands
[4] = operands
[5];
23287 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
23288 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
23292 gcc_unreachable ();
23298 /* Output a call-via instruction for thumb state. */
23300 thumb_call_via_reg (rtx reg
)
23302 int regno
= REGNO (reg
);
23305 gcc_assert (regno
< LR_REGNUM
);
23307 /* If we are in the normal text section we can use a single instance
23308 per compilation unit. If we are doing function sections, then we need
23309 an entry per section, since we can't rely on reachability. */
23310 if (in_section
== text_section
)
23312 thumb_call_reg_needed
= 1;
23314 if (thumb_call_via_label
[regno
] == NULL
)
23315 thumb_call_via_label
[regno
] = gen_label_rtx ();
23316 labelp
= thumb_call_via_label
+ regno
;
23320 if (cfun
->machine
->call_via
[regno
] == NULL
)
23321 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
23322 labelp
= cfun
->machine
->call_via
+ regno
;
23325 output_asm_insn ("bl\t%a0", labelp
);
23329 /* Routines for generating rtl. */
23331 thumb_expand_movmemqi (rtx
*operands
)
23333 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
23334 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
23335 HOST_WIDE_INT len
= INTVAL (operands
[2]);
23336 HOST_WIDE_INT offset
= 0;
23340 emit_insn (gen_movmem12b (out
, in
, out
, in
));
23346 emit_insn (gen_movmem8b (out
, in
, out
, in
));
23352 rtx reg
= gen_reg_rtx (SImode
);
23353 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
23354 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
23361 rtx reg
= gen_reg_rtx (HImode
);
23362 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
23363 plus_constant (Pmode
, in
,
23365 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
23374 rtx reg
= gen_reg_rtx (QImode
);
23375 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
23376 plus_constant (Pmode
, in
,
23378 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
23385 thumb_reload_out_hi (rtx
*operands
)
23387 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
23390 /* Handle reading a half-word from memory during reload. */
23392 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
23394 gcc_unreachable ();
23397 /* Return the length of a function name prefix
23398 that starts with the character 'c'. */
23400 arm_get_strip_length (int c
)
23404 ARM_NAME_ENCODING_LENGTHS
23409 /* Return a pointer to a function's name with any
23410 and all prefix encodings stripped from it. */
23412 arm_strip_name_encoding (const char *name
)
23416 while ((skip
= arm_get_strip_length (* name
)))
23422 /* If there is a '*' anywhere in the name's prefix, then
23423 emit the stripped name verbatim, otherwise prepend an
23424 underscore if leading underscores are being used. */
23426 arm_asm_output_labelref (FILE *stream
, const char *name
)
23431 while ((skip
= arm_get_strip_length (* name
)))
23433 verbatim
|= (*name
== '*');
23438 fputs (name
, stream
);
23440 asm_fprintf (stream
, "%U%s", name
);
23443 /* This function is used to emit an EABI tag and its associated value.
23444 We emit the numerical value of the tag in case the assembler does not
23445 support textual tags. (Eg gas prior to 2.20). If requested we include
23446 the tag name in a comment so that anyone reading the assembler output
23447 will know which tag is being set.
23449 This function is not static because arm-c.c needs it too. */
23452 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
23454 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
23455 if (flag_verbose_asm
|| flag_debug_asm
)
23456 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
23457 asm_fprintf (asm_out_file
, "\n");
23461 arm_file_start (void)
23465 if (TARGET_UNIFIED_ASM
)
23466 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
23470 const char *fpu_name
;
23471 if (arm_selected_arch
)
23472 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
23473 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
23474 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
23476 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_selected_cpu
->name
);
23478 if (TARGET_SOFT_FLOAT
)
23480 fpu_name
= "softvfp";
23484 fpu_name
= arm_fpu_desc
->name
;
23485 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
23487 if (TARGET_HARD_FLOAT
)
23488 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23489 if (TARGET_HARD_FLOAT_ABI
)
23490 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23493 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
23495 /* Some of these attributes only apply when the corresponding features
23496 are used. However we don't have any easy way of figuring this out.
23497 Conservatively record the setting that would have been used. */
23499 if (flag_rounding_math
)
23500 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23502 if (!flag_unsafe_math_optimizations
)
23504 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23505 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23507 if (flag_signaling_nans
)
23508 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23510 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23511 flag_finite_math_only
? 1 : 3);
23513 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23514 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23515 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23516 flag_short_enums
? 1 : 2);
23518 /* Tag_ABI_optimization_goals. */
23521 else if (optimize
>= 2)
23527 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
23529 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23532 if (arm_fp16_format
)
23533 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23534 (int) arm_fp16_format
);
23536 if (arm_lang_output_object_attributes_hook
)
23537 arm_lang_output_object_attributes_hook();
23540 default_file_start ();
23544 arm_file_end (void)
23548 if (NEED_INDICATE_EXEC_STACK
)
23549 /* Add .note.GNU-stack. */
23550 file_end_indicate_exec_stack ();
23552 if (! thumb_call_reg_needed
)
23555 switch_to_section (text_section
);
23556 asm_fprintf (asm_out_file
, "\t.code 16\n");
23557 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
23559 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
23561 rtx label
= thumb_call_via_label
[regno
];
23565 targetm
.asm_out
.internal_label (asm_out_file
, "L",
23566 CODE_LABEL_NUMBER (label
));
23567 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
23573 /* Symbols in the text segment can be accessed without indirecting via the
23574 constant pool; it may take an extra binary operation, but this is still
23575 faster than indirecting via memory. Don't do this when not optimizing,
23576 since we won't be calculating al of the offsets necessary to do this
23580 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
23582 if (optimize
> 0 && TREE_CONSTANT (decl
))
23583 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
23585 default_encode_section_info (decl
, rtl
, first
);
23587 #endif /* !ARM_PE */
23590 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
23592 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
23593 && !strcmp (prefix
, "L"))
23595 arm_ccfsm_state
= 0;
23596 arm_target_insn
= NULL
;
23598 default_internal_label (stream
, prefix
, labelno
);
23601 /* Output code to add DELTA to the first argument, and then jump
23602 to FUNCTION. Used for C++ multiple inheritance. */
23604 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
23605 HOST_WIDE_INT delta
,
23606 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
23609 static int thunk_label
= 0;
23612 int mi_delta
= delta
;
23613 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
23615 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
23618 mi_delta
= - mi_delta
;
23622 int labelno
= thunk_label
++;
23623 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
23624 /* Thunks are entered in arm mode when avaiable. */
23625 if (TARGET_THUMB1_ONLY
)
23627 /* push r3 so we can use it as a temporary. */
23628 /* TODO: Omit this save if r3 is not used. */
23629 fputs ("\tpush {r3}\n", file
);
23630 fputs ("\tldr\tr3, ", file
);
23634 fputs ("\tldr\tr12, ", file
);
23636 assemble_name (file
, label
);
23637 fputc ('\n', file
);
23640 /* If we are generating PIC, the ldr instruction below loads
23641 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23642 the address of the add + 8, so we have:
23644 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23647 Note that we have "+ 1" because some versions of GNU ld
23648 don't set the low bit of the result for R_ARM_REL32
23649 relocations against thumb function symbols.
23650 On ARMv6M this is +4, not +8. */
23651 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
23652 assemble_name (file
, labelpc
);
23653 fputs (":\n", file
);
23654 if (TARGET_THUMB1_ONLY
)
23656 /* This is 2 insns after the start of the thunk, so we know it
23657 is 4-byte aligned. */
23658 fputs ("\tadd\tr3, pc, r3\n", file
);
23659 fputs ("\tmov r12, r3\n", file
);
23662 fputs ("\tadd\tr12, pc, r12\n", file
);
23664 else if (TARGET_THUMB1_ONLY
)
23665 fputs ("\tmov r12, r3\n", file
);
23667 if (TARGET_THUMB1_ONLY
)
23669 if (mi_delta
> 255)
23671 fputs ("\tldr\tr3, ", file
);
23672 assemble_name (file
, label
);
23673 fputs ("+4\n", file
);
23674 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
23675 mi_op
, this_regno
, this_regno
);
23677 else if (mi_delta
!= 0)
23679 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
23680 mi_op
, this_regno
, this_regno
,
23686 /* TODO: Use movw/movt for large constants when available. */
23687 while (mi_delta
!= 0)
23689 if ((mi_delta
& (3 << shift
)) == 0)
23693 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
23694 mi_op
, this_regno
, this_regno
,
23695 mi_delta
& (0xff << shift
));
23696 mi_delta
&= ~(0xff << shift
);
23703 if (TARGET_THUMB1_ONLY
)
23704 fputs ("\tpop\t{r3}\n", file
);
23706 fprintf (file
, "\tbx\tr12\n");
23707 ASM_OUTPUT_ALIGN (file
, 2);
23708 assemble_name (file
, label
);
23709 fputs (":\n", file
);
23712 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23713 rtx tem
= XEXP (DECL_RTL (function
), 0);
23714 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
23715 tem
= gen_rtx_MINUS (GET_MODE (tem
),
23717 gen_rtx_SYMBOL_REF (Pmode
,
23718 ggc_strdup (labelpc
)));
23719 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
23722 /* Output ".word .LTHUNKn". */
23723 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
23725 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
23726 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
23730 fputs ("\tb\t", file
);
23731 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
23732 if (NEED_PLT_RELOC
)
23733 fputs ("(PLT)", file
);
23734 fputc ('\n', file
);
23739 arm_emit_vector_const (FILE *file
, rtx x
)
23742 const char * pattern
;
23744 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
23746 switch (GET_MODE (x
))
23748 case V2SImode
: pattern
= "%08x"; break;
23749 case V4HImode
: pattern
= "%04x"; break;
23750 case V8QImode
: pattern
= "%02x"; break;
23751 default: gcc_unreachable ();
23754 fprintf (file
, "0x");
23755 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
23759 element
= CONST_VECTOR_ELT (x
, i
);
23760 fprintf (file
, pattern
, INTVAL (element
));
23766 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23767 HFmode constant pool entries are actually loaded with ldr. */
23769 arm_emit_fp16_const (rtx c
)
23774 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
23775 bits
= real_to_target (NULL
, &r
, HFmode
);
23776 if (WORDS_BIG_ENDIAN
)
23777 assemble_zeros (2);
23778 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
23779 if (!WORDS_BIG_ENDIAN
)
23780 assemble_zeros (2);
23784 arm_output_load_gr (rtx
*operands
)
23791 if (!MEM_P (operands
[1])
23792 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
23793 || !REG_P (reg
= XEXP (sum
, 0))
23794 || !CONST_INT_P (offset
= XEXP (sum
, 1))
23795 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
23796 return "wldrw%?\t%0, %1";
23798 /* Fix up an out-of-range load of a GR register. */
23799 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
23800 wcgr
= operands
[0];
23802 output_asm_insn ("ldr%?\t%0, %1", operands
);
23804 operands
[0] = wcgr
;
23806 output_asm_insn ("tmcr%?\t%0, %1", operands
);
23807 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
23812 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23814 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23815 named arg and all anonymous args onto the stack.
23816 XXX I know the prologue shouldn't be pushing registers, but it is faster
23820 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
23821 enum machine_mode mode
,
23824 int second_time ATTRIBUTE_UNUSED
)
23826 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
23829 cfun
->machine
->uses_anonymous_args
= 1;
23830 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
23832 nregs
= pcum
->aapcs_ncrn
;
23833 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
23837 nregs
= pcum
->nregs
;
23839 if (nregs
< NUM_ARG_REGS
)
23840 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
23843 /* Return nonzero if the CONSUMER instruction (a store) does not need
23844 PRODUCER's value to calculate the address. */
23847 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
23849 rtx value
= PATTERN (producer
);
23850 rtx addr
= PATTERN (consumer
);
23852 if (GET_CODE (value
) == COND_EXEC
)
23853 value
= COND_EXEC_CODE (value
);
23854 if (GET_CODE (value
) == PARALLEL
)
23855 value
= XVECEXP (value
, 0, 0);
23856 value
= XEXP (value
, 0);
23857 if (GET_CODE (addr
) == COND_EXEC
)
23858 addr
= COND_EXEC_CODE (addr
);
23859 if (GET_CODE (addr
) == PARALLEL
)
23860 addr
= XVECEXP (addr
, 0, 0);
23861 addr
= XEXP (addr
, 0);
23863 return !reg_overlap_mentioned_p (value
, addr
);
23866 /* Return nonzero if the CONSUMER instruction (a store) does need
23867 PRODUCER's value to calculate the address. */
23870 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
23872 return !arm_no_early_store_addr_dep (producer
, consumer
);
23875 /* Return nonzero if the CONSUMER instruction (a load) does need
23876 PRODUCER's value to calculate the address. */
23879 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
23881 rtx value
= PATTERN (producer
);
23882 rtx addr
= PATTERN (consumer
);
23884 if (GET_CODE (value
) == COND_EXEC
)
23885 value
= COND_EXEC_CODE (value
);
23886 if (GET_CODE (value
) == PARALLEL
)
23887 value
= XVECEXP (value
, 0, 0);
23888 value
= XEXP (value
, 0);
23889 if (GET_CODE (addr
) == COND_EXEC
)
23890 addr
= COND_EXEC_CODE (addr
);
23891 if (GET_CODE (addr
) == PARALLEL
)
23893 if (GET_CODE (XVECEXP (addr
, 0, 0)) == RETURN
)
23894 addr
= XVECEXP (addr
, 0, 1);
23896 addr
= XVECEXP (addr
, 0, 0);
23898 addr
= XEXP (addr
, 1);
23900 return reg_overlap_mentioned_p (value
, addr
);
23903 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23904 have an early register shift value or amount dependency on the
23905 result of PRODUCER. */
23908 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
23910 rtx value
= PATTERN (producer
);
23911 rtx op
= PATTERN (consumer
);
23914 if (GET_CODE (value
) == COND_EXEC
)
23915 value
= COND_EXEC_CODE (value
);
23916 if (GET_CODE (value
) == PARALLEL
)
23917 value
= XVECEXP (value
, 0, 0);
23918 value
= XEXP (value
, 0);
23919 if (GET_CODE (op
) == COND_EXEC
)
23920 op
= COND_EXEC_CODE (op
);
23921 if (GET_CODE (op
) == PARALLEL
)
23922 op
= XVECEXP (op
, 0, 0);
23925 early_op
= XEXP (op
, 0);
23926 /* This is either an actual independent shift, or a shift applied to
23927 the first operand of another operation. We want the whole shift
23929 if (REG_P (early_op
))
23932 return !reg_overlap_mentioned_p (value
, early_op
);
23935 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23936 have an early register shift value dependency on the result of
23940 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
23942 rtx value
= PATTERN (producer
);
23943 rtx op
= PATTERN (consumer
);
23946 if (GET_CODE (value
) == COND_EXEC
)
23947 value
= COND_EXEC_CODE (value
);
23948 if (GET_CODE (value
) == PARALLEL
)
23949 value
= XVECEXP (value
, 0, 0);
23950 value
= XEXP (value
, 0);
23951 if (GET_CODE (op
) == COND_EXEC
)
23952 op
= COND_EXEC_CODE (op
);
23953 if (GET_CODE (op
) == PARALLEL
)
23954 op
= XVECEXP (op
, 0, 0);
23957 early_op
= XEXP (op
, 0);
23959 /* This is either an actual independent shift, or a shift applied to
23960 the first operand of another operation. We want the value being
23961 shifted, in either case. */
23962 if (!REG_P (early_op
))
23963 early_op
= XEXP (early_op
, 0);
23965 return !reg_overlap_mentioned_p (value
, early_op
);
23968 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23969 have an early register mult dependency on the result of
23973 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
23975 rtx value
= PATTERN (producer
);
23976 rtx op
= PATTERN (consumer
);
23978 if (GET_CODE (value
) == COND_EXEC
)
23979 value
= COND_EXEC_CODE (value
);
23980 if (GET_CODE (value
) == PARALLEL
)
23981 value
= XVECEXP (value
, 0, 0);
23982 value
= XEXP (value
, 0);
23983 if (GET_CODE (op
) == COND_EXEC
)
23984 op
= COND_EXEC_CODE (op
);
23985 if (GET_CODE (op
) == PARALLEL
)
23986 op
= XVECEXP (op
, 0, 0);
23989 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
23991 if (GET_CODE (XEXP (op
, 0)) == MULT
)
23992 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
23994 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
24000 /* We can't rely on the caller doing the proper promotion when
24001 using APCS or ATPCS. */
24004 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
24006 return !TARGET_AAPCS_BASED
;
24009 static enum machine_mode
24010 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
24011 enum machine_mode mode
,
24012 int *punsignedp ATTRIBUTE_UNUSED
,
24013 const_tree fntype ATTRIBUTE_UNUSED
,
24014 int for_return ATTRIBUTE_UNUSED
)
24016 if (GET_MODE_CLASS (mode
) == MODE_INT
24017 && GET_MODE_SIZE (mode
) < 4)
24023 /* AAPCS based ABIs use short enums by default. */
24026 arm_default_short_enums (void)
24028 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
24032 /* AAPCS requires that anonymous bitfields affect structure alignment. */
24035 arm_align_anon_bitfield (void)
24037 return TARGET_AAPCS_BASED
;
24041 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
24044 arm_cxx_guard_type (void)
24046 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
24049 /* Return non-zero if the consumer (a multiply-accumulate instruction)
24050 has an accumulator dependency on the result of the producer (a
24051 multiplication instruction) and no other dependency on that result. */
24053 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
24055 rtx mul
= PATTERN (producer
);
24056 rtx mac
= PATTERN (consumer
);
24058 rtx mac_op0
, mac_op1
, mac_acc
;
24060 if (GET_CODE (mul
) == COND_EXEC
)
24061 mul
= COND_EXEC_CODE (mul
);
24062 if (GET_CODE (mac
) == COND_EXEC
)
24063 mac
= COND_EXEC_CODE (mac
);
24065 /* Check that mul is of the form (set (...) (mult ...))
24066 and mla is of the form (set (...) (plus (mult ...) (...))). */
24067 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
24068 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
24069 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
24072 mul_result
= XEXP (mul
, 0);
24073 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
24074 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
24075 mac_acc
= XEXP (XEXP (mac
, 1), 1);
24077 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
24078 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
24079 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
24083 /* The EABI says test the least significant bit of a guard variable. */
24086 arm_cxx_guard_mask_bit (void)
24088 return TARGET_AAPCS_BASED
;
24092 /* The EABI specifies that all array cookies are 8 bytes long. */
24095 arm_get_cookie_size (tree type
)
24099 if (!TARGET_AAPCS_BASED
)
24100 return default_cxx_get_cookie_size (type
);
24102 size
= build_int_cst (sizetype
, 8);
24107 /* The EABI says that array cookies should also contain the element size. */
24110 arm_cookie_has_size (void)
24112 return TARGET_AAPCS_BASED
;
24116 /* The EABI says constructors and destructors should return a pointer to
24117 the object constructed/destroyed. */
24120 arm_cxx_cdtor_returns_this (void)
24122 return TARGET_AAPCS_BASED
;
24125 /* The EABI says that an inline function may never be the key
24129 arm_cxx_key_method_may_be_inline (void)
24131 return !TARGET_AAPCS_BASED
;
24135 arm_cxx_determine_class_data_visibility (tree decl
)
24137 if (!TARGET_AAPCS_BASED
24138 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
24141 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24142 is exported. However, on systems without dynamic vague linkage,
24143 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
24144 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
24145 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
24147 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
24148 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
24152 arm_cxx_class_data_always_comdat (void)
24154 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24155 vague linkage if the class has no key function. */
24156 return !TARGET_AAPCS_BASED
;
24160 /* The EABI says __aeabi_atexit should be used to register static
24164 arm_cxx_use_aeabi_atexit (void)
24166 return TARGET_AAPCS_BASED
;
24171 arm_set_return_address (rtx source
, rtx scratch
)
24173 arm_stack_offsets
*offsets
;
24174 HOST_WIDE_INT delta
;
24176 unsigned long saved_regs
;
24178 offsets
= arm_get_frame_offsets ();
24179 saved_regs
= offsets
->saved_regs_mask
;
24181 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
24182 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
24185 if (frame_pointer_needed
)
24186 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
24189 /* LR will be the first saved register. */
24190 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
24195 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
24196 GEN_INT (delta
& ~4095)));
24201 addr
= stack_pointer_rtx
;
24203 addr
= plus_constant (Pmode
, addr
, delta
);
24205 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
24211 thumb_set_return_address (rtx source
, rtx scratch
)
24213 arm_stack_offsets
*offsets
;
24214 HOST_WIDE_INT delta
;
24215 HOST_WIDE_INT limit
;
24218 unsigned long mask
;
24222 offsets
= arm_get_frame_offsets ();
24223 mask
= offsets
->saved_regs_mask
;
24224 if (mask
& (1 << LR_REGNUM
))
24227 /* Find the saved regs. */
24228 if (frame_pointer_needed
)
24230 delta
= offsets
->soft_frame
- offsets
->saved_args
;
24231 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
24237 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
24240 /* Allow for the stack frame. */
24241 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
24243 /* The link register is always the first saved register. */
24246 /* Construct the address. */
24247 addr
= gen_rtx_REG (SImode
, reg
);
24250 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
24251 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
24255 addr
= plus_constant (Pmode
, addr
, delta
);
24257 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
24260 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
24263 /* Implements target hook vector_mode_supported_p. */
24265 arm_vector_mode_supported_p (enum machine_mode mode
)
24267 /* Neon also supports V2SImode, etc. listed in the clause below. */
24268 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
24269 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
24272 if ((TARGET_NEON
|| TARGET_IWMMXT
)
24273 && ((mode
== V2SImode
)
24274 || (mode
== V4HImode
)
24275 || (mode
== V8QImode
)))
24278 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
24279 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
24280 || mode
== V2HAmode
))
24286 /* Implements target hook array_mode_supported_p. */
24289 arm_array_mode_supported_p (enum machine_mode mode
,
24290 unsigned HOST_WIDE_INT nelems
)
24293 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
24294 && (nelems
>= 2 && nelems
<= 4))
24300 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24301 registers when autovectorizing for Neon, at least until multiple vector
24302 widths are supported properly by the middle-end. */
24304 static enum machine_mode
24305 arm_preferred_simd_mode (enum machine_mode mode
)
24311 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
24313 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
24315 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
24317 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
24319 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
24326 if (TARGET_REALLY_IWMMXT
)
24342 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24344 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24345 using r0-r4 for function arguments, r7 for the stack frame and don't have
24346 enough left over to do doubleword arithmetic. For Thumb-2 all the
24347 potentially problematic instructions accept high registers so this is not
24348 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24349 that require many low registers. */
24351 arm_class_likely_spilled_p (reg_class_t rclass
)
24353 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
24354 || rclass
== CC_REG
)
24360 /* Implements target hook small_register_classes_for_mode_p. */
24362 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
24364 return TARGET_THUMB1
;
24367 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24368 ARM insns and therefore guarantee that the shift count is modulo 256.
24369 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24370 guarantee no particular behavior for out-of-range counts. */
24372 static unsigned HOST_WIDE_INT
24373 arm_shift_truncation_mask (enum machine_mode mode
)
24375 return mode
== SImode
? 255 : 0;
24379 /* Map internal gcc register numbers to DWARF2 register numbers. */
24382 arm_dbx_register_number (unsigned int regno
)
24387 if (IS_VFP_REGNUM (regno
))
24389 /* See comment in arm_dwarf_register_span. */
24390 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
24391 return 64 + regno
- FIRST_VFP_REGNUM
;
24393 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
24396 if (IS_IWMMXT_GR_REGNUM (regno
))
24397 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
24399 if (IS_IWMMXT_REGNUM (regno
))
24400 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
24402 gcc_unreachable ();
24405 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24406 GCC models tham as 64 32-bit registers, so we need to describe this to
24407 the DWARF generation code. Other registers can use the default. */
24409 arm_dwarf_register_span (rtx rtl
)
24416 regno
= REGNO (rtl
);
24417 if (!IS_VFP_REGNUM (regno
))
24420 /* XXX FIXME: The EABI defines two VFP register ranges:
24421 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24423 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24424 corresponding D register. Until GDB supports this, we shall use the
24425 legacy encodings. We also use these encodings for D0-D15 for
24426 compatibility with older debuggers. */
24427 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
24430 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
24431 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
24432 regno
= (regno
- FIRST_VFP_REGNUM
) / 2;
24433 for (i
= 0; i
< nregs
; i
++)
24434 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, 256 + regno
+ i
);
24439 #if ARM_UNWIND_INFO
24440 /* Emit unwind directives for a store-multiple instruction or stack pointer
24441 push during alignment.
24442 These should only ever be generated by the function prologue code, so
24443 expect them to have a particular form. */
24446 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
24449 HOST_WIDE_INT offset
;
24450 HOST_WIDE_INT nregs
;
24456 e
= XVECEXP (p
, 0, 0);
24457 if (GET_CODE (e
) != SET
)
24460 /* First insn will adjust the stack pointer. */
24461 if (GET_CODE (e
) != SET
24462 || !REG_P (XEXP (e
, 0))
24463 || REGNO (XEXP (e
, 0)) != SP_REGNUM
24464 || GET_CODE (XEXP (e
, 1)) != PLUS
)
24467 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
24468 nregs
= XVECLEN (p
, 0) - 1;
24470 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
24473 /* The function prologue may also push pc, but not annotate it as it is
24474 never restored. We turn this into a stack pointer adjustment. */
24475 if (nregs
* 4 == offset
- 4)
24477 fprintf (asm_out_file
, "\t.pad #4\n");
24481 fprintf (asm_out_file
, "\t.save {");
24483 else if (IS_VFP_REGNUM (reg
))
24486 fprintf (asm_out_file
, "\t.vsave {");
24489 /* Unknown register type. */
24492 /* If the stack increment doesn't match the size of the saved registers,
24493 something has gone horribly wrong. */
24494 if (offset
!= nregs
* reg_size
)
24499 /* The remaining insns will describe the stores. */
24500 for (i
= 1; i
<= nregs
; i
++)
24502 /* Expect (set (mem <addr>) (reg)).
24503 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24504 e
= XVECEXP (p
, 0, i
);
24505 if (GET_CODE (e
) != SET
24506 || !MEM_P (XEXP (e
, 0))
24507 || !REG_P (XEXP (e
, 1)))
24510 reg
= REGNO (XEXP (e
, 1));
24515 fprintf (asm_out_file
, ", ");
24516 /* We can't use %r for vfp because we need to use the
24517 double precision register names. */
24518 if (IS_VFP_REGNUM (reg
))
24519 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
24521 asm_fprintf (asm_out_file
, "%r", reg
);
24523 #ifdef ENABLE_CHECKING
24524 /* Check that the addresses are consecutive. */
24525 e
= XEXP (XEXP (e
, 0), 0);
24526 if (GET_CODE (e
) == PLUS
)
24528 offset
+= reg_size
;
24529 if (!REG_P (XEXP (e
, 0))
24530 || REGNO (XEXP (e
, 0)) != SP_REGNUM
24531 || !CONST_INT_P (XEXP (e
, 1))
24532 || offset
!= INTVAL (XEXP (e
, 1)))
24537 || REGNO (e
) != SP_REGNUM
)
24541 fprintf (asm_out_file
, "}\n");
24544 /* Emit unwind directives for a SET. */
24547 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
24555 switch (GET_CODE (e0
))
24558 /* Pushing a single register. */
24559 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
24560 || !REG_P (XEXP (XEXP (e0
, 0), 0))
24561 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
24564 asm_fprintf (asm_out_file
, "\t.save ");
24565 if (IS_VFP_REGNUM (REGNO (e1
)))
24566 asm_fprintf(asm_out_file
, "{d%d}\n",
24567 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
24569 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
24573 if (REGNO (e0
) == SP_REGNUM
)
24575 /* A stack increment. */
24576 if (GET_CODE (e1
) != PLUS
24577 || !REG_P (XEXP (e1
, 0))
24578 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
24579 || !CONST_INT_P (XEXP (e1
, 1)))
24582 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
24583 -INTVAL (XEXP (e1
, 1)));
24585 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
24587 HOST_WIDE_INT offset
;
24589 if (GET_CODE (e1
) == PLUS
)
24591 if (!REG_P (XEXP (e1
, 0))
24592 || !CONST_INT_P (XEXP (e1
, 1)))
24594 reg
= REGNO (XEXP (e1
, 0));
24595 offset
= INTVAL (XEXP (e1
, 1));
24596 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
24597 HARD_FRAME_POINTER_REGNUM
, reg
,
24600 else if (REG_P (e1
))
24603 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
24604 HARD_FRAME_POINTER_REGNUM
, reg
);
24609 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
24611 /* Move from sp to reg. */
24612 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
24614 else if (GET_CODE (e1
) == PLUS
24615 && REG_P (XEXP (e1
, 0))
24616 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
24617 && CONST_INT_P (XEXP (e1
, 1)))
24619 /* Set reg to offset from sp. */
24620 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
24621 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
24633 /* Emit unwind directives for the given insn. */
24636 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
24639 bool handled_one
= false;
24641 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
24644 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
24645 && (TREE_NOTHROW (current_function_decl
)
24646 || crtl
->all_throwers_are_sibcalls
))
24649 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
24652 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
24654 pat
= XEXP (note
, 0);
24655 switch (REG_NOTE_KIND (note
))
24657 case REG_FRAME_RELATED_EXPR
:
24660 case REG_CFA_REGISTER
:
24663 pat
= PATTERN (insn
);
24664 if (GET_CODE (pat
) == PARALLEL
)
24665 pat
= XVECEXP (pat
, 0, 0);
24668 /* Only emitted for IS_STACKALIGN re-alignment. */
24673 src
= SET_SRC (pat
);
24674 dest
= SET_DEST (pat
);
24676 gcc_assert (src
== stack_pointer_rtx
);
24677 reg
= REGNO (dest
);
24678 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24681 handled_one
= true;
24684 case REG_CFA_DEF_CFA
:
24685 case REG_CFA_EXPRESSION
:
24686 case REG_CFA_ADJUST_CFA
:
24687 case REG_CFA_OFFSET
:
24688 /* ??? Only handling here what we actually emit. */
24689 gcc_unreachable ();
24697 pat
= PATTERN (insn
);
24700 switch (GET_CODE (pat
))
24703 arm_unwind_emit_set (asm_out_file
, pat
);
24707 /* Store multiple. */
24708 arm_unwind_emit_sequence (asm_out_file
, pat
);
24717 /* Output a reference from a function exception table to the type_info
24718 object X. The EABI specifies that the symbol should be relocated by
24719 an R_ARM_TARGET2 relocation. */
24722 arm_output_ttype (rtx x
)
24724 fputs ("\t.word\t", asm_out_file
);
24725 output_addr_const (asm_out_file
, x
);
24726 /* Use special relocations for symbol references. */
24727 if (!CONST_INT_P (x
))
24728 fputs ("(TARGET2)", asm_out_file
);
24729 fputc ('\n', asm_out_file
);
24734 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24737 arm_asm_emit_except_personality (rtx personality
)
24739 fputs ("\t.personality\t", asm_out_file
);
24740 output_addr_const (asm_out_file
, personality
);
24741 fputc ('\n', asm_out_file
);
24744 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24747 arm_asm_init_sections (void)
24749 exception_section
= get_unnamed_section (0, output_section_asm_op
,
24752 #endif /* ARM_UNWIND_INFO */
24754 /* Output unwind directives for the start/end of a function. */
24757 arm_output_fn_unwind (FILE * f
, bool prologue
)
24759 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
24763 fputs ("\t.fnstart\n", f
);
24766 /* If this function will never be unwound, then mark it as such.
24767 The came condition is used in arm_unwind_emit to suppress
24768 the frame annotations. */
24769 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
24770 && (TREE_NOTHROW (current_function_decl
)
24771 || crtl
->all_throwers_are_sibcalls
))
24772 fputs("\t.cantunwind\n", f
);
24774 fputs ("\t.fnend\n", f
);
24779 arm_emit_tls_decoration (FILE *fp
, rtx x
)
24781 enum tls_reloc reloc
;
24784 val
= XVECEXP (x
, 0, 0);
24785 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
24787 output_addr_const (fp
, val
);
24792 fputs ("(tlsgd)", fp
);
24795 fputs ("(tlsldm)", fp
);
24798 fputs ("(tlsldo)", fp
);
24801 fputs ("(gottpoff)", fp
);
24804 fputs ("(tpoff)", fp
);
24807 fputs ("(tlsdesc)", fp
);
24810 gcc_unreachable ();
24819 fputs (" + (. - ", fp
);
24820 output_addr_const (fp
, XVECEXP (x
, 0, 2));
24821 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24822 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
24823 output_addr_const (fp
, XVECEXP (x
, 0, 3));
24833 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24836 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
24838 gcc_assert (size
== 4);
24839 fputs ("\t.word\t", file
);
24840 output_addr_const (file
, x
);
24841 fputs ("(tlsldo)", file
);
24844 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24847 arm_output_addr_const_extra (FILE *fp
, rtx x
)
24849 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
24850 return arm_emit_tls_decoration (fp
, x
);
24851 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
24854 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
24856 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
24857 assemble_name_raw (fp
, label
);
24861 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
24863 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
24867 output_addr_const (fp
, XVECEXP (x
, 0, 0));
24871 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
24873 output_addr_const (fp
, XVECEXP (x
, 0, 0));
24877 output_addr_const (fp
, XVECEXP (x
, 0, 1));
24881 else if (GET_CODE (x
) == CONST_VECTOR
)
24882 return arm_emit_vector_const (fp
, x
);
24887 /* Output assembly for a shift instruction.
24888 SET_FLAGS determines how the instruction modifies the condition codes.
24889 0 - Do not set condition codes.
24890 1 - Set condition codes.
24891 2 - Use smallest instruction. */
24893 arm_output_shift(rtx
* operands
, int set_flags
)
24896 static const char flag_chars
[3] = {'?', '.', '!'};
24901 c
= flag_chars
[set_flags
];
24902 if (TARGET_UNIFIED_ASM
)
24904 shift
= shift_op(operands
[3], &val
);
24908 operands
[2] = GEN_INT(val
);
24909 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
24912 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
24915 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
24916 output_asm_insn (pattern
, operands
);
24920 /* Output assembly for a WMMX immediate shift instruction. */
24922 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
24924 int shift
= INTVAL (operands
[2]);
24926 enum machine_mode opmode
= GET_MODE (operands
[0]);
24928 gcc_assert (shift
>= 0);
24930 /* If the shift value in the register versions is > 63 (for D qualifier),
24931 31 (for W qualifier) or 15 (for H qualifier). */
24932 if (((opmode
== V4HImode
) && (shift
> 15))
24933 || ((opmode
== V2SImode
) && (shift
> 31))
24934 || ((opmode
== DImode
) && (shift
> 63)))
24938 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
24939 output_asm_insn (templ
, operands
);
24940 if (opmode
== DImode
)
24942 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
24943 output_asm_insn (templ
, operands
);
24948 /* The destination register will contain all zeros. */
24949 sprintf (templ
, "wzero\t%%0");
24950 output_asm_insn (templ
, operands
);
24955 if ((opmode
== DImode
) && (shift
> 32))
24957 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
24958 output_asm_insn (templ
, operands
);
24959 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
24960 output_asm_insn (templ
, operands
);
24964 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
24965 output_asm_insn (templ
, operands
);
24970 /* Output assembly for a WMMX tinsr instruction. */
24972 arm_output_iwmmxt_tinsr (rtx
*operands
)
24974 int mask
= INTVAL (operands
[3]);
24977 int units
= mode_nunits
[GET_MODE (operands
[0])];
24978 gcc_assert ((mask
& (mask
- 1)) == 0);
24979 for (i
= 0; i
< units
; ++i
)
24981 if ((mask
& 0x01) == 1)
24987 gcc_assert (i
< units
);
24989 switch (GET_MODE (operands
[0]))
24992 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
24995 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
24998 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
25001 gcc_unreachable ();
25004 output_asm_insn (templ
, operands
);
25009 /* Output a Thumb-1 casesi dispatch sequence. */
25011 thumb1_output_casesi (rtx
*operands
)
25013 rtx diff_vec
= PATTERN (next_real_insn (operands
[0]));
25015 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
25017 switch (GET_MODE(diff_vec
))
25020 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
25021 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
25023 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
25024 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
25026 return "bl\t%___gnu_thumb1_case_si";
25028 gcc_unreachable ();
25032 /* Output a Thumb-2 casesi instruction. */
25034 thumb2_output_casesi (rtx
*operands
)
25036 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
25038 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
25040 output_asm_insn ("cmp\t%0, %1", operands
);
25041 output_asm_insn ("bhi\t%l3", operands
);
25042 switch (GET_MODE(diff_vec
))
25045 return "tbb\t[%|pc, %0]";
25047 return "tbh\t[%|pc, %0, lsl #1]";
25051 output_asm_insn ("adr\t%4, %l2", operands
);
25052 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
25053 output_asm_insn ("add\t%4, %4, %5", operands
);
25058 output_asm_insn ("adr\t%4, %l2", operands
);
25059 return "ldr\t%|pc, [%4, %0, lsl #2]";
25062 gcc_unreachable ();
25066 /* Most ARM cores are single issue, but some newer ones can dual issue.
25067 The scheduler descriptions rely on this being correct. */
25069 arm_issue_rate (void)
25091 /* A table and a function to perform ARM-specific name mangling for
25092 NEON vector types in order to conform to the AAPCS (see "Procedure
25093 Call Standard for the ARM Architecture", Appendix A). To qualify
25094 for emission with the mangled names defined in that document, a
25095 vector type must not only be of the correct mode but also be
25096 composed of NEON vector element types (e.g. __builtin_neon_qi). */
25099 enum machine_mode mode
;
25100 const char *element_type_name
;
25101 const char *aapcs_name
;
25102 } arm_mangle_map_entry
;
25104 static arm_mangle_map_entry arm_mangle_map
[] = {
25105 /* 64-bit containerized types. */
25106 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
25107 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
25108 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
25109 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
25110 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
25111 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
25112 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
25113 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
25114 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
25115 /* 128-bit containerized types. */
25116 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
25117 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
25118 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
25119 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
25120 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
25121 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
25122 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
25123 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
25124 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
25125 { VOIDmode
, NULL
, NULL
}
25129 arm_mangle_type (const_tree type
)
25131 arm_mangle_map_entry
*pos
= arm_mangle_map
;
25133 /* The ARM ABI documents (10th October 2008) say that "__va_list"
25134 has to be managled as if it is in the "std" namespace. */
25135 if (TARGET_AAPCS_BASED
25136 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
25137 return "St9__va_list";
25139 /* Half-precision float. */
25140 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
25143 if (TREE_CODE (type
) != VECTOR_TYPE
)
25146 /* Check the mode of the vector type, and the name of the vector
25147 element type, against the table. */
25148 while (pos
->mode
!= VOIDmode
)
25150 tree elt_type
= TREE_TYPE (type
);
25152 if (pos
->mode
== TYPE_MODE (type
)
25153 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
25154 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
25155 pos
->element_type_name
))
25156 return pos
->aapcs_name
;
25161 /* Use the default mangling for unrecognized (possibly user-defined)
25166 /* Order of allocation of core registers for Thumb: this allocation is
25167 written over the corresponding initial entries of the array
25168 initialized with REG_ALLOC_ORDER. We allocate all low registers
25169 first. Saving and restoring a low register is usually cheaper than
25170 using a call-clobbered high register. */
25172 static const int thumb_core_reg_alloc_order
[] =
25174 3, 2, 1, 0, 4, 5, 6, 7,
25175 14, 12, 8, 9, 10, 11
25178 /* Adjust register allocation order when compiling for Thumb. */
25181 arm_order_regs_for_local_alloc (void)
25183 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
25184 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
25186 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
25187 sizeof (thumb_core_reg_alloc_order
));
25190 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25193 arm_frame_pointer_required (void)
25195 return (cfun
->has_nonlocal_label
25196 || SUBTARGET_FRAME_POINTER_REQUIRED
25197 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
25200 /* Only thumb1 can't support conditional execution, so return true if
25201 the target is not thumb1. */
25203 arm_have_conditional_execution (void)
25205 return !TARGET_THUMB1
;
25208 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
25209 static HOST_WIDE_INT
25210 arm_vector_alignment (const_tree type
)
25212 HOST_WIDE_INT align
= tree_low_cst (TYPE_SIZE (type
), 0);
25214 if (TARGET_AAPCS_BASED
)
25215 align
= MIN (align
, 64);
25220 static unsigned int
25221 arm_autovectorize_vector_sizes (void)
25223 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
25227 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
25229 /* Vectors which aren't in packed structures will not be less aligned than
25230 the natural alignment of their element type, so this is safe. */
25231 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
25234 return default_builtin_vector_alignment_reachable (type
, is_packed
);
25238 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
25239 const_tree type
, int misalignment
,
25242 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
25244 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
25249 /* If the misalignment is unknown, we should be able to handle the access
25250 so long as it is not to a member of a packed data structure. */
25251 if (misalignment
== -1)
25254 /* Return true if the misalignment is a multiple of the natural alignment
25255 of the vector's element type. This is probably always going to be
25256 true in practice, since we've already established that this isn't a
25258 return ((misalignment
% align
) == 0);
25261 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
25266 arm_conditional_register_usage (void)
25270 if (TARGET_THUMB1
&& optimize_size
)
25272 /* When optimizing for size on Thumb-1, it's better not
25273 to use the HI regs, because of the overhead of
25275 for (regno
= FIRST_HI_REGNUM
;
25276 regno
<= LAST_HI_REGNUM
; ++regno
)
25277 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
25280 /* The link register can be clobbered by any branch insn,
25281 but we have no way to track that at present, so mark
25282 it as unavailable. */
25284 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
25286 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
25288 /* VFPv3 registers are disabled when earlier VFP
25289 versions are selected due to the definition of
25290 LAST_VFP_REGNUM. */
25291 for (regno
= FIRST_VFP_REGNUM
;
25292 regno
<= LAST_VFP_REGNUM
; ++ regno
)
25294 fixed_regs
[regno
] = 0;
25295 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
25296 || regno
>= FIRST_VFP_REGNUM
+ 32;
25300 if (TARGET_REALLY_IWMMXT
)
25302 regno
= FIRST_IWMMXT_GR_REGNUM
;
25303 /* The 2002/10/09 revision of the XScale ABI has wCG0
25304 and wCG1 as call-preserved registers. The 2002/11/21
25305 revision changed this so that all wCG registers are
25306 scratch registers. */
25307 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
25308 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
25309 fixed_regs
[regno
] = 0;
25310 /* The XScale ABI has wR0 - wR9 as scratch registers,
25311 the rest as call-preserved registers. */
25312 for (regno
= FIRST_IWMMXT_REGNUM
;
25313 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
25315 fixed_regs
[regno
] = 0;
25316 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
25320 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
25322 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
25323 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
25325 else if (TARGET_APCS_STACK
)
25327 fixed_regs
[10] = 1;
25328 call_used_regs
[10] = 1;
25330 /* -mcaller-super-interworking reserves r11 for calls to
25331 _interwork_r11_call_via_rN(). Making the register global
25332 is an easy way of ensuring that it remains valid for all
25334 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
25335 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
25337 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
25338 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
25339 if (TARGET_CALLER_INTERWORKING
)
25340 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
25342 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25346 arm_preferred_rename_class (reg_class_t rclass
)
25348 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25349 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25350 and code size can be reduced. */
25351 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
25357 /* Compute the atrribute "length" of insn "*push_multi".
25358 So this function MUST be kept in sync with that insn pattern. */
25360 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
25362 int i
, regno
, hi_reg
;
25363 int num_saves
= XVECLEN (parallel_op
, 0);
25373 regno
= REGNO (first_op
);
25374 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
25375 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
25377 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
25378 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
25386 /* Compute the number of instructions emitted by output_move_double. */
25388 arm_count_output_move_double_insns (rtx
*operands
)
25392 /* output_move_double may modify the operands array, so call it
25393 here on a copy of the array. */
25394 ops
[0] = operands
[0];
25395 ops
[1] = operands
[1];
25396 output_move_double (ops
, false, &count
);
25401 vfp3_const_double_for_fract_bits (rtx operand
)
25403 REAL_VALUE_TYPE r0
;
25405 if (!CONST_DOUBLE_P (operand
))
25408 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
25409 if (exact_real_inverse (DFmode
, &r0
))
25411 if (exact_real_truncate (DFmode
, &r0
))
25413 HOST_WIDE_INT value
= real_to_integer (&r0
);
25414 value
= value
& 0xffffffff;
25415 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
25416 return int_log2 (value
);
25422 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25425 arm_pre_atomic_barrier (enum memmodel model
)
25427 if (need_atomic_barrier_p (model
, true))
25428 emit_insn (gen_memory_barrier ());
25432 arm_post_atomic_barrier (enum memmodel model
)
25434 if (need_atomic_barrier_p (model
, false))
25435 emit_insn (gen_memory_barrier ());
25438 /* Emit the load-exclusive and store-exclusive instructions. */
25441 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
)
25443 rtx (*gen
) (rtx
, rtx
);
25447 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
25448 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
25449 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
25450 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
25452 gcc_unreachable ();
25455 emit_insn (gen (rval
, mem
));
25459 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
, rtx mem
)
25461 rtx (*gen
) (rtx
, rtx
, rtx
);
25465 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
25466 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
25467 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
25468 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
25470 gcc_unreachable ();
25473 emit_insn (gen (bval
, rval
, mem
));
25476 /* Mark the previous jump instruction as unlikely. */
25479 emit_unlikely_jump (rtx insn
)
25481 rtx very_unlikely
= GEN_INT (REG_BR_PROB_BASE
/ 100 - 1);
25483 insn
= emit_jump_insn (insn
);
25484 add_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
25487 /* Expand a compare and swap pattern. */
25490 arm_expand_compare_and_swap (rtx operands
[])
25492 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
25493 enum machine_mode mode
;
25494 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
25496 bval
= operands
[0];
25497 rval
= operands
[1];
25499 oldval
= operands
[3];
25500 newval
= operands
[4];
25501 is_weak
= operands
[5];
25502 mod_s
= operands
[6];
25503 mod_f
= operands
[7];
25504 mode
= GET_MODE (mem
);
25510 /* For narrow modes, we're going to perform the comparison in SImode,
25511 so do the zero-extension now. */
25512 rval
= gen_reg_rtx (SImode
);
25513 oldval
= convert_modes (SImode
, mode
, oldval
, true);
25517 /* Force the value into a register if needed. We waited until after
25518 the zero-extension above to do this properly. */
25519 if (!arm_add_operand (oldval
, SImode
))
25520 oldval
= force_reg (SImode
, oldval
);
25524 if (!cmpdi_operand (oldval
, mode
))
25525 oldval
= force_reg (mode
, oldval
);
25529 gcc_unreachable ();
25534 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
25535 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
25536 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
25537 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
25539 gcc_unreachable ();
25542 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
25544 if (mode
== QImode
|| mode
== HImode
)
25545 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
25547 /* In all cases, we arrange for success to be signaled by Z set.
25548 This arrangement allows for the boolean result to be used directly
25549 in a subsequent branch, post optimization. */
25550 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
25551 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
25552 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
25555 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25556 another memory store between the load-exclusive and store-exclusive can
25557 reset the monitor from Exclusive to Open state. This means we must wait
25558 until after reload to split the pattern, lest we get a register spill in
25559 the middle of the atomic sequence. */
25562 arm_split_compare_and_swap (rtx operands
[])
25564 rtx rval
, mem
, oldval
, newval
, scratch
;
25565 enum machine_mode mode
;
25566 enum memmodel mod_s
, mod_f
;
25568 rtx label1
, label2
, x
, cond
;
25570 rval
= operands
[0];
25572 oldval
= operands
[2];
25573 newval
= operands
[3];
25574 is_weak
= (operands
[4] != const0_rtx
);
25575 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
25576 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
25577 scratch
= operands
[7];
25578 mode
= GET_MODE (mem
);
25580 arm_pre_atomic_barrier (mod_s
);
25585 label1
= gen_label_rtx ();
25586 emit_label (label1
);
25588 label2
= gen_label_rtx ();
25590 arm_emit_load_exclusive (mode
, rval
, mem
);
25592 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
25593 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
25594 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
25595 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
25596 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
25598 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
);
25600 /* Weak or strong, we want EQ to be true for success, so that we
25601 match the flags that we got from the compare above. */
25602 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
25603 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
25604 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
25608 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
25609 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
25610 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
25611 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
25614 if (mod_f
!= MEMMODEL_RELAXED
)
25615 emit_label (label2
);
25617 arm_post_atomic_barrier (mod_s
);
25619 if (mod_f
== MEMMODEL_RELAXED
)
25620 emit_label (label2
);
25624 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
25625 rtx value
, rtx model_rtx
, rtx cond
)
25627 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
25628 enum machine_mode mode
= GET_MODE (mem
);
25629 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
25632 arm_pre_atomic_barrier (model
);
25634 label
= gen_label_rtx ();
25635 emit_label (label
);
25638 new_out
= gen_lowpart (wmode
, new_out
);
25640 old_out
= gen_lowpart (wmode
, old_out
);
25643 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
25645 arm_emit_load_exclusive (mode
, old_out
, mem
);
25654 x
= gen_rtx_AND (wmode
, old_out
, value
);
25655 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
25656 x
= gen_rtx_NOT (wmode
, new_out
);
25657 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
25661 if (CONST_INT_P (value
))
25663 value
= GEN_INT (-INTVAL (value
));
25669 if (mode
== DImode
)
25671 /* DImode plus/minus need to clobber flags. */
25672 /* The adddi3 and subdi3 patterns are incorrectly written so that
25673 they require matching operands, even when we could easily support
25674 three operands. Thankfully, this can be fixed up post-splitting,
25675 as the individual add+adc patterns do accept three operands and
25676 post-reload cprop can make these moves go away. */
25677 emit_move_insn (new_out
, old_out
);
25679 x
= gen_adddi3 (new_out
, new_out
, value
);
25681 x
= gen_subdi3 (new_out
, new_out
, value
);
25688 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
25689 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
25693 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
));
25695 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
25696 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
25698 arm_post_atomic_barrier (model
);
25701 #define MAX_VECT_LEN 16
25703 struct expand_vec_perm_d
25705 rtx target
, op0
, op1
;
25706 unsigned char perm
[MAX_VECT_LEN
];
25707 enum machine_mode vmode
;
25708 unsigned char nelt
;
25713 /* Generate a variable permutation. */
25716 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
25718 enum machine_mode vmode
= GET_MODE (target
);
25719 bool one_vector_p
= rtx_equal_p (op0
, op1
);
25721 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
25722 gcc_checking_assert (GET_MODE (op0
) == vmode
);
25723 gcc_checking_assert (GET_MODE (op1
) == vmode
);
25724 gcc_checking_assert (GET_MODE (sel
) == vmode
);
25725 gcc_checking_assert (TARGET_NEON
);
25729 if (vmode
== V8QImode
)
25730 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
25732 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
25738 if (vmode
== V8QImode
)
25740 pair
= gen_reg_rtx (V16QImode
);
25741 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
25742 pair
= gen_lowpart (TImode
, pair
);
25743 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
25747 pair
= gen_reg_rtx (OImode
);
25748 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
25749 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
25755 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
25757 enum machine_mode vmode
= GET_MODE (target
);
25758 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
25759 bool one_vector_p
= rtx_equal_p (op0
, op1
);
25760 rtx rmask
[MAX_VECT_LEN
], mask
;
25762 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25763 numbering of elements for big-endian, we must reverse the order. */
25764 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
25766 /* The VTBL instruction does not use a modulo index, so we must take care
25767 of that ourselves. */
25768 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
25769 for (i
= 0; i
< nelt
; ++i
)
25771 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
25772 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
25774 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
25777 /* Generate or test for an insn that supports a constant permutation. */
25779 /* Recognize patterns for the VUZP insns. */
25782 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
25784 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
25785 rtx out0
, out1
, in0
, in1
, x
;
25786 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
25788 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
25791 /* Note that these are little-endian tests. Adjust for big-endian later. */
25792 if (d
->perm
[0] == 0)
25794 else if (d
->perm
[0] == 1)
25798 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
25800 for (i
= 0; i
< nelt
; i
++)
25802 unsigned elt
= (i
* 2 + odd
) & mask
;
25803 if (d
->perm
[i
] != elt
)
25813 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
25814 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
25815 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
25816 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
25817 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
25818 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
25819 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
25820 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
25822 gcc_unreachable ();
25827 if (BYTES_BIG_ENDIAN
)
25829 x
= in0
, in0
= in1
, in1
= x
;
25834 out1
= gen_reg_rtx (d
->vmode
);
25836 x
= out0
, out0
= out1
, out1
= x
;
25838 emit_insn (gen (out0
, in0
, in1
, out1
));
25842 /* Recognize patterns for the VZIP insns. */
25845 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
25847 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
25848 rtx out0
, out1
, in0
, in1
, x
;
25849 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
25851 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
25854 /* Note that these are little-endian tests. Adjust for big-endian later. */
25856 if (d
->perm
[0] == high
)
25858 else if (d
->perm
[0] == 0)
25862 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
25864 for (i
= 0; i
< nelt
/ 2; i
++)
25866 unsigned elt
= (i
+ high
) & mask
;
25867 if (d
->perm
[i
* 2] != elt
)
25869 elt
= (elt
+ nelt
) & mask
;
25870 if (d
->perm
[i
* 2 + 1] != elt
)
25880 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
25881 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
25882 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
25883 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
25884 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
25885 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
25886 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
25887 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
25889 gcc_unreachable ();
25894 if (BYTES_BIG_ENDIAN
)
25896 x
= in0
, in0
= in1
, in1
= x
;
25901 out1
= gen_reg_rtx (d
->vmode
);
25903 x
= out0
, out0
= out1
, out1
= x
;
25905 emit_insn (gen (out0
, in0
, in1
, out1
));
25909 /* Recognize patterns for the VREV insns. */
25912 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
25914 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
25915 rtx (*gen
)(rtx
, rtx
, rtx
);
25917 if (!d
->one_vector_p
)
25926 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
25927 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
25935 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
25936 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
25937 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
25938 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
25946 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
25947 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
25948 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
25949 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
25950 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
25951 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
25952 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
25953 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
25962 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
25963 for (j
= 0; j
<= diff
; j
+= 1)
25965 /* This is guaranteed to be true as the value of diff
25966 is 7, 3, 1 and we should have enough elements in the
25967 queue to generate this. Getting a vector mask with a
25968 value of diff other than these values implies that
25969 something is wrong by the time we get here. */
25970 gcc_assert (i
+ j
< nelt
);
25971 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
25979 /* ??? The third operand is an artifact of the builtin infrastructure
25980 and is ignored by the actual instruction. */
25981 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
25985 /* Recognize patterns for the VTRN insns. */
25988 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
25990 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
25991 rtx out0
, out1
, in0
, in1
, x
;
25992 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
25994 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
25997 /* Note that these are little-endian tests. Adjust for big-endian later. */
25998 if (d
->perm
[0] == 0)
26000 else if (d
->perm
[0] == 1)
26004 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
26006 for (i
= 0; i
< nelt
; i
+= 2)
26008 if (d
->perm
[i
] != i
+ odd
)
26010 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
26020 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
26021 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
26022 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
26023 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
26024 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
26025 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
26026 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
26027 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
26029 gcc_unreachable ();
26034 if (BYTES_BIG_ENDIAN
)
26036 x
= in0
, in0
= in1
, in1
= x
;
26041 out1
= gen_reg_rtx (d
->vmode
);
26043 x
= out0
, out0
= out1
, out1
= x
;
26045 emit_insn (gen (out0
, in0
, in1
, out1
));
26049 /* Recognize patterns for the VEXT insns. */
26052 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
26054 unsigned int i
, nelt
= d
->nelt
;
26055 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
26058 unsigned int location
;
26060 unsigned int next
= d
->perm
[0] + 1;
26062 /* TODO: Handle GCC's numbering of elements for big-endian. */
26063 if (BYTES_BIG_ENDIAN
)
26066 /* Check if the extracted indexes are increasing by one. */
26067 for (i
= 1; i
< nelt
; next
++, i
++)
26069 /* If we hit the most significant element of the 2nd vector in
26070 the previous iteration, no need to test further. */
26071 if (next
== 2 * nelt
)
26074 /* If we are operating on only one vector: it could be a
26075 rotation. If there are only two elements of size < 64, let
26076 arm_evpc_neon_vrev catch it. */
26077 if (d
->one_vector_p
&& (next
== nelt
))
26079 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
26085 if (d
->perm
[i
] != next
)
26089 location
= d
->perm
[0];
26093 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
26094 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
26095 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
26096 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
26097 case V2SImode
: gen
= gen_neon_vextv2si
; break;
26098 case V4SImode
: gen
= gen_neon_vextv4si
; break;
26099 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
26100 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
26101 case V2DImode
: gen
= gen_neon_vextv2di
; break;
26110 offset
= GEN_INT (location
);
26111 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
26115 /* The NEON VTBL instruction is a fully variable permuation that's even
26116 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
26117 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
26118 can do slightly better by expanding this as a constant where we don't
26119 have to apply a mask. */
26122 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
26124 rtx rperm
[MAX_VECT_LEN
], sel
;
26125 enum machine_mode vmode
= d
->vmode
;
26126 unsigned int i
, nelt
= d
->nelt
;
26128 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26129 numbering of elements for big-endian, we must reverse the order. */
26130 if (BYTES_BIG_ENDIAN
)
26136 /* Generic code will try constant permutation twice. Once with the
26137 original mode and again with the elements lowered to QImode.
26138 So wait and don't do the selector expansion ourselves. */
26139 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
26142 for (i
= 0; i
< nelt
; ++i
)
26143 rperm
[i
] = GEN_INT (d
->perm
[i
]);
26144 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
26145 sel
= force_reg (vmode
, sel
);
26147 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
26152 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
26154 /* Check if the input mask matches vext before reordering the
26157 if (arm_evpc_neon_vext (d
))
26160 /* The pattern matching functions above are written to look for a small
26161 number to begin the sequence (0, 1, N/2). If we begin with an index
26162 from the second operand, we can swap the operands. */
26163 if (d
->perm
[0] >= d
->nelt
)
26165 unsigned i
, nelt
= d
->nelt
;
26168 for (i
= 0; i
< nelt
; ++i
)
26169 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
26178 if (arm_evpc_neon_vuzp (d
))
26180 if (arm_evpc_neon_vzip (d
))
26182 if (arm_evpc_neon_vrev (d
))
26184 if (arm_evpc_neon_vtrn (d
))
26186 return arm_evpc_neon_vtbl (d
);
26191 /* Expand a vec_perm_const pattern. */
26194 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
26196 struct expand_vec_perm_d d
;
26197 int i
, nelt
, which
;
26203 d
.vmode
= GET_MODE (target
);
26204 gcc_assert (VECTOR_MODE_P (d
.vmode
));
26205 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
26206 d
.testing_p
= false;
26208 for (i
= which
= 0; i
< nelt
; ++i
)
26210 rtx e
= XVECEXP (sel
, 0, i
);
26211 int ei
= INTVAL (e
) & (2 * nelt
- 1);
26212 which
|= (ei
< nelt
? 1 : 2);
26222 d
.one_vector_p
= false;
26223 if (!rtx_equal_p (op0
, op1
))
26226 /* The elements of PERM do not suggest that only the first operand
26227 is used, but both operands are identical. Allow easier matching
26228 of the permutation by folding the permutation into the single
26232 for (i
= 0; i
< nelt
; ++i
)
26233 d
.perm
[i
] &= nelt
- 1;
26235 d
.one_vector_p
= true;
26240 d
.one_vector_p
= true;
26244 return arm_expand_vec_perm_const_1 (&d
);
26247 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
26250 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
26251 const unsigned char *sel
)
26253 struct expand_vec_perm_d d
;
26254 unsigned int i
, nelt
, which
;
26258 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
26259 d
.testing_p
= true;
26260 memcpy (d
.perm
, sel
, nelt
);
26262 /* Categorize the set of elements in the selector. */
26263 for (i
= which
= 0; i
< nelt
; ++i
)
26265 unsigned char e
= d
.perm
[i
];
26266 gcc_assert (e
< 2 * nelt
);
26267 which
|= (e
< nelt
? 1 : 2);
26270 /* For all elements from second vector, fold the elements to first. */
26272 for (i
= 0; i
< nelt
; ++i
)
26275 /* Check whether the mask can be applied to the vector type. */
26276 d
.one_vector_p
= (which
!= 3);
26278 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
26279 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
26280 if (!d
.one_vector_p
)
26281 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
26284 ret
= arm_expand_vec_perm_const_1 (&d
);
26291 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
26293 /* If we are soft float and we do not have ldrd
26294 then all auto increment forms are ok. */
26295 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
26300 /* Post increment and Pre Decrement are supported for all
26301 instruction forms except for vector forms. */
26304 if (VECTOR_MODE_P (mode
))
26306 if (code
!= ARM_PRE_DEC
)
26316 /* Without LDRD and mode size greater than
26317 word size, there is no point in auto-incrementing
26318 because ldm and stm will not have these forms. */
26319 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
26322 /* Vector and floating point modes do not support
26323 these auto increment forms. */
26324 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
26337 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26338 on ARM, since we know that shifts by negative amounts are no-ops.
26339 Additionally, the default expansion code is not available or suitable
26340 for post-reload insn splits (this can occur when the register allocator
26341 chooses not to do a shift in NEON).
26343 This function is used in both initial expand and post-reload splits, and
26344 handles all kinds of 64-bit shifts.
26346 Input requirements:
26347 - It is safe for the input and output to be the same register, but
26348 early-clobber rules apply for the shift amount and scratch registers.
26349 - Shift by register requires both scratch registers. Shift by a constant
26350 less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases
26351 the scratch registers may be NULL.
26352 - Ashiftrt by a register also clobbers the CC register. */
26354 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
26355 rtx amount
, rtx scratch1
, rtx scratch2
)
26357 rtx out_high
= gen_highpart (SImode
, out
);
26358 rtx out_low
= gen_lowpart (SImode
, out
);
26359 rtx in_high
= gen_highpart (SImode
, in
);
26360 rtx in_low
= gen_lowpart (SImode
, in
);
26363 in = the register pair containing the input value.
26364 out = the destination register pair.
26365 up = the high- or low-part of each pair.
26366 down = the opposite part to "up".
26367 In a shift, we can consider bits to shift from "up"-stream to
26368 "down"-stream, so in a left-shift "up" is the low-part and "down"
26369 is the high-part of each register pair. */
26371 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
26372 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
26373 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
26374 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
26376 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
26378 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
26379 && GET_MODE (out
) == DImode
);
26381 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
26382 && GET_MODE (in
) == DImode
);
26384 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
26385 && GET_MODE (amount
) == SImode
)
26386 || CONST_INT_P (amount
)));
26387 gcc_assert (scratch1
== NULL
26388 || (GET_CODE (scratch1
) == SCRATCH
)
26389 || (GET_MODE (scratch1
) == SImode
26390 && REG_P (scratch1
)));
26391 gcc_assert (scratch2
== NULL
26392 || (GET_CODE (scratch2
) == SCRATCH
)
26393 || (GET_MODE (scratch2
) == SImode
26394 && REG_P (scratch2
)));
26395 gcc_assert (!REG_P (out
) || !REG_P (amount
)
26396 || !HARD_REGISTER_P (out
)
26397 || (REGNO (out
) != REGNO (amount
)
26398 && REGNO (out
) + 1 != REGNO (amount
)));
26400 /* Macros to make following code more readable. */
26401 #define SUB_32(DEST,SRC) \
26402 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
26403 #define RSB_32(DEST,SRC) \
26404 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
26405 #define SUB_S_32(DEST,SRC) \
26406 gen_addsi3_compare0 ((DEST), (SRC), \
26408 #define SET(DEST,SRC) \
26409 gen_rtx_SET (SImode, (DEST), (SRC))
26410 #define SHIFT(CODE,SRC,AMOUNT) \
26411 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26412 #define LSHIFT(CODE,SRC,AMOUNT) \
26413 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26414 SImode, (SRC), (AMOUNT))
26415 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26416 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26417 SImode, (SRC), (AMOUNT))
26419 gen_rtx_IOR (SImode, (A), (B))
26420 #define BRANCH(COND,LABEL) \
26421 gen_arm_cond_branch ((LABEL), \
26422 gen_rtx_ ## COND (CCmode, cc_reg, \
26426 /* Shifts by register and shifts by constant are handled separately. */
26427 if (CONST_INT_P (amount
))
26429 /* We have a shift-by-constant. */
26431 /* First, handle out-of-range shift amounts.
26432 In both cases we try to match the result an ARM instruction in a
26433 shift-by-register would give. This helps reduce execution
26434 differences between optimization levels, but it won't stop other
26435 parts of the compiler doing different things. This is "undefined
26436 behaviour, in any case. */
26437 if (INTVAL (amount
) <= 0)
26438 emit_insn (gen_movdi (out
, in
));
26439 else if (INTVAL (amount
) >= 64)
26441 if (code
== ASHIFTRT
)
26443 rtx const31_rtx
= GEN_INT (31);
26444 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
26445 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
26448 emit_insn (gen_movdi (out
, const0_rtx
));
26451 /* Now handle valid shifts. */
26452 else if (INTVAL (amount
) < 32)
26454 /* Shifts by a constant less than 32. */
26455 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
26457 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
26458 emit_insn (SET (out_down
,
26459 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
26461 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
26465 /* Shifts by a constant greater than 31. */
26466 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
26468 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
26469 if (code
== ASHIFTRT
)
26470 emit_insn (gen_ashrsi3 (out_up
, in_up
,
26473 emit_insn (SET (out_up
, const0_rtx
));
26478 /* We have a shift-by-register. */
26479 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
26481 /* This alternative requires the scratch registers. */
26482 gcc_assert (scratch1
&& REG_P (scratch1
));
26483 gcc_assert (scratch2
&& REG_P (scratch2
));
26485 /* We will need the values "amount-32" and "32-amount" later.
26486 Swapping them around now allows the later code to be more general. */
26490 emit_insn (SUB_32 (scratch1
, amount
));
26491 emit_insn (RSB_32 (scratch2
, amount
));
26494 emit_insn (RSB_32 (scratch1
, amount
));
26495 /* Also set CC = amount > 32. */
26496 emit_insn (SUB_S_32 (scratch2
, amount
));
26499 emit_insn (RSB_32 (scratch1
, amount
));
26500 emit_insn (SUB_32 (scratch2
, amount
));
26503 gcc_unreachable ();
26506 /* Emit code like this:
26509 out_down = in_down << amount;
26510 out_down = (in_up << (amount - 32)) | out_down;
26511 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26512 out_up = in_up << amount;
26515 out_down = in_down >> amount;
26516 out_down = (in_up << (32 - amount)) | out_down;
26518 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26519 out_up = in_up << amount;
26522 out_down = in_down >> amount;
26523 out_down = (in_up << (32 - amount)) | out_down;
26525 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26526 out_up = in_up << amount;
26528 The ARM and Thumb2 variants are the same but implemented slightly
26529 differently. If this were only called during expand we could just
26530 use the Thumb2 case and let combine do the right thing, but this
26531 can also be called from post-reload splitters. */
26533 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
26535 if (!TARGET_THUMB2
)
26537 /* Emit code for ARM mode. */
26538 emit_insn (SET (out_down
,
26539 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
26540 if (code
== ASHIFTRT
)
26542 rtx done_label
= gen_label_rtx ();
26543 emit_jump_insn (BRANCH (LT
, done_label
));
26544 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
26546 emit_label (done_label
);
26549 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
26554 /* Emit code for Thumb2 mode.
26555 Thumb2 can't do shift and or in one insn. */
26556 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
26557 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
26559 if (code
== ASHIFTRT
)
26561 rtx done_label
= gen_label_rtx ();
26562 emit_jump_insn (BRANCH (LT
, done_label
));
26563 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
26564 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
26565 emit_label (done_label
);
26569 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
26570 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
26574 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
26589 /* Returns true if a valid comparison operation and makes
26590 the operands in a form that is valid. */
26592 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
26594 enum rtx_code code
= GET_CODE (*comparison
);
26595 enum rtx_code canonical_code
;
26596 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
26597 ? GET_MODE (*op2
) : GET_MODE (*op1
);
26599 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
26601 if (code
== UNEQ
|| code
== LTGT
)
26604 canonical_code
= arm_canonicalize_comparison (code
, op1
, op2
);
26605 PUT_CODE (*comparison
, canonical_code
);
26610 if (!arm_add_operand (*op1
, mode
))
26611 *op1
= force_reg (mode
, *op1
);
26612 if (!arm_add_operand (*op2
, mode
))
26613 *op2
= force_reg (mode
, *op2
);
26617 if (!cmpdi_operand (*op1
, mode
))
26618 *op1
= force_reg (mode
, *op1
);
26619 if (!cmpdi_operand (*op2
, mode
))
26620 *op2
= force_reg (mode
, *op2
);
26625 if (!arm_float_compare_operand (*op1
, mode
))
26626 *op1
= force_reg (mode
, *op1
);
26627 if (!arm_float_compare_operand (*op2
, mode
))
26628 *op2
= force_reg (mode
, *op2
);
26638 #include "gt-arm.h"