1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
50 #include "target-def.h"
52 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode
;
62 typedef struct minipool_fixup Mfix
;
64 void (*arm_lang_output_object_attributes_hook
)(void);
71 /* Forward function declarations. */
72 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets
*arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
77 HOST_WIDE_INT
, rtx
, rtx
, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx
, int);
80 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
81 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
82 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
83 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
84 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
85 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
86 inline static int thumb1_index_register_rtx_p (rtx
, int);
87 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx
, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx
, int);
93 static void arm_print_operand_address (FILE *, rtx
);
94 static bool arm_print_operand_punct_valid_p (unsigned char code
);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
96 static arm_cc
get_arm_condition_code (rtx
);
97 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
98 static rtx
is_jump_table (rtx
);
99 static const char *output_multi_immediate (rtx
*, const char *, const char *,
101 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
102 static struct machine_function
*arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx
is_jump_table (rtx
);
105 static HOST_WIDE_INT
get_jump_table_size (rtx
);
106 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
107 static Mnode
*add_minipool_forward_ref (Mfix
*);
108 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
109 static Mnode
*add_minipool_backward_ref (Mfix
*);
110 static void assign_minipool_offsets (Mfix
*);
111 static void arm_print_value (FILE *, rtx
);
112 static void dump_minipool (rtx
);
113 static int arm_barrier_cost (rtx
);
114 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
115 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
116 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree
);
123 static unsigned long arm_compute_func_type (void);
124 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
125 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
126 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
132 static int arm_comp_type_attributes (const_tree
, const_tree
);
133 static void arm_set_default_type_attributes (tree
);
134 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
135 static int optimal_immediate_sequence (enum rtx_code code
,
136 unsigned HOST_WIDE_INT val
,
137 struct four_ints
*return_sequence
);
138 static int optimal_immediate_sequence_1 (enum rtx_code code
,
139 unsigned HOST_WIDE_INT val
,
140 struct four_ints
*return_sequence
,
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree
, tree
);
144 static enum machine_mode
arm_promote_function_mode (const_tree
,
145 enum machine_mode
, int *,
147 static bool arm_return_in_memory (const_tree
, const_tree
);
148 static rtx
arm_function_value (const_tree
, const_tree
, bool);
149 static rtx
arm_libcall_value_1 (enum machine_mode
);
150 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
157 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
158 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
159 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
160 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
163 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
164 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
165 static int arm_address_cost (rtx
, bool);
166 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
167 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
171 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
172 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
173 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
174 static tree
arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond
, rtx pattern
);
176 static rtx
emit_set_insn (rtx
, rtx
);
177 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
179 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
181 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
183 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
184 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
186 static rtx
aapcs_libcall_value (enum machine_mode
);
187 static int aapcs_select_return_coproc (const_tree
, const_tree
);
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
191 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
194 static void arm_encode_section_info (tree
, rtx
, int);
197 static void arm_file_end (void);
198 static void arm_file_start (void);
200 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
202 static bool arm_pass_by_reference (cumulative_args_t
,
203 enum machine_mode
, const_tree
, bool);
204 static bool arm_promote_prototypes (const_tree
);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree
);
208 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
209 static bool arm_return_in_memory (const_tree
, const_tree
);
211 static void arm_unwind_emit (FILE *, rtx
);
212 static bool arm_output_ttype (rtx
);
213 static void arm_asm_emit_except_personality (rtx
);
214 static void arm_asm_init_sections (void);
216 static rtx
arm_dwarf_register_span (rtx
);
218 static tree
arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree
arm_get_cookie_size (tree
);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree
);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree
arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree
, rtx
);
230 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
233 static bool arm_cannot_copy_insn_p (rtx
);
234 static bool arm_tls_symbol_p (rtx x
);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
237 static bool arm_output_addr_const_extra (FILE *, rtx
);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static bool arm_warn_func_return (tree
);
240 static const char *arm_invalid_parameter_type (const_tree t
);
241 static const char *arm_invalid_return_type (const_tree t
);
242 static tree
arm_promoted_type (const_tree t
);
243 static tree
arm_convert_to_type (tree type
, tree expr
);
244 static bool arm_scalar_mode_supported_p (enum machine_mode
);
245 static bool arm_frame_pointer_required (void);
246 static bool arm_can_eliminate (const int, const int);
247 static void arm_asm_trampoline_template (FILE *);
248 static void arm_trampoline_init (rtx
, tree
, rtx
);
249 static rtx
arm_trampoline_adjust_address (rtx
);
250 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
251 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
252 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
253 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
254 static bool arm_array_mode_supported_p (enum machine_mode
,
255 unsigned HOST_WIDE_INT
);
256 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
257 static bool arm_class_likely_spilled_p (reg_class_t
);
258 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
259 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
260 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
264 static void arm_conditional_register_usage (void);
265 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
266 static unsigned int arm_autovectorize_vector_sizes (void);
267 static int arm_default_branch_cost (bool, bool);
268 static int arm_cortex_a5_branch_cost (bool, bool);
270 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
271 const unsigned char *sel
);
274 /* Table of machine attributes. */
275 static const struct attribute_spec arm_attribute_table
[] =
277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
278 affects_type_identity } */
279 /* Function calls made to this symbol must be done indirectly, because
280 it may lie outside of the 26 bit addressing range of a normal function
282 { "long_call", 0, 0, false, true, true, NULL
, false },
283 /* Whereas these functions are always known to reside within the 26 bit
285 { "short_call", 0, 0, false, true, true, NULL
, false },
286 /* Specify the procedure call conventions for a function. */
287 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
289 /* Interrupt Service Routines have special prologue and epilogue requirements. */
290 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
292 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
294 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
297 /* ARM/PE has three new attributes:
299 dllexport - for exporting a function/variable that will live in a dll
300 dllimport - for importing a function/variable from a dll
302 Microsoft allows multiple declspecs in one __declspec, separating
303 them with spaces. We do NOT support this. Instead, use __declspec
306 { "dllimport", 0, 0, true, false, false, NULL
, false },
307 { "dllexport", 0, 0, true, false, false, NULL
, false },
308 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
310 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
312 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
313 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
316 { NULL
, 0, 0, false, false, false, NULL
, false }
319 /* Initialize the GCC target structure. */
320 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 #undef TARGET_MERGE_DECL_ATTRIBUTES
322 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
325 #undef TARGET_LEGITIMIZE_ADDRESS
326 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
328 #undef TARGET_ATTRIBUTE_TABLE
329 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
331 #undef TARGET_ASM_FILE_START
332 #define TARGET_ASM_FILE_START arm_file_start
333 #undef TARGET_ASM_FILE_END
334 #define TARGET_ASM_FILE_END arm_file_end
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP NULL
338 #undef TARGET_ASM_INTEGER
339 #define TARGET_ASM_INTEGER arm_assemble_integer
341 #undef TARGET_PRINT_OPERAND
342 #define TARGET_PRINT_OPERAND arm_print_operand
343 #undef TARGET_PRINT_OPERAND_ADDRESS
344 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
345 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
346 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
348 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
349 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
351 #undef TARGET_ASM_FUNCTION_PROLOGUE
352 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
354 #undef TARGET_ASM_FUNCTION_EPILOGUE
355 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
357 #undef TARGET_OPTION_OVERRIDE
358 #define TARGET_OPTION_OVERRIDE arm_option_override
360 #undef TARGET_COMP_TYPE_ATTRIBUTES
361 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
363 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
364 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
369 #undef TARGET_REGISTER_MOVE_COST
370 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
372 #undef TARGET_MEMORY_MOVE_COST
373 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
375 #undef TARGET_ENCODE_SECTION_INFO
377 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
379 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
382 #undef TARGET_STRIP_NAME_ENCODING
383 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
385 #undef TARGET_ASM_INTERNAL_LABEL
386 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
388 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
389 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
391 #undef TARGET_FUNCTION_VALUE
392 #define TARGET_FUNCTION_VALUE arm_function_value
394 #undef TARGET_LIBCALL_VALUE
395 #define TARGET_LIBCALL_VALUE arm_libcall_value
397 #undef TARGET_FUNCTION_VALUE_REGNO_P
398 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
400 #undef TARGET_ASM_OUTPUT_MI_THUNK
401 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
402 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
403 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
405 #undef TARGET_RTX_COSTS
406 #define TARGET_RTX_COSTS arm_rtx_costs
407 #undef TARGET_ADDRESS_COST
408 #define TARGET_ADDRESS_COST arm_address_cost
410 #undef TARGET_SHIFT_TRUNCATION_MASK
411 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
412 #undef TARGET_VECTOR_MODE_SUPPORTED_P
413 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
414 #undef TARGET_ARRAY_MODE_SUPPORTED_P
415 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
416 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
417 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
418 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
419 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
420 arm_autovectorize_vector_sizes
422 #undef TARGET_MACHINE_DEPENDENT_REORG
423 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
425 #undef TARGET_INIT_BUILTINS
426 #define TARGET_INIT_BUILTINS arm_init_builtins
427 #undef TARGET_EXPAND_BUILTIN
428 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL arm_builtin_decl
432 #undef TARGET_INIT_LIBFUNCS
433 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
435 #undef TARGET_PROMOTE_FUNCTION_MODE
436 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
437 #undef TARGET_PROMOTE_PROTOTYPES
438 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
443 #undef TARGET_FUNCTION_ARG
444 #define TARGET_FUNCTION_ARG arm_function_arg
445 #undef TARGET_FUNCTION_ARG_ADVANCE
446 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
447 #undef TARGET_FUNCTION_ARG_BOUNDARY
448 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
450 #undef TARGET_SETUP_INCOMING_VARARGS
451 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
453 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
454 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
456 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
457 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
458 #undef TARGET_TRAMPOLINE_INIT
459 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
460 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
461 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
463 #undef TARGET_WARN_FUNC_RETURN
464 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
466 #undef TARGET_DEFAULT_SHORT_ENUMS
467 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
469 #undef TARGET_ALIGN_ANON_BITFIELD
470 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
472 #undef TARGET_NARROW_VOLATILE_BITFIELD
473 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
475 #undef TARGET_CXX_GUARD_TYPE
476 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
478 #undef TARGET_CXX_GUARD_MASK_BIT
479 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
481 #undef TARGET_CXX_GET_COOKIE_SIZE
482 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
484 #undef TARGET_CXX_COOKIE_HAS_SIZE
485 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
487 #undef TARGET_CXX_CDTOR_RETURNS_THIS
488 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
490 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
491 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
493 #undef TARGET_CXX_USE_AEABI_ATEXIT
494 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
496 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
497 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
498 arm_cxx_determine_class_data_visibility
500 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
501 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
503 #undef TARGET_RETURN_IN_MSB
504 #define TARGET_RETURN_IN_MSB arm_return_in_msb
506 #undef TARGET_RETURN_IN_MEMORY
507 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
509 #undef TARGET_MUST_PASS_IN_STACK
510 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
513 #undef TARGET_ASM_UNWIND_EMIT
514 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
516 /* EABI unwinding tables use a different format for the typeinfo tables. */
517 #undef TARGET_ASM_TTYPE
518 #define TARGET_ASM_TTYPE arm_output_ttype
520 #undef TARGET_ARM_EABI_UNWINDER
521 #define TARGET_ARM_EABI_UNWINDER true
523 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
524 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
526 #undef TARGET_ASM_INIT_SECTIONS
527 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
528 #endif /* ARM_UNWIND_INFO */
530 #undef TARGET_DWARF_REGISTER_SPAN
531 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
533 #undef TARGET_CANNOT_COPY_INSN_P
534 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
537 #undef TARGET_HAVE_TLS
538 #define TARGET_HAVE_TLS true
541 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
542 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
544 #undef TARGET_LEGITIMATE_CONSTANT_P
545 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
547 #undef TARGET_CANNOT_FORCE_CONST_MEM
548 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
550 #undef TARGET_MAX_ANCHOR_OFFSET
551 #define TARGET_MAX_ANCHOR_OFFSET 4095
553 /* The minimum is set such that the total size of the block
554 for a particular anchor is -4088 + 1 + 4095 bytes, which is
555 divisible by eight, ensuring natural spacing of anchors. */
556 #undef TARGET_MIN_ANCHOR_OFFSET
557 #define TARGET_MIN_ANCHOR_OFFSET -4088
559 #undef TARGET_SCHED_ISSUE_RATE
560 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
562 #undef TARGET_MANGLE_TYPE
563 #define TARGET_MANGLE_TYPE arm_mangle_type
565 #undef TARGET_BUILD_BUILTIN_VA_LIST
566 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
567 #undef TARGET_EXPAND_BUILTIN_VA_START
568 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
569 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
570 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
577 #undef TARGET_LEGITIMATE_ADDRESS_P
578 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
580 #undef TARGET_PREFERRED_RELOAD_CLASS
581 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
583 #undef TARGET_INVALID_PARAMETER_TYPE
584 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
586 #undef TARGET_INVALID_RETURN_TYPE
587 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
589 #undef TARGET_PROMOTED_TYPE
590 #define TARGET_PROMOTED_TYPE arm_promoted_type
592 #undef TARGET_CONVERT_TO_TYPE
593 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
595 #undef TARGET_SCALAR_MODE_SUPPORTED_P
596 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
598 #undef TARGET_FRAME_POINTER_REQUIRED
599 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
601 #undef TARGET_CAN_ELIMINATE
602 #define TARGET_CAN_ELIMINATE arm_can_eliminate
604 #undef TARGET_CONDITIONAL_REGISTER_USAGE
605 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
607 #undef TARGET_CLASS_LIKELY_SPILLED_P
608 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
610 #undef TARGET_VECTOR_ALIGNMENT
611 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
613 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
614 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
615 arm_vector_alignment_reachable
617 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
618 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
619 arm_builtin_support_vector_misalignment
621 #undef TARGET_PREFERRED_RENAME_CLASS
622 #define TARGET_PREFERRED_RENAME_CLASS \
623 arm_preferred_rename_class
625 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
626 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
627 arm_vectorize_vec_perm_const_ok
629 struct gcc_target targetm
= TARGET_INITIALIZER
;
631 /* Obstack for minipool constant handling. */
632 static struct obstack minipool_obstack
;
633 static char * minipool_startobj
;
635 /* The maximum number of insns skipped which
636 will be conditionalised if possible. */
637 static int max_insns_skipped
= 5;
639 extern FILE * asm_out_file
;
641 /* True if we are currently building a constant table. */
642 int making_const_table
;
644 /* The processor for which instructions should be scheduled. */
645 enum processor_type arm_tune
= arm_none
;
647 /* The current tuning set. */
648 const struct tune_params
*current_tune
;
650 /* Which floating point hardware to schedule for. */
653 /* Which floating popint hardware to use. */
654 const struct arm_fpu_desc
*arm_fpu_desc
;
656 /* Used for Thumb call_via trampolines. */
657 rtx thumb_call_via_label
[14];
658 static int thumb_call_reg_needed
;
660 /* Bit values used to identify processor capabilities. */
661 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
662 #define FL_ARCH3M (1 << 1) /* Extended multiply */
663 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
664 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
665 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
666 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
667 #define FL_THUMB (1 << 6) /* Thumb aware */
668 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
669 #define FL_STRONG (1 << 8) /* StrongARM */
670 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
671 #define FL_XSCALE (1 << 10) /* XScale */
672 /* spare (1 << 11) */
673 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
674 media instructions. */
675 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
676 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
677 Note: ARM6 & 7 derivatives only. */
678 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
679 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
680 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
682 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
683 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
684 #define FL_NEON (1 << 20) /* Neon instructions. */
685 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
687 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
688 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
690 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
691 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
693 /* Flags that only effect tuning, not available instructions. */
694 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
697 #define FL_FOR_ARCH2 FL_NOTM
698 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
699 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
700 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
701 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
702 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
703 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
704 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
705 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
706 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
707 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
708 #define FL_FOR_ARCH6J FL_FOR_ARCH6
709 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
710 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
711 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
712 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
713 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
714 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
715 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
716 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
717 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
718 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
720 /* The bits in this mask specify which
721 instructions we are allowed to generate. */
722 static unsigned long insn_flags
= 0;
724 /* The bits in this mask specify which instruction scheduling options should
726 static unsigned long tune_flags
= 0;
728 /* The highest ARM architecture version supported by the
730 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
732 /* The following are used in the arm.md file as equivalents to bits
733 in the above two flag variables. */
735 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
738 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
741 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
744 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
747 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
750 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
753 /* Nonzero if this chip supports the ARM 6K extensions. */
756 /* Nonzero if this chip supports the ARM 7 extensions. */
759 /* Nonzero if instructions not present in the 'M' profile can be used. */
760 int arm_arch_notm
= 0;
762 /* Nonzero if instructions present in ARMv7E-M can be used. */
765 /* Nonzero if this chip can benefit from load scheduling. */
766 int arm_ld_sched
= 0;
768 /* Nonzero if this chip is a StrongARM. */
769 int arm_tune_strongarm
= 0;
771 /* Nonzero if this chip supports Intel Wireless MMX technology. */
772 int arm_arch_iwmmxt
= 0;
774 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
775 int arm_arch_iwmmxt2
= 0;
777 /* Nonzero if this chip is an XScale. */
778 int arm_arch_xscale
= 0;
780 /* Nonzero if tuning for XScale */
781 int arm_tune_xscale
= 0;
783 /* Nonzero if we want to tune for stores that access the write-buffer.
784 This typically means an ARM6 or ARM7 with MMU or MPU. */
785 int arm_tune_wbuf
= 0;
787 /* Nonzero if tuning for Cortex-A9. */
788 int arm_tune_cortex_a9
= 0;
790 /* Nonzero if generating Thumb instructions. */
793 /* Nonzero if generating Thumb-1 instructions. */
796 /* Nonzero if we should define __THUMB_INTERWORK__ in the
798 XXX This is a bit of a hack, it's intended to help work around
799 problems in GLD which doesn't understand that armv5t code is
800 interworking clean. */
801 int arm_cpp_interwork
= 0;
803 /* Nonzero if chip supports Thumb 2. */
806 /* Nonzero if chip supports integer division instruction. */
807 int arm_arch_arm_hwdiv
;
808 int arm_arch_thumb_hwdiv
;
810 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
811 we must report the mode of the memory reference from
812 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
813 enum machine_mode output_memory_reference_mode
;
815 /* The register number to be used for the PIC offset register. */
816 unsigned arm_pic_register
= INVALID_REGNUM
;
818 /* Set to 1 after arm_reorg has started. Reset to start at the start of
819 the next function. */
820 static int after_arm_reorg
= 0;
822 enum arm_pcs arm_pcs_default
;
824 /* For an explanation of these variables, see final_prescan_insn below. */
826 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
827 enum arm_cond_code arm_current_cc
;
830 int arm_target_label
;
831 /* The number of conditionally executed insns, including the current insn. */
832 int arm_condexec_count
= 0;
833 /* A bitmask specifying the patterns for the IT block.
834 Zero means do not output an IT block before this insn. */
835 int arm_condexec_mask
= 0;
836 /* The number of bits used in arm_condexec_mask. */
837 int arm_condexec_masklen
= 0;
839 /* The condition codes of the ARM, and the inverse function. */
840 static const char * const arm_condition_codes
[] =
842 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
843 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
846 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
847 int arm_regs_in_sequence
[] =
849 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
852 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
853 #define streq(string1, string2) (strcmp (string1, string2) == 0)
855 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
856 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
857 | (1 << PIC_OFFSET_TABLE_REGNUM)))
859 /* Initialization code. */
863 const char *const name
;
864 enum processor_type core
;
866 enum base_architecture base_arch
;
867 const unsigned long flags
;
868 const struct tune_params
*const tune
;
872 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
873 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
878 const struct tune_params arm_slowmul_tune
=
880 arm_slowmul_rtx_costs
,
882 3, /* Constant limit. */
883 5, /* Max cond insns. */
884 ARM_PREFETCH_NOT_BENEFICIAL
,
885 true, /* Prefer constant pool. */
886 arm_default_branch_cost
,
887 false /* Prefer LDRD/STRD. */
890 const struct tune_params arm_fastmul_tune
=
892 arm_fastmul_rtx_costs
,
894 1, /* Constant limit. */
895 5, /* Max cond insns. */
896 ARM_PREFETCH_NOT_BENEFICIAL
,
897 true, /* Prefer constant pool. */
898 arm_default_branch_cost
,
899 false /* Prefer LDRD/STRD. */
902 /* StrongARM has early execution of branches, so a sequence that is worth
903 skipping is shorter. Set max_insns_skipped to a lower value. */
905 const struct tune_params arm_strongarm_tune
=
907 arm_fastmul_rtx_costs
,
909 1, /* Constant limit. */
910 3, /* Max cond insns. */
911 ARM_PREFETCH_NOT_BENEFICIAL
,
912 true, /* Prefer constant pool. */
913 arm_default_branch_cost
,
914 false /* Prefer LDRD/STRD. */
917 const struct tune_params arm_xscale_tune
=
919 arm_xscale_rtx_costs
,
920 xscale_sched_adjust_cost
,
921 2, /* Constant limit. */
922 3, /* Max cond insns. */
923 ARM_PREFETCH_NOT_BENEFICIAL
,
924 true, /* Prefer constant pool. */
925 arm_default_branch_cost
,
926 false /* Prefer LDRD/STRD. */
929 const struct tune_params arm_9e_tune
=
933 1, /* Constant limit. */
934 5, /* Max cond insns. */
935 ARM_PREFETCH_NOT_BENEFICIAL
,
936 true, /* Prefer constant pool. */
937 arm_default_branch_cost
,
938 false /* Prefer LDRD/STRD. */
941 const struct tune_params arm_v6t2_tune
=
945 1, /* Constant limit. */
946 5, /* Max cond insns. */
947 ARM_PREFETCH_NOT_BENEFICIAL
,
948 false, /* Prefer constant pool. */
949 arm_default_branch_cost
,
950 false /* Prefer LDRD/STRD. */
953 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
954 const struct tune_params arm_cortex_tune
=
958 1, /* Constant limit. */
959 5, /* Max cond insns. */
960 ARM_PREFETCH_NOT_BENEFICIAL
,
961 false, /* Prefer constant pool. */
962 arm_default_branch_cost
,
963 false /* Prefer LDRD/STRD. */
966 const struct tune_params arm_cortex_a15_tune
=
970 1, /* Constant limit. */
971 5, /* Max cond insns. */
972 ARM_PREFETCH_NOT_BENEFICIAL
,
973 false, /* Prefer constant pool. */
974 arm_default_branch_cost
,
975 true /* Prefer LDRD/STRD. */
978 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
979 less appealing. Set max_insns_skipped to a low value. */
981 const struct tune_params arm_cortex_a5_tune
=
985 1, /* Constant limit. */
986 1, /* Max cond insns. */
987 ARM_PREFETCH_NOT_BENEFICIAL
,
988 false, /* Prefer constant pool. */
989 arm_cortex_a5_branch_cost
,
990 false /* Prefer LDRD/STRD. */
993 const struct tune_params arm_cortex_a9_tune
=
996 cortex_a9_sched_adjust_cost
,
997 1, /* Constant limit. */
998 5, /* Max cond insns. */
999 ARM_PREFETCH_BENEFICIAL(4,32,32),
1000 false, /* Prefer constant pool. */
1001 arm_default_branch_cost
,
1002 false /* Prefer LDRD/STRD. */
1005 const struct tune_params arm_fa726te_tune
=
1008 fa726te_sched_adjust_cost
,
1009 1, /* Constant limit. */
1010 5, /* Max cond insns. */
1011 ARM_PREFETCH_NOT_BENEFICIAL
,
1012 true, /* Prefer constant pool. */
1013 arm_default_branch_cost
,
1014 false /* Prefer LDRD/STRD. */
1018 /* Not all of these give usefully different compilation alternatives,
1019 but there is no simple way of generalizing them. */
1020 static const struct processors all_cores
[] =
1023 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1024 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1025 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1026 #include "arm-cores.def"
1028 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1031 static const struct processors all_architectures
[] =
1033 /* ARM Architectures */
1034 /* We don't specify tuning costs here as it will be figured out
1037 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1038 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1039 #include "arm-arches.def"
1041 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1045 /* These are populated as commandline arguments are processed, or NULL
1046 if not specified. */
1047 static const struct processors
*arm_selected_arch
;
1048 static const struct processors
*arm_selected_cpu
;
1049 static const struct processors
*arm_selected_tune
;
1051 /* The name of the preprocessor macro to define for this architecture. */
1053 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1055 /* Available values for -mfpu=. */
1057 static const struct arm_fpu_desc all_fpus
[] =
1059 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1060 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1061 #include "arm-fpus.def"
1066 /* Supported TLS relocations. */
1074 TLS_DESCSEQ
/* GNU scheme */
1077 /* The maximum number of insns to be used when loading a constant. */
1079 arm_constant_limit (bool size_p
)
1081 return size_p
? 1 : current_tune
->constant_limit
;
1084 /* Emit an insn that's a simple single-set. Both the operands must be known
1087 emit_set_insn (rtx x
, rtx y
)
1089 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1092 /* Return the number of bits set in VALUE. */
1094 bit_count (unsigned long value
)
1096 unsigned long count
= 0;
1101 value
&= value
- 1; /* Clear the least-significant set bit. */
1109 enum machine_mode mode
;
1111 } arm_fixed_mode_set
;
1113 /* A small helper for setting fixed-point library libfuncs. */
1116 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1117 const char *funcname
, const char *modename
,
1122 if (num_suffix
== 0)
1123 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
1125 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
1127 set_optab_libfunc (optable
, mode
, buffer
);
1131 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
1132 enum machine_mode from
, const char *funcname
,
1133 const char *toname
, const char *fromname
)
1136 const char *maybe_suffix_2
= "";
1138 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1139 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
1140 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
1141 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
1142 maybe_suffix_2
= "2";
1144 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
1147 set_conv_libfunc (optable
, to
, from
, buffer
);
1150 /* Set up library functions unique to ARM. */
1153 arm_init_libfuncs (void)
1155 /* For Linux, we have access to kernel support for atomic operations. */
1156 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
1157 init_sync_libfuncs (2 * UNITS_PER_WORD
);
1159 /* There are no special library functions unless we are using the
1164 /* The functions below are described in Section 4 of the "Run-Time
1165 ABI for the ARM architecture", Version 1.0. */
1167 /* Double-precision floating-point arithmetic. Table 2. */
1168 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1169 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1170 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1171 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1172 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1174 /* Double-precision comparisons. Table 3. */
1175 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1176 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1177 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1178 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1179 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1180 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1181 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1183 /* Single-precision floating-point arithmetic. Table 4. */
1184 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1185 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1186 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1187 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1188 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1190 /* Single-precision comparisons. Table 5. */
1191 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1192 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1193 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1194 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1195 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1196 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1197 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1199 /* Floating-point to integer conversions. Table 6. */
1200 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1201 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1202 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1203 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1204 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1205 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1206 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1207 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1209 /* Conversions between floating types. Table 7. */
1210 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1211 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1213 /* Integer to floating-point conversions. Table 8. */
1214 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1215 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1216 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1217 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1218 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1219 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1220 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1221 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1223 /* Long long. Table 9. */
1224 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1225 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1226 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1227 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1228 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1229 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1230 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1231 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1233 /* Integer (32/32->32) division. \S 4.3.1. */
1234 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1235 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1237 /* The divmod functions are designed so that they can be used for
1238 plain division, even though they return both the quotient and the
1239 remainder. The quotient is returned in the usual location (i.e.,
1240 r0 for SImode, {r0, r1} for DImode), just as would be expected
1241 for an ordinary division routine. Because the AAPCS calling
1242 conventions specify that all of { r0, r1, r2, r3 } are
1243 callee-saved registers, there is no need to tell the compiler
1244 explicitly that those registers are clobbered by these
1246 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1247 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1249 /* For SImode division the ABI provides div-without-mod routines,
1250 which are faster. */
1251 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1252 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1254 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1255 divmod libcalls instead. */
1256 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1257 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1258 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1259 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1261 /* Half-precision float operations. The compiler handles all operations
1262 with NULL libfuncs by converting the SFmode. */
1263 switch (arm_fp16_format
)
1265 case ARM_FP16_FORMAT_IEEE
:
1266 case ARM_FP16_FORMAT_ALTERNATIVE
:
1269 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1270 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1272 : "__gnu_f2h_alternative"));
1273 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1274 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1276 : "__gnu_h2f_alternative"));
1279 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1280 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1281 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1282 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1283 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1286 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1287 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1288 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1289 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1290 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1291 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1292 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1299 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1301 const arm_fixed_mode_set fixed_arith_modes
[] =
1322 const arm_fixed_mode_set fixed_conv_modes
[] =
1352 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
1354 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
1355 "add", fixed_arith_modes
[i
].name
, 3);
1356 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
1357 "ssadd", fixed_arith_modes
[i
].name
, 3);
1358 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
1359 "usadd", fixed_arith_modes
[i
].name
, 3);
1360 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
1361 "sub", fixed_arith_modes
[i
].name
, 3);
1362 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
1363 "sssub", fixed_arith_modes
[i
].name
, 3);
1364 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
1365 "ussub", fixed_arith_modes
[i
].name
, 3);
1366 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
1367 "mul", fixed_arith_modes
[i
].name
, 3);
1368 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
1369 "ssmul", fixed_arith_modes
[i
].name
, 3);
1370 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
1371 "usmul", fixed_arith_modes
[i
].name
, 3);
1372 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
1373 "div", fixed_arith_modes
[i
].name
, 3);
1374 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
1375 "udiv", fixed_arith_modes
[i
].name
, 3);
1376 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
1377 "ssdiv", fixed_arith_modes
[i
].name
, 3);
1378 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
1379 "usdiv", fixed_arith_modes
[i
].name
, 3);
1380 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
1381 "neg", fixed_arith_modes
[i
].name
, 2);
1382 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
1383 "ssneg", fixed_arith_modes
[i
].name
, 2);
1384 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
1385 "usneg", fixed_arith_modes
[i
].name
, 2);
1386 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
1387 "ashl", fixed_arith_modes
[i
].name
, 3);
1388 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
1389 "ashr", fixed_arith_modes
[i
].name
, 3);
1390 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
1391 "lshr", fixed_arith_modes
[i
].name
, 3);
1392 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
1393 "ssashl", fixed_arith_modes
[i
].name
, 3);
1394 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
1395 "usashl", fixed_arith_modes
[i
].name
, 3);
1396 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
1397 "cmp", fixed_arith_modes
[i
].name
, 2);
1400 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
1401 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
1404 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
1405 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
1408 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
1409 fixed_conv_modes
[j
].mode
, "fract",
1410 fixed_conv_modes
[i
].name
,
1411 fixed_conv_modes
[j
].name
);
1412 arm_set_fixed_conv_libfunc (satfract_optab
,
1413 fixed_conv_modes
[i
].mode
,
1414 fixed_conv_modes
[j
].mode
, "satfract",
1415 fixed_conv_modes
[i
].name
,
1416 fixed_conv_modes
[j
].name
);
1417 arm_set_fixed_conv_libfunc (fractuns_optab
,
1418 fixed_conv_modes
[i
].mode
,
1419 fixed_conv_modes
[j
].mode
, "fractuns",
1420 fixed_conv_modes
[i
].name
,
1421 fixed_conv_modes
[j
].name
);
1422 arm_set_fixed_conv_libfunc (satfractuns_optab
,
1423 fixed_conv_modes
[i
].mode
,
1424 fixed_conv_modes
[j
].mode
, "satfractuns",
1425 fixed_conv_modes
[i
].name
,
1426 fixed_conv_modes
[j
].name
);
1430 if (TARGET_AAPCS_BASED
)
1431 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1434 /* On AAPCS systems, this is the "struct __va_list". */
1435 static GTY(()) tree va_list_type
;
1437 /* Return the type to use as __builtin_va_list. */
1439 arm_build_builtin_va_list (void)
1444 if (!TARGET_AAPCS_BASED
)
1445 return std_build_builtin_va_list ();
1447 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1455 The C Library ABI further reinforces this definition in \S
1458 We must follow this definition exactly. The structure tag
1459 name is visible in C++ mangled names, and thus forms a part
1460 of the ABI. The field name may be used by people who
1461 #include <stdarg.h>. */
1462 /* Create the type. */
1463 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1464 /* Give it the required name. */
1465 va_list_name
= build_decl (BUILTINS_LOCATION
,
1467 get_identifier ("__va_list"),
1469 DECL_ARTIFICIAL (va_list_name
) = 1;
1470 TYPE_NAME (va_list_type
) = va_list_name
;
1471 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
1472 /* Create the __ap field. */
1473 ap_field
= build_decl (BUILTINS_LOCATION
,
1475 get_identifier ("__ap"),
1477 DECL_ARTIFICIAL (ap_field
) = 1;
1478 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1479 TYPE_FIELDS (va_list_type
) = ap_field
;
1480 /* Compute its layout. */
1481 layout_type (va_list_type
);
1483 return va_list_type
;
1486 /* Return an expression of type "void *" pointing to the next
1487 available argument in a variable-argument list. VALIST is the
1488 user-level va_list object, of type __builtin_va_list. */
1490 arm_extract_valist_ptr (tree valist
)
1492 if (TREE_TYPE (valist
) == error_mark_node
)
1493 return error_mark_node
;
1495 /* On an AAPCS target, the pointer is stored within "struct
1497 if (TARGET_AAPCS_BASED
)
1499 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1500 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1501 valist
, ap_field
, NULL_TREE
);
1507 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1509 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1511 valist
= arm_extract_valist_ptr (valist
);
1512 std_expand_builtin_va_start (valist
, nextarg
);
1515 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1517 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1520 valist
= arm_extract_valist_ptr (valist
);
1521 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1524 /* Fix up any incompatible options that the user has specified. */
1526 arm_option_override (void)
1528 if (global_options_set
.x_arm_arch_option
)
1529 arm_selected_arch
= &all_architectures
[arm_arch_option
];
1531 if (global_options_set
.x_arm_cpu_option
)
1532 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
1534 if (global_options_set
.x_arm_tune_option
)
1535 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
1537 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1538 SUBTARGET_OVERRIDE_OPTIONS
;
1541 if (arm_selected_arch
)
1543 if (arm_selected_cpu
)
1545 /* Check for conflict between mcpu and march. */
1546 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
1548 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1549 arm_selected_cpu
->name
, arm_selected_arch
->name
);
1550 /* -march wins for code generation.
1551 -mcpu wins for default tuning. */
1552 if (!arm_selected_tune
)
1553 arm_selected_tune
= arm_selected_cpu
;
1555 arm_selected_cpu
= arm_selected_arch
;
1559 arm_selected_arch
= NULL
;
1562 /* Pick a CPU based on the architecture. */
1563 arm_selected_cpu
= arm_selected_arch
;
1566 /* If the user did not specify a processor, choose one for them. */
1567 if (!arm_selected_cpu
)
1569 const struct processors
* sel
;
1570 unsigned int sought
;
1572 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
1573 if (!arm_selected_cpu
->name
)
1575 #ifdef SUBTARGET_CPU_DEFAULT
1576 /* Use the subtarget default CPU if none was specified by
1578 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
1580 /* Default to ARM6. */
1581 if (!arm_selected_cpu
->name
)
1582 arm_selected_cpu
= &all_cores
[arm6
];
1585 sel
= arm_selected_cpu
;
1586 insn_flags
= sel
->flags
;
1588 /* Now check to see if the user has specified some command line
1589 switch that require certain abilities from the cpu. */
1592 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1594 sought
|= (FL_THUMB
| FL_MODE32
);
1596 /* There are no ARM processors that support both APCS-26 and
1597 interworking. Therefore we force FL_MODE26 to be removed
1598 from insn_flags here (if it was set), so that the search
1599 below will always be able to find a compatible processor. */
1600 insn_flags
&= ~FL_MODE26
;
1603 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1605 /* Try to locate a CPU type that supports all of the abilities
1606 of the default CPU, plus the extra abilities requested by
1608 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1609 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1612 if (sel
->name
== NULL
)
1614 unsigned current_bit_count
= 0;
1615 const struct processors
* best_fit
= NULL
;
1617 /* Ideally we would like to issue an error message here
1618 saying that it was not possible to find a CPU compatible
1619 with the default CPU, but which also supports the command
1620 line options specified by the programmer, and so they
1621 ought to use the -mcpu=<name> command line option to
1622 override the default CPU type.
1624 If we cannot find a cpu that has both the
1625 characteristics of the default cpu and the given
1626 command line options we scan the array again looking
1627 for a best match. */
1628 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1629 if ((sel
->flags
& sought
) == sought
)
1633 count
= bit_count (sel
->flags
& insn_flags
);
1635 if (count
>= current_bit_count
)
1638 current_bit_count
= count
;
1642 gcc_assert (best_fit
);
1646 arm_selected_cpu
= sel
;
1650 gcc_assert (arm_selected_cpu
);
1651 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1652 if (!arm_selected_tune
)
1653 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
1655 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
1656 insn_flags
= arm_selected_cpu
->flags
;
1657 arm_base_arch
= arm_selected_cpu
->base_arch
;
1659 arm_tune
= arm_selected_tune
->core
;
1660 tune_flags
= arm_selected_tune
->flags
;
1661 current_tune
= arm_selected_tune
->tune
;
1663 /* Make sure that the processor choice does not conflict with any of the
1664 other command line choices. */
1665 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1666 error ("target CPU does not support ARM mode");
1668 /* BPABI targets use linker tricks to allow interworking on cores
1669 without thumb support. */
1670 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
1672 warning (0, "target CPU does not support interworking" );
1673 target_flags
&= ~MASK_INTERWORK
;
1676 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1678 warning (0, "target CPU does not support THUMB instructions");
1679 target_flags
&= ~MASK_THUMB
;
1682 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1684 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1685 target_flags
&= ~MASK_APCS_FRAME
;
1688 /* Callee super interworking implies thumb interworking. Adding
1689 this to the flags here simplifies the logic elsewhere. */
1690 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1691 target_flags
|= MASK_INTERWORK
;
1693 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1694 from here where no function is being compiled currently. */
1695 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1696 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1698 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1699 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1701 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1703 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1704 target_flags
|= MASK_APCS_FRAME
;
1707 if (TARGET_POKE_FUNCTION_NAME
)
1708 target_flags
|= MASK_APCS_FRAME
;
1710 if (TARGET_APCS_REENT
&& flag_pic
)
1711 error ("-fpic and -mapcs-reent are incompatible");
1713 if (TARGET_APCS_REENT
)
1714 warning (0, "APCS reentrant code not supported. Ignored");
1716 /* If this target is normally configured to use APCS frames, warn if they
1717 are turned off and debugging is turned on. */
1719 && write_symbols
!= NO_DEBUG
1720 && !TARGET_APCS_FRAME
1721 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1722 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1724 if (TARGET_APCS_FLOAT
)
1725 warning (0, "passing floating point arguments in fp regs not yet supported");
1727 if (TARGET_LITTLE_WORDS
)
1728 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
1729 "will be removed in a future release");
1731 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1732 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1733 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1734 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1735 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1736 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1737 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1738 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1739 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1740 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
1741 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
1742 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1743 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1745 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1746 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1747 thumb_code
= TARGET_ARM
== 0;
1748 thumb1_code
= TARGET_THUMB1
!= 0;
1749 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1750 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1751 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1752 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
1753 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
1754 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
1755 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
1757 /* If we are not using the default (ARM mode) section anchor offset
1758 ranges, then set the correct ranges now. */
1761 /* Thumb-1 LDR instructions cannot have negative offsets.
1762 Permissible positive offset ranges are 5-bit (for byte loads),
1763 6-bit (for halfword loads), or 7-bit (for word loads).
1764 Empirical results suggest a 7-bit anchor range gives the best
1765 overall code size. */
1766 targetm
.min_anchor_offset
= 0;
1767 targetm
.max_anchor_offset
= 127;
1769 else if (TARGET_THUMB2
)
1771 /* The minimum is set such that the total size of the block
1772 for a particular anchor is 248 + 1 + 4095 bytes, which is
1773 divisible by eight, ensuring natural spacing of anchors. */
1774 targetm
.min_anchor_offset
= -248;
1775 targetm
.max_anchor_offset
= 4095;
1778 /* V5 code we generate is completely interworking capable, so we turn off
1779 TARGET_INTERWORK here to avoid many tests later on. */
1781 /* XXX However, we must pass the right pre-processor defines to CPP
1782 or GLD can get confused. This is a hack. */
1783 if (TARGET_INTERWORK
)
1784 arm_cpp_interwork
= 1;
1787 target_flags
&= ~MASK_INTERWORK
;
1789 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1790 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1792 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1793 error ("iwmmxt abi requires an iwmmxt capable cpu");
1795 if (!global_options_set
.x_arm_fpu_index
)
1797 const char *target_fpu_name
;
1800 #ifdef FPUTYPE_DEFAULT
1801 target_fpu_name
= FPUTYPE_DEFAULT
;
1803 target_fpu_name
= "vfp";
1806 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
1811 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
1813 switch (arm_fpu_desc
->model
)
1815 case ARM_FP_MODEL_VFP
:
1816 arm_fpu_attr
= FPU_VFP
;
1823 if (TARGET_AAPCS_BASED
)
1825 if (TARGET_CALLER_INTERWORKING
)
1826 error ("AAPCS does not support -mcaller-super-interworking");
1828 if (TARGET_CALLEE_INTERWORKING
)
1829 error ("AAPCS does not support -mcallee-super-interworking");
1832 /* iWMMXt and NEON are incompatible. */
1833 if (TARGET_IWMMXT
&& TARGET_NEON
)
1834 error ("iWMMXt and NEON are incompatible");
1836 /* iWMMXt unsupported under Thumb mode. */
1837 if (TARGET_THUMB
&& TARGET_IWMMXT
)
1838 error ("iWMMXt unsupported under Thumb mode");
1840 /* __fp16 support currently assumes the core has ldrh. */
1841 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
1842 sorry ("__fp16 and no ldrh");
1844 /* If soft-float is specified then don't use FPU. */
1845 if (TARGET_SOFT_FLOAT
)
1846 arm_fpu_attr
= FPU_NONE
;
1848 if (TARGET_AAPCS_BASED
)
1850 if (arm_abi
== ARM_ABI_IWMMXT
)
1851 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
1852 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
1853 && TARGET_HARD_FLOAT
1855 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
1857 arm_pcs_default
= ARM_PCS_AAPCS
;
1861 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1862 sorry ("-mfloat-abi=hard and VFP");
1864 if (arm_abi
== ARM_ABI_APCS
)
1865 arm_pcs_default
= ARM_PCS_APCS
;
1867 arm_pcs_default
= ARM_PCS_ATPCS
;
1870 /* For arm2/3 there is no need to do any scheduling if we are doing
1871 software floating-point. */
1872 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
1873 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1875 /* Use the cp15 method if it is available. */
1876 if (target_thread_pointer
== TP_AUTO
)
1878 if (arm_arch6k
&& !TARGET_THUMB1
)
1879 target_thread_pointer
= TP_CP15
;
1881 target_thread_pointer
= TP_SOFT
;
1884 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1885 error ("can not use -mtp=cp15 with 16-bit Thumb");
1887 /* Override the default structure alignment for AAPCS ABI. */
1888 if (!global_options_set
.x_arm_structure_size_boundary
)
1890 if (TARGET_AAPCS_BASED
)
1891 arm_structure_size_boundary
= 8;
1895 if (arm_structure_size_boundary
!= 8
1896 && arm_structure_size_boundary
!= 32
1897 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
1899 if (ARM_DOUBLEWORD_ALIGN
)
1901 "structure size boundary can only be set to 8, 32 or 64");
1903 warning (0, "structure size boundary can only be set to 8 or 32");
1904 arm_structure_size_boundary
1905 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
1909 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1911 error ("RTP PIC is incompatible with Thumb");
1915 /* If stack checking is disabled, we can use r10 as the PIC register,
1916 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1917 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1919 if (TARGET_VXWORKS_RTP
)
1920 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1921 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1924 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1925 arm_pic_register
= 9;
1927 if (arm_pic_register_string
!= NULL
)
1929 int pic_register
= decode_reg_name (arm_pic_register_string
);
1932 warning (0, "-mpic-register= is useless without -fpic");
1934 /* Prevent the user from choosing an obviously stupid PIC register. */
1935 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1936 || pic_register
== HARD_FRAME_POINTER_REGNUM
1937 || pic_register
== STACK_POINTER_REGNUM
1938 || pic_register
>= PC_REGNUM
1939 || (TARGET_VXWORKS_RTP
1940 && (unsigned int) pic_register
!= arm_pic_register
))
1941 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1943 arm_pic_register
= pic_register
;
1946 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1947 if (fix_cm3_ldrd
== 2)
1949 if (arm_selected_cpu
->core
== cortexm3
)
1955 /* Enable -munaligned-access by default for
1956 - all ARMv6 architecture-based processors
1957 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1959 Disable -munaligned-access by default for
1960 - all pre-ARMv6 architecture-based processors
1961 - ARMv6-M architecture-based processors. */
1963 if (unaligned_access
== 2)
1965 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
1966 unaligned_access
= 1;
1968 unaligned_access
= 0;
1970 else if (unaligned_access
== 1
1971 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
1973 warning (0, "target CPU does not support unaligned accesses");
1974 unaligned_access
= 0;
1977 if (TARGET_THUMB1
&& flag_schedule_insns
)
1979 /* Don't warn since it's on by default in -O2. */
1980 flag_schedule_insns
= 0;
1985 /* If optimizing for size, bump the number of instructions that we
1986 are prepared to conditionally execute (even on a StrongARM). */
1987 max_insns_skipped
= 6;
1990 max_insns_skipped
= current_tune
->max_insns_skipped
;
1992 /* Hot/Cold partitioning is not currently supported, since we can't
1993 handle literal pool placement in that case. */
1994 if (flag_reorder_blocks_and_partition
)
1996 inform (input_location
,
1997 "-freorder-blocks-and-partition not supported on this architecture");
1998 flag_reorder_blocks_and_partition
= 0;
1999 flag_reorder_blocks
= 1;
2003 /* Hoisting PIC address calculations more aggressively provides a small,
2004 but measurable, size reduction for PIC code. Therefore, we decrease
2005 the bar for unrestricted expression hoisting to the cost of PIC address
2006 calculation, which is 2 instructions. */
2007 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2008 global_options
.x_param_values
,
2009 global_options_set
.x_param_values
);
2011 /* ARM EABI defaults to strict volatile bitfields. */
2012 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2013 && abi_version_at_least(2))
2014 flag_strict_volatile_bitfields
= 1;
2016 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2017 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2018 if (flag_prefetch_loop_arrays
< 0
2021 && current_tune
->num_prefetch_slots
> 0)
2022 flag_prefetch_loop_arrays
= 1;
2024 /* Set up parameters to be used in prefetching algorithm. Do not override the
2025 defaults unless we are tuning for a core we have researched values for. */
2026 if (current_tune
->num_prefetch_slots
> 0)
2027 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2028 current_tune
->num_prefetch_slots
,
2029 global_options
.x_param_values
,
2030 global_options_set
.x_param_values
);
2031 if (current_tune
->l1_cache_line_size
>= 0)
2032 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2033 current_tune
->l1_cache_line_size
,
2034 global_options
.x_param_values
,
2035 global_options_set
.x_param_values
);
2036 if (current_tune
->l1_cache_size
>= 0)
2037 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2038 current_tune
->l1_cache_size
,
2039 global_options
.x_param_values
,
2040 global_options_set
.x_param_values
);
2042 /* Use the alternative scheduling-pressure algorithm by default. */
2043 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
2044 global_options
.x_param_values
,
2045 global_options_set
.x_param_values
);
2047 /* Register global variables with the garbage collector. */
2048 arm_add_gc_roots ();
2052 arm_add_gc_roots (void)
2054 gcc_obstack_init(&minipool_obstack
);
2055 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2058 /* A table of known ARM exception types.
2059 For use with the interrupt function attribute. */
2063 const char *const arg
;
2064 const unsigned long return_value
;
2068 static const isr_attribute_arg isr_attribute_args
[] =
2070 { "IRQ", ARM_FT_ISR
},
2071 { "irq", ARM_FT_ISR
},
2072 { "FIQ", ARM_FT_FIQ
},
2073 { "fiq", ARM_FT_FIQ
},
2074 { "ABORT", ARM_FT_ISR
},
2075 { "abort", ARM_FT_ISR
},
2076 { "ABORT", ARM_FT_ISR
},
2077 { "abort", ARM_FT_ISR
},
2078 { "UNDEF", ARM_FT_EXCEPTION
},
2079 { "undef", ARM_FT_EXCEPTION
},
2080 { "SWI", ARM_FT_EXCEPTION
},
2081 { "swi", ARM_FT_EXCEPTION
},
2082 { NULL
, ARM_FT_NORMAL
}
2085 /* Returns the (interrupt) function type of the current
2086 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2088 static unsigned long
2089 arm_isr_value (tree argument
)
2091 const isr_attribute_arg
* ptr
;
2095 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2097 /* No argument - default to IRQ. */
2098 if (argument
== NULL_TREE
)
2101 /* Get the value of the argument. */
2102 if (TREE_VALUE (argument
) == NULL_TREE
2103 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2104 return ARM_FT_UNKNOWN
;
2106 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2108 /* Check it against the list of known arguments. */
2109 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2110 if (streq (arg
, ptr
->arg
))
2111 return ptr
->return_value
;
2113 /* An unrecognized interrupt type. */
2114 return ARM_FT_UNKNOWN
;
2117 /* Computes the type of the current function. */
2119 static unsigned long
2120 arm_compute_func_type (void)
2122 unsigned long type
= ARM_FT_UNKNOWN
;
2126 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2128 /* Decide if the current function is volatile. Such functions
2129 never return, and many memory cycles can be saved by not storing
2130 register values that will never be needed again. This optimization
2131 was added to speed up context switching in a kernel application. */
2133 && (TREE_NOTHROW (current_function_decl
)
2134 || !(flag_unwind_tables
2136 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2137 && TREE_THIS_VOLATILE (current_function_decl
))
2138 type
|= ARM_FT_VOLATILE
;
2140 if (cfun
->static_chain_decl
!= NULL
)
2141 type
|= ARM_FT_NESTED
;
2143 attr
= DECL_ATTRIBUTES (current_function_decl
);
2145 a
= lookup_attribute ("naked", attr
);
2147 type
|= ARM_FT_NAKED
;
2149 a
= lookup_attribute ("isr", attr
);
2151 a
= lookup_attribute ("interrupt", attr
);
2154 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2156 type
|= arm_isr_value (TREE_VALUE (a
));
2161 /* Returns the type of the current function. */
2164 arm_current_func_type (void)
2166 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2167 cfun
->machine
->func_type
= arm_compute_func_type ();
2169 return cfun
->machine
->func_type
;
2173 arm_allocate_stack_slots_for_args (void)
2175 /* Naked functions should not allocate stack slots for arguments. */
2176 return !IS_NAKED (arm_current_func_type ());
2180 arm_warn_func_return (tree decl
)
2182 /* Naked functions are implemented entirely in assembly, including the
2183 return sequence, so suppress warnings about this. */
2184 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
2188 /* Output assembler code for a block containing the constant parts
2189 of a trampoline, leaving space for the variable parts.
2191 On the ARM, (if r8 is the static chain regnum, and remembering that
2192 referencing pc adds an offset of 8) the trampoline looks like:
2195 .word static chain value
2196 .word function's address
2197 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2200 arm_asm_trampoline_template (FILE *f
)
2204 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2205 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2207 else if (TARGET_THUMB2
)
2209 /* The Thumb-2 trampoline is similar to the arm implementation.
2210 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2211 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2212 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2213 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2217 ASM_OUTPUT_ALIGN (f
, 2);
2218 fprintf (f
, "\t.code\t16\n");
2219 fprintf (f
, ".Ltrampoline_start:\n");
2220 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2221 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2222 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2223 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2224 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2225 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2227 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2228 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2231 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2234 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
2236 rtx fnaddr
, mem
, a_tramp
;
2238 emit_block_move (m_tramp
, assemble_trampoline_template (),
2239 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
2241 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
2242 emit_move_insn (mem
, chain_value
);
2244 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
2245 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
2246 emit_move_insn (mem
, fnaddr
);
2248 a_tramp
= XEXP (m_tramp
, 0);
2249 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
2250 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
2251 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
2254 /* Thumb trampolines should be entered in thumb mode, so set
2255 the bottom bit of the address. */
2258 arm_trampoline_adjust_address (rtx addr
)
2261 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
2262 NULL
, 0, OPTAB_LIB_WIDEN
);
2266 /* Return 1 if it is possible to return using a single instruction.
2267 If SIBLING is non-null, this is a test for a return before a sibling
2268 call. SIBLING is the call insn, so we can examine its register usage. */
2271 use_return_insn (int iscond
, rtx sibling
)
2274 unsigned int func_type
;
2275 unsigned long saved_int_regs
;
2276 unsigned HOST_WIDE_INT stack_adjust
;
2277 arm_stack_offsets
*offsets
;
2279 /* Never use a return instruction before reload has run. */
2280 if (!reload_completed
)
2283 func_type
= arm_current_func_type ();
2285 /* Naked, volatile and stack alignment functions need special
2287 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2290 /* So do interrupt functions that use the frame pointer and Thumb
2291 interrupt functions. */
2292 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2295 offsets
= arm_get_frame_offsets ();
2296 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2298 /* As do variadic functions. */
2299 if (crtl
->args
.pretend_args_size
2300 || cfun
->machine
->uses_anonymous_args
2301 /* Or if the function calls __builtin_eh_return () */
2302 || crtl
->calls_eh_return
2303 /* Or if the function calls alloca */
2304 || cfun
->calls_alloca
2305 /* Or if there is a stack adjustment. However, if the stack pointer
2306 is saved on the stack, we can use a pre-incrementing stack load. */
2307 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2308 && stack_adjust
== 4)))
2311 saved_int_regs
= offsets
->saved_regs_mask
;
2313 /* Unfortunately, the insn
2315 ldmib sp, {..., sp, ...}
2317 triggers a bug on most SA-110 based devices, such that the stack
2318 pointer won't be correctly restored if the instruction takes a
2319 page fault. We work around this problem by popping r3 along with
2320 the other registers, since that is never slower than executing
2321 another instruction.
2323 We test for !arm_arch5 here, because code for any architecture
2324 less than this could potentially be run on one of the buggy
2326 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2328 /* Validate that r3 is a call-clobbered register (always true in
2329 the default abi) ... */
2330 if (!call_used_regs
[3])
2333 /* ... that it isn't being used for a return value ... */
2334 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2337 /* ... or for a tail-call argument ... */
2340 gcc_assert (GET_CODE (sibling
) == CALL_INSN
);
2342 if (find_regno_fusage (sibling
, USE
, 3))
2346 /* ... and that there are no call-saved registers in r0-r2
2347 (always true in the default ABI). */
2348 if (saved_int_regs
& 0x7)
2352 /* Can't be done if interworking with Thumb, and any registers have been
2354 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2357 /* On StrongARM, conditional returns are expensive if they aren't
2358 taken and multiple registers have been stacked. */
2359 if (iscond
&& arm_tune_strongarm
)
2361 /* Conditional return when just the LR is stored is a simple
2362 conditional-load instruction, that's not expensive. */
2363 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2367 && arm_pic_register
!= INVALID_REGNUM
2368 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2372 /* If there are saved registers but the LR isn't saved, then we need
2373 two instructions for the return. */
2374 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2377 /* Can't be done if any of the VFP regs are pushed,
2378 since this also requires an insn. */
2379 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2380 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2381 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2384 if (TARGET_REALLY_IWMMXT
)
2385 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2386 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2392 /* Return TRUE if int I is a valid immediate ARM constant. */
2395 const_ok_for_arm (HOST_WIDE_INT i
)
2399 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2400 be all zero, or all one. */
2401 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2402 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2403 != ((~(unsigned HOST_WIDE_INT
) 0)
2404 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2407 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2409 /* Fast return for 0 and small values. We must do this for zero, since
2410 the code below can't handle that one case. */
2411 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2414 /* Get the number of trailing zeros. */
2415 lowbit
= ffs((int) i
) - 1;
2417 /* Only even shifts are allowed in ARM mode so round down to the
2418 nearest even number. */
2422 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2427 /* Allow rotated constants in ARM mode. */
2429 && ((i
& ~0xc000003f) == 0
2430 || (i
& ~0xf000000f) == 0
2431 || (i
& ~0xfc000003) == 0))
2438 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2441 if (i
== v
|| i
== (v
| (v
<< 8)))
2444 /* Allow repeated pattern 0xXY00XY00. */
2454 /* Return true if I is a valid constant for the operation CODE. */
2456 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2458 if (const_ok_for_arm (i
))
2464 /* See if we can use movw. */
2465 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
2468 /* Otherwise, try mvn. */
2469 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2472 /* See if we can use addw or subw. */
2474 && ((i
& 0xfffff000) == 0
2475 || ((-i
) & 0xfffff000) == 0))
2477 /* else fall through. */
2497 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2499 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2505 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2509 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2516 /* Return true if I is a valid di mode constant for the operation CODE. */
2518 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
2520 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
2521 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
2522 rtx hi
= GEN_INT (hi_val
);
2523 rtx lo
= GEN_INT (lo_val
);
2531 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
2538 /* Emit a sequence of insns to handle a large constant.
2539 CODE is the code of the operation required, it can be any of SET, PLUS,
2540 IOR, AND, XOR, MINUS;
2541 MODE is the mode in which the operation is being performed;
2542 VAL is the integer to operate on;
2543 SOURCE is the other operand (a register, or a null-pointer for SET);
2544 SUBTARGETS means it is safe to create scratch registers if that will
2545 either produce a simpler sequence, or we will want to cse the values.
2546 Return value is the number of insns emitted. */
2548 /* ??? Tweak this for thumb2. */
2550 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2551 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2555 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2556 cond
= COND_EXEC_TEST (PATTERN (insn
));
2560 if (subtargets
|| code
== SET
2561 || (GET_CODE (target
) == REG
&& GET_CODE (source
) == REG
2562 && REGNO (target
) != REGNO (source
)))
2564 /* After arm_reorg has been called, we can't fix up expensive
2565 constants by pushing them into memory so we must synthesize
2566 them in-line, regardless of the cost. This is only likely to
2567 be more costly on chips that have load delay slots and we are
2568 compiling without running the scheduler (so no splitting
2569 occurred before the final instruction emission).
2571 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2573 if (!after_arm_reorg
2575 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2577 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2582 /* Currently SET is the only monadic value for CODE, all
2583 the rest are diadic. */
2584 if (TARGET_USE_MOVT
)
2585 arm_emit_movpair (target
, GEN_INT (val
));
2587 emit_set_insn (target
, GEN_INT (val
));
2593 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2595 if (TARGET_USE_MOVT
)
2596 arm_emit_movpair (temp
, GEN_INT (val
));
2598 emit_set_insn (temp
, GEN_INT (val
));
2600 /* For MINUS, the value is subtracted from, since we never
2601 have subtraction of a constant. */
2603 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2605 emit_set_insn (target
,
2606 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2612 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2616 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2617 ARM/THUMB2 immediates, and add up to VAL.
2618 Thr function return value gives the number of insns required. */
2620 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
2621 struct four_ints
*return_sequence
)
2623 int best_consecutive_zeros
= 0;
2627 struct four_ints tmp_sequence
;
2629 /* If we aren't targeting ARM, the best place to start is always at
2630 the bottom, otherwise look more closely. */
2633 for (i
= 0; i
< 32; i
+= 2)
2635 int consecutive_zeros
= 0;
2637 if (!(val
& (3 << i
)))
2639 while ((i
< 32) && !(val
& (3 << i
)))
2641 consecutive_zeros
+= 2;
2644 if (consecutive_zeros
> best_consecutive_zeros
)
2646 best_consecutive_zeros
= consecutive_zeros
;
2647 best_start
= i
- consecutive_zeros
;
2654 /* So long as it won't require any more insns to do so, it's
2655 desirable to emit a small constant (in bits 0...9) in the last
2656 insn. This way there is more chance that it can be combined with
2657 a later addressing insn to form a pre-indexed load or store
2658 operation. Consider:
2660 *((volatile int *)0xe0000100) = 1;
2661 *((volatile int *)0xe0000110) = 2;
2663 We want this to wind up as:
2667 str rB, [rA, #0x100]
2669 str rB, [rA, #0x110]
2671 rather than having to synthesize both large constants from scratch.
2673 Therefore, we calculate how many insns would be required to emit
2674 the constant starting from `best_start', and also starting from
2675 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2676 yield a shorter sequence, we may as well use zero. */
2677 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
2679 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
2681 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
2682 if (insns2
<= insns1
)
2684 *return_sequence
= tmp_sequence
;
2692 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2694 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
2695 struct four_ints
*return_sequence
, int i
)
2697 int remainder
= val
& 0xffffffff;
2700 /* Try and find a way of doing the job in either two or three
2703 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2704 location. We start at position I. This may be the MSB, or
2705 optimial_immediate_sequence may have positioned it at the largest block
2706 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2707 wrapping around to the top of the word when we drop off the bottom.
2708 In the worst case this code should produce no more than four insns.
2710 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2711 constants, shifted to any arbitrary location. We should always start
2716 unsigned int b1
, b2
, b3
, b4
;
2717 unsigned HOST_WIDE_INT result
;
2720 gcc_assert (insns
< 4);
2725 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2726 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
2729 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
2730 /* We can use addw/subw for the last 12 bits. */
2734 /* Use an 8-bit shifted/rotated immediate. */
2738 result
= remainder
& ((0x0ff << end
)
2739 | ((i
< end
) ? (0xff >> (32 - end
))
2746 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2747 arbitrary shifts. */
2748 i
-= TARGET_ARM
? 2 : 1;
2752 /* Next, see if we can do a better job with a thumb2 replicated
2755 We do it this way around to catch the cases like 0x01F001E0 where
2756 two 8-bit immediates would work, but a replicated constant would
2759 TODO: 16-bit constants that don't clear all the bits, but still win.
2760 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2763 b1
= (remainder
& 0xff000000) >> 24;
2764 b2
= (remainder
& 0x00ff0000) >> 16;
2765 b3
= (remainder
& 0x0000ff00) >> 8;
2766 b4
= remainder
& 0xff;
2770 /* The 8-bit immediate already found clears b1 (and maybe b2),
2771 but must leave b3 and b4 alone. */
2773 /* First try to find a 32-bit replicated constant that clears
2774 almost everything. We can assume that we can't do it in one,
2775 or else we wouldn't be here. */
2776 unsigned int tmp
= b1
& b2
& b3
& b4
;
2777 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
2779 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
2780 + (tmp
== b3
) + (tmp
== b4
);
2782 && (matching_bytes
>= 3
2783 || (matching_bytes
== 2
2784 && const_ok_for_op (remainder
& ~tmp2
, code
))))
2786 /* At least 3 of the bytes match, and the fourth has at
2787 least as many bits set, or two of the bytes match
2788 and it will only require one more insn to finish. */
2796 /* Second, try to find a 16-bit replicated constant that can
2797 leave three of the bytes clear. If b2 or b4 is already
2798 zero, then we can. If the 8-bit from above would not
2799 clear b2 anyway, then we still win. */
2800 else if (b1
== b3
&& (!b2
|| !b4
2801 || (remainder
& 0x00ff0000 & ~result
)))
2803 result
= remainder
& 0xff00ff00;
2809 /* The 8-bit immediate already found clears b2 (and maybe b3)
2810 and we don't get here unless b1 is alredy clear, but it will
2811 leave b4 unchanged. */
2813 /* If we can clear b2 and b4 at once, then we win, since the
2814 8-bits couldn't possibly reach that far. */
2817 result
= remainder
& 0x00ff00ff;
2823 return_sequence
->i
[insns
++] = result
;
2824 remainder
&= ~result
;
2826 if (code
== SET
|| code
== MINUS
)
2834 /* Emit an instruction with the indicated PATTERN. If COND is
2835 non-NULL, conditionalize the execution of the instruction on COND
2839 emit_constant_insn (rtx cond
, rtx pattern
)
2842 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
2843 emit_insn (pattern
);
2846 /* As above, but extra parameter GENERATE which, if clear, suppresses
2850 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
2851 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
2856 int final_invert
= 0;
2858 int set_sign_bit_copies
= 0;
2859 int clear_sign_bit_copies
= 0;
2860 int clear_zero_bit_copies
= 0;
2861 int set_zero_bit_copies
= 0;
2862 int insns
= 0, neg_insns
, inv_insns
;
2863 unsigned HOST_WIDE_INT temp1
, temp2
;
2864 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
2865 struct four_ints
*immediates
;
2866 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
2868 /* Find out which operations are safe for a given CODE. Also do a quick
2869 check for degenerate cases; these can occur when DImode operations
2882 if (remainder
== 0xffffffff)
2885 emit_constant_insn (cond
,
2886 gen_rtx_SET (VOIDmode
, target
,
2887 GEN_INT (ARM_SIGN_EXTEND (val
))));
2893 if (reload_completed
&& rtx_equal_p (target
, source
))
2897 emit_constant_insn (cond
,
2898 gen_rtx_SET (VOIDmode
, target
, source
));
2907 emit_constant_insn (cond
,
2908 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2911 if (remainder
== 0xffffffff)
2913 if (reload_completed
&& rtx_equal_p (target
, source
))
2916 emit_constant_insn (cond
,
2917 gen_rtx_SET (VOIDmode
, target
, source
));
2926 if (reload_completed
&& rtx_equal_p (target
, source
))
2929 emit_constant_insn (cond
,
2930 gen_rtx_SET (VOIDmode
, target
, source
));
2934 if (remainder
== 0xffffffff)
2937 emit_constant_insn (cond
,
2938 gen_rtx_SET (VOIDmode
, target
,
2939 gen_rtx_NOT (mode
, source
)));
2946 /* We treat MINUS as (val - source), since (source - val) is always
2947 passed as (source + (-val)). */
2951 emit_constant_insn (cond
,
2952 gen_rtx_SET (VOIDmode
, target
,
2953 gen_rtx_NEG (mode
, source
)));
2956 if (const_ok_for_arm (val
))
2959 emit_constant_insn (cond
,
2960 gen_rtx_SET (VOIDmode
, target
,
2961 gen_rtx_MINUS (mode
, GEN_INT (val
),
2972 /* If we can do it in one insn get out quickly. */
2973 if (const_ok_for_op (val
, code
))
2976 emit_constant_insn (cond
,
2977 gen_rtx_SET (VOIDmode
, target
,
2979 ? gen_rtx_fmt_ee (code
, mode
, source
,
2985 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
2987 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
2988 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
2992 if (mode
== SImode
&& i
== 16)
2993 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
2995 emit_constant_insn (cond
,
2996 gen_zero_extendhisi2
2997 (target
, gen_lowpart (HImode
, source
)));
2999 /* Extz only supports SImode, but we can coerce the operands
3001 emit_constant_insn (cond
,
3002 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3003 gen_lowpart (SImode
, source
),
3004 GEN_INT (i
), const0_rtx
));
3010 /* Calculate a few attributes that may be useful for specific
3012 /* Count number of leading zeros. */
3013 for (i
= 31; i
>= 0; i
--)
3015 if ((remainder
& (1 << i
)) == 0)
3016 clear_sign_bit_copies
++;
3021 /* Count number of leading 1's. */
3022 for (i
= 31; i
>= 0; i
--)
3024 if ((remainder
& (1 << i
)) != 0)
3025 set_sign_bit_copies
++;
3030 /* Count number of trailing zero's. */
3031 for (i
= 0; i
<= 31; i
++)
3033 if ((remainder
& (1 << i
)) == 0)
3034 clear_zero_bit_copies
++;
3039 /* Count number of trailing 1's. */
3040 for (i
= 0; i
<= 31; i
++)
3042 if ((remainder
& (1 << i
)) != 0)
3043 set_zero_bit_copies
++;
3051 /* See if we can do this by sign_extending a constant that is known
3052 to be negative. This is a good, way of doing it, since the shift
3053 may well merge into a subsequent insn. */
3054 if (set_sign_bit_copies
> 1)
3056 if (const_ok_for_arm
3057 (temp1
= ARM_SIGN_EXTEND (remainder
3058 << (set_sign_bit_copies
- 1))))
3062 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3063 emit_constant_insn (cond
,
3064 gen_rtx_SET (VOIDmode
, new_src
,
3066 emit_constant_insn (cond
,
3067 gen_ashrsi3 (target
, new_src
,
3068 GEN_INT (set_sign_bit_copies
- 1)));
3072 /* For an inverted constant, we will need to set the low bits,
3073 these will be shifted out of harm's way. */
3074 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
3075 if (const_ok_for_arm (~temp1
))
3079 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3080 emit_constant_insn (cond
,
3081 gen_rtx_SET (VOIDmode
, new_src
,
3083 emit_constant_insn (cond
,
3084 gen_ashrsi3 (target
, new_src
,
3085 GEN_INT (set_sign_bit_copies
- 1)));
3091 /* See if we can calculate the value as the difference between two
3092 valid immediates. */
3093 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
3095 int topshift
= clear_sign_bit_copies
& ~1;
3097 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
3098 & (0xff000000 >> topshift
));
3100 /* If temp1 is zero, then that means the 9 most significant
3101 bits of remainder were 1 and we've caused it to overflow.
3102 When topshift is 0 we don't need to do anything since we
3103 can borrow from 'bit 32'. */
3104 if (temp1
== 0 && topshift
!= 0)
3105 temp1
= 0x80000000 >> (topshift
- 1);
3107 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
3109 if (const_ok_for_arm (temp2
))
3113 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3114 emit_constant_insn (cond
,
3115 gen_rtx_SET (VOIDmode
, new_src
,
3117 emit_constant_insn (cond
,
3118 gen_addsi3 (target
, new_src
,
3126 /* See if we can generate this by setting the bottom (or the top)
3127 16 bits, and then shifting these into the other half of the
3128 word. We only look for the simplest cases, to do more would cost
3129 too much. Be careful, however, not to generate this when the
3130 alternative would take fewer insns. */
3131 if (val
& 0xffff0000)
3133 temp1
= remainder
& 0xffff0000;
3134 temp2
= remainder
& 0x0000ffff;
3136 /* Overlaps outside this range are best done using other methods. */
3137 for (i
= 9; i
< 24; i
++)
3139 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
3140 && !const_ok_for_arm (temp2
))
3142 rtx new_src
= (subtargets
3143 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3145 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
3146 source
, subtargets
, generate
);
3154 gen_rtx_ASHIFT (mode
, source
,
3161 /* Don't duplicate cases already considered. */
3162 for (i
= 17; i
< 24; i
++)
3164 if (((temp1
| (temp1
>> i
)) == remainder
)
3165 && !const_ok_for_arm (temp1
))
3167 rtx new_src
= (subtargets
3168 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3170 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
3171 source
, subtargets
, generate
);
3176 gen_rtx_SET (VOIDmode
, target
,
3179 gen_rtx_LSHIFTRT (mode
, source
,
3190 /* If we have IOR or XOR, and the constant can be loaded in a
3191 single instruction, and we can find a temporary to put it in,
3192 then this can be done in two instructions instead of 3-4. */
3194 /* TARGET can't be NULL if SUBTARGETS is 0 */
3195 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
3197 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
3201 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3203 emit_constant_insn (cond
,
3204 gen_rtx_SET (VOIDmode
, sub
,
3206 emit_constant_insn (cond
,
3207 gen_rtx_SET (VOIDmode
, target
,
3208 gen_rtx_fmt_ee (code
, mode
,
3219 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3220 and the remainder 0s for e.g. 0xfff00000)
3221 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3223 This can be done in 2 instructions by using shifts with mov or mvn.
3228 mvn r0, r0, lsr #12 */
3229 if (set_sign_bit_copies
> 8
3230 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
3234 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3235 rtx shift
= GEN_INT (set_sign_bit_copies
);
3239 gen_rtx_SET (VOIDmode
, sub
,
3241 gen_rtx_ASHIFT (mode
,
3246 gen_rtx_SET (VOIDmode
, target
,
3248 gen_rtx_LSHIFTRT (mode
, sub
,
3255 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3257 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3259 For eg. r0 = r0 | 0xfff
3264 if (set_zero_bit_copies
> 8
3265 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
3269 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3270 rtx shift
= GEN_INT (set_zero_bit_copies
);
3274 gen_rtx_SET (VOIDmode
, sub
,
3276 gen_rtx_LSHIFTRT (mode
,
3281 gen_rtx_SET (VOIDmode
, target
,
3283 gen_rtx_ASHIFT (mode
, sub
,
3289 /* This will never be reached for Thumb2 because orn is a valid
3290 instruction. This is for Thumb1 and the ARM 32 bit cases.
3292 x = y | constant (such that ~constant is a valid constant)
3294 x = ~(~y & ~constant).
3296 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
3300 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3301 emit_constant_insn (cond
,
3302 gen_rtx_SET (VOIDmode
, sub
,
3303 gen_rtx_NOT (mode
, source
)));
3306 sub
= gen_reg_rtx (mode
);
3307 emit_constant_insn (cond
,
3308 gen_rtx_SET (VOIDmode
, sub
,
3309 gen_rtx_AND (mode
, source
,
3311 emit_constant_insn (cond
,
3312 gen_rtx_SET (VOIDmode
, target
,
3313 gen_rtx_NOT (mode
, sub
)));
3320 /* See if two shifts will do 2 or more insn's worth of work. */
3321 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
3323 HOST_WIDE_INT shift_mask
= ((0xffffffff
3324 << (32 - clear_sign_bit_copies
))
3327 if ((remainder
| shift_mask
) != 0xffffffff)
3331 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3332 insns
= arm_gen_constant (AND
, mode
, cond
,
3333 remainder
| shift_mask
,
3334 new_src
, source
, subtargets
, 1);
3339 rtx targ
= subtargets
? NULL_RTX
: target
;
3340 insns
= arm_gen_constant (AND
, mode
, cond
,
3341 remainder
| shift_mask
,
3342 targ
, source
, subtargets
, 0);
3348 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3349 rtx shift
= GEN_INT (clear_sign_bit_copies
);
3351 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
3352 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
3358 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
3360 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
3362 if ((remainder
| shift_mask
) != 0xffffffff)
3366 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3368 insns
= arm_gen_constant (AND
, mode
, cond
,
3369 remainder
| shift_mask
,
3370 new_src
, source
, subtargets
, 1);
3375 rtx targ
= subtargets
? NULL_RTX
: target
;
3377 insns
= arm_gen_constant (AND
, mode
, cond
,
3378 remainder
| shift_mask
,
3379 targ
, source
, subtargets
, 0);
3385 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3386 rtx shift
= GEN_INT (clear_zero_bit_copies
);
3388 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
3389 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
3401 /* Calculate what the instruction sequences would be if we generated it
3402 normally, negated, or inverted. */
3404 /* AND cannot be split into multiple insns, so invert and use BIC. */
3407 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
3410 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
3415 if (can_invert
|| final_invert
)
3416 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
3421 immediates
= &pos_immediates
;
3423 /* Is the negated immediate sequence more efficient? */
3424 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
3427 immediates
= &neg_immediates
;
3432 /* Is the inverted immediate sequence more efficient?
3433 We must allow for an extra NOT instruction for XOR operations, although
3434 there is some chance that the final 'mvn' will get optimized later. */
3435 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
3438 immediates
= &inv_immediates
;
3446 /* Now output the chosen sequence as instructions. */
3449 for (i
= 0; i
< insns
; i
++)
3451 rtx new_src
, temp1_rtx
;
3453 temp1
= immediates
->i
[i
];
3455 if (code
== SET
|| code
== MINUS
)
3456 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3457 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
3458 new_src
= gen_reg_rtx (mode
);
3464 else if (can_negate
)
3467 temp1
= trunc_int_for_mode (temp1
, mode
);
3468 temp1_rtx
= GEN_INT (temp1
);
3472 else if (code
== MINUS
)
3473 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3475 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3477 emit_constant_insn (cond
,
3478 gen_rtx_SET (VOIDmode
, new_src
,
3484 can_negate
= can_invert
;
3488 else if (code
== MINUS
)
3496 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3497 gen_rtx_NOT (mode
, source
)));
3504 /* Canonicalize a comparison so that we are more likely to recognize it.
3505 This can be done for a few constant compares, where we can make the
3506 immediate value easier to load. */
3509 arm_canonicalize_comparison (enum rtx_code code
, rtx
*op0
, rtx
*op1
)
3511 enum machine_mode mode
;
3512 unsigned HOST_WIDE_INT i
, maxval
;
3514 mode
= GET_MODE (*op0
);
3515 if (mode
== VOIDmode
)
3516 mode
= GET_MODE (*op1
);
3518 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3520 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3521 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3522 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3523 for GTU/LEU in Thumb mode. */
3528 if (code
== GT
|| code
== LE
3529 || (!TARGET_ARM
&& (code
== GTU
|| code
== LEU
)))
3531 /* Missing comparison. First try to use an available
3533 if (GET_CODE (*op1
) == CONST_INT
)
3541 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3543 *op1
= GEN_INT (i
+ 1);
3544 return code
== GT
? GE
: LT
;
3549 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3550 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3552 *op1
= GEN_INT (i
+ 1);
3553 return code
== GTU
? GEU
: LTU
;
3561 /* If that did not work, reverse the condition. */
3565 return swap_condition (code
);
3571 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3572 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3573 to facilitate possible combining with a cmp into 'ands'. */
3575 && GET_CODE (*op0
) == ZERO_EXTEND
3576 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
3577 && GET_MODE (XEXP (*op0
, 0)) == QImode
3578 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
3579 && subreg_lowpart_p (XEXP (*op0
, 0))
3580 && *op1
== const0_rtx
)
3581 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
3584 /* Comparisons smaller than DImode. Only adjust comparisons against
3585 an out-of-range constant. */
3586 if (GET_CODE (*op1
) != CONST_INT
3587 || const_ok_for_arm (INTVAL (*op1
))
3588 || const_ok_for_arm (- INTVAL (*op1
)))
3602 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3604 *op1
= GEN_INT (i
+ 1);
3605 return code
== GT
? GE
: LT
;
3612 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3614 *op1
= GEN_INT (i
- 1);
3615 return code
== GE
? GT
: LE
;
3621 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3622 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3624 *op1
= GEN_INT (i
+ 1);
3625 return code
== GTU
? GEU
: LTU
;
3632 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3634 *op1
= GEN_INT (i
- 1);
3635 return code
== GEU
? GTU
: LEU
;
3647 /* Define how to find the value returned by a function. */
3650 arm_function_value(const_tree type
, const_tree func
,
3651 bool outgoing ATTRIBUTE_UNUSED
)
3653 enum machine_mode mode
;
3654 int unsignedp ATTRIBUTE_UNUSED
;
3655 rtx r ATTRIBUTE_UNUSED
;
3657 mode
= TYPE_MODE (type
);
3659 if (TARGET_AAPCS_BASED
)
3660 return aapcs_allocate_return_reg (mode
, type
, func
);
3662 /* Promote integer types. */
3663 if (INTEGRAL_TYPE_P (type
))
3664 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
3666 /* Promotes small structs returned in a register to full-word size
3667 for big-endian AAPCS. */
3668 if (arm_return_in_msb (type
))
3670 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3671 if (size
% UNITS_PER_WORD
!= 0)
3673 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
3674 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
3678 return arm_libcall_value_1 (mode
);
3682 libcall_eq (const void *p1
, const void *p2
)
3684 return rtx_equal_p ((const_rtx
) p1
, (const_rtx
) p2
);
3688 libcall_hash (const void *p1
)
3690 return hash_rtx ((const_rtx
) p1
, VOIDmode
, NULL
, NULL
, FALSE
);
3694 add_libcall (htab_t htab
, rtx libcall
)
3696 *htab_find_slot (htab
, libcall
, INSERT
) = libcall
;
3700 arm_libcall_uses_aapcs_base (const_rtx libcall
)
3702 static bool init_done
= false;
3703 static htab_t libcall_htab
;
3709 libcall_htab
= htab_create (31, libcall_hash
, libcall_eq
,
3711 add_libcall (libcall_htab
,
3712 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
3713 add_libcall (libcall_htab
,
3714 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
3715 add_libcall (libcall_htab
,
3716 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
3717 add_libcall (libcall_htab
,
3718 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
3720 add_libcall (libcall_htab
,
3721 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
3722 add_libcall (libcall_htab
,
3723 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
3724 add_libcall (libcall_htab
,
3725 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
3726 add_libcall (libcall_htab
,
3727 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
3729 add_libcall (libcall_htab
,
3730 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
3731 add_libcall (libcall_htab
,
3732 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
3733 add_libcall (libcall_htab
,
3734 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
3735 add_libcall (libcall_htab
,
3736 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
3737 add_libcall (libcall_htab
,
3738 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
3739 add_libcall (libcall_htab
,
3740 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
3741 add_libcall (libcall_htab
,
3742 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
3743 add_libcall (libcall_htab
,
3744 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
3746 /* Values from double-precision helper functions are returned in core
3747 registers if the selected core only supports single-precision
3748 arithmetic, even if we are using the hard-float ABI. The same is
3749 true for single-precision helpers, but we will never be using the
3750 hard-float ABI on a CPU which doesn't support single-precision
3751 operations in hardware. */
3752 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
3753 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
3754 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
3755 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
3756 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
3757 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
3758 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
3759 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
3760 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
3761 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
3762 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
3763 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
3765 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
3769 return libcall
&& htab_find (libcall_htab
, libcall
) != NULL
;
3773 arm_libcall_value_1 (enum machine_mode mode
)
3775 if (TARGET_AAPCS_BASED
)
3776 return aapcs_libcall_value (mode
);
3777 else if (TARGET_IWMMXT_ABI
3778 && arm_vector_mode_supported_p (mode
))
3779 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
3781 return gen_rtx_REG (mode
, ARG_REGISTER (1));
3784 /* Define how to find the value returned by a library function
3785 assuming the value has mode MODE. */
3788 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
3790 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
3791 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
3793 /* The following libcalls return their result in integer registers,
3794 even though they return a floating point value. */
3795 if (arm_libcall_uses_aapcs_base (libcall
))
3796 return gen_rtx_REG (mode
, ARG_REGISTER(1));
3800 return arm_libcall_value_1 (mode
);
3803 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3806 arm_function_value_regno_p (const unsigned int regno
)
3808 if (regno
== ARG_REGISTER (1)
3810 && TARGET_AAPCS_BASED
3812 && TARGET_HARD_FLOAT
3813 && regno
== FIRST_VFP_REGNUM
)
3814 || (TARGET_IWMMXT_ABI
3815 && regno
== FIRST_IWMMXT_REGNUM
))
3821 /* Determine the amount of memory needed to store the possible return
3822 registers of an untyped call. */
3824 arm_apply_result_size (void)
3830 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
3832 if (TARGET_IWMMXT_ABI
)
3839 /* Decide whether TYPE should be returned in memory (true)
3840 or in a register (false). FNTYPE is the type of the function making
3843 arm_return_in_memory (const_tree type
, const_tree fntype
)
3847 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
3849 if (TARGET_AAPCS_BASED
)
3851 /* Simple, non-aggregate types (ie not including vectors and
3852 complex) are always returned in a register (or registers).
3853 We don't care about which register here, so we can short-cut
3854 some of the detail. */
3855 if (!AGGREGATE_TYPE_P (type
)
3856 && TREE_CODE (type
) != VECTOR_TYPE
3857 && TREE_CODE (type
) != COMPLEX_TYPE
)
3860 /* Any return value that is no larger than one word can be
3862 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
3865 /* Check any available co-processors to see if they accept the
3866 type as a register candidate (VFP, for example, can return
3867 some aggregates in consecutive registers). These aren't
3868 available if the call is variadic. */
3869 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
3872 /* Vector values should be returned using ARM registers, not
3873 memory (unless they're over 16 bytes, which will break since
3874 we only have four call-clobbered registers to play with). */
3875 if (TREE_CODE (type
) == VECTOR_TYPE
)
3876 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3878 /* The rest go in memory. */
3882 if (TREE_CODE (type
) == VECTOR_TYPE
)
3883 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3885 if (!AGGREGATE_TYPE_P (type
) &&
3886 (TREE_CODE (type
) != VECTOR_TYPE
))
3887 /* All simple types are returned in registers. */
3890 if (arm_abi
!= ARM_ABI_APCS
)
3892 /* ATPCS and later return aggregate types in memory only if they are
3893 larger than a word (or are variable size). */
3894 return (size
< 0 || size
> UNITS_PER_WORD
);
3897 /* For the arm-wince targets we choose to be compatible with Microsoft's
3898 ARM and Thumb compilers, which always return aggregates in memory. */
3900 /* All structures/unions bigger than one word are returned in memory.
3901 Also catch the case where int_size_in_bytes returns -1. In this case
3902 the aggregate is either huge or of variable size, and in either case
3903 we will want to return it via memory and not in a register. */
3904 if (size
< 0 || size
> UNITS_PER_WORD
)
3907 if (TREE_CODE (type
) == RECORD_TYPE
)
3911 /* For a struct the APCS says that we only return in a register
3912 if the type is 'integer like' and every addressable element
3913 has an offset of zero. For practical purposes this means
3914 that the structure can have at most one non bit-field element
3915 and that this element must be the first one in the structure. */
3917 /* Find the first field, ignoring non FIELD_DECL things which will
3918 have been created by C++. */
3919 for (field
= TYPE_FIELDS (type
);
3920 field
&& TREE_CODE (field
) != FIELD_DECL
;
3921 field
= DECL_CHAIN (field
))
3925 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3927 /* Check that the first field is valid for returning in a register. */
3929 /* ... Floats are not allowed */
3930 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3933 /* ... Aggregates that are not themselves valid for returning in
3934 a register are not allowed. */
3935 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3938 /* Now check the remaining fields, if any. Only bitfields are allowed,
3939 since they are not addressable. */
3940 for (field
= DECL_CHAIN (field
);
3942 field
= DECL_CHAIN (field
))
3944 if (TREE_CODE (field
) != FIELD_DECL
)
3947 if (!DECL_BIT_FIELD_TYPE (field
))
3954 if (TREE_CODE (type
) == UNION_TYPE
)
3958 /* Unions can be returned in registers if every element is
3959 integral, or can be returned in an integer register. */
3960 for (field
= TYPE_FIELDS (type
);
3962 field
= DECL_CHAIN (field
))
3964 if (TREE_CODE (field
) != FIELD_DECL
)
3967 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3970 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3976 #endif /* not ARM_WINCE */
3978 /* Return all other types in memory. */
3982 const struct pcs_attribute_arg
3986 } pcs_attribute_args
[] =
3988 {"aapcs", ARM_PCS_AAPCS
},
3989 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
3991 /* We could recognize these, but changes would be needed elsewhere
3992 * to implement them. */
3993 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
3994 {"atpcs", ARM_PCS_ATPCS
},
3995 {"apcs", ARM_PCS_APCS
},
3997 {NULL
, ARM_PCS_UNKNOWN
}
4001 arm_pcs_from_attribute (tree attr
)
4003 const struct pcs_attribute_arg
*ptr
;
4006 /* Get the value of the argument. */
4007 if (TREE_VALUE (attr
) == NULL_TREE
4008 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4009 return ARM_PCS_UNKNOWN
;
4011 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4013 /* Check it against the list of known arguments. */
4014 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4015 if (streq (arg
, ptr
->arg
))
4018 /* An unrecognized interrupt type. */
4019 return ARM_PCS_UNKNOWN
;
4022 /* Get the PCS variant to use for this call. TYPE is the function's type
4023 specification, DECL is the specific declartion. DECL may be null if
4024 the call could be indirect or if this is a library call. */
4026 arm_get_pcs_model (const_tree type
, const_tree decl
)
4028 bool user_convention
= false;
4029 enum arm_pcs user_pcs
= arm_pcs_default
;
4034 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
4037 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
4038 user_convention
= true;
4041 if (TARGET_AAPCS_BASED
)
4043 /* Detect varargs functions. These always use the base rules
4044 (no argument is ever a candidate for a co-processor
4046 bool base_rules
= stdarg_p (type
);
4048 if (user_convention
)
4050 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
4051 sorry ("non-AAPCS derived PCS variant");
4052 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
4053 error ("variadic functions must use the base AAPCS variant");
4057 return ARM_PCS_AAPCS
;
4058 else if (user_convention
)
4060 else if (decl
&& flag_unit_at_a_time
)
4062 /* Local functions never leak outside this compilation unit,
4063 so we are free to use whatever conventions are
4065 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4066 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4068 return ARM_PCS_AAPCS_LOCAL
;
4071 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
4072 sorry ("PCS variant");
4074 /* For everything else we use the target's default. */
4075 return arm_pcs_default
;
4080 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4081 const_tree fntype ATTRIBUTE_UNUSED
,
4082 rtx libcall ATTRIBUTE_UNUSED
,
4083 const_tree fndecl ATTRIBUTE_UNUSED
)
4085 /* Record the unallocated VFP registers. */
4086 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
4087 pcum
->aapcs_vfp_reg_alloc
= 0;
4090 /* Walk down the type tree of TYPE counting consecutive base elements.
4091 If *MODEP is VOIDmode, then set it to the first valid floating point
4092 type. If a non-floating point type is found, or if a floating point
4093 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4094 otherwise return the count in the sub-tree. */
4096 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
4098 enum machine_mode mode
;
4101 switch (TREE_CODE (type
))
4104 mode
= TYPE_MODE (type
);
4105 if (mode
!= DFmode
&& mode
!= SFmode
)
4108 if (*modep
== VOIDmode
)
4117 mode
= TYPE_MODE (TREE_TYPE (type
));
4118 if (mode
!= DFmode
&& mode
!= SFmode
)
4121 if (*modep
== VOIDmode
)
4130 /* Use V2SImode and V4SImode as representatives of all 64-bit
4131 and 128-bit vector types, whether or not those modes are
4132 supported with the present options. */
4133 size
= int_size_in_bytes (type
);
4146 if (*modep
== VOIDmode
)
4149 /* Vector modes are considered to be opaque: two vectors are
4150 equivalent for the purposes of being homogeneous aggregates
4151 if they are the same size. */
4160 tree index
= TYPE_DOMAIN (type
);
4162 /* Can't handle incomplete types. */
4163 if (!COMPLETE_TYPE_P(type
))
4166 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
4169 || !TYPE_MAX_VALUE (index
)
4170 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
4171 || !TYPE_MIN_VALUE (index
)
4172 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
4176 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
4177 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
4179 /* There must be no padding. */
4180 if (!host_integerp (TYPE_SIZE (type
), 1)
4181 || (tree_low_cst (TYPE_SIZE (type
), 1)
4182 != count
* GET_MODE_BITSIZE (*modep
)))
4194 /* Can't handle incomplete types. */
4195 if (!COMPLETE_TYPE_P(type
))
4198 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4200 if (TREE_CODE (field
) != FIELD_DECL
)
4203 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4209 /* There must be no padding. */
4210 if (!host_integerp (TYPE_SIZE (type
), 1)
4211 || (tree_low_cst (TYPE_SIZE (type
), 1)
4212 != count
* GET_MODE_BITSIZE (*modep
)))
4219 case QUAL_UNION_TYPE
:
4221 /* These aren't very interesting except in a degenerate case. */
4226 /* Can't handle incomplete types. */
4227 if (!COMPLETE_TYPE_P(type
))
4230 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4232 if (TREE_CODE (field
) != FIELD_DECL
)
4235 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4238 count
= count
> sub_count
? count
: sub_count
;
4241 /* There must be no padding. */
4242 if (!host_integerp (TYPE_SIZE (type
), 1)
4243 || (tree_low_cst (TYPE_SIZE (type
), 1)
4244 != count
* GET_MODE_BITSIZE (*modep
)))
4257 /* Return true if PCS_VARIANT should use VFP registers. */
4259 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
4261 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
4263 static bool seen_thumb1_vfp
= false;
4265 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
4267 sorry ("Thumb-1 hard-float VFP ABI");
4268 /* sorry() is not immediately fatal, so only display this once. */
4269 seen_thumb1_vfp
= true;
4275 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
4278 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
4279 (TARGET_VFP_DOUBLE
|| !is_double
));
4282 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4283 suitable for passing or returning in VFP registers for the PCS
4284 variant selected. If it is, then *BASE_MODE is updated to contain
4285 a machine mode describing each element of the argument's type and
4286 *COUNT to hold the number of such elements. */
4288 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
4289 enum machine_mode mode
, const_tree type
,
4290 enum machine_mode
*base_mode
, int *count
)
4292 enum machine_mode new_mode
= VOIDmode
;
4294 /* If we have the type information, prefer that to working things
4295 out from the mode. */
4298 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
4300 if (ag_count
> 0 && ag_count
<= 4)
4305 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
4306 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
4307 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
4312 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
4315 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
4321 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
4324 *base_mode
= new_mode
;
4329 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
4330 enum machine_mode mode
, const_tree type
)
4332 int count ATTRIBUTE_UNUSED
;
4333 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
4335 if (!use_vfp_abi (pcs_variant
, false))
4337 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4342 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4345 if (!use_vfp_abi (pcum
->pcs_variant
, false))
4348 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
4349 &pcum
->aapcs_vfp_rmode
,
4350 &pcum
->aapcs_vfp_rcount
);
4354 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4355 const_tree type ATTRIBUTE_UNUSED
)
4357 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
4358 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
4361 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
4362 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
4364 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
4365 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4368 int rcount
= pcum
->aapcs_vfp_rcount
;
4370 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
4374 /* Avoid using unsupported vector modes. */
4375 if (rmode
== V2SImode
)
4377 else if (rmode
== V4SImode
)
4384 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
4385 for (i
= 0; i
< rcount
; i
++)
4387 rtx tmp
= gen_rtx_REG (rmode
,
4388 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
4389 tmp
= gen_rtx_EXPR_LIST
4391 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
4392 XVECEXP (par
, 0, i
) = tmp
;
4395 pcum
->aapcs_reg
= par
;
4398 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
4405 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
4406 enum machine_mode mode
,
4407 const_tree type ATTRIBUTE_UNUSED
)
4409 if (!use_vfp_abi (pcs_variant
, false))
4412 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4415 enum machine_mode ag_mode
;
4420 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4425 if (ag_mode
== V2SImode
)
4427 else if (ag_mode
== V4SImode
)
4433 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
4434 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
4435 for (i
= 0; i
< count
; i
++)
4437 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
4438 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
4439 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
4440 XVECEXP (par
, 0, i
) = tmp
;
4446 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
4450 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4451 enum machine_mode mode ATTRIBUTE_UNUSED
,
4452 const_tree type ATTRIBUTE_UNUSED
)
4454 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
4455 pcum
->aapcs_vfp_reg_alloc
= 0;
4459 #define AAPCS_CP(X) \
4461 aapcs_ ## X ## _cum_init, \
4462 aapcs_ ## X ## _is_call_candidate, \
4463 aapcs_ ## X ## _allocate, \
4464 aapcs_ ## X ## _is_return_candidate, \
4465 aapcs_ ## X ## _allocate_return_reg, \
4466 aapcs_ ## X ## _advance \
4469 /* Table of co-processors that can be used to pass arguments in
4470 registers. Idealy no arugment should be a candidate for more than
4471 one co-processor table entry, but the table is processed in order
4472 and stops after the first match. If that entry then fails to put
4473 the argument into a co-processor register, the argument will go on
4477 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4478 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4480 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4481 BLKmode) is a candidate for this co-processor's registers; this
4482 function should ignore any position-dependent state in
4483 CUMULATIVE_ARGS and only use call-type dependent information. */
4484 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4486 /* Return true if the argument does get a co-processor register; it
4487 should set aapcs_reg to an RTX of the register allocated as is
4488 required for a return from FUNCTION_ARG. */
4489 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4491 /* Return true if a result of mode MODE (or type TYPE if MODE is
4492 BLKmode) is can be returned in this co-processor's registers. */
4493 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4495 /* Allocate and return an RTX element to hold the return type of a
4496 call, this routine must not fail and will only be called if
4497 is_return_candidate returned true with the same parameters. */
4498 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4500 /* Finish processing this argument and prepare to start processing
4502 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4503 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4511 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4516 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4517 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4524 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4526 /* We aren't passed a decl, so we can't check that a call is local.
4527 However, it isn't clear that that would be a win anyway, since it
4528 might limit some tail-calling opportunities. */
4529 enum arm_pcs pcs_variant
;
4533 const_tree fndecl
= NULL_TREE
;
4535 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4538 fntype
= TREE_TYPE (fntype
);
4541 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4544 pcs_variant
= arm_pcs_default
;
4546 if (pcs_variant
!= ARM_PCS_AAPCS
)
4550 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4551 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4560 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4563 /* We aren't passed a decl, so we can't check that a call is local.
4564 However, it isn't clear that that would be a win anyway, since it
4565 might limit some tail-calling opportunities. */
4566 enum arm_pcs pcs_variant
;
4567 int unsignedp ATTRIBUTE_UNUSED
;
4571 const_tree fndecl
= NULL_TREE
;
4573 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4576 fntype
= TREE_TYPE (fntype
);
4579 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4582 pcs_variant
= arm_pcs_default
;
4584 /* Promote integer types. */
4585 if (type
&& INTEGRAL_TYPE_P (type
))
4586 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4588 if (pcs_variant
!= ARM_PCS_AAPCS
)
4592 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4593 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4595 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
4599 /* Promotes small structs returned in a register to full-word size
4600 for big-endian AAPCS. */
4601 if (type
&& arm_return_in_msb (type
))
4603 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4604 if (size
% UNITS_PER_WORD
!= 0)
4606 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4607 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4611 return gen_rtx_REG (mode
, R0_REGNUM
);
4615 aapcs_libcall_value (enum machine_mode mode
)
4617 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
4618 && GET_MODE_SIZE (mode
) <= 4)
4621 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
4624 /* Lay out a function argument using the AAPCS rules. The rule
4625 numbers referred to here are those in the AAPCS. */
4627 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4628 const_tree type
, bool named
)
4633 /* We only need to do this once per argument. */
4634 if (pcum
->aapcs_arg_processed
)
4637 pcum
->aapcs_arg_processed
= true;
4639 /* Special case: if named is false then we are handling an incoming
4640 anonymous argument which is on the stack. */
4644 /* Is this a potential co-processor register candidate? */
4645 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4647 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
4648 pcum
->aapcs_cprc_slot
= slot
;
4650 /* We don't have to apply any of the rules from part B of the
4651 preparation phase, these are handled elsewhere in the
4656 /* A Co-processor register candidate goes either in its own
4657 class of registers or on the stack. */
4658 if (!pcum
->aapcs_cprc_failed
[slot
])
4660 /* C1.cp - Try to allocate the argument to co-processor
4662 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
4665 /* C2.cp - Put the argument on the stack and note that we
4666 can't assign any more candidates in this slot. We also
4667 need to note that we have allocated stack space, so that
4668 we won't later try to split a non-cprc candidate between
4669 core registers and the stack. */
4670 pcum
->aapcs_cprc_failed
[slot
] = true;
4671 pcum
->can_split
= false;
4674 /* We didn't get a register, so this argument goes on the
4676 gcc_assert (pcum
->can_split
== false);
4681 /* C3 - For double-word aligned arguments, round the NCRN up to the
4682 next even number. */
4683 ncrn
= pcum
->aapcs_ncrn
;
4684 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
4687 nregs
= ARM_NUM_REGS2(mode
, type
);
4689 /* Sigh, this test should really assert that nregs > 0, but a GCC
4690 extension allows empty structs and then gives them empty size; it
4691 then allows such a structure to be passed by value. For some of
4692 the code below we have to pretend that such an argument has
4693 non-zero size so that we 'locate' it correctly either in
4694 registers or on the stack. */
4695 gcc_assert (nregs
>= 0);
4697 nregs2
= nregs
? nregs
: 1;
4699 /* C4 - Argument fits entirely in core registers. */
4700 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
4702 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4703 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
4707 /* C5 - Some core registers left and there are no arguments already
4708 on the stack: split this argument between the remaining core
4709 registers and the stack. */
4710 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
4712 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4713 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4714 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
4718 /* C6 - NCRN is set to 4. */
4719 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4721 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4725 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4726 for a call to a function whose data type is FNTYPE.
4727 For a library call, FNTYPE is NULL. */
4729 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
4731 tree fndecl ATTRIBUTE_UNUSED
)
4733 /* Long call handling. */
4735 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4737 pcum
->pcs_variant
= arm_pcs_default
;
4739 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4741 if (arm_libcall_uses_aapcs_base (libname
))
4742 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
4744 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
4745 pcum
->aapcs_reg
= NULL_RTX
;
4746 pcum
->aapcs_partial
= 0;
4747 pcum
->aapcs_arg_processed
= false;
4748 pcum
->aapcs_cprc_slot
= -1;
4749 pcum
->can_split
= true;
4751 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4755 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4757 pcum
->aapcs_cprc_failed
[i
] = false;
4758 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
4766 /* On the ARM, the offset starts at 0. */
4768 pcum
->iwmmxt_nregs
= 0;
4769 pcum
->can_split
= true;
4771 /* Varargs vectors are treated the same as long long.
4772 named_count avoids having to change the way arm handles 'named' */
4773 pcum
->named_count
= 0;
4776 if (TARGET_REALLY_IWMMXT
&& fntype
)
4780 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
4782 fn_arg
= TREE_CHAIN (fn_arg
))
4783 pcum
->named_count
+= 1;
4785 if (! pcum
->named_count
)
4786 pcum
->named_count
= INT_MAX
;
4791 /* Return true if mode/type need doubleword alignment. */
4793 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
4795 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
4796 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
4800 /* Determine where to put an argument to a function.
4801 Value is zero to push the argument on the stack,
4802 or a hard register in which to store the argument.
4804 MODE is the argument's machine mode.
4805 TYPE is the data type of the argument (as a tree).
4806 This is null for libcalls where that information may
4808 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4809 the preceding args and about the function being called.
4810 NAMED is nonzero if this argument is a named parameter
4811 (otherwise it is an extra parameter matching an ellipsis).
4813 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4814 other arguments are passed on the stack. If (NAMED == 0) (which happens
4815 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4816 defined), say it is passed in the stack (function_prologue will
4817 indeed make it pass in the stack if necessary). */
4820 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
4821 const_tree type
, bool named
)
4823 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4826 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4827 a call insn (op3 of a call_value insn). */
4828 if (mode
== VOIDmode
)
4831 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4833 aapcs_layout_arg (pcum
, mode
, type
, named
);
4834 return pcum
->aapcs_reg
;
4837 /* Varargs vectors are treated the same as long long.
4838 named_count avoids having to change the way arm handles 'named' */
4839 if (TARGET_IWMMXT_ABI
4840 && arm_vector_mode_supported_p (mode
)
4841 && pcum
->named_count
> pcum
->nargs
+ 1)
4843 if (pcum
->iwmmxt_nregs
<= 9)
4844 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
4847 pcum
->can_split
= false;
4852 /* Put doubleword aligned quantities in even register pairs. */
4854 && ARM_DOUBLEWORD_ALIGN
4855 && arm_needs_doubleword_align (mode
, type
))
4858 /* Only allow splitting an arg between regs and memory if all preceding
4859 args were allocated to regs. For args passed by reference we only count
4860 the reference pointer. */
4861 if (pcum
->can_split
)
4864 nregs
= ARM_NUM_REGS2 (mode
, type
);
4866 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
4869 return gen_rtx_REG (mode
, pcum
->nregs
);
4873 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
4875 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
4876 ? DOUBLEWORD_ALIGNMENT
4881 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
4882 tree type
, bool named
)
4884 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4885 int nregs
= pcum
->nregs
;
4887 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4889 aapcs_layout_arg (pcum
, mode
, type
, named
);
4890 return pcum
->aapcs_partial
;
4893 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
4896 if (NUM_ARG_REGS
> nregs
4897 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
4899 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
4904 /* Update the data in PCUM to advance over an argument
4905 of mode MODE and data type TYPE.
4906 (TYPE is null for libcalls where that information may not be available.) */
4909 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
4910 const_tree type
, bool named
)
4912 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4914 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4916 aapcs_layout_arg (pcum
, mode
, type
, named
);
4918 if (pcum
->aapcs_cprc_slot
>= 0)
4920 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
4922 pcum
->aapcs_cprc_slot
= -1;
4925 /* Generic stuff. */
4926 pcum
->aapcs_arg_processed
= false;
4927 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
4928 pcum
->aapcs_reg
= NULL_RTX
;
4929 pcum
->aapcs_partial
= 0;
4934 if (arm_vector_mode_supported_p (mode
)
4935 && pcum
->named_count
> pcum
->nargs
4936 && TARGET_IWMMXT_ABI
)
4937 pcum
->iwmmxt_nregs
+= 1;
4939 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
4943 /* Variable sized types are passed by reference. This is a GCC
4944 extension to the ARM ABI. */
4947 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
4948 enum machine_mode mode ATTRIBUTE_UNUSED
,
4949 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4951 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4954 /* Encode the current state of the #pragma [no_]long_calls. */
4957 OFF
, /* No #pragma [no_]long_calls is in effect. */
4958 LONG
, /* #pragma long_calls is in effect. */
4959 SHORT
/* #pragma no_long_calls is in effect. */
4962 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
4965 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4967 arm_pragma_long_calls
= LONG
;
4971 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4973 arm_pragma_long_calls
= SHORT
;
4977 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4979 arm_pragma_long_calls
= OFF
;
4982 /* Handle an attribute requiring a FUNCTION_DECL;
4983 arguments as in struct attribute_spec.handler. */
4985 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
4986 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4988 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4990 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4992 *no_add_attrs
= true;
4998 /* Handle an "interrupt" or "isr" attribute;
4999 arguments as in struct attribute_spec.handler. */
5001 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5006 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5008 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5010 *no_add_attrs
= true;
5012 /* FIXME: the argument if any is checked for type attributes;
5013 should it be checked for decl ones? */
5017 if (TREE_CODE (*node
) == FUNCTION_TYPE
5018 || TREE_CODE (*node
) == METHOD_TYPE
)
5020 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5022 warning (OPT_Wattributes
, "%qE attribute ignored",
5024 *no_add_attrs
= true;
5027 else if (TREE_CODE (*node
) == POINTER_TYPE
5028 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
5029 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
5030 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
5032 *node
= build_variant_type_copy (*node
);
5033 TREE_TYPE (*node
) = build_type_attribute_variant
5035 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
5036 *no_add_attrs
= true;
5040 /* Possibly pass this attribute on from the type to a decl. */
5041 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
5042 | (int) ATTR_FLAG_FUNCTION_NEXT
5043 | (int) ATTR_FLAG_ARRAY_NEXT
))
5045 *no_add_attrs
= true;
5046 return tree_cons (name
, args
, NULL_TREE
);
5050 warning (OPT_Wattributes
, "%qE attribute ignored",
5059 /* Handle a "pcs" attribute; arguments as in struct
5060 attribute_spec.handler. */
5062 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
5063 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5065 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
5067 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
5068 *no_add_attrs
= true;
5073 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5074 /* Handle the "notshared" attribute. This attribute is another way of
5075 requesting hidden visibility. ARM's compiler supports
5076 "__declspec(notshared)"; we support the same thing via an
5080 arm_handle_notshared_attribute (tree
*node
,
5081 tree name ATTRIBUTE_UNUSED
,
5082 tree args ATTRIBUTE_UNUSED
,
5083 int flags ATTRIBUTE_UNUSED
,
5086 tree decl
= TYPE_NAME (*node
);
5090 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
5091 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
5092 *no_add_attrs
= false;
5098 /* Return 0 if the attributes for two types are incompatible, 1 if they
5099 are compatible, and 2 if they are nearly compatible (which causes a
5100 warning to be generated). */
5102 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
5106 /* Check for mismatch of non-default calling convention. */
5107 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
5110 /* Check for mismatched call attributes. */
5111 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5112 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5113 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5114 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5116 /* Only bother to check if an attribute is defined. */
5117 if (l1
| l2
| s1
| s2
)
5119 /* If one type has an attribute, the other must have the same attribute. */
5120 if ((l1
!= l2
) || (s1
!= s2
))
5123 /* Disallow mixed attributes. */
5124 if ((l1
& s2
) || (l2
& s1
))
5128 /* Check for mismatched ISR attribute. */
5129 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
5131 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
5132 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
5134 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
5141 /* Assigns default attributes to newly defined type. This is used to
5142 set short_call/long_call attributes for function types of
5143 functions defined inside corresponding #pragma scopes. */
5145 arm_set_default_type_attributes (tree type
)
5147 /* Add __attribute__ ((long_call)) to all functions, when
5148 inside #pragma long_calls or __attribute__ ((short_call)),
5149 when inside #pragma no_long_calls. */
5150 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
5152 tree type_attr_list
, attr_name
;
5153 type_attr_list
= TYPE_ATTRIBUTES (type
);
5155 if (arm_pragma_long_calls
== LONG
)
5156 attr_name
= get_identifier ("long_call");
5157 else if (arm_pragma_long_calls
== SHORT
)
5158 attr_name
= get_identifier ("short_call");
5162 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
5163 TYPE_ATTRIBUTES (type
) = type_attr_list
;
5167 /* Return true if DECL is known to be linked into section SECTION. */
5170 arm_function_in_section_p (tree decl
, section
*section
)
5172 /* We can only be certain about functions defined in the same
5173 compilation unit. */
5174 if (!TREE_STATIC (decl
))
5177 /* Make sure that SYMBOL always binds to the definition in this
5178 compilation unit. */
5179 if (!targetm
.binds_local_p (decl
))
5182 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5183 if (!DECL_SECTION_NAME (decl
))
5185 /* Make sure that we will not create a unique section for DECL. */
5186 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
5190 return function_section (decl
) == section
;
5193 /* Return nonzero if a 32-bit "long_call" should be generated for
5194 a call from the current function to DECL. We generate a long_call
5197 a. has an __attribute__((long call))
5198 or b. is within the scope of a #pragma long_calls
5199 or c. the -mlong-calls command line switch has been specified
5201 However we do not generate a long call if the function:
5203 d. has an __attribute__ ((short_call))
5204 or e. is inside the scope of a #pragma no_long_calls
5205 or f. is defined in the same section as the current function. */
5208 arm_is_long_call_p (tree decl
)
5213 return TARGET_LONG_CALLS
;
5215 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
5216 if (lookup_attribute ("short_call", attrs
))
5219 /* For "f", be conservative, and only cater for cases in which the
5220 whole of the current function is placed in the same section. */
5221 if (!flag_reorder_blocks_and_partition
5222 && TREE_CODE (decl
) == FUNCTION_DECL
5223 && arm_function_in_section_p (decl
, current_function_section ()))
5226 if (lookup_attribute ("long_call", attrs
))
5229 return TARGET_LONG_CALLS
;
5232 /* Return nonzero if it is ok to make a tail-call to DECL. */
5234 arm_function_ok_for_sibcall (tree decl
, tree exp
)
5236 unsigned long func_type
;
5238 if (cfun
->machine
->sibcall_blocked
)
5241 /* Never tailcall something for which we have no decl, or if we
5242 are generating code for Thumb-1. */
5243 if (decl
== NULL
|| TARGET_THUMB1
)
5246 /* The PIC register is live on entry to VxWorks PLT entries, so we
5247 must make the call before restoring the PIC register. */
5248 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
5251 /* Cannot tail-call to long calls, since these are out of range of
5252 a branch instruction. */
5253 if (arm_is_long_call_p (decl
))
5256 /* If we are interworking and the function is not declared static
5257 then we can't tail-call it unless we know that it exists in this
5258 compilation unit (since it might be a Thumb routine). */
5259 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
5262 func_type
= arm_current_func_type ();
5263 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5264 if (IS_INTERRUPT (func_type
))
5267 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5269 /* Check that the return value locations are the same. For
5270 example that we aren't returning a value from the sibling in
5271 a VFP register but then need to transfer it to a core
5275 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
5276 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5278 if (!rtx_equal_p (a
, b
))
5282 /* Never tailcall if function may be called with a misaligned SP. */
5283 if (IS_STACKALIGN (func_type
))
5286 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5287 references should become a NOP. Don't convert such calls into
5289 if (TARGET_AAPCS_BASED
5290 && arm_abi
== ARM_ABI_AAPCS
5291 && DECL_WEAK (decl
))
5294 /* Everything else is ok. */
5299 /* Addressing mode support functions. */
5301 /* Return nonzero if X is a legitimate immediate operand when compiling
5302 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5304 legitimate_pic_operand_p (rtx x
)
5306 if (GET_CODE (x
) == SYMBOL_REF
5307 || (GET_CODE (x
) == CONST
5308 && GET_CODE (XEXP (x
, 0)) == PLUS
5309 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5315 /* Record that the current function needs a PIC register. Initialize
5316 cfun->machine->pic_reg if we have not already done so. */
5319 require_pic_register (void)
5321 /* A lot of the logic here is made obscure by the fact that this
5322 routine gets called as part of the rtx cost estimation process.
5323 We don't want those calls to affect any assumptions about the real
5324 function; and further, we can't call entry_of_function() until we
5325 start the real expansion process. */
5326 if (!crtl
->uses_pic_offset_table
)
5328 gcc_assert (can_create_pseudo_p ());
5329 if (arm_pic_register
!= INVALID_REGNUM
)
5331 if (!cfun
->machine
->pic_reg
)
5332 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
5334 /* Play games to avoid marking the function as needing pic
5335 if we are being called as part of the cost-estimation
5337 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5338 crtl
->uses_pic_offset_table
= 1;
5344 if (!cfun
->machine
->pic_reg
)
5345 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
5347 /* Play games to avoid marking the function as needing pic
5348 if we are being called as part of the cost-estimation
5350 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5352 crtl
->uses_pic_offset_table
= 1;
5355 arm_load_pic_register (0UL);
5360 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
5362 INSN_LOCATOR (insn
) = prologue_locator
;
5364 /* We can be called during expansion of PHI nodes, where
5365 we can't yet emit instructions directly in the final
5366 insn stream. Queue the insns on the entry edge, they will
5367 be committed after everything else is expanded. */
5368 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
5375 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
5377 if (GET_CODE (orig
) == SYMBOL_REF
5378 || GET_CODE (orig
) == LABEL_REF
)
5384 gcc_assert (can_create_pseudo_p ());
5385 reg
= gen_reg_rtx (Pmode
);
5388 /* VxWorks does not impose a fixed gap between segments; the run-time
5389 gap can be different from the object-file gap. We therefore can't
5390 use GOTOFF unless we are absolutely sure that the symbol is in the
5391 same segment as the GOT. Unfortunately, the flexibility of linker
5392 scripts means that we can't be sure of that in general, so assume
5393 that GOTOFF is never valid on VxWorks. */
5394 if ((GET_CODE (orig
) == LABEL_REF
5395 || (GET_CODE (orig
) == SYMBOL_REF
&&
5396 SYMBOL_REF_LOCAL_P (orig
)))
5398 && !TARGET_VXWORKS_RTP
)
5399 insn
= arm_pic_static_addr (orig
, reg
);
5405 /* If this function doesn't have a pic register, create one now. */
5406 require_pic_register ();
5408 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
5410 /* Make the MEM as close to a constant as possible. */
5411 mem
= SET_SRC (pat
);
5412 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
5413 MEM_READONLY_P (mem
) = 1;
5414 MEM_NOTRAP_P (mem
) = 1;
5416 insn
= emit_insn (pat
);
5419 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5421 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
5425 else if (GET_CODE (orig
) == CONST
)
5429 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5430 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
5433 /* Handle the case where we have: const (UNSPEC_TLS). */
5434 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
5435 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
5438 /* Handle the case where we have:
5439 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5441 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5442 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
5443 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
5445 gcc_assert (GET_CODE (XEXP (XEXP (orig
, 0), 1)) == CONST_INT
);
5451 gcc_assert (can_create_pseudo_p ());
5452 reg
= gen_reg_rtx (Pmode
);
5455 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
5457 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
5458 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
5459 base
== reg
? 0 : reg
);
5461 if (GET_CODE (offset
) == CONST_INT
)
5463 /* The base register doesn't really matter, we only want to
5464 test the index for the appropriate mode. */
5465 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
5467 gcc_assert (can_create_pseudo_p ());
5468 offset
= force_reg (Pmode
, offset
);
5471 if (GET_CODE (offset
) == CONST_INT
)
5472 return plus_constant (Pmode
, base
, INTVAL (offset
));
5475 if (GET_MODE_SIZE (mode
) > 4
5476 && (GET_MODE_CLASS (mode
) == MODE_INT
5477 || TARGET_SOFT_FLOAT
))
5479 emit_insn (gen_addsi3 (reg
, base
, offset
));
5483 return gen_rtx_PLUS (Pmode
, base
, offset
);
5490 /* Find a spare register to use during the prolog of a function. */
5493 thumb_find_work_register (unsigned long pushed_regs_mask
)
5497 /* Check the argument registers first as these are call-used. The
5498 register allocation order means that sometimes r3 might be used
5499 but earlier argument registers might not, so check them all. */
5500 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5501 if (!df_regs_ever_live_p (reg
))
5504 /* Before going on to check the call-saved registers we can try a couple
5505 more ways of deducing that r3 is available. The first is when we are
5506 pushing anonymous arguments onto the stack and we have less than 4
5507 registers worth of fixed arguments(*). In this case r3 will be part of
5508 the variable argument list and so we can be sure that it will be
5509 pushed right at the start of the function. Hence it will be available
5510 for the rest of the prologue.
5511 (*): ie crtl->args.pretend_args_size is greater than 0. */
5512 if (cfun
->machine
->uses_anonymous_args
5513 && crtl
->args
.pretend_args_size
> 0)
5514 return LAST_ARG_REGNUM
;
5516 /* The other case is when we have fixed arguments but less than 4 registers
5517 worth. In this case r3 might be used in the body of the function, but
5518 it is not being used to convey an argument into the function. In theory
5519 we could just check crtl->args.size to see how many bytes are
5520 being passed in argument registers, but it seems that it is unreliable.
5521 Sometimes it will have the value 0 when in fact arguments are being
5522 passed. (See testcase execute/20021111-1.c for an example). So we also
5523 check the args_info.nregs field as well. The problem with this field is
5524 that it makes no allowances for arguments that are passed to the
5525 function but which are not used. Hence we could miss an opportunity
5526 when a function has an unused argument in r3. But it is better to be
5527 safe than to be sorry. */
5528 if (! cfun
->machine
->uses_anonymous_args
5529 && crtl
->args
.size
>= 0
5530 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5531 && crtl
->args
.info
.nregs
< 4)
5532 return LAST_ARG_REGNUM
;
5534 /* Otherwise look for a call-saved register that is going to be pushed. */
5535 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5536 if (pushed_regs_mask
& (1 << reg
))
5541 /* Thumb-2 can use high regs. */
5542 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5543 if (pushed_regs_mask
& (1 << reg
))
5546 /* Something went wrong - thumb_compute_save_reg_mask()
5547 should have arranged for a suitable register to be pushed. */
5551 static GTY(()) int pic_labelno
;
5553 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5557 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5559 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5561 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5564 gcc_assert (flag_pic
);
5566 pic_reg
= cfun
->machine
->pic_reg
;
5567 if (TARGET_VXWORKS_RTP
)
5569 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5570 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5571 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5573 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5575 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5576 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5580 /* We use an UNSPEC rather than a LABEL_REF because this label
5581 never appears in the code stream. */
5583 labelno
= GEN_INT (pic_labelno
++);
5584 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5585 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5587 /* On the ARM the PC register contains 'dot + 8' at the time of the
5588 addition, on the Thumb it is 'dot + 4'. */
5589 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
5590 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
5592 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5596 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
5598 else /* TARGET_THUMB1 */
5600 if (arm_pic_register
!= INVALID_REGNUM
5601 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
5603 /* We will have pushed the pic register, so we should always be
5604 able to find a work register. */
5605 pic_tmp
= gen_rtx_REG (SImode
,
5606 thumb_find_work_register (saved_regs
));
5607 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
5608 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
5609 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5612 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
5616 /* Need to emit this whether or not we obey regdecls,
5617 since setjmp/longjmp can cause life info to screw up. */
5621 /* Generate code to load the address of a static var when flag_pic is set. */
5623 arm_pic_static_addr (rtx orig
, rtx reg
)
5625 rtx l1
, labelno
, offset_rtx
, insn
;
5627 gcc_assert (flag_pic
);
5629 /* We use an UNSPEC rather than a LABEL_REF because this label
5630 never appears in the code stream. */
5631 labelno
= GEN_INT (pic_labelno
++);
5632 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5633 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5635 /* On the ARM the PC register contains 'dot + 8' at the time of the
5636 addition, on the Thumb it is 'dot + 4'. */
5637 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
5638 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
5639 UNSPEC_SYMBOL_OFFSET
);
5640 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
5642 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
5646 /* Return nonzero if X is valid as an ARM state addressing register. */
5648 arm_address_register_rtx_p (rtx x
, int strict_p
)
5652 if (GET_CODE (x
) != REG
)
5658 return ARM_REGNO_OK_FOR_BASE_P (regno
);
5660 return (regno
<= LAST_ARM_REGNUM
5661 || regno
>= FIRST_PSEUDO_REGISTER
5662 || regno
== FRAME_POINTER_REGNUM
5663 || regno
== ARG_POINTER_REGNUM
);
5666 /* Return TRUE if this rtx is the difference of a symbol and a label,
5667 and will reduce to a PC-relative relocation in the object file.
5668 Expressions like this can be left alone when generating PIC, rather
5669 than forced through the GOT. */
5671 pcrel_constant_p (rtx x
)
5673 if (GET_CODE (x
) == MINUS
)
5674 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
5679 /* Return true if X will surely end up in an index register after next
5682 will_be_in_index_register (const_rtx x
)
5684 /* arm.md: calculate_pic_address will split this into a register. */
5685 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
5688 /* Return nonzero if X is a valid ARM state address operand. */
5690 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
5694 enum rtx_code code
= GET_CODE (x
);
5696 if (arm_address_register_rtx_p (x
, strict_p
))
5699 use_ldrd
= (TARGET_LDRD
5701 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5703 if (code
== POST_INC
|| code
== PRE_DEC
5704 || ((code
== PRE_INC
|| code
== POST_DEC
)
5705 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5706 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5708 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5709 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5710 && GET_CODE (XEXP (x
, 1)) == PLUS
5711 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5713 rtx addend
= XEXP (XEXP (x
, 1), 1);
5715 /* Don't allow ldrd post increment by register because it's hard
5716 to fixup invalid register choices. */
5718 && GET_CODE (x
) == POST_MODIFY
5719 && GET_CODE (addend
) == REG
)
5722 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
5723 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
5726 /* After reload constants split into minipools will have addresses
5727 from a LABEL_REF. */
5728 else if (reload_completed
5729 && (code
== LABEL_REF
5731 && GET_CODE (XEXP (x
, 0)) == PLUS
5732 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5733 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5736 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5739 else if (code
== PLUS
)
5741 rtx xop0
= XEXP (x
, 0);
5742 rtx xop1
= XEXP (x
, 1);
5744 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5745 && ((GET_CODE(xop1
) == CONST_INT
5746 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
5747 || (!strict_p
&& will_be_in_index_register (xop1
))))
5748 || (arm_address_register_rtx_p (xop1
, strict_p
)
5749 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
5753 /* Reload currently can't handle MINUS, so disable this for now */
5754 else if (GET_CODE (x
) == MINUS
)
5756 rtx xop0
= XEXP (x
, 0);
5757 rtx xop1
= XEXP (x
, 1);
5759 return (arm_address_register_rtx_p (xop0
, strict_p
)
5760 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
5764 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5765 && code
== SYMBOL_REF
5766 && CONSTANT_POOL_ADDRESS_P (x
)
5768 && symbol_mentioned_p (get_pool_constant (x
))
5769 && ! pcrel_constant_p (get_pool_constant (x
))))
5775 /* Return nonzero if X is a valid Thumb-2 address operand. */
5777 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5780 enum rtx_code code
= GET_CODE (x
);
5782 if (arm_address_register_rtx_p (x
, strict_p
))
5785 use_ldrd
= (TARGET_LDRD
5787 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5789 if (code
== POST_INC
|| code
== PRE_DEC
5790 || ((code
== PRE_INC
|| code
== POST_DEC
)
5791 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5792 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5794 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5795 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5796 && GET_CODE (XEXP (x
, 1)) == PLUS
5797 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5799 /* Thumb-2 only has autoincrement by constant. */
5800 rtx addend
= XEXP (XEXP (x
, 1), 1);
5801 HOST_WIDE_INT offset
;
5803 if (GET_CODE (addend
) != CONST_INT
)
5806 offset
= INTVAL(addend
);
5807 if (GET_MODE_SIZE (mode
) <= 4)
5808 return (offset
> -256 && offset
< 256);
5810 return (use_ldrd
&& offset
> -1024 && offset
< 1024
5811 && (offset
& 3) == 0);
5814 /* After reload constants split into minipools will have addresses
5815 from a LABEL_REF. */
5816 else if (reload_completed
5817 && (code
== LABEL_REF
5819 && GET_CODE (XEXP (x
, 0)) == PLUS
5820 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5821 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5824 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5827 else if (code
== PLUS
)
5829 rtx xop0
= XEXP (x
, 0);
5830 rtx xop1
= XEXP (x
, 1);
5832 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5833 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
5834 || (!strict_p
&& will_be_in_index_register (xop1
))))
5835 || (arm_address_register_rtx_p (xop1
, strict_p
)
5836 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
5839 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5840 && code
== SYMBOL_REF
5841 && CONSTANT_POOL_ADDRESS_P (x
)
5843 && symbol_mentioned_p (get_pool_constant (x
))
5844 && ! pcrel_constant_p (get_pool_constant (x
))))
5850 /* Return nonzero if INDEX is valid for an address index operand in
5853 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
5856 HOST_WIDE_INT range
;
5857 enum rtx_code code
= GET_CODE (index
);
5859 /* Standard coprocessor addressing modes. */
5860 if (TARGET_HARD_FLOAT
5862 && (mode
== SFmode
|| mode
== DFmode
))
5863 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5864 && INTVAL (index
) > -1024
5865 && (INTVAL (index
) & 3) == 0);
5867 /* For quad modes, we restrict the constant offset to be slightly less
5868 than what the instruction format permits. We do this because for
5869 quad mode moves, we will actually decompose them into two separate
5870 double-mode reads or writes. INDEX must therefore be a valid
5871 (double-mode) offset and so should INDEX+8. */
5872 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5873 return (code
== CONST_INT
5874 && INTVAL (index
) < 1016
5875 && INTVAL (index
) > -1024
5876 && (INTVAL (index
) & 3) == 0);
5878 /* We have no such constraint on double mode offsets, so we permit the
5879 full range of the instruction format. */
5880 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5881 return (code
== CONST_INT
5882 && INTVAL (index
) < 1024
5883 && INTVAL (index
) > -1024
5884 && (INTVAL (index
) & 3) == 0);
5886 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5887 return (code
== CONST_INT
5888 && INTVAL (index
) < 1024
5889 && INTVAL (index
) > -1024
5890 && (INTVAL (index
) & 3) == 0);
5892 if (arm_address_register_rtx_p (index
, strict_p
)
5893 && (GET_MODE_SIZE (mode
) <= 4))
5896 if (mode
== DImode
|| mode
== DFmode
)
5898 if (code
== CONST_INT
)
5900 HOST_WIDE_INT val
= INTVAL (index
);
5903 return val
> -256 && val
< 256;
5905 return val
> -4096 && val
< 4092;
5908 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
5911 if (GET_MODE_SIZE (mode
) <= 4
5915 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
5919 rtx xiop0
= XEXP (index
, 0);
5920 rtx xiop1
= XEXP (index
, 1);
5922 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5923 && power_of_two_operand (xiop1
, SImode
))
5924 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5925 && power_of_two_operand (xiop0
, SImode
)));
5927 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
5928 || code
== ASHIFT
|| code
== ROTATERT
)
5930 rtx op
= XEXP (index
, 1);
5932 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5933 && GET_CODE (op
) == CONST_INT
5935 && INTVAL (op
) <= 31);
5939 /* For ARM v4 we may be doing a sign-extend operation during the
5945 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
5951 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
5953 return (code
== CONST_INT
5954 && INTVAL (index
) < range
5955 && INTVAL (index
) > -range
);
5958 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5959 index operand. i.e. 1, 2, 4 or 8. */
5961 thumb2_index_mul_operand (rtx op
)
5965 if (GET_CODE(op
) != CONST_INT
)
5969 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
5972 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5974 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
5976 enum rtx_code code
= GET_CODE (index
);
5978 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5979 /* Standard coprocessor addressing modes. */
5980 if (TARGET_HARD_FLOAT
5982 && (mode
== SFmode
|| mode
== DFmode
))
5983 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5984 /* Thumb-2 allows only > -256 index range for it's core register
5985 load/stores. Since we allow SF/DF in core registers, we have
5986 to use the intersection between -256~4096 (core) and -1024~1024
5988 && INTVAL (index
) > -256
5989 && (INTVAL (index
) & 3) == 0);
5991 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5993 /* For DImode assume values will usually live in core regs
5994 and only allow LDRD addressing modes. */
5995 if (!TARGET_LDRD
|| mode
!= DImode
)
5996 return (code
== CONST_INT
5997 && INTVAL (index
) < 1024
5998 && INTVAL (index
) > -1024
5999 && (INTVAL (index
) & 3) == 0);
6002 /* For quad modes, we restrict the constant offset to be slightly less
6003 than what the instruction format permits. We do this because for
6004 quad mode moves, we will actually decompose them into two separate
6005 double-mode reads or writes. INDEX must therefore be a valid
6006 (double-mode) offset and so should INDEX+8. */
6007 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6008 return (code
== CONST_INT
6009 && INTVAL (index
) < 1016
6010 && INTVAL (index
) > -1024
6011 && (INTVAL (index
) & 3) == 0);
6013 /* We have no such constraint on double mode offsets, so we permit the
6014 full range of the instruction format. */
6015 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6016 return (code
== CONST_INT
6017 && INTVAL (index
) < 1024
6018 && INTVAL (index
) > -1024
6019 && (INTVAL (index
) & 3) == 0);
6021 if (arm_address_register_rtx_p (index
, strict_p
)
6022 && (GET_MODE_SIZE (mode
) <= 4))
6025 if (mode
== DImode
|| mode
== DFmode
)
6027 if (code
== CONST_INT
)
6029 HOST_WIDE_INT val
= INTVAL (index
);
6030 /* ??? Can we assume ldrd for thumb2? */
6031 /* Thumb-2 ldrd only has reg+const addressing modes. */
6032 /* ldrd supports offsets of +-1020.
6033 However the ldr fallback does not. */
6034 return val
> -256 && val
< 256 && (val
& 3) == 0;
6042 rtx xiop0
= XEXP (index
, 0);
6043 rtx xiop1
= XEXP (index
, 1);
6045 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6046 && thumb2_index_mul_operand (xiop1
))
6047 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6048 && thumb2_index_mul_operand (xiop0
)));
6050 else if (code
== ASHIFT
)
6052 rtx op
= XEXP (index
, 1);
6054 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6055 && GET_CODE (op
) == CONST_INT
6057 && INTVAL (op
) <= 3);
6060 return (code
== CONST_INT
6061 && INTVAL (index
) < 4096
6062 && INTVAL (index
) > -256);
6065 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6067 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
6071 if (GET_CODE (x
) != REG
)
6077 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
6079 return (regno
<= LAST_LO_REGNUM
6080 || regno
> LAST_VIRTUAL_REGISTER
6081 || regno
== FRAME_POINTER_REGNUM
6082 || (GET_MODE_SIZE (mode
) >= 4
6083 && (regno
== STACK_POINTER_REGNUM
6084 || regno
>= FIRST_PSEUDO_REGISTER
6085 || x
== hard_frame_pointer_rtx
6086 || x
== arg_pointer_rtx
)));
6089 /* Return nonzero if x is a legitimate index register. This is the case
6090 for any base register that can access a QImode object. */
6092 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
6094 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
6097 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6099 The AP may be eliminated to either the SP or the FP, so we use the
6100 least common denominator, e.g. SImode, and offsets from 0 to 64.
6102 ??? Verify whether the above is the right approach.
6104 ??? Also, the FP may be eliminated to the SP, so perhaps that
6105 needs special handling also.
6107 ??? Look at how the mips16 port solves this problem. It probably uses
6108 better ways to solve some of these problems.
6110 Although it is not incorrect, we don't accept QImode and HImode
6111 addresses based on the frame pointer or arg pointer until the
6112 reload pass starts. This is so that eliminating such addresses
6113 into stack based ones won't produce impossible code. */
6115 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6117 /* ??? Not clear if this is right. Experiment. */
6118 if (GET_MODE_SIZE (mode
) < 4
6119 && !(reload_in_progress
|| reload_completed
)
6120 && (reg_mentioned_p (frame_pointer_rtx
, x
)
6121 || reg_mentioned_p (arg_pointer_rtx
, x
)
6122 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
6123 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
6124 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
6125 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
6128 /* Accept any base register. SP only in SImode or larger. */
6129 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
6132 /* This is PC relative data before arm_reorg runs. */
6133 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
6134 && GET_CODE (x
) == SYMBOL_REF
6135 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
6138 /* This is PC relative data after arm_reorg runs. */
6139 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
6141 && (GET_CODE (x
) == LABEL_REF
6142 || (GET_CODE (x
) == CONST
6143 && GET_CODE (XEXP (x
, 0)) == PLUS
6144 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6145 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
6148 /* Post-inc indexing only supported for SImode and larger. */
6149 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
6150 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
6153 else if (GET_CODE (x
) == PLUS
)
6155 /* REG+REG address can be any two index registers. */
6156 /* We disallow FRAME+REG addressing since we know that FRAME
6157 will be replaced with STACK, and SP relative addressing only
6158 permits SP+OFFSET. */
6159 if (GET_MODE_SIZE (mode
) <= 4
6160 && XEXP (x
, 0) != frame_pointer_rtx
6161 && XEXP (x
, 1) != frame_pointer_rtx
6162 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
6163 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
6164 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
6167 /* REG+const has 5-7 bit offset for non-SP registers. */
6168 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
6169 || XEXP (x
, 0) == arg_pointer_rtx
)
6170 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6171 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6174 /* REG+const has 10-bit offset for SP, but only SImode and
6175 larger is supported. */
6176 /* ??? Should probably check for DI/DFmode overflow here
6177 just like GO_IF_LEGITIMATE_OFFSET does. */
6178 else if (GET_CODE (XEXP (x
, 0)) == REG
6179 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
6180 && GET_MODE_SIZE (mode
) >= 4
6181 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6182 && INTVAL (XEXP (x
, 1)) >= 0
6183 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
6184 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
6187 else if (GET_CODE (XEXP (x
, 0)) == REG
6188 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
6189 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
6190 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
6191 && REGNO (XEXP (x
, 0))
6192 <= LAST_VIRTUAL_POINTER_REGISTER
))
6193 && GET_MODE_SIZE (mode
) >= 4
6194 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6195 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
6199 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6200 && GET_MODE_SIZE (mode
) == 4
6201 && GET_CODE (x
) == SYMBOL_REF
6202 && CONSTANT_POOL_ADDRESS_P (x
)
6204 && symbol_mentioned_p (get_pool_constant (x
))
6205 && ! pcrel_constant_p (get_pool_constant (x
))))
6211 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6212 instruction of mode MODE. */
6214 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
6216 switch (GET_MODE_SIZE (mode
))
6219 return val
>= 0 && val
< 32;
6222 return val
>= 0 && val
< 64 && (val
& 1) == 0;
6226 && (val
+ GET_MODE_SIZE (mode
)) <= 128
6232 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
6235 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
6236 else if (TARGET_THUMB2
)
6237 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
6238 else /* if (TARGET_THUMB1) */
6239 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
6242 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6244 Given an rtx X being reloaded into a reg required to be
6245 in class CLASS, return the class of reg to actually use.
6246 In general this is just CLASS, but for the Thumb core registers and
6247 immediate constants we prefer a LO_REGS class or a subset. */
6250 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
6256 if (rclass
== GENERAL_REGS
6257 || rclass
== HI_REGS
6258 || rclass
== NO_REGS
6259 || rclass
== STACK_REG
)
6266 /* Build the SYMBOL_REF for __tls_get_addr. */
6268 static GTY(()) rtx tls_get_addr_libfunc
;
6271 get_tls_get_addr (void)
6273 if (!tls_get_addr_libfunc
)
6274 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
6275 return tls_get_addr_libfunc
;
6279 arm_load_tp (rtx target
)
6282 target
= gen_reg_rtx (SImode
);
6286 /* Can return in any reg. */
6287 emit_insn (gen_load_tp_hard (target
));
6291 /* Always returned in r0. Immediately copy the result into a pseudo,
6292 otherwise other uses of r0 (e.g. setting up function arguments) may
6293 clobber the value. */
6297 emit_insn (gen_load_tp_soft ());
6299 tmp
= gen_rtx_REG (SImode
, 0);
6300 emit_move_insn (target
, tmp
);
6306 load_tls_operand (rtx x
, rtx reg
)
6310 if (reg
== NULL_RTX
)
6311 reg
= gen_reg_rtx (SImode
);
6313 tmp
= gen_rtx_CONST (SImode
, x
);
6315 emit_move_insn (reg
, tmp
);
6321 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
6323 rtx insns
, label
, labelno
, sum
;
6325 gcc_assert (reloc
!= TLS_DESCSEQ
);
6328 labelno
= GEN_INT (pic_labelno
++);
6329 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6330 label
= gen_rtx_CONST (VOIDmode
, label
);
6332 sum
= gen_rtx_UNSPEC (Pmode
,
6333 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
6334 GEN_INT (TARGET_ARM
? 8 : 4)),
6336 reg
= load_tls_operand (sum
, reg
);
6339 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
6341 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6343 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
6344 LCT_PURE
, /* LCT_CONST? */
6345 Pmode
, 1, reg
, Pmode
);
6347 insns
= get_insns ();
6354 arm_tls_descseq_addr (rtx x
, rtx reg
)
6356 rtx labelno
= GEN_INT (pic_labelno
++);
6357 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6358 rtx sum
= gen_rtx_UNSPEC (Pmode
,
6359 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
6360 gen_rtx_CONST (VOIDmode
, label
),
6361 GEN_INT (!TARGET_ARM
)),
6363 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
6365 emit_insn (gen_tlscall (x
, labelno
));
6367 reg
= gen_reg_rtx (SImode
);
6369 gcc_assert (REGNO (reg
) != 0);
6371 emit_move_insn (reg
, reg0
);
6377 legitimize_tls_address (rtx x
, rtx reg
)
6379 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
6380 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
6384 case TLS_MODEL_GLOBAL_DYNAMIC
:
6385 if (TARGET_GNU2_TLS
)
6387 reg
= arm_tls_descseq_addr (x
, reg
);
6389 tp
= arm_load_tp (NULL_RTX
);
6391 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
6395 /* Original scheme */
6396 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
6397 dest
= gen_reg_rtx (Pmode
);
6398 emit_libcall_block (insns
, dest
, ret
, x
);
6402 case TLS_MODEL_LOCAL_DYNAMIC
:
6403 if (TARGET_GNU2_TLS
)
6405 reg
= arm_tls_descseq_addr (x
, reg
);
6407 tp
= arm_load_tp (NULL_RTX
);
6409 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
6413 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
6415 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6416 share the LDM result with other LD model accesses. */
6417 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
6419 dest
= gen_reg_rtx (Pmode
);
6420 emit_libcall_block (insns
, dest
, ret
, eqv
);
6422 /* Load the addend. */
6423 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
6424 GEN_INT (TLS_LDO32
)),
6426 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
6427 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
6431 case TLS_MODEL_INITIAL_EXEC
:
6432 labelno
= GEN_INT (pic_labelno
++);
6433 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6434 label
= gen_rtx_CONST (VOIDmode
, label
);
6435 sum
= gen_rtx_UNSPEC (Pmode
,
6436 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
6437 GEN_INT (TARGET_ARM
? 8 : 4)),
6439 reg
= load_tls_operand (sum
, reg
);
6442 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
6443 else if (TARGET_THUMB2
)
6444 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
6447 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6448 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
6451 tp
= arm_load_tp (NULL_RTX
);
6453 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6455 case TLS_MODEL_LOCAL_EXEC
:
6456 tp
= arm_load_tp (NULL_RTX
);
6458 reg
= gen_rtx_UNSPEC (Pmode
,
6459 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
6461 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
6463 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6470 /* Try machine-dependent ways of modifying an illegitimate address
6471 to be legitimate. If we find one, return the new, valid address. */
6473 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6477 /* TODO: legitimize_address for Thumb2. */
6480 return thumb_legitimize_address (x
, orig_x
, mode
);
6483 if (arm_tls_symbol_p (x
))
6484 return legitimize_tls_address (x
, NULL_RTX
);
6486 if (GET_CODE (x
) == PLUS
)
6488 rtx xop0
= XEXP (x
, 0);
6489 rtx xop1
= XEXP (x
, 1);
6491 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
6492 xop0
= force_reg (SImode
, xop0
);
6494 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
6495 xop1
= force_reg (SImode
, xop1
);
6497 if (ARM_BASE_REGISTER_RTX_P (xop0
)
6498 && GET_CODE (xop1
) == CONST_INT
)
6500 HOST_WIDE_INT n
, low_n
;
6504 /* VFP addressing modes actually allow greater offsets, but for
6505 now we just stick with the lowest common denominator. */
6507 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
6519 low_n
= ((mode
) == TImode
? 0
6520 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
6524 base_reg
= gen_reg_rtx (SImode
);
6525 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
6526 emit_move_insn (base_reg
, val
);
6527 x
= plus_constant (Pmode
, base_reg
, low_n
);
6529 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6530 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6533 /* XXX We don't allow MINUS any more -- see comment in
6534 arm_legitimate_address_outer_p (). */
6535 else if (GET_CODE (x
) == MINUS
)
6537 rtx xop0
= XEXP (x
, 0);
6538 rtx xop1
= XEXP (x
, 1);
6540 if (CONSTANT_P (xop0
))
6541 xop0
= force_reg (SImode
, xop0
);
6543 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
6544 xop1
= force_reg (SImode
, xop1
);
6546 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6547 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
6550 /* Make sure to take full advantage of the pre-indexed addressing mode
6551 with absolute addresses which often allows for the base register to
6552 be factorized for multiple adjacent memory references, and it might
6553 even allows for the mini pool to be avoided entirely. */
6554 else if (GET_CODE (x
) == CONST_INT
&& optimize
> 0)
6557 HOST_WIDE_INT mask
, base
, index
;
6560 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6561 use a 8-bit index. So let's use a 12-bit index for SImode only and
6562 hope that arm_gen_constant will enable ldrb to use more bits. */
6563 bits
= (mode
== SImode
) ? 12 : 8;
6564 mask
= (1 << bits
) - 1;
6565 base
= INTVAL (x
) & ~mask
;
6566 index
= INTVAL (x
) & mask
;
6567 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
6569 /* It'll most probably be more efficient to generate the base
6570 with more bits set and use a negative index instead. */
6574 base_reg
= force_reg (SImode
, GEN_INT (base
));
6575 x
= plus_constant (Pmode
, base_reg
, index
);
6580 /* We need to find and carefully transform any SYMBOL and LABEL
6581 references; so go back to the original address expression. */
6582 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6584 if (new_x
!= orig_x
)
6592 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6593 to be legitimate. If we find one, return the new, valid address. */
6595 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6597 if (arm_tls_symbol_p (x
))
6598 return legitimize_tls_address (x
, NULL_RTX
);
6600 if (GET_CODE (x
) == PLUS
6601 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6602 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
6603 || INTVAL (XEXP (x
, 1)) < 0))
6605 rtx xop0
= XEXP (x
, 0);
6606 rtx xop1
= XEXP (x
, 1);
6607 HOST_WIDE_INT offset
= INTVAL (xop1
);
6609 /* Try and fold the offset into a biasing of the base register and
6610 then offsetting that. Don't do this when optimizing for space
6611 since it can cause too many CSEs. */
6612 if (optimize_size
&& offset
>= 0
6613 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
6615 HOST_WIDE_INT delta
;
6618 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
6619 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
6620 delta
= 31 * GET_MODE_SIZE (mode
);
6622 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
6624 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
6626 x
= plus_constant (Pmode
, xop0
, delta
);
6628 else if (offset
< 0 && offset
> -256)
6629 /* Small negative offsets are best done with a subtract before the
6630 dereference, forcing these into a register normally takes two
6632 x
= force_operand (x
, NULL_RTX
);
6635 /* For the remaining cases, force the constant into a register. */
6636 xop1
= force_reg (SImode
, xop1
);
6637 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6640 else if (GET_CODE (x
) == PLUS
6641 && s_register_operand (XEXP (x
, 1), SImode
)
6642 && !s_register_operand (XEXP (x
, 0), SImode
))
6644 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
6646 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
6651 /* We need to find and carefully transform any SYMBOL and LABEL
6652 references; so go back to the original address expression. */
6653 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6655 if (new_x
!= orig_x
)
6663 arm_legitimize_reload_address (rtx
*p
,
6664 enum machine_mode mode
,
6665 int opnum
, int type
,
6666 int ind_levels ATTRIBUTE_UNUSED
)
6668 /* We must recognize output that we have already generated ourselves. */
6669 if (GET_CODE (*p
) == PLUS
6670 && GET_CODE (XEXP (*p
, 0)) == PLUS
6671 && GET_CODE (XEXP (XEXP (*p
, 0), 0)) == REG
6672 && GET_CODE (XEXP (XEXP (*p
, 0), 1)) == CONST_INT
6673 && GET_CODE (XEXP (*p
, 1)) == CONST_INT
)
6675 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
6676 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
6677 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6681 if (GET_CODE (*p
) == PLUS
6682 && GET_CODE (XEXP (*p
, 0)) == REG
6683 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
6684 /* If the base register is equivalent to a constant, let the generic
6685 code handle it. Otherwise we will run into problems if a future
6686 reload pass decides to rematerialize the constant. */
6687 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
6688 && GET_CODE (XEXP (*p
, 1)) == CONST_INT
)
6690 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
6691 HOST_WIDE_INT low
, high
;
6693 /* Detect coprocessor load/stores. */
6694 bool coproc_p
= ((TARGET_HARD_FLOAT
6696 && (mode
== SFmode
|| mode
== DFmode
))
6697 || (TARGET_REALLY_IWMMXT
6698 && VALID_IWMMXT_REG_MODE (mode
))
6700 && (VALID_NEON_DREG_MODE (mode
)
6701 || VALID_NEON_QREG_MODE (mode
))));
6703 /* For some conditions, bail out when lower two bits are unaligned. */
6704 if ((val
& 0x3) != 0
6705 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6707 /* For DI, and DF under soft-float: */
6708 || ((mode
== DImode
|| mode
== DFmode
)
6709 /* Without ldrd, we use stm/ldm, which does not
6710 fair well with unaligned bits. */
6712 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6713 || TARGET_THUMB2
))))
6716 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6717 of which the (reg+high) gets turned into a reload add insn,
6718 we try to decompose the index into high/low values that can often
6719 also lead to better reload CSE.
6721 ldr r0, [r2, #4100] // Offset too large
6722 ldr r1, [r2, #4104] // Offset too large
6724 is best reloaded as:
6730 which post-reload CSE can simplify in most cases to eliminate the
6731 second add instruction:
6736 The idea here is that we want to split out the bits of the constant
6737 as a mask, rather than as subtracting the maximum offset that the
6738 respective type of load/store used can handle.
6740 When encountering negative offsets, we can still utilize it even if
6741 the overall offset is positive; sometimes this may lead to an immediate
6742 that can be constructed with fewer instructions.
6744 ldr r0, [r2, #0x3FFFFC]
6746 This is best reloaded as:
6747 add t1, r2, #0x400000
6750 The trick for spotting this for a load insn with N bits of offset
6751 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6752 negative offset that is going to make bit N and all the bits below
6753 it become zero in the remainder part.
6755 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6756 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6757 used in most cases of ARM load/store instructions. */
6759 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6760 (((VAL) & ((1 << (N)) - 1)) \
6761 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6766 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
6768 /* NEON quad-word load/stores are made of two double-word accesses,
6769 so the valid index range is reduced by 8. Treat as 9-bit range if
6771 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
6772 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
6774 else if (GET_MODE_SIZE (mode
) == 8)
6777 low
= (TARGET_THUMB2
6778 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
6779 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
6781 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6782 to access doublewords. The supported load/store offsets are
6783 -8, -4, and 4, which we try to produce here. */
6784 low
= ((val
& 0xf) ^ 0x8) - 0x8;
6786 else if (GET_MODE_SIZE (mode
) < 8)
6788 /* NEON element load/stores do not have an offset. */
6789 if (TARGET_NEON_FP16
&& mode
== HFmode
)
6794 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6795 Try the wider 12-bit range first, and re-try if the result
6797 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6799 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6803 if (mode
== HImode
|| mode
== HFmode
)
6806 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6809 /* The storehi/movhi_bytes fallbacks can use only
6810 [-4094,+4094] of the full ldrb/strb index range. */
6811 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6812 if (low
== 4095 || low
== -4095)
6817 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6823 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
6824 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
6825 - (unsigned HOST_WIDE_INT
) 0x80000000);
6826 /* Check for overflow or zero */
6827 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
6830 /* Reload the high part into a base reg; leave the low part
6832 *p
= gen_rtx_PLUS (GET_MODE (*p
),
6833 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
6836 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
6837 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
6838 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6846 thumb_legitimize_reload_address (rtx
*x_p
,
6847 enum machine_mode mode
,
6848 int opnum
, int type
,
6849 int ind_levels ATTRIBUTE_UNUSED
)
6853 if (GET_CODE (x
) == PLUS
6854 && GET_MODE_SIZE (mode
) < 4
6855 && REG_P (XEXP (x
, 0))
6856 && XEXP (x
, 0) == stack_pointer_rtx
6857 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6858 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6863 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6864 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6868 /* If both registers are hi-regs, then it's better to reload the
6869 entire expression rather than each register individually. That
6870 only requires one reload register rather than two. */
6871 if (GET_CODE (x
) == PLUS
6872 && REG_P (XEXP (x
, 0))
6873 && REG_P (XEXP (x
, 1))
6874 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
6875 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
6880 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6881 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6888 /* Test for various thread-local symbols. */
6890 /* Return TRUE if X is a thread-local symbol. */
6893 arm_tls_symbol_p (rtx x
)
6895 if (! TARGET_HAVE_TLS
)
6898 if (GET_CODE (x
) != SYMBOL_REF
)
6901 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6904 /* Helper for arm_tls_referenced_p. */
6907 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
6909 if (GET_CODE (*x
) == SYMBOL_REF
)
6910 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
6912 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6913 TLS offsets, not real symbol references. */
6914 if (GET_CODE (*x
) == UNSPEC
6915 && XINT (*x
, 1) == UNSPEC_TLS
)
6921 /* Return TRUE if X contains any TLS symbol references. */
6924 arm_tls_referenced_p (rtx x
)
6926 if (! TARGET_HAVE_TLS
)
6929 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
6932 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6934 On the ARM, allow any integer (invalid ones are removed later by insn
6935 patterns), nice doubles and symbol_refs which refer to the function's
6938 When generating pic allow anything. */
6941 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
6943 /* At present, we have no support for Neon structure constants, so forbid
6944 them here. It might be possible to handle simple cases like 0 and -1
6946 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
6949 return flag_pic
|| !label_mentioned_p (x
);
6953 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6955 return (GET_CODE (x
) == CONST_INT
6956 || GET_CODE (x
) == CONST_DOUBLE
6957 || CONSTANT_ADDRESS_P (x
)
6962 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
6964 return (!arm_cannot_force_const_mem (mode
, x
)
6966 ? arm_legitimate_constant_p_1 (mode
, x
)
6967 : thumb_legitimate_constant_p (mode
, x
)));
6970 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6973 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6977 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
6979 split_const (x
, &base
, &offset
);
6980 if (GET_CODE (base
) == SYMBOL_REF
6981 && !offset_within_block_p (base
, INTVAL (offset
)))
6984 return arm_tls_referenced_p (x
);
6987 #define REG_OR_SUBREG_REG(X) \
6988 (GET_CODE (X) == REG \
6989 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6991 #define REG_OR_SUBREG_RTX(X) \
6992 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6995 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
6997 enum machine_mode mode
= GET_MODE (x
);
7011 return COSTS_N_INSNS (1);
7014 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7017 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7024 return COSTS_N_INSNS (2) + cycles
;
7026 return COSTS_N_INSNS (1) + 16;
7029 return (COSTS_N_INSNS (1)
7030 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
7031 + GET_CODE (SET_DEST (x
)) == MEM
));
7036 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7038 if (thumb_shiftable_const (INTVAL (x
)))
7039 return COSTS_N_INSNS (2);
7040 return COSTS_N_INSNS (3);
7042 else if ((outer
== PLUS
|| outer
== COMPARE
)
7043 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7045 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7046 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7047 return COSTS_N_INSNS (1);
7048 else if (outer
== AND
)
7051 /* This duplicates the tests in the andsi3 expander. */
7052 for (i
= 9; i
<= 31; i
++)
7053 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7054 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7055 return COSTS_N_INSNS (2);
7057 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7058 || outer
== LSHIFTRT
)
7060 return COSTS_N_INSNS (2);
7066 return COSTS_N_INSNS (3);
7084 /* XXX another guess. */
7085 /* Memory costs quite a lot for the first word, but subsequent words
7086 load at the equivalent of a single insn each. */
7087 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7088 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7093 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7099 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
7100 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
7106 return total
+ COSTS_N_INSNS (1);
7108 /* Assume a two-shift sequence. Increase the cost slightly so
7109 we prefer actual shifts over an extend operation. */
7110 return total
+ 1 + COSTS_N_INSNS (2);
7118 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
7120 enum machine_mode mode
= GET_MODE (x
);
7121 enum rtx_code subcode
;
7123 enum rtx_code code
= GET_CODE (x
);
7129 /* Memory costs quite a lot for the first word, but subsequent words
7130 load at the equivalent of a single insn each. */
7131 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
7138 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
7139 *total
= COSTS_N_INSNS (2);
7140 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
7141 *total
= COSTS_N_INSNS (4);
7143 *total
= COSTS_N_INSNS (20);
7147 if (GET_CODE (XEXP (x
, 1)) == REG
)
7148 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
7149 else if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7150 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7156 *total
+= COSTS_N_INSNS (4);
7161 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
7162 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7165 *total
+= COSTS_N_INSNS (3);
7169 *total
+= COSTS_N_INSNS (1);
7170 /* Increase the cost of complex shifts because they aren't any faster,
7171 and reduce dual issue opportunities. */
7172 if (arm_tune_cortex_a9
7173 && outer
!= SET
&& GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7181 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7182 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
7183 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
7185 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7189 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
7190 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7192 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7199 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7201 if (TARGET_HARD_FLOAT
7203 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7205 *total
= COSTS_N_INSNS (1);
7206 if (GET_CODE (XEXP (x
, 0)) == CONST_DOUBLE
7207 && arm_const_double_rtx (XEXP (x
, 0)))
7209 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7213 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
7214 && arm_const_double_rtx (XEXP (x
, 1)))
7216 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7222 *total
= COSTS_N_INSNS (20);
7226 *total
= COSTS_N_INSNS (1);
7227 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
7228 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
7230 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7234 subcode
= GET_CODE (XEXP (x
, 1));
7235 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7236 || subcode
== LSHIFTRT
7237 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7239 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7240 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
7244 /* A shift as a part of RSB costs no more than RSB itself. */
7245 if (GET_CODE (XEXP (x
, 0)) == MULT
7246 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7248 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
7249 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7254 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
7256 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7257 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
7261 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
7262 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
7264 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7265 if (GET_CODE (XEXP (XEXP (x
, 1), 0)) == REG
7266 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
7267 *total
+= COSTS_N_INSNS (1);
7275 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
7276 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7277 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7279 *total
= COSTS_N_INSNS (1);
7280 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
7282 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7286 /* MLA: All arguments must be registers. We filter out
7287 multiplication by a power of two, so that we fall down into
7289 if (GET_CODE (XEXP (x
, 0)) == MULT
7290 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7292 /* The cost comes from the cost of the multiply. */
7296 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7298 if (TARGET_HARD_FLOAT
7300 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7302 *total
= COSTS_N_INSNS (1);
7303 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
7304 && arm_const_double_rtx (XEXP (x
, 1)))
7306 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7313 *total
= COSTS_N_INSNS (20);
7317 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
7318 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
7320 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7321 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
7322 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
7323 *total
+= COSTS_N_INSNS (1);
7329 case AND
: case XOR
: case IOR
:
7331 /* Normally the frame registers will be spilt into reg+const during
7332 reload, so it is a bad idea to combine them with other instructions,
7333 since then they might not be moved outside of loops. As a compromise
7334 we allow integration with ops that have a constant as their second
7336 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
7337 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
7338 && GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7339 *total
= COSTS_N_INSNS (1);
7343 *total
+= COSTS_N_INSNS (2);
7344 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
7345 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7347 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7354 *total
+= COSTS_N_INSNS (1);
7355 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
7356 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7358 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7361 subcode
= GET_CODE (XEXP (x
, 0));
7362 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7363 || subcode
== LSHIFTRT
7364 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7366 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7367 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7372 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7374 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7375 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7379 if (subcode
== UMIN
|| subcode
== UMAX
7380 || subcode
== SMIN
|| subcode
== SMAX
)
7382 *total
= COSTS_N_INSNS (3);
7389 /* This should have been handled by the CPU specific routines. */
7393 if (arm_arch3m
&& mode
== SImode
7394 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
7395 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7396 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
7397 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
7398 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
7399 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
7401 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
7404 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7408 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7410 if (TARGET_HARD_FLOAT
7412 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7414 *total
= COSTS_N_INSNS (1);
7417 *total
= COSTS_N_INSNS (2);
7423 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
7424 if (mode
== SImode
&& code
== NOT
)
7426 subcode
= GET_CODE (XEXP (x
, 0));
7427 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7428 || subcode
== LSHIFTRT
7429 || subcode
== ROTATE
|| subcode
== ROTATERT
7431 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
7433 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7434 /* Register shifts cost an extra cycle. */
7435 if (GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
7436 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
7445 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7447 *total
= COSTS_N_INSNS (4);
7451 operand
= XEXP (x
, 0);
7453 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
7454 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
7455 && GET_CODE (XEXP (operand
, 0)) == REG
7456 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
7457 *total
+= COSTS_N_INSNS (1);
7458 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
7459 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
7463 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7465 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7471 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
7472 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7474 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7480 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
7481 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7483 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7503 /* SCC insns. In the case where the comparison has already been
7504 performed, then they cost 2 instructions. Otherwise they need
7505 an additional comparison before them. */
7506 *total
= COSTS_N_INSNS (2);
7507 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7514 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7520 *total
+= COSTS_N_INSNS (1);
7521 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
7522 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7524 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7528 subcode
= GET_CODE (XEXP (x
, 0));
7529 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7530 || subcode
== LSHIFTRT
7531 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7533 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7534 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7539 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7541 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7542 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
7552 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7553 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
7554 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7555 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
7559 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7561 if (TARGET_HARD_FLOAT
7563 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7565 *total
= COSTS_N_INSNS (1);
7568 *total
= COSTS_N_INSNS (20);
7571 *total
= COSTS_N_INSNS (1);
7573 *total
+= COSTS_N_INSNS (3);
7579 if (GET_MODE_CLASS (mode
) == MODE_INT
)
7581 rtx op
= XEXP (x
, 0);
7582 enum machine_mode opmode
= GET_MODE (op
);
7585 *total
+= COSTS_N_INSNS (1);
7587 if (opmode
!= SImode
)
7591 /* If !arm_arch4, we use one of the extendhisi2_mem
7592 or movhi_bytes patterns for HImode. For a QImode
7593 sign extension, we first zero-extend from memory
7594 and then perform a shift sequence. */
7595 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
7596 *total
+= COSTS_N_INSNS (2);
7599 *total
+= COSTS_N_INSNS (1);
7601 /* We don't have the necessary insn, so we need to perform some
7603 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
7604 /* An and with constant 255. */
7605 *total
+= COSTS_N_INSNS (1);
7607 /* A shift sequence. Increase costs slightly to avoid
7608 combining two shifts into an extend operation. */
7609 *total
+= COSTS_N_INSNS (2) + 1;
7615 switch (GET_MODE (XEXP (x
, 0)))
7622 *total
= COSTS_N_INSNS (1);
7632 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7636 if (const_ok_for_arm (INTVAL (x
))
7637 || const_ok_for_arm (~INTVAL (x
)))
7638 *total
= COSTS_N_INSNS (1);
7640 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
7641 INTVAL (x
), NULL_RTX
,
7648 *total
= COSTS_N_INSNS (3);
7652 *total
= COSTS_N_INSNS (1);
7656 *total
= COSTS_N_INSNS (1);
7657 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
7661 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
7662 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7663 *total
= COSTS_N_INSNS (1);
7665 *total
= COSTS_N_INSNS (4);
7672 /* We cost this as high as our memory costs to allow this to
7673 be hoisted from loops. */
7674 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
7676 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
7682 && TARGET_HARD_FLOAT
7684 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
7685 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
7686 *total
= COSTS_N_INSNS (1);
7688 *total
= COSTS_N_INSNS (4);
7692 *total
= COSTS_N_INSNS (4);
7697 /* Estimates the size cost of thumb1 instructions.
7698 For now most of the code is copied from thumb1_rtx_costs. We need more
7699 fine grain tuning when we have more related test cases. */
7701 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7703 enum machine_mode mode
= GET_MODE (x
);
7716 return COSTS_N_INSNS (1);
7719 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7721 /* Thumb1 mul instruction can't operate on const. We must Load it
7722 into a register first. */
7723 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
7724 return COSTS_N_INSNS (1) + const_size
;
7726 return COSTS_N_INSNS (1);
7729 return (COSTS_N_INSNS (1)
7730 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
7731 + GET_CODE (SET_DEST (x
)) == MEM
));
7736 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7737 return COSTS_N_INSNS (1);
7738 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7739 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
7740 return COSTS_N_INSNS (2);
7741 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7742 if (thumb_shiftable_const (INTVAL (x
)))
7743 return COSTS_N_INSNS (2);
7744 return COSTS_N_INSNS (3);
7746 else if ((outer
== PLUS
|| outer
== COMPARE
)
7747 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7749 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7750 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7751 return COSTS_N_INSNS (1);
7752 else if (outer
== AND
)
7755 /* This duplicates the tests in the andsi3 expander. */
7756 for (i
= 9; i
<= 31; i
++)
7757 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7758 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7759 return COSTS_N_INSNS (2);
7761 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7762 || outer
== LSHIFTRT
)
7764 return COSTS_N_INSNS (2);
7770 return COSTS_N_INSNS (3);
7788 /* XXX another guess. */
7789 /* Memory costs quite a lot for the first word, but subsequent words
7790 load at the equivalent of a single insn each. */
7791 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7792 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7797 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7802 /* XXX still guessing. */
7803 switch (GET_MODE (XEXP (x
, 0)))
7806 return (1 + (mode
== DImode
? 4 : 0)
7807 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7810 return (4 + (mode
== DImode
? 4 : 0)
7811 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7814 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7825 /* RTX costs when optimizing for size. */
7827 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7830 enum machine_mode mode
= GET_MODE (x
);
7833 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
7837 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7841 /* A memory access costs 1 insn if the mode is small, or the address is
7842 a single register, otherwise it costs one insn per word. */
7843 if (REG_P (XEXP (x
, 0)))
7844 *total
= COSTS_N_INSNS (1);
7846 && GET_CODE (XEXP (x
, 0)) == PLUS
7847 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
7848 /* This will be split into two instructions.
7849 See arm.md:calculate_pic_address. */
7850 *total
= COSTS_N_INSNS (2);
7852 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7859 /* Needs a libcall, so it costs about this. */
7860 *total
= COSTS_N_INSNS (2);
7864 if (mode
== SImode
&& GET_CODE (XEXP (x
, 1)) == REG
)
7866 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
7874 if (mode
== DImode
&& GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7876 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
7879 else if (mode
== SImode
)
7881 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
7882 /* Slightly disparage register shifts, but not by much. */
7883 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7884 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
7888 /* Needs a libcall. */
7889 *total
= COSTS_N_INSNS (2);
7893 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7894 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7896 *total
= COSTS_N_INSNS (1);
7902 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
7903 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
7905 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
7906 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
7907 || subcode1
== ROTATE
|| subcode1
== ROTATERT
7908 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
7909 || subcode1
== ASHIFTRT
)
7911 /* It's just the cost of the two operands. */
7916 *total
= COSTS_N_INSNS (1);
7920 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7924 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7925 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7927 *total
= COSTS_N_INSNS (1);
7931 /* A shift as a part of ADD costs nothing. */
7932 if (GET_CODE (XEXP (x
, 0)) == MULT
7933 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7935 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
7936 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
7937 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
7942 case AND
: case XOR
: case IOR
:
7945 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
7947 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
7948 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
7949 || (code
== AND
&& subcode
== NOT
))
7951 /* It's just the cost of the two operands. */
7957 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7961 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7965 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7966 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7968 *total
= COSTS_N_INSNS (1);
7974 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7983 if (cc_register (XEXP (x
, 0), VOIDmode
))
7986 *total
= COSTS_N_INSNS (1);
7990 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7991 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7992 *total
= COSTS_N_INSNS (1);
7994 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
7999 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
8002 if (const_ok_for_arm (INTVAL (x
)))
8003 /* A multiplication by a constant requires another instruction
8004 to load the constant to a register. */
8005 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
8007 else if (const_ok_for_arm (~INTVAL (x
)))
8008 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
8009 else if (const_ok_for_arm (-INTVAL (x
)))
8011 if (outer_code
== COMPARE
|| outer_code
== PLUS
8012 || outer_code
== MINUS
)
8015 *total
= COSTS_N_INSNS (1);
8018 *total
= COSTS_N_INSNS (2);
8024 *total
= COSTS_N_INSNS (2);
8028 *total
= COSTS_N_INSNS (4);
8033 && TARGET_HARD_FLOAT
8034 && outer_code
== SET
8035 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8036 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8037 *total
= COSTS_N_INSNS (1);
8039 *total
= COSTS_N_INSNS (4);
8044 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8045 cost of these slightly. */
8046 *total
= COSTS_N_INSNS (1) + 1;
8053 if (mode
!= VOIDmode
)
8054 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8056 *total
= COSTS_N_INSNS (4); /* How knows? */
8061 /* RTX costs when optimizing for size. */
8063 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
8064 int *total
, bool speed
)
8067 return arm_size_rtx_costs (x
, (enum rtx_code
) code
,
8068 (enum rtx_code
) outer_code
, total
);
8070 return current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
8071 (enum rtx_code
) outer_code
,
8075 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8076 supported on any "slowmul" cores, so it can be ignored. */
8079 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8080 int *total
, bool speed
)
8082 enum machine_mode mode
= GET_MODE (x
);
8086 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8093 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
8096 *total
= COSTS_N_INSNS (20);
8100 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
8102 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
8103 & (unsigned HOST_WIDE_INT
) 0xffffffff);
8104 int cost
, const_ok
= const_ok_for_arm (i
);
8105 int j
, booth_unit_size
;
8107 /* Tune as appropriate. */
8108 cost
= const_ok
? 4 : 8;
8109 booth_unit_size
= 2;
8110 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
8112 i
>>= booth_unit_size
;
8116 *total
= COSTS_N_INSNS (cost
);
8117 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8121 *total
= COSTS_N_INSNS (20);
8125 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
8130 /* RTX cost for cores with a fast multiply unit (M variants). */
8133 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8134 int *total
, bool speed
)
8136 enum machine_mode mode
= GET_MODE (x
);
8140 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8144 /* ??? should thumb2 use different costs? */
8148 /* There is no point basing this on the tuning, since it is always the
8149 fast variant if it exists at all. */
8151 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
8152 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8153 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8155 *total
= COSTS_N_INSNS(2);
8162 *total
= COSTS_N_INSNS (5);
8166 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
8168 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
8169 & (unsigned HOST_WIDE_INT
) 0xffffffff);
8170 int cost
, const_ok
= const_ok_for_arm (i
);
8171 int j
, booth_unit_size
;
8173 /* Tune as appropriate. */
8174 cost
= const_ok
? 4 : 8;
8175 booth_unit_size
= 8;
8176 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
8178 i
>>= booth_unit_size
;
8182 *total
= COSTS_N_INSNS(cost
);
8188 *total
= COSTS_N_INSNS (4);
8192 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8194 if (TARGET_HARD_FLOAT
8196 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8198 *total
= COSTS_N_INSNS (1);
8203 /* Requires a lib call */
8204 *total
= COSTS_N_INSNS (20);
8208 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8213 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8214 so it can be ignored. */
8217 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8218 int *total
, bool speed
)
8220 enum machine_mode mode
= GET_MODE (x
);
8224 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8231 if (GET_CODE (XEXP (x
, 0)) != MULT
)
8232 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8234 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8235 will stall until the multiplication is complete. */
8236 *total
= COSTS_N_INSNS (3);
8240 /* There is no point basing this on the tuning, since it is always the
8241 fast variant if it exists at all. */
8243 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
8244 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8245 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8247 *total
= COSTS_N_INSNS (2);
8254 *total
= COSTS_N_INSNS (5);
8258 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
8260 /* If operand 1 is a constant we can more accurately
8261 calculate the cost of the multiply. The multiplier can
8262 retire 15 bits on the first cycle and a further 12 on the
8263 second. We do, of course, have to load the constant into
8264 a register first. */
8265 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8266 /* There's a general overhead of one cycle. */
8268 unsigned HOST_WIDE_INT masked_const
;
8273 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
8275 masked_const
= i
& 0xffff8000;
8276 if (masked_const
!= 0)
8279 masked_const
= i
& 0xf8000000;
8280 if (masked_const
!= 0)
8283 *total
= COSTS_N_INSNS (cost
);
8289 *total
= COSTS_N_INSNS (3);
8293 /* Requires a lib call */
8294 *total
= COSTS_N_INSNS (20);
8298 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8303 /* RTX costs for 9e (and later) cores. */
8306 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8307 int *total
, bool speed
)
8309 enum machine_mode mode
= GET_MODE (x
);
8316 *total
= COSTS_N_INSNS (3);
8320 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8328 /* There is no point basing this on the tuning, since it is always the
8329 fast variant if it exists at all. */
8331 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
8332 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8333 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8335 *total
= COSTS_N_INSNS (2);
8342 *total
= COSTS_N_INSNS (5);
8348 *total
= COSTS_N_INSNS (2);
8352 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8354 if (TARGET_HARD_FLOAT
8356 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8358 *total
= COSTS_N_INSNS (1);
8363 *total
= COSTS_N_INSNS (20);
8367 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8370 /* All address computations that can be done are free, but rtx cost returns
8371 the same for practically all of them. So we weight the different types
8372 of address here in the order (most pref first):
8373 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8375 arm_arm_address_cost (rtx x
)
8377 enum rtx_code c
= GET_CODE (x
);
8379 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
8381 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
8386 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
8389 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
8399 arm_thumb_address_cost (rtx x
)
8401 enum rtx_code c
= GET_CODE (x
);
8406 && GET_CODE (XEXP (x
, 0)) == REG
8407 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
8414 arm_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
8416 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
8419 /* Adjust cost hook for XScale. */
8421 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8423 /* Some true dependencies can have a higher cost depending
8424 on precisely how certain input operands are used. */
8425 if (REG_NOTE_KIND(link
) == 0
8426 && recog_memoized (insn
) >= 0
8427 && recog_memoized (dep
) >= 0)
8429 int shift_opnum
= get_attr_shift (insn
);
8430 enum attr_type attr_type
= get_attr_type (dep
);
8432 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8433 operand for INSN. If we have a shifted input operand and the
8434 instruction we depend on is another ALU instruction, then we may
8435 have to account for an additional stall. */
8436 if (shift_opnum
!= 0
8437 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
8439 rtx shifted_operand
;
8442 /* Get the shifted operand. */
8443 extract_insn (insn
);
8444 shifted_operand
= recog_data
.operand
[shift_opnum
];
8446 /* Iterate over all the operands in DEP. If we write an operand
8447 that overlaps with SHIFTED_OPERAND, then we have increase the
8448 cost of this dependency. */
8450 preprocess_constraints ();
8451 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
8453 /* We can ignore strict inputs. */
8454 if (recog_data
.operand_type
[opno
] == OP_IN
)
8457 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
8469 /* Adjust cost hook for Cortex A9. */
8471 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8473 switch (REG_NOTE_KIND (link
))
8480 case REG_DEP_OUTPUT
:
8481 if (recog_memoized (insn
) >= 0
8482 && recog_memoized (dep
) >= 0)
8484 if (GET_CODE (PATTERN (insn
)) == SET
)
8487 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
8489 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
8491 enum attr_type attr_type_insn
= get_attr_type (insn
);
8492 enum attr_type attr_type_dep
= get_attr_type (dep
);
8494 /* By default all dependencies of the form
8497 have an extra latency of 1 cycle because
8498 of the input and output dependency in this
8499 case. However this gets modeled as an true
8500 dependency and hence all these checks. */
8501 if (REG_P (SET_DEST (PATTERN (insn
)))
8502 && REG_P (SET_DEST (PATTERN (dep
)))
8503 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
8504 SET_DEST (PATTERN (dep
))))
8506 /* FMACS is a special case where the dependent
8507 instruction can be issued 3 cycles before
8508 the normal latency in case of an output
8510 if ((attr_type_insn
== TYPE_FMACS
8511 || attr_type_insn
== TYPE_FMACD
)
8512 && (attr_type_dep
== TYPE_FMACS
8513 || attr_type_dep
== TYPE_FMACD
))
8515 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8516 *cost
= insn_default_latency (dep
) - 3;
8518 *cost
= insn_default_latency (dep
);
8523 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8524 *cost
= insn_default_latency (dep
) + 1;
8526 *cost
= insn_default_latency (dep
);
8542 /* Adjust cost hook for FA726TE. */
8544 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8546 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8547 have penalty of 3. */
8548 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
8549 && recog_memoized (insn
) >= 0
8550 && recog_memoized (dep
) >= 0
8551 && get_attr_conds (dep
) == CONDS_SET
)
8553 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8554 if (get_attr_conds (insn
) == CONDS_USE
8555 && get_attr_type (insn
) != TYPE_BRANCH
)
8561 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
8562 || get_attr_conds (insn
) == CONDS_USE
)
8572 /* Implement TARGET_REGISTER_MOVE_COST.
8574 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8575 it is typically more expensive than a single memory access. We set
8576 the cost to less than two memory accesses so that floating
8577 point to integer conversion does not go through memory. */
8580 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
8581 reg_class_t from
, reg_class_t to
)
8585 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
8586 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
8588 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
8589 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
8591 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
8598 if (from
== HI_REGS
|| to
== HI_REGS
)
8605 /* Implement TARGET_MEMORY_MOVE_COST. */
8608 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
8609 bool in ATTRIBUTE_UNUSED
)
8615 if (GET_MODE_SIZE (mode
) < 4)
8618 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
8622 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8623 It corrects the value of COST based on the relationship between
8624 INSN and DEP through the dependence LINK. It returns the new
8625 value. There is a per-core adjust_cost hook to adjust scheduler costs
8626 and the per-core hook can choose to completely override the generic
8627 adjust_cost function. Only put bits of code into arm_adjust_cost that
8628 are common across all cores. */
8630 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
8634 /* When generating Thumb-1 code, we want to place flag-setting operations
8635 close to a conditional branch which depends on them, so that we can
8636 omit the comparison. */
8638 && REG_NOTE_KIND (link
) == 0
8639 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
8640 && recog_memoized (dep
) >= 0
8641 && get_attr_conds (dep
) == CONDS_SET
)
8644 if (current_tune
->sched_adjust_cost
!= NULL
)
8646 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
8650 /* XXX Is this strictly true? */
8651 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8652 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8655 /* Call insns don't incur a stall, even if they follow a load. */
8656 if (REG_NOTE_KIND (link
) == 0
8657 && GET_CODE (insn
) == CALL_INSN
)
8660 if ((i_pat
= single_set (insn
)) != NULL
8661 && GET_CODE (SET_SRC (i_pat
)) == MEM
8662 && (d_pat
= single_set (dep
)) != NULL
8663 && GET_CODE (SET_DEST (d_pat
)) == MEM
)
8665 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
8666 /* This is a load after a store, there is no conflict if the load reads
8667 from a cached area. Assume that loads from the stack, and from the
8668 constant pool are cached, and that others will miss. This is a
8671 if ((GET_CODE (src_mem
) == SYMBOL_REF
8672 && CONSTANT_POOL_ADDRESS_P (src_mem
))
8673 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
8674 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
8675 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
8683 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
8686 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
8688 return (optimize
> 0) ? 2 : 0;
8692 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
8694 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
8697 static bool fp_consts_inited
= false;
8699 static REAL_VALUE_TYPE value_fp0
;
8702 init_fp_table (void)
8706 r
= REAL_VALUE_ATOF ("0", DFmode
);
8708 fp_consts_inited
= true;
8711 /* Return TRUE if rtx X is a valid immediate FP constant. */
8713 arm_const_double_rtx (rtx x
)
8717 if (!fp_consts_inited
)
8720 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8721 if (REAL_VALUE_MINUS_ZERO (r
))
8724 if (REAL_VALUES_EQUAL (r
, value_fp0
))
8730 /* VFPv3 has a fairly wide range of representable immediates, formed from
8731 "quarter-precision" floating-point values. These can be evaluated using this
8732 formula (with ^ for exponentiation):
8736 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8737 16 <= n <= 31 and 0 <= r <= 7.
8739 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8741 - A (most-significant) is the sign bit.
8742 - BCD are the exponent (encoded as r XOR 3).
8743 - EFGH are the mantissa (encoded as n - 16).
8746 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8747 fconst[sd] instruction, or -1 if X isn't suitable. */
8749 vfp3_const_double_index (rtx x
)
8751 REAL_VALUE_TYPE r
, m
;
8753 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
8754 unsigned HOST_WIDE_INT mask
;
8755 HOST_WIDE_INT m1
, m2
;
8756 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8758 if (!TARGET_VFP3
|| GET_CODE (x
) != CONST_DOUBLE
)
8761 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8763 /* We can't represent these things, so detect them first. */
8764 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
8767 /* Extract sign, exponent and mantissa. */
8768 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
8769 r
= real_value_abs (&r
);
8770 exponent
= REAL_EXP (&r
);
8771 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8772 highest (sign) bit, with a fixed binary point at bit point_pos.
8773 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8774 bits for the mantissa, this may fail (low bits would be lost). */
8775 real_ldexp (&m
, &r
, point_pos
- exponent
);
8776 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
8780 /* If there are bits set in the low part of the mantissa, we can't
8781 represent this value. */
8785 /* Now make it so that mantissa contains the most-significant bits, and move
8786 the point_pos to indicate that the least-significant bits have been
8788 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8791 /* We can permit four significant bits of mantissa only, plus a high bit
8792 which is always 1. */
8793 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8794 if ((mantissa
& mask
) != 0)
8797 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8798 mantissa
>>= point_pos
- 5;
8800 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8801 floating-point immediate zero with Neon using an integer-zero load, but
8802 that case is handled elsewhere.) */
8806 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
8808 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8809 normalized significands are in the range [1, 2). (Our mantissa is shifted
8810 left 4 places at this point relative to normalized IEEE754 values). GCC
8811 internally uses [0.5, 1) (see real.c), so the exponent returned from
8812 REAL_EXP must be altered. */
8813 exponent
= 5 - exponent
;
8815 if (exponent
< 0 || exponent
> 7)
8818 /* Sign, mantissa and exponent are now in the correct form to plug into the
8819 formula described in the comment above. */
8820 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
8823 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8825 vfp3_const_double_rtx (rtx x
)
8830 return vfp3_const_double_index (x
) != -1;
8833 /* Recognize immediates which can be used in various Neon instructions. Legal
8834 immediates are described by the following table (for VMVN variants, the
8835 bitwise inverse of the constant shown is recognized. In either case, VMOV
8836 is output and the correct instruction to use for a given constant is chosen
8837 by the assembler). The constant shown is replicated across all elements of
8838 the destination vector.
8840 insn elems variant constant (binary)
8841 ---- ----- ------- -----------------
8842 vmov i32 0 00000000 00000000 00000000 abcdefgh
8843 vmov i32 1 00000000 00000000 abcdefgh 00000000
8844 vmov i32 2 00000000 abcdefgh 00000000 00000000
8845 vmov i32 3 abcdefgh 00000000 00000000 00000000
8846 vmov i16 4 00000000 abcdefgh
8847 vmov i16 5 abcdefgh 00000000
8848 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8849 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8850 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8851 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8852 vmvn i16 10 00000000 abcdefgh
8853 vmvn i16 11 abcdefgh 00000000
8854 vmov i32 12 00000000 00000000 abcdefgh 11111111
8855 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8856 vmov i32 14 00000000 abcdefgh 11111111 11111111
8857 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8859 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8860 eeeeeeee ffffffff gggggggg hhhhhhhh
8861 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8862 vmov f32 19 00000000 00000000 00000000 00000000
8864 For case 18, B = !b. Representable values are exactly those accepted by
8865 vfp3_const_double_index, but are output as floating-point numbers rather
8868 For case 19, we will change it to vmov.i32 when assembling.
8870 Variants 0-5 (inclusive) may also be used as immediates for the second
8871 operand of VORR/VBIC instructions.
8873 The INVERSE argument causes the bitwise inverse of the given operand to be
8874 recognized instead (used for recognizing legal immediates for the VAND/VORN
8875 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8876 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8877 output, rather than the real insns vbic/vorr).
8879 INVERSE makes no difference to the recognition of float vectors.
8881 The return value is the variant of immediate as shown in the above table, or
8882 -1 if the given value doesn't match any of the listed patterns.
8885 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
8886 rtx
*modconst
, int *elementwidth
)
8888 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8890 for (i = 0; i < idx; i += (STRIDE)) \
8895 immtype = (CLASS); \
8896 elsize = (ELSIZE); \
8900 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
8901 unsigned int innersize
;
8902 unsigned char bytes
[16];
8903 int immtype
= -1, matches
;
8904 unsigned int invmask
= inverse
? 0xff : 0;
8905 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
8909 n_elts
= CONST_VECTOR_NUNITS (op
);
8910 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8915 if (mode
== VOIDmode
)
8917 innersize
= GET_MODE_SIZE (mode
);
8920 /* Vectors of float constants. */
8921 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8923 rtx el0
= CONST_VECTOR_ELT (op
, 0);
8926 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
8929 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
8931 for (i
= 1; i
< n_elts
; i
++)
8933 rtx elt
= CONST_VECTOR_ELT (op
, i
);
8936 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
8938 if (!REAL_VALUES_EQUAL (r0
, re
))
8943 *modconst
= CONST_VECTOR_ELT (op
, 0);
8948 if (el0
== CONST0_RTX (GET_MODE (el0
)))
8954 /* Splat vector constant out into a byte vector. */
8955 for (i
= 0; i
< n_elts
; i
++)
8957 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
8958 unsigned HOST_WIDE_INT elpart
;
8959 unsigned int part
, parts
;
8961 if (GET_CODE (el
) == CONST_INT
)
8963 elpart
= INTVAL (el
);
8966 else if (GET_CODE (el
) == CONST_DOUBLE
)
8968 elpart
= CONST_DOUBLE_LOW (el
);
8974 for (part
= 0; part
< parts
; part
++)
8977 for (byte
= 0; byte
< innersize
; byte
++)
8979 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8980 elpart
>>= BITS_PER_UNIT
;
8982 if (GET_CODE (el
) == CONST_DOUBLE
)
8983 elpart
= CONST_DOUBLE_HIGH (el
);
8988 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8992 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8993 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8995 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8996 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8998 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8999 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
9001 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
9002 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
9004 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
9006 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
9008 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
9009 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
9011 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
9012 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
9014 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
9015 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
9017 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
9018 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
9020 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
9022 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
9024 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
9025 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
9027 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
9028 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
9030 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
9031 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
9033 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
9034 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
9036 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
9038 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
9039 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
9047 *elementwidth
= elsize
;
9051 unsigned HOST_WIDE_INT imm
= 0;
9053 /* Un-invert bytes of recognized vector, if necessary. */
9055 for (i
= 0; i
< idx
; i
++)
9056 bytes
[i
] ^= invmask
;
9060 /* FIXME: Broken on 32-bit H_W_I hosts. */
9061 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
9063 for (i
= 0; i
< 8; i
++)
9064 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
9065 << (i
* BITS_PER_UNIT
);
9067 *modconst
= GEN_INT (imm
);
9071 unsigned HOST_WIDE_INT imm
= 0;
9073 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
9074 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
9076 *modconst
= GEN_INT (imm
);
9084 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9085 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9086 float elements), and a modified constant (whatever should be output for a
9087 VMOV) in *MODCONST. */
9090 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
9091 rtx
*modconst
, int *elementwidth
)
9095 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
9101 *modconst
= tmpconst
;
9104 *elementwidth
= tmpwidth
;
9109 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9110 the immediate is valid, write a constant suitable for using as an operand
9111 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9112 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9115 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
9116 rtx
*modconst
, int *elementwidth
)
9120 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
9122 if (retval
< 0 || retval
> 5)
9126 *modconst
= tmpconst
;
9129 *elementwidth
= tmpwidth
;
9134 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9135 the immediate is valid, write a constant suitable for using as an operand
9136 to VSHR/VSHL to *MODCONST and the corresponding element width to
9137 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9138 because they have different limitations. */
9141 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
9142 rtx
*modconst
, int *elementwidth
,
9145 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
9146 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
9147 unsigned HOST_WIDE_INT last_elt
= 0;
9148 unsigned HOST_WIDE_INT maxshift
;
9150 /* Split vector constant out into a byte vector. */
9151 for (i
= 0; i
< n_elts
; i
++)
9153 rtx el
= CONST_VECTOR_ELT (op
, i
);
9154 unsigned HOST_WIDE_INT elpart
;
9156 if (GET_CODE (el
) == CONST_INT
)
9157 elpart
= INTVAL (el
);
9158 else if (GET_CODE (el
) == CONST_DOUBLE
)
9163 if (i
!= 0 && elpart
!= last_elt
)
9169 /* Shift less than element size. */
9170 maxshift
= innersize
* 8;
9174 /* Left shift immediate value can be from 0 to <size>-1. */
9175 if (last_elt
>= maxshift
)
9180 /* Right shift immediate value can be from 1 to <size>. */
9181 if (last_elt
== 0 || last_elt
> maxshift
)
9186 *elementwidth
= innersize
* 8;
9189 *modconst
= CONST_VECTOR_ELT (op
, 0);
9194 /* Return a string suitable for output of Neon immediate logic operation
9198 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
9199 int inverse
, int quad
)
9201 int width
, is_valid
;
9202 static char templ
[40];
9204 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
9206 gcc_assert (is_valid
!= 0);
9209 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
9211 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
9216 /* Return a string suitable for output of Neon immediate shift operation
9217 (VSHR or VSHL) MNEM. */
9220 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
9221 enum machine_mode mode
, int quad
,
9224 int width
, is_valid
;
9225 static char templ
[40];
9227 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
9228 gcc_assert (is_valid
!= 0);
9231 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
9233 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
9238 /* Output a sequence of pairwise operations to implement a reduction.
9239 NOTE: We do "too much work" here, because pairwise operations work on two
9240 registers-worth of operands in one go. Unfortunately we can't exploit those
9241 extra calculations to do the full operation in fewer steps, I don't think.
9242 Although all vector elements of the result but the first are ignored, we
9243 actually calculate the same result in each of the elements. An alternative
9244 such as initially loading a vector with zero to use as each of the second
9245 operands would use up an additional register and take an extra instruction,
9246 for no particular gain. */
9249 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
9250 rtx (*reduc
) (rtx
, rtx
, rtx
))
9252 enum machine_mode inner
= GET_MODE_INNER (mode
);
9253 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
9256 for (i
= parts
/ 2; i
>= 1; i
/= 2)
9258 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
9259 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
9264 /* If VALS is a vector constant that can be loaded into a register
9265 using VDUP, generate instructions to do so and return an RTX to
9266 assign to the register. Otherwise return NULL_RTX. */
9269 neon_vdup_constant (rtx vals
)
9271 enum machine_mode mode
= GET_MODE (vals
);
9272 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
9273 int n_elts
= GET_MODE_NUNITS (mode
);
9274 bool all_same
= true;
9278 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
9281 for (i
= 0; i
< n_elts
; ++i
)
9283 x
= XVECEXP (vals
, 0, i
);
9284 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
9289 /* The elements are not all the same. We could handle repeating
9290 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9291 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9295 /* We can load this constant by using VDUP and a constant in a
9296 single ARM register. This will be cheaper than a vector
9299 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
9300 return gen_rtx_VEC_DUPLICATE (mode
, x
);
9303 /* Generate code to load VALS, which is a PARALLEL containing only
9304 constants (for vec_init) or CONST_VECTOR, efficiently into a
9305 register. Returns an RTX to copy into the register, or NULL_RTX
9306 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9309 neon_make_constant (rtx vals
)
9311 enum machine_mode mode
= GET_MODE (vals
);
9313 rtx const_vec
= NULL_RTX
;
9314 int n_elts
= GET_MODE_NUNITS (mode
);
9318 if (GET_CODE (vals
) == CONST_VECTOR
)
9320 else if (GET_CODE (vals
) == PARALLEL
)
9322 /* A CONST_VECTOR must contain only CONST_INTs and
9323 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9324 Only store valid constants in a CONST_VECTOR. */
9325 for (i
= 0; i
< n_elts
; ++i
)
9327 rtx x
= XVECEXP (vals
, 0, i
);
9328 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
9331 if (n_const
== n_elts
)
9332 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
9337 if (const_vec
!= NULL
9338 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
9339 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9341 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
9342 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9343 pipeline cycle; creating the constant takes one or two ARM
9346 else if (const_vec
!= NULL_RTX
)
9347 /* Load from constant pool. On Cortex-A8 this takes two cycles
9348 (for either double or quad vectors). We can not take advantage
9349 of single-cycle VLD1 because we need a PC-relative addressing
9353 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9354 We can not construct an initializer. */
9358 /* Initialize vector TARGET to VALS. */
9361 neon_expand_vector_init (rtx target
, rtx vals
)
9363 enum machine_mode mode
= GET_MODE (target
);
9364 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
9365 int n_elts
= GET_MODE_NUNITS (mode
);
9366 int n_var
= 0, one_var
= -1;
9367 bool all_same
= true;
9371 for (i
= 0; i
< n_elts
; ++i
)
9373 x
= XVECEXP (vals
, 0, i
);
9374 if (!CONSTANT_P (x
))
9375 ++n_var
, one_var
= i
;
9377 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
9383 rtx constant
= neon_make_constant (vals
);
9384 if (constant
!= NULL_RTX
)
9386 emit_move_insn (target
, constant
);
9391 /* Splat a single non-constant element if we can. */
9392 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
9394 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
9395 emit_insn (gen_rtx_SET (VOIDmode
, target
,
9396 gen_rtx_VEC_DUPLICATE (mode
, x
)));
9400 /* One field is non-constant. Load constant then overwrite varying
9401 field. This is more efficient than using the stack. */
9404 rtx copy
= copy_rtx (vals
);
9405 rtx index
= GEN_INT (one_var
);
9407 /* Load constant part of vector, substitute neighboring value for
9409 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
9410 neon_expand_vector_init (target
, copy
);
9412 /* Insert variable. */
9413 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
9417 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
9420 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
9423 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
9426 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
9429 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
9432 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
9435 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
9438 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
9441 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
9449 /* Construct the vector in memory one field at a time
9450 and load the whole vector. */
9451 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
9452 for (i
= 0; i
< n_elts
; i
++)
9453 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
9454 i
* GET_MODE_SIZE (inner_mode
)),
9455 XVECEXP (vals
, 0, i
));
9456 emit_move_insn (target
, mem
);
9459 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9460 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9461 reported source locations are bogus. */
9464 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
9469 gcc_assert (GET_CODE (operand
) == CONST_INT
);
9471 lane
= INTVAL (operand
);
9473 if (lane
< low
|| lane
>= high
)
9477 /* Bounds-check lanes. */
9480 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
9482 bounds_check (operand
, low
, high
, "lane out of range");
9485 /* Bounds-check constants. */
9488 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
9490 bounds_check (operand
, low
, high
, "constant out of range");
9494 neon_element_bits (enum machine_mode mode
)
9497 return GET_MODE_BITSIZE (mode
);
9499 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
9503 /* Predicates for `match_operand' and `match_operator'. */
9505 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9506 WB is true if full writeback address modes are allowed and is false
9507 if limited writeback address modes (POST_INC and PRE_DEC) are
9511 arm_coproc_mem_operand (rtx op
, bool wb
)
9515 /* Reject eliminable registers. */
9516 if (! (reload_in_progress
|| reload_completed
)
9517 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9518 || reg_mentioned_p (arg_pointer_rtx
, op
)
9519 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9520 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9521 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9522 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9525 /* Constants are converted into offsets from labels. */
9526 if (GET_CODE (op
) != MEM
)
9531 if (reload_completed
9532 && (GET_CODE (ind
) == LABEL_REF
9533 || (GET_CODE (ind
) == CONST
9534 && GET_CODE (XEXP (ind
, 0)) == PLUS
9535 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9536 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
9539 /* Match: (mem (reg)). */
9540 if (GET_CODE (ind
) == REG
)
9541 return arm_address_register_rtx_p (ind
, 0);
9543 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9544 acceptable in any case (subject to verification by
9545 arm_address_register_rtx_p). We need WB to be true to accept
9546 PRE_INC and POST_DEC. */
9547 if (GET_CODE (ind
) == POST_INC
9548 || GET_CODE (ind
) == PRE_DEC
9550 && (GET_CODE (ind
) == PRE_INC
9551 || GET_CODE (ind
) == POST_DEC
)))
9552 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9555 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
9556 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
9557 && GET_CODE (XEXP (ind
, 1)) == PLUS
9558 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
9559 ind
= XEXP (ind
, 1);
9564 if (GET_CODE (ind
) == PLUS
9565 && GET_CODE (XEXP (ind
, 0)) == REG
9566 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9567 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
9568 && INTVAL (XEXP (ind
, 1)) > -1024
9569 && INTVAL (XEXP (ind
, 1)) < 1024
9570 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
9576 /* Return TRUE if OP is a memory operand which we can load or store a vector
9577 to/from. TYPE is one of the following values:
9578 0 - Vector load/stor (vldr)
9579 1 - Core registers (ldm)
9580 2 - Element/structure loads (vld1)
9583 neon_vector_mem_operand (rtx op
, int type
)
9587 /* Reject eliminable registers. */
9588 if (! (reload_in_progress
|| reload_completed
)
9589 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9590 || reg_mentioned_p (arg_pointer_rtx
, op
)
9591 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9592 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9593 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9594 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9597 /* Constants are converted into offsets from labels. */
9598 if (GET_CODE (op
) != MEM
)
9603 if (reload_completed
9604 && (GET_CODE (ind
) == LABEL_REF
9605 || (GET_CODE (ind
) == CONST
9606 && GET_CODE (XEXP (ind
, 0)) == PLUS
9607 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9608 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
9611 /* Match: (mem (reg)). */
9612 if (GET_CODE (ind
) == REG
)
9613 return arm_address_register_rtx_p (ind
, 0);
9615 /* Allow post-increment with Neon registers. */
9616 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
9617 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
9618 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9620 /* FIXME: vld1 allows register post-modify. */
9626 && GET_CODE (ind
) == PLUS
9627 && GET_CODE (XEXP (ind
, 0)) == REG
9628 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9629 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
9630 && INTVAL (XEXP (ind
, 1)) > -1024
9631 && INTVAL (XEXP (ind
, 1)) < 1016
9632 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
9638 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9641 neon_struct_mem_operand (rtx op
)
9645 /* Reject eliminable registers. */
9646 if (! (reload_in_progress
|| reload_completed
)
9647 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9648 || reg_mentioned_p (arg_pointer_rtx
, op
)
9649 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9650 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9651 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9652 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9655 /* Constants are converted into offsets from labels. */
9656 if (GET_CODE (op
) != MEM
)
9661 if (reload_completed
9662 && (GET_CODE (ind
) == LABEL_REF
9663 || (GET_CODE (ind
) == CONST
9664 && GET_CODE (XEXP (ind
, 0)) == PLUS
9665 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9666 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
9669 /* Match: (mem (reg)). */
9670 if (GET_CODE (ind
) == REG
)
9671 return arm_address_register_rtx_p (ind
, 0);
9673 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9674 if (GET_CODE (ind
) == POST_INC
9675 || GET_CODE (ind
) == PRE_DEC
)
9676 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9681 /* Return true if X is a register that will be eliminated later on. */
9683 arm_eliminable_register (rtx x
)
9685 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
9686 || REGNO (x
) == ARG_POINTER_REGNUM
9687 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
9688 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
9691 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9692 coprocessor registers. Otherwise return NO_REGS. */
9695 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
9699 if (!TARGET_NEON_FP16
)
9700 return GENERAL_REGS
;
9701 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2))
9703 return GENERAL_REGS
;
9706 /* The neon move patterns handle all legitimate vector and struct
9709 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
9710 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
9711 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
9712 || VALID_NEON_STRUCT_MODE (mode
)))
9715 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
9718 return GENERAL_REGS
;
9721 /* Values which must be returned in the most-significant end of the return
9725 arm_return_in_msb (const_tree valtype
)
9727 return (TARGET_AAPCS_BASED
9729 && (AGGREGATE_TYPE_P (valtype
)
9730 || TREE_CODE (valtype
) == COMPLEX_TYPE
9731 || FIXED_POINT_TYPE_P (valtype
)));
9734 /* Return TRUE if X references a SYMBOL_REF. */
9736 symbol_mentioned_p (rtx x
)
9741 if (GET_CODE (x
) == SYMBOL_REF
)
9744 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9745 are constant offsets, not symbols. */
9746 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9749 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9751 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9757 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9758 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
9761 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
9768 /* Return TRUE if X references a LABEL_REF. */
9770 label_mentioned_p (rtx x
)
9775 if (GET_CODE (x
) == LABEL_REF
)
9778 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9779 instruction, but they are constant offsets, not symbols. */
9780 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9783 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9784 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9790 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9791 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
9794 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
9802 tls_mentioned_p (rtx x
)
9804 switch (GET_CODE (x
))
9807 return tls_mentioned_p (XEXP (x
, 0));
9810 if (XINT (x
, 1) == UNSPEC_TLS
)
9818 /* Must not copy any rtx that uses a pc-relative address. */
9821 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
9823 if (GET_CODE (*x
) == UNSPEC
9824 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
9825 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
9831 arm_cannot_copy_insn_p (rtx insn
)
9833 /* The tls call insn cannot be copied, as it is paired with a data
9835 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
9838 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
9844 enum rtx_code code
= GET_CODE (x
);
9861 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9864 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
9865 int *mask
, bool *signed_sat
)
9867 /* The high bound must be a power of two minus one. */
9868 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
9872 /* The low bound is either zero (for usat) or one less than the
9873 negation of the high bound (for ssat). */
9874 if (INTVAL (lo_bound
) == 0)
9879 *signed_sat
= false;
9884 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
9897 /* Return 1 if memory locations are adjacent. */
9899 adjacent_mem_locations (rtx a
, rtx b
)
9901 /* We don't guarantee to preserve the order of these memory refs. */
9902 if (volatile_refs_p (a
) || volatile_refs_p (b
))
9905 if ((GET_CODE (XEXP (a
, 0)) == REG
9906 || (GET_CODE (XEXP (a
, 0)) == PLUS
9907 && GET_CODE (XEXP (XEXP (a
, 0), 1)) == CONST_INT
))
9908 && (GET_CODE (XEXP (b
, 0)) == REG
9909 || (GET_CODE (XEXP (b
, 0)) == PLUS
9910 && GET_CODE (XEXP (XEXP (b
, 0), 1)) == CONST_INT
)))
9912 HOST_WIDE_INT val0
= 0, val1
= 0;
9916 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
9918 reg0
= XEXP (XEXP (a
, 0), 0);
9919 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
9924 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
9926 reg1
= XEXP (XEXP (b
, 0), 0);
9927 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
9932 /* Don't accept any offset that will require multiple
9933 instructions to handle, since this would cause the
9934 arith_adjacentmem pattern to output an overlong sequence. */
9935 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
9938 /* Don't allow an eliminable register: register elimination can make
9939 the offset too large. */
9940 if (arm_eliminable_register (reg0
))
9943 val_diff
= val1
- val0
;
9947 /* If the target has load delay slots, then there's no benefit
9948 to using an ldm instruction unless the offset is zero and
9949 we are optimizing for size. */
9950 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
9951 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
9952 && (val_diff
== 4 || val_diff
== -4));
9955 return ((REGNO (reg0
) == REGNO (reg1
))
9956 && (val_diff
== 4 || val_diff
== -4));
9962 /* Return true if OP is a valid load or store multiple operation. LOAD is true
9963 for load operations, false for store operations. CONSECUTIVE is true
9964 if the register numbers in the operation must be consecutive in the register
9965 bank. RETURN_PC is true if value is to be loaded in PC.
9966 The pattern we are trying to match for load is:
9967 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
9968 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
9971 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
9974 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
9975 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
9976 3. If consecutive is TRUE, then for kth register being loaded,
9977 REGNO (R_dk) = REGNO (R_d0) + k.
9978 The pattern for store is similar. */
9980 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
9981 bool consecutive
, bool return_pc
)
9983 HOST_WIDE_INT count
= XVECLEN (op
, 0);
9986 unsigned first_regno
;
9987 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
9989 bool addr_reg_in_reglist
= false;
9990 bool update
= false;
9995 /* If not in SImode, then registers must be consecutive
9996 (e.g., VLDM instructions for DFmode). */
9997 gcc_assert ((mode
== SImode
) || consecutive
);
9998 /* Setting return_pc for stores is illegal. */
9999 gcc_assert (!return_pc
|| load
);
10001 /* Set up the increments and the regs per val based on the mode. */
10002 reg_increment
= GET_MODE_SIZE (mode
);
10003 regs_per_val
= reg_increment
/ 4;
10004 offset_adj
= return_pc
? 1 : 0;
10007 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
10008 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
10011 /* Check if this is a write-back. */
10012 elt
= XVECEXP (op
, 0, offset_adj
);
10013 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
10019 /* The offset adjustment must be the number of registers being
10020 popped times the size of a single register. */
10021 if (!REG_P (SET_DEST (elt
))
10022 || !REG_P (XEXP (SET_SRC (elt
), 0))
10023 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
10024 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
10025 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
10026 ((count
- 1 - offset_adj
) * reg_increment
))
10030 i
= i
+ offset_adj
;
10031 base
= base
+ offset_adj
;
10032 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10033 success depends on the type: VLDM can do just one reg,
10034 LDM must do at least two. */
10035 if ((count
<= i
) && (mode
== SImode
))
10038 elt
= XVECEXP (op
, 0, i
- 1);
10039 if (GET_CODE (elt
) != SET
)
10044 reg
= SET_DEST (elt
);
10045 mem
= SET_SRC (elt
);
10049 reg
= SET_SRC (elt
);
10050 mem
= SET_DEST (elt
);
10053 if (!REG_P (reg
) || !MEM_P (mem
))
10056 regno
= REGNO (reg
);
10057 first_regno
= regno
;
10058 addr
= XEXP (mem
, 0);
10059 if (GET_CODE (addr
) == PLUS
)
10061 if (!CONST_INT_P (XEXP (addr
, 1)))
10064 offset
= INTVAL (XEXP (addr
, 1));
10065 addr
= XEXP (addr
, 0);
10071 /* Don't allow SP to be loaded unless it is also the base register. It
10072 guarantees that SP is reset correctly when an LDM instruction
10073 is interruptted. Otherwise, we might end up with a corrupt stack. */
10074 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
10077 for (; i
< count
; i
++)
10079 elt
= XVECEXP (op
, 0, i
);
10080 if (GET_CODE (elt
) != SET
)
10085 reg
= SET_DEST (elt
);
10086 mem
= SET_SRC (elt
);
10090 reg
= SET_SRC (elt
);
10091 mem
= SET_DEST (elt
);
10095 || GET_MODE (reg
) != mode
10096 || REGNO (reg
) <= regno
10099 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
10100 /* Don't allow SP to be loaded unless it is also the base register. It
10101 guarantees that SP is reset correctly when an LDM instruction
10102 is interrupted. Otherwise, we might end up with a corrupt stack. */
10103 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
10105 || GET_MODE (mem
) != mode
10106 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
10107 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
10108 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
10109 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
10110 offset
+ (i
- base
) * reg_increment
))
10111 && (!REG_P (XEXP (mem
, 0))
10112 || offset
+ (i
- base
) * reg_increment
!= 0)))
10115 regno
= REGNO (reg
);
10116 if (regno
== REGNO (addr
))
10117 addr_reg_in_reglist
= true;
10122 if (update
&& addr_reg_in_reglist
)
10125 /* For Thumb-1, address register is always modified - either by write-back
10126 or by explicit load. If the pattern does not describe an update,
10127 then the address register must be in the list of loaded registers. */
10129 return update
|| addr_reg_in_reglist
;
10135 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10136 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10137 instruction. ADD_OFFSET is nonzero if the base address register needs
10138 to be modified with an add instruction before we can use it. */
10141 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
10142 int nops
, HOST_WIDE_INT add_offset
)
10144 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10145 if the offset isn't small enough. The reason 2 ldrs are faster
10146 is because these ARMs are able to do more than one cache access
10147 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10148 whilst the ARM8 has a double bandwidth cache. This means that
10149 these cores can do both an instruction fetch and a data fetch in
10150 a single cycle, so the trick of calculating the address into a
10151 scratch register (one of the result regs) and then doing a load
10152 multiple actually becomes slower (and no smaller in code size).
10153 That is the transformation
10155 ldr rd1, [rbase + offset]
10156 ldr rd2, [rbase + offset + 4]
10160 add rd1, rbase, offset
10161 ldmia rd1, {rd1, rd2}
10163 produces worse code -- '3 cycles + any stalls on rd2' instead of
10164 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10165 access per cycle, the first sequence could never complete in less
10166 than 6 cycles, whereas the ldm sequence would only take 5 and
10167 would make better use of sequential accesses if not hitting the
10170 We cheat here and test 'arm_ld_sched' which we currently know to
10171 only be true for the ARM8, ARM9 and StrongARM. If this ever
10172 changes, then the test below needs to be reworked. */
10173 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
10176 /* XScale has load-store double instructions, but they have stricter
10177 alignment requirements than load-store multiple, so we cannot
10180 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10181 the pipeline until completion.
10189 An ldr instruction takes 1-3 cycles, but does not block the
10198 Best case ldr will always win. However, the more ldr instructions
10199 we issue, the less likely we are to be able to schedule them well.
10200 Using ldr instructions also increases code size.
10202 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10203 for counts of 3 or 4 regs. */
10204 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
10209 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10210 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10211 an array ORDER which describes the sequence to use when accessing the
10212 offsets that produces an ascending order. In this sequence, each
10213 offset must be larger by exactly 4 than the previous one. ORDER[0]
10214 must have been filled in with the lowest offset by the caller.
10215 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10216 we use to verify that ORDER produces an ascending order of registers.
10217 Return true if it was possible to construct such an order, false if
10221 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
10222 int *unsorted_regs
)
10225 for (i
= 1; i
< nops
; i
++)
10229 order
[i
] = order
[i
- 1];
10230 for (j
= 0; j
< nops
; j
++)
10231 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
10233 /* We must find exactly one offset that is higher than the
10234 previous one by 4. */
10235 if (order
[i
] != order
[i
- 1])
10239 if (order
[i
] == order
[i
- 1])
10241 /* The register numbers must be ascending. */
10242 if (unsorted_regs
!= NULL
10243 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
10249 /* Used to determine in a peephole whether a sequence of load
10250 instructions can be changed into a load-multiple instruction.
10251 NOPS is the number of separate load instructions we are examining. The
10252 first NOPS entries in OPERANDS are the destination registers, the
10253 next NOPS entries are memory operands. If this function is
10254 successful, *BASE is set to the common base register of the memory
10255 accesses; *LOAD_OFFSET is set to the first memory location's offset
10256 from that base register.
10257 REGS is an array filled in with the destination register numbers.
10258 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10259 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10260 the sequence of registers in REGS matches the loads from ascending memory
10261 locations, and the function verifies that the register numbers are
10262 themselves ascending. If CHECK_REGS is false, the register numbers
10263 are stored in the order they are found in the operands. */
10265 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
10266 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
10268 int unsorted_regs
[MAX_LDM_STM_OPS
];
10269 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
10270 int order
[MAX_LDM_STM_OPS
];
10271 rtx base_reg_rtx
= NULL
;
10275 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10276 easily extended if required. */
10277 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
10279 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
10281 /* Loop over the operands and check that the memory references are
10282 suitable (i.e. immediate offsets from the same base register). At
10283 the same time, extract the target register, and the memory
10285 for (i
= 0; i
< nops
; i
++)
10290 /* Convert a subreg of a mem into the mem itself. */
10291 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
10292 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
10294 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
10296 /* Don't reorder volatile memory references; it doesn't seem worth
10297 looking for the case where the order is ok anyway. */
10298 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
10301 offset
= const0_rtx
;
10303 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
10304 || (GET_CODE (reg
) == SUBREG
10305 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
10306 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
10307 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
10309 || (GET_CODE (reg
) == SUBREG
10310 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
10311 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
10316 base_reg
= REGNO (reg
);
10317 base_reg_rtx
= reg
;
10318 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
10321 else if (base_reg
!= (int) REGNO (reg
))
10322 /* Not addressed from the same base register. */
10325 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
10326 ? REGNO (operands
[i
])
10327 : REGNO (SUBREG_REG (operands
[i
])));
10329 /* If it isn't an integer register, or if it overwrites the
10330 base register but isn't the last insn in the list, then
10331 we can't do this. */
10332 if (unsorted_regs
[i
] < 0
10333 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
10334 || unsorted_regs
[i
] > 14
10335 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
10338 unsorted_offsets
[i
] = INTVAL (offset
);
10339 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
10343 /* Not a suitable memory address. */
10347 /* All the useful information has now been extracted from the
10348 operands into unsorted_regs and unsorted_offsets; additionally,
10349 order[0] has been set to the lowest offset in the list. Sort
10350 the offsets into order, verifying that they are adjacent, and
10351 check that the register numbers are ascending. */
10352 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
10353 check_regs
? unsorted_regs
: NULL
))
10357 memcpy (saved_order
, order
, sizeof order
);
10363 for (i
= 0; i
< nops
; i
++)
10364 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
10366 *load_offset
= unsorted_offsets
[order
[0]];
10370 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
10373 if (unsorted_offsets
[order
[0]] == 0)
10374 ldm_case
= 1; /* ldmia */
10375 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
10376 ldm_case
= 2; /* ldmib */
10377 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
10378 ldm_case
= 3; /* ldmda */
10379 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
10380 ldm_case
= 4; /* ldmdb */
10381 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
10382 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
10387 if (!multiple_operation_profitable_p (false, nops
,
10389 ? unsorted_offsets
[order
[0]] : 0))
10395 /* Used to determine in a peephole whether a sequence of store instructions can
10396 be changed into a store-multiple instruction.
10397 NOPS is the number of separate store instructions we are examining.
10398 NOPS_TOTAL is the total number of instructions recognized by the peephole
10400 The first NOPS entries in OPERANDS are the source registers, the next
10401 NOPS entries are memory operands. If this function is successful, *BASE is
10402 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10403 to the first memory location's offset from that base register. REGS is an
10404 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10405 likewise filled with the corresponding rtx's.
10406 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10407 numbers to an ascending order of stores.
10408 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10409 from ascending memory locations, and the function verifies that the register
10410 numbers are themselves ascending. If CHECK_REGS is false, the register
10411 numbers are stored in the order they are found in the operands. */
10413 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
10414 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
10415 HOST_WIDE_INT
*load_offset
, bool check_regs
)
10417 int unsorted_regs
[MAX_LDM_STM_OPS
];
10418 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
10419 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
10420 int order
[MAX_LDM_STM_OPS
];
10422 rtx base_reg_rtx
= NULL
;
10425 /* Write back of base register is currently only supported for Thumb 1. */
10426 int base_writeback
= TARGET_THUMB1
;
10428 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10429 easily extended if required. */
10430 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
10432 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
10434 /* Loop over the operands and check that the memory references are
10435 suitable (i.e. immediate offsets from the same base register). At
10436 the same time, extract the target register, and the memory
10438 for (i
= 0; i
< nops
; i
++)
10443 /* Convert a subreg of a mem into the mem itself. */
10444 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
10445 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
10447 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
10449 /* Don't reorder volatile memory references; it doesn't seem worth
10450 looking for the case where the order is ok anyway. */
10451 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
10454 offset
= const0_rtx
;
10456 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
10457 || (GET_CODE (reg
) == SUBREG
10458 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
10459 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
10460 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
10462 || (GET_CODE (reg
) == SUBREG
10463 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
10464 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
10467 unsorted_reg_rtxs
[i
] = (GET_CODE (operands
[i
]) == REG
10468 ? operands
[i
] : SUBREG_REG (operands
[i
]));
10469 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
10473 base_reg
= REGNO (reg
);
10474 base_reg_rtx
= reg
;
10475 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
10478 else if (base_reg
!= (int) REGNO (reg
))
10479 /* Not addressed from the same base register. */
10482 /* If it isn't an integer register, then we can't do this. */
10483 if (unsorted_regs
[i
] < 0
10484 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
10485 /* The effects are unpredictable if the base register is
10486 both updated and stored. */
10487 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
10488 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
10489 || unsorted_regs
[i
] > 14)
10492 unsorted_offsets
[i
] = INTVAL (offset
);
10493 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
10497 /* Not a suitable memory address. */
10501 /* All the useful information has now been extracted from the
10502 operands into unsorted_regs and unsorted_offsets; additionally,
10503 order[0] has been set to the lowest offset in the list. Sort
10504 the offsets into order, verifying that they are adjacent, and
10505 check that the register numbers are ascending. */
10506 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
10507 check_regs
? unsorted_regs
: NULL
))
10511 memcpy (saved_order
, order
, sizeof order
);
10517 for (i
= 0; i
< nops
; i
++)
10519 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
10521 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
10524 *load_offset
= unsorted_offsets
[order
[0]];
10528 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
10531 if (unsorted_offsets
[order
[0]] == 0)
10532 stm_case
= 1; /* stmia */
10533 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
10534 stm_case
= 2; /* stmib */
10535 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
10536 stm_case
= 3; /* stmda */
10537 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
10538 stm_case
= 4; /* stmdb */
10542 if (!multiple_operation_profitable_p (false, nops
, 0))
10548 /* Routines for use in generating RTL. */
10550 /* Generate a load-multiple instruction. COUNT is the number of loads in
10551 the instruction; REGS and MEMS are arrays containing the operands.
10552 BASEREG is the base register to be used in addressing the memory operands.
10553 WBACK_OFFSET is nonzero if the instruction should update the base
10557 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
10558 HOST_WIDE_INT wback_offset
)
10563 if (!multiple_operation_profitable_p (false, count
, 0))
10569 for (i
= 0; i
< count
; i
++)
10570 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
10572 if (wback_offset
!= 0)
10573 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
10575 seq
= get_insns ();
10581 result
= gen_rtx_PARALLEL (VOIDmode
,
10582 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
10583 if (wback_offset
!= 0)
10585 XVECEXP (result
, 0, 0)
10586 = gen_rtx_SET (VOIDmode
, basereg
,
10587 plus_constant (Pmode
, basereg
, wback_offset
));
10592 for (j
= 0; i
< count
; i
++, j
++)
10593 XVECEXP (result
, 0, i
)
10594 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
10599 /* Generate a store-multiple instruction. COUNT is the number of stores in
10600 the instruction; REGS and MEMS are arrays containing the operands.
10601 BASEREG is the base register to be used in addressing the memory operands.
10602 WBACK_OFFSET is nonzero if the instruction should update the base
10606 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
10607 HOST_WIDE_INT wback_offset
)
10612 if (GET_CODE (basereg
) == PLUS
)
10613 basereg
= XEXP (basereg
, 0);
10615 if (!multiple_operation_profitable_p (false, count
, 0))
10621 for (i
= 0; i
< count
; i
++)
10622 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
10624 if (wback_offset
!= 0)
10625 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
10627 seq
= get_insns ();
10633 result
= gen_rtx_PARALLEL (VOIDmode
,
10634 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
10635 if (wback_offset
!= 0)
10637 XVECEXP (result
, 0, 0)
10638 = gen_rtx_SET (VOIDmode
, basereg
,
10639 plus_constant (Pmode
, basereg
, wback_offset
));
10644 for (j
= 0; i
< count
; i
++, j
++)
10645 XVECEXP (result
, 0, i
)
10646 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
10651 /* Generate either a load-multiple or a store-multiple instruction. This
10652 function can be used in situations where we can start with a single MEM
10653 rtx and adjust its address upwards.
10654 COUNT is the number of operations in the instruction, not counting a
10655 possible update of the base register. REGS is an array containing the
10657 BASEREG is the base register to be used in addressing the memory operands,
10658 which are constructed from BASEMEM.
10659 WRITE_BACK specifies whether the generated instruction should include an
10660 update of the base register.
10661 OFFSETP is used to pass an offset to and from this function; this offset
10662 is not used when constructing the address (instead BASEMEM should have an
10663 appropriate offset in its address), it is used only for setting
10664 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10667 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
10668 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
10670 rtx mems
[MAX_LDM_STM_OPS
];
10671 HOST_WIDE_INT offset
= *offsetp
;
10674 gcc_assert (count
<= MAX_LDM_STM_OPS
);
10676 if (GET_CODE (basereg
) == PLUS
)
10677 basereg
= XEXP (basereg
, 0);
10679 for (i
= 0; i
< count
; i
++)
10681 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
10682 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
10690 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
10691 write_back
? 4 * count
: 0);
10693 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
10694 write_back
? 4 * count
: 0);
10698 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10699 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10701 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
10706 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10707 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10709 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
10713 /* Called from a peephole2 expander to turn a sequence of loads into an
10714 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10715 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10716 is true if we can reorder the registers because they are used commutatively
10718 Returns true iff we could generate a new instruction. */
10721 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
10723 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10724 rtx mems
[MAX_LDM_STM_OPS
];
10725 int i
, j
, base_reg
;
10727 HOST_WIDE_INT offset
;
10728 int write_back
= FALSE
;
10732 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
10733 &base_reg
, &offset
, !sort_regs
);
10739 for (i
= 0; i
< nops
- 1; i
++)
10740 for (j
= i
+ 1; j
< nops
; j
++)
10741 if (regs
[i
] > regs
[j
])
10747 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10751 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
10752 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
10758 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
10759 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
10761 if (!TARGET_THUMB1
)
10763 base_reg
= regs
[0];
10764 base_reg_rtx
= newbase
;
10768 for (i
= 0; i
< nops
; i
++)
10770 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
10771 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10774 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10775 write_back
? offset
+ i
* 4 : 0));
10779 /* Called from a peephole2 expander to turn a sequence of stores into an
10780 STM instruction. OPERANDS are the operands found by the peephole matcher;
10781 NOPS indicates how many separate stores we are trying to combine.
10782 Returns true iff we could generate a new instruction. */
10785 gen_stm_seq (rtx
*operands
, int nops
)
10788 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10789 rtx mems
[MAX_LDM_STM_OPS
];
10792 HOST_WIDE_INT offset
;
10793 int write_back
= FALSE
;
10796 bool base_reg_dies
;
10798 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
10799 mem_order
, &base_reg
, &offset
, true);
10804 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10806 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
10809 gcc_assert (base_reg_dies
);
10815 gcc_assert (base_reg_dies
);
10816 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10820 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
10822 for (i
= 0; i
< nops
; i
++)
10824 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
10825 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10828 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10829 write_back
? offset
+ i
* 4 : 0));
10833 /* Called from a peephole2 expander to turn a sequence of stores that are
10834 preceded by constant loads into an STM instruction. OPERANDS are the
10835 operands found by the peephole matcher; NOPS indicates how many
10836 separate stores we are trying to combine; there are 2 * NOPS
10837 instructions in the peephole.
10838 Returns true iff we could generate a new instruction. */
10841 gen_const_stm_seq (rtx
*operands
, int nops
)
10843 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
10844 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10845 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
10846 rtx mems
[MAX_LDM_STM_OPS
];
10849 HOST_WIDE_INT offset
;
10850 int write_back
= FALSE
;
10853 bool base_reg_dies
;
10855 HARD_REG_SET allocated
;
10857 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
10858 mem_order
, &base_reg
, &offset
, false);
10863 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
10865 /* If the same register is used more than once, try to find a free
10867 CLEAR_HARD_REG_SET (allocated
);
10868 for (i
= 0; i
< nops
; i
++)
10870 for (j
= i
+ 1; j
< nops
; j
++)
10871 if (regs
[i
] == regs
[j
])
10873 rtx t
= peep2_find_free_register (0, nops
* 2,
10874 TARGET_THUMB1
? "l" : "r",
10875 SImode
, &allocated
);
10879 regs
[i
] = REGNO (t
);
10883 /* Compute an ordering that maps the register numbers to an ascending
10886 for (i
= 0; i
< nops
; i
++)
10887 if (regs
[i
] < regs
[reg_order
[0]])
10890 for (i
= 1; i
< nops
; i
++)
10892 int this_order
= reg_order
[i
- 1];
10893 for (j
= 0; j
< nops
; j
++)
10894 if (regs
[j
] > regs
[reg_order
[i
- 1]]
10895 && (this_order
== reg_order
[i
- 1]
10896 || regs
[j
] < regs
[this_order
]))
10898 reg_order
[i
] = this_order
;
10901 /* Ensure that registers that must be live after the instruction end
10902 up with the correct value. */
10903 for (i
= 0; i
< nops
; i
++)
10905 int this_order
= reg_order
[i
];
10906 if ((this_order
!= mem_order
[i
]
10907 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
10908 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
10912 /* Load the constants. */
10913 for (i
= 0; i
< nops
; i
++)
10915 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
10916 sorted_regs
[i
] = regs
[reg_order
[i
]];
10917 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
10920 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10922 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
10925 gcc_assert (base_reg_dies
);
10931 gcc_assert (base_reg_dies
);
10932 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10936 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
10938 for (i
= 0; i
< nops
; i
++)
10940 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
10941 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10944 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
10945 write_back
? offset
+ i
* 4 : 0));
10949 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10950 unaligned copies on processors which support unaligned semantics for those
10951 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10952 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10953 An interleave factor of 1 (the minimum) will perform no interleaving.
10954 Load/store multiple are used for aligned addresses where possible. */
10957 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
10958 HOST_WIDE_INT length
,
10959 unsigned int interleave_factor
)
10961 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
10962 int *regnos
= XALLOCAVEC (int, interleave_factor
);
10963 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
10964 HOST_WIDE_INT i
, j
;
10965 HOST_WIDE_INT remaining
= length
, words
;
10966 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
10968 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
10969 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
10970 HOST_WIDE_INT srcoffset
, dstoffset
;
10971 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
10974 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
10976 /* Use hard registers if we have aligned source or destination so we can use
10977 load/store multiple with contiguous registers. */
10978 if (dst_aligned
|| src_aligned
)
10979 for (i
= 0; i
< interleave_factor
; i
++)
10980 regs
[i
] = gen_rtx_REG (SImode
, i
);
10982 for (i
= 0; i
< interleave_factor
; i
++)
10983 regs
[i
] = gen_reg_rtx (SImode
);
10985 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
10986 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
10988 srcoffset
= dstoffset
= 0;
10990 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10991 For copying the last bytes we want to subtract this offset again. */
10992 src_autoinc
= dst_autoinc
= 0;
10994 for (i
= 0; i
< interleave_factor
; i
++)
10997 /* Copy BLOCK_SIZE_BYTES chunks. */
10999 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
11002 if (src_aligned
&& interleave_factor
> 1)
11004 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
11005 TRUE
, srcbase
, &srcoffset
));
11006 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
11010 for (j
= 0; j
< interleave_factor
; j
++)
11012 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
11014 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
11015 srcoffset
+ j
* UNITS_PER_WORD
);
11016 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
11018 srcoffset
+= block_size_bytes
;
11022 if (dst_aligned
&& interleave_factor
> 1)
11024 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
11025 TRUE
, dstbase
, &dstoffset
));
11026 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
11030 for (j
= 0; j
< interleave_factor
; j
++)
11032 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
11034 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
11035 dstoffset
+ j
* UNITS_PER_WORD
);
11036 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
11038 dstoffset
+= block_size_bytes
;
11041 remaining
-= block_size_bytes
;
11044 /* Copy any whole words left (note these aren't interleaved with any
11045 subsequent halfword/byte load/stores in the interests of simplicity). */
11047 words
= remaining
/ UNITS_PER_WORD
;
11049 gcc_assert (words
< interleave_factor
);
11051 if (src_aligned
&& words
> 1)
11053 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
11055 src_autoinc
+= UNITS_PER_WORD
* words
;
11059 for (j
= 0; j
< words
; j
++)
11061 addr
= plus_constant (Pmode
, src
,
11062 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
11063 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
11064 srcoffset
+ j
* UNITS_PER_WORD
);
11065 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
11067 srcoffset
+= words
* UNITS_PER_WORD
;
11070 if (dst_aligned
&& words
> 1)
11072 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
11074 dst_autoinc
+= words
* UNITS_PER_WORD
;
11078 for (j
= 0; j
< words
; j
++)
11080 addr
= plus_constant (Pmode
, dst
,
11081 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
11082 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
11083 dstoffset
+ j
* UNITS_PER_WORD
);
11084 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
11086 dstoffset
+= words
* UNITS_PER_WORD
;
11089 remaining
-= words
* UNITS_PER_WORD
;
11091 gcc_assert (remaining
< 4);
11093 /* Copy a halfword if necessary. */
11095 if (remaining
>= 2)
11097 halfword_tmp
= gen_reg_rtx (SImode
);
11099 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
11100 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
11101 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
11103 /* Either write out immediately, or delay until we've loaded the last
11104 byte, depending on interleave factor. */
11105 if (interleave_factor
== 1)
11107 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11108 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
11109 emit_insn (gen_unaligned_storehi (mem
,
11110 gen_lowpart (HImode
, halfword_tmp
)));
11111 halfword_tmp
= NULL
;
11119 gcc_assert (remaining
< 2);
11121 /* Copy last byte. */
11123 if ((remaining
& 1) != 0)
11125 byte_tmp
= gen_reg_rtx (SImode
);
11127 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
11128 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
11129 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
11131 if (interleave_factor
== 1)
11133 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11134 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
11135 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
11144 /* Store last halfword if we haven't done so already. */
11148 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11149 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
11150 emit_insn (gen_unaligned_storehi (mem
,
11151 gen_lowpart (HImode
, halfword_tmp
)));
11155 /* Likewise for last byte. */
11159 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
11160 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
11161 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
11165 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
11168 /* From mips_adjust_block_mem:
11170 Helper function for doing a loop-based block operation on memory
11171 reference MEM. Each iteration of the loop will operate on LENGTH
11174 Create a new base register for use within the loop and point it to
11175 the start of MEM. Create a new memory reference that uses this
11176 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11179 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
11182 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
11184 /* Although the new mem does not refer to a known location,
11185 it does keep up to LENGTH bytes of alignment. */
11186 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
11187 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
11190 /* From mips_block_move_loop:
11192 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11193 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11194 the memory regions do not overlap. */
11197 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
11198 unsigned int interleave_factor
,
11199 HOST_WIDE_INT bytes_per_iter
)
11201 rtx label
, src_reg
, dest_reg
, final_src
, test
;
11202 HOST_WIDE_INT leftover
;
11204 leftover
= length
% bytes_per_iter
;
11205 length
-= leftover
;
11207 /* Create registers and memory references for use within the loop. */
11208 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
11209 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
11211 /* Calculate the value that SRC_REG should have after the last iteration of
11213 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
11214 0, 0, OPTAB_WIDEN
);
11216 /* Emit the start of the loop. */
11217 label
= gen_label_rtx ();
11218 emit_label (label
);
11220 /* Emit the loop body. */
11221 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
11222 interleave_factor
);
11224 /* Move on to the next block. */
11225 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
11226 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
11228 /* Emit the loop condition. */
11229 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
11230 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
11232 /* Mop up any left-over bytes. */
11234 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
11237 /* Emit a block move when either the source or destination is unaligned (not
11238 aligned to a four-byte boundary). This may need further tuning depending on
11239 core type, optimize_size setting, etc. */
11242 arm_movmemqi_unaligned (rtx
*operands
)
11244 HOST_WIDE_INT length
= INTVAL (operands
[2]);
11248 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
11249 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
11250 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11251 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11252 or dst_aligned though: allow more interleaving in those cases since the
11253 resulting code can be smaller. */
11254 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
11255 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
11258 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
11259 interleave_factor
, bytes_per_iter
);
11261 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
11262 interleave_factor
);
11266 /* Note that the loop created by arm_block_move_unaligned_loop may be
11267 subject to loop unrolling, which makes tuning this condition a little
11270 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
11272 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
11279 arm_gen_movmemqi (rtx
*operands
)
11281 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
11282 HOST_WIDE_INT srcoffset
, dstoffset
;
11284 rtx src
, dst
, srcbase
, dstbase
;
11285 rtx part_bytes_reg
= NULL
;
11288 if (GET_CODE (operands
[2]) != CONST_INT
11289 || GET_CODE (operands
[3]) != CONST_INT
11290 || INTVAL (operands
[2]) > 64)
11293 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
11294 return arm_movmemqi_unaligned (operands
);
11296 if (INTVAL (operands
[3]) & 3)
11299 dstbase
= operands
[0];
11300 srcbase
= operands
[1];
11302 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
11303 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
11305 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
11306 out_words_to_go
= INTVAL (operands
[2]) / 4;
11307 last_bytes
= INTVAL (operands
[2]) & 3;
11308 dstoffset
= srcoffset
= 0;
11310 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
11311 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
11313 for (i
= 0; in_words_to_go
>= 2; i
+=4)
11315 if (in_words_to_go
> 4)
11316 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
11317 TRUE
, srcbase
, &srcoffset
));
11319 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
11320 src
, FALSE
, srcbase
,
11323 if (out_words_to_go
)
11325 if (out_words_to_go
> 4)
11326 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
11327 TRUE
, dstbase
, &dstoffset
));
11328 else if (out_words_to_go
!= 1)
11329 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
11330 out_words_to_go
, dst
,
11333 dstbase
, &dstoffset
));
11336 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
11337 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
11338 if (last_bytes
!= 0)
11340 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
11346 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
11347 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
11350 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11351 if (out_words_to_go
)
11355 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
11356 sreg
= copy_to_reg (mem
);
11358 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
11359 emit_move_insn (mem
, sreg
);
11362 gcc_assert (!in_words_to_go
); /* Sanity check */
11365 if (in_words_to_go
)
11367 gcc_assert (in_words_to_go
> 0);
11369 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
11370 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
11373 gcc_assert (!last_bytes
|| part_bytes_reg
);
11375 if (BYTES_BIG_ENDIAN
&& last_bytes
)
11377 rtx tmp
= gen_reg_rtx (SImode
);
11379 /* The bytes we want are in the top end of the word. */
11380 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
11381 GEN_INT (8 * (4 - last_bytes
))));
11382 part_bytes_reg
= tmp
;
11386 mem
= adjust_automodify_address (dstbase
, QImode
,
11387 plus_constant (Pmode
, dst
,
11389 dstoffset
+ last_bytes
- 1);
11390 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
11394 tmp
= gen_reg_rtx (SImode
);
11395 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
11396 part_bytes_reg
= tmp
;
11403 if (last_bytes
> 1)
11405 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
11406 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
11410 rtx tmp
= gen_reg_rtx (SImode
);
11411 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
11412 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
11413 part_bytes_reg
= tmp
;
11420 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
11421 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
11428 /* Select a dominance comparison mode if possible for a test of the general
11429 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11430 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11431 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11432 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11433 In all cases OP will be either EQ or NE, but we don't need to know which
11434 here. If we are unable to support a dominance comparison we return
11435 CC mode. This will then fail to match for the RTL expressions that
11436 generate this call. */
11438 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
11440 enum rtx_code cond1
, cond2
;
11443 /* Currently we will probably get the wrong result if the individual
11444 comparisons are not simple. This also ensures that it is safe to
11445 reverse a comparison if necessary. */
11446 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
11448 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
11452 /* The if_then_else variant of this tests the second condition if the
11453 first passes, but is true if the first fails. Reverse the first
11454 condition to get a true "inclusive-or" expression. */
11455 if (cond_or
== DOM_CC_NX_OR_Y
)
11456 cond1
= reverse_condition (cond1
);
11458 /* If the comparisons are not equal, and one doesn't dominate the other,
11459 then we can't do this. */
11461 && !comparison_dominates_p (cond1
, cond2
)
11462 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
11467 enum rtx_code temp
= cond1
;
11475 if (cond_or
== DOM_CC_X_AND_Y
)
11480 case EQ
: return CC_DEQmode
;
11481 case LE
: return CC_DLEmode
;
11482 case LEU
: return CC_DLEUmode
;
11483 case GE
: return CC_DGEmode
;
11484 case GEU
: return CC_DGEUmode
;
11485 default: gcc_unreachable ();
11489 if (cond_or
== DOM_CC_X_AND_Y
)
11501 gcc_unreachable ();
11505 if (cond_or
== DOM_CC_X_AND_Y
)
11517 gcc_unreachable ();
11521 if (cond_or
== DOM_CC_X_AND_Y
)
11522 return CC_DLTUmode
;
11527 return CC_DLTUmode
;
11529 return CC_DLEUmode
;
11533 gcc_unreachable ();
11537 if (cond_or
== DOM_CC_X_AND_Y
)
11538 return CC_DGTUmode
;
11543 return CC_DGTUmode
;
11545 return CC_DGEUmode
;
11549 gcc_unreachable ();
11552 /* The remaining cases only occur when both comparisons are the
11555 gcc_assert (cond1
== cond2
);
11559 gcc_assert (cond1
== cond2
);
11563 gcc_assert (cond1
== cond2
);
11567 gcc_assert (cond1
== cond2
);
11568 return CC_DLEUmode
;
11571 gcc_assert (cond1
== cond2
);
11572 return CC_DGEUmode
;
11575 gcc_unreachable ();
11580 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
11582 /* All floating point compares return CCFP if it is an equality
11583 comparison, and CCFPE otherwise. */
11584 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
11607 gcc_unreachable ();
11611 /* A compare with a shifted operand. Because of canonicalization, the
11612 comparison will have to be swapped when we emit the assembler. */
11613 if (GET_MODE (y
) == SImode
11614 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
11615 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
11616 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
11617 || GET_CODE (x
) == ROTATERT
))
11620 /* This operation is performed swapped, but since we only rely on the Z
11621 flag we don't need an additional mode. */
11622 if (GET_MODE (y
) == SImode
11623 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
11624 && GET_CODE (x
) == NEG
11625 && (op
== EQ
|| op
== NE
))
11628 /* This is a special case that is used by combine to allow a
11629 comparison of a shifted byte load to be split into a zero-extend
11630 followed by a comparison of the shifted integer (only valid for
11631 equalities and unsigned inequalities). */
11632 if (GET_MODE (x
) == SImode
11633 && GET_CODE (x
) == ASHIFT
11634 && GET_CODE (XEXP (x
, 1)) == CONST_INT
&& INTVAL (XEXP (x
, 1)) == 24
11635 && GET_CODE (XEXP (x
, 0)) == SUBREG
11636 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == MEM
11637 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
11638 && (op
== EQ
|| op
== NE
11639 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
11640 && GET_CODE (y
) == CONST_INT
)
11643 /* A construct for a conditional compare, if the false arm contains
11644 0, then both conditions must be true, otherwise either condition
11645 must be true. Not all conditions are possible, so CCmode is
11646 returned if it can't be done. */
11647 if (GET_CODE (x
) == IF_THEN_ELSE
11648 && (XEXP (x
, 2) == const0_rtx
11649 || XEXP (x
, 2) == const1_rtx
)
11650 && COMPARISON_P (XEXP (x
, 0))
11651 && COMPARISON_P (XEXP (x
, 1)))
11652 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
11653 INTVAL (XEXP (x
, 2)));
11655 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11656 if (GET_CODE (x
) == AND
11657 && (op
== EQ
|| op
== NE
)
11658 && COMPARISON_P (XEXP (x
, 0))
11659 && COMPARISON_P (XEXP (x
, 1)))
11660 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
11663 if (GET_CODE (x
) == IOR
11664 && (op
== EQ
|| op
== NE
)
11665 && COMPARISON_P (XEXP (x
, 0))
11666 && COMPARISON_P (XEXP (x
, 1)))
11667 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
11670 /* An operation (on Thumb) where we want to test for a single bit.
11671 This is done by shifting that bit up into the top bit of a
11672 scratch register; we can then branch on the sign bit. */
11674 && GET_MODE (x
) == SImode
11675 && (op
== EQ
|| op
== NE
)
11676 && GET_CODE (x
) == ZERO_EXTRACT
11677 && XEXP (x
, 1) == const1_rtx
)
11680 /* An operation that sets the condition codes as a side-effect, the
11681 V flag is not set correctly, so we can only use comparisons where
11682 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11684 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11685 if (GET_MODE (x
) == SImode
11687 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
11688 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
11689 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
11690 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
11691 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
11692 || GET_CODE (x
) == LSHIFTRT
11693 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
11694 || GET_CODE (x
) == ROTATERT
11695 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
11696 return CC_NOOVmode
;
11698 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
11701 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
11702 && GET_CODE (x
) == PLUS
11703 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
11706 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
11712 /* A DImode comparison against zero can be implemented by
11713 or'ing the two halves together. */
11714 if (y
== const0_rtx
)
11717 /* We can do an equality test in three Thumb instructions. */
11727 /* DImode unsigned comparisons can be implemented by cmp +
11728 cmpeq without a scratch register. Not worth doing in
11739 /* DImode signed and unsigned comparisons can be implemented
11740 by cmp + sbcs with a scratch register, but that does not
11741 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11742 gcc_assert (op
!= EQ
&& op
!= NE
);
11746 gcc_unreachable ();
11750 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
11751 return GET_MODE (x
);
11756 /* X and Y are two things to compare using CODE. Emit the compare insn and
11757 return the rtx for register 0 in the proper mode. FP means this is a
11758 floating point compare: I don't think that it is needed on the arm. */
11760 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
11762 enum machine_mode mode
;
11764 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
11766 /* We might have X as a constant, Y as a register because of the predicates
11767 used for cmpdi. If so, force X to a register here. */
11768 if (dimode_comparison
&& !REG_P (x
))
11769 x
= force_reg (DImode
, x
);
11771 mode
= SELECT_CC_MODE (code
, x
, y
);
11772 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
11774 if (dimode_comparison
11775 && mode
!= CC_CZmode
)
11779 /* To compare two non-zero values for equality, XOR them and
11780 then compare against zero. Not used for ARM mode; there
11781 CC_CZmode is cheaper. */
11782 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
11784 gcc_assert (!reload_completed
);
11785 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
11789 /* A scratch register is required. */
11790 if (reload_completed
)
11791 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
11793 scratch
= gen_rtx_SCRATCH (SImode
);
11795 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
11796 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
11797 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
11800 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
11805 /* Generate a sequence of insns that will generate the correct return
11806 address mask depending on the physical architecture that the program
11809 arm_gen_return_addr_mask (void)
11811 rtx reg
= gen_reg_rtx (Pmode
);
11813 emit_insn (gen_return_addr_mask (reg
));
11818 arm_reload_in_hi (rtx
*operands
)
11820 rtx ref
= operands
[1];
11822 HOST_WIDE_INT offset
= 0;
11824 if (GET_CODE (ref
) == SUBREG
)
11826 offset
= SUBREG_BYTE (ref
);
11827 ref
= SUBREG_REG (ref
);
11830 if (GET_CODE (ref
) == REG
)
11832 /* We have a pseudo which has been spilt onto the stack; there
11833 are two cases here: the first where there is a simple
11834 stack-slot replacement and a second where the stack-slot is
11835 out of range, or is used as a subreg. */
11836 if (reg_equiv_mem (REGNO (ref
)))
11838 ref
= reg_equiv_mem (REGNO (ref
));
11839 base
= find_replacement (&XEXP (ref
, 0));
11842 /* The slot is out of range, or was dressed up in a SUBREG. */
11843 base
= reg_equiv_address (REGNO (ref
));
11846 base
= find_replacement (&XEXP (ref
, 0));
11848 /* Handle the case where the address is too complex to be offset by 1. */
11849 if (GET_CODE (base
) == MINUS
11850 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
11852 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11854 emit_set_insn (base_plus
, base
);
11857 else if (GET_CODE (base
) == PLUS
)
11859 /* The addend must be CONST_INT, or we would have dealt with it above. */
11860 HOST_WIDE_INT hi
, lo
;
11862 offset
+= INTVAL (XEXP (base
, 1));
11863 base
= XEXP (base
, 0);
11865 /* Rework the address into a legal sequence of insns. */
11866 /* Valid range for lo is -4095 -> 4095 */
11869 : -((-offset
) & 0xfff));
11871 /* Corner case, if lo is the max offset then we would be out of range
11872 once we have added the additional 1 below, so bump the msb into the
11873 pre-loading insn(s). */
11877 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
11878 ^ (HOST_WIDE_INT
) 0x80000000)
11879 - (HOST_WIDE_INT
) 0x80000000);
11881 gcc_assert (hi
+ lo
== offset
);
11885 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11887 /* Get the base address; addsi3 knows how to handle constants
11888 that require more than one insn. */
11889 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
11895 /* Operands[2] may overlap operands[0] (though it won't overlap
11896 operands[1]), that's why we asked for a DImode reg -- so we can
11897 use the bit that does not overlap. */
11898 if (REGNO (operands
[2]) == REGNO (operands
[0]))
11899 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11901 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
11903 emit_insn (gen_zero_extendqisi2 (scratch
,
11904 gen_rtx_MEM (QImode
,
11905 plus_constant (Pmode
, base
,
11907 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11908 gen_rtx_MEM (QImode
,
11909 plus_constant (Pmode
, base
,
11911 if (!BYTES_BIG_ENDIAN
)
11912 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11913 gen_rtx_IOR (SImode
,
11916 gen_rtx_SUBREG (SImode
, operands
[0], 0),
11920 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11921 gen_rtx_IOR (SImode
,
11922 gen_rtx_ASHIFT (SImode
, scratch
,
11924 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
11927 /* Handle storing a half-word to memory during reload by synthesizing as two
11928 byte stores. Take care not to clobber the input values until after we
11929 have moved them somewhere safe. This code assumes that if the DImode
11930 scratch in operands[2] overlaps either the input value or output address
11931 in some way, then that value must die in this insn (we absolutely need
11932 two scratch registers for some corner cases). */
11934 arm_reload_out_hi (rtx
*operands
)
11936 rtx ref
= operands
[0];
11937 rtx outval
= operands
[1];
11939 HOST_WIDE_INT offset
= 0;
11941 if (GET_CODE (ref
) == SUBREG
)
11943 offset
= SUBREG_BYTE (ref
);
11944 ref
= SUBREG_REG (ref
);
11947 if (GET_CODE (ref
) == REG
)
11949 /* We have a pseudo which has been spilt onto the stack; there
11950 are two cases here: the first where there is a simple
11951 stack-slot replacement and a second where the stack-slot is
11952 out of range, or is used as a subreg. */
11953 if (reg_equiv_mem (REGNO (ref
)))
11955 ref
= reg_equiv_mem (REGNO (ref
));
11956 base
= find_replacement (&XEXP (ref
, 0));
11959 /* The slot is out of range, or was dressed up in a SUBREG. */
11960 base
= reg_equiv_address (REGNO (ref
));
11963 base
= find_replacement (&XEXP (ref
, 0));
11965 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
11967 /* Handle the case where the address is too complex to be offset by 1. */
11968 if (GET_CODE (base
) == MINUS
11969 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
11971 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11973 /* Be careful not to destroy OUTVAL. */
11974 if (reg_overlap_mentioned_p (base_plus
, outval
))
11976 /* Updating base_plus might destroy outval, see if we can
11977 swap the scratch and base_plus. */
11978 if (!reg_overlap_mentioned_p (scratch
, outval
))
11981 scratch
= base_plus
;
11986 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
11988 /* Be conservative and copy OUTVAL into the scratch now,
11989 this should only be necessary if outval is a subreg
11990 of something larger than a word. */
11991 /* XXX Might this clobber base? I can't see how it can,
11992 since scratch is known to overlap with OUTVAL, and
11993 must be wider than a word. */
11994 emit_insn (gen_movhi (scratch_hi
, outval
));
11995 outval
= scratch_hi
;
11999 emit_set_insn (base_plus
, base
);
12002 else if (GET_CODE (base
) == PLUS
)
12004 /* The addend must be CONST_INT, or we would have dealt with it above. */
12005 HOST_WIDE_INT hi
, lo
;
12007 offset
+= INTVAL (XEXP (base
, 1));
12008 base
= XEXP (base
, 0);
12010 /* Rework the address into a legal sequence of insns. */
12011 /* Valid range for lo is -4095 -> 4095 */
12014 : -((-offset
) & 0xfff));
12016 /* Corner case, if lo is the max offset then we would be out of range
12017 once we have added the additional 1 below, so bump the msb into the
12018 pre-loading insn(s). */
12022 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
12023 ^ (HOST_WIDE_INT
) 0x80000000)
12024 - (HOST_WIDE_INT
) 0x80000000);
12026 gcc_assert (hi
+ lo
== offset
);
12030 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
12032 /* Be careful not to destroy OUTVAL. */
12033 if (reg_overlap_mentioned_p (base_plus
, outval
))
12035 /* Updating base_plus might destroy outval, see if we
12036 can swap the scratch and base_plus. */
12037 if (!reg_overlap_mentioned_p (scratch
, outval
))
12040 scratch
= base_plus
;
12045 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
12047 /* Be conservative and copy outval into scratch now,
12048 this should only be necessary if outval is a
12049 subreg of something larger than a word. */
12050 /* XXX Might this clobber base? I can't see how it
12051 can, since scratch is known to overlap with
12053 emit_insn (gen_movhi (scratch_hi
, outval
));
12054 outval
= scratch_hi
;
12058 /* Get the base address; addsi3 knows how to handle constants
12059 that require more than one insn. */
12060 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
12066 if (BYTES_BIG_ENDIAN
)
12068 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
12069 plus_constant (Pmode
, base
,
12071 gen_lowpart (QImode
, outval
)));
12072 emit_insn (gen_lshrsi3 (scratch
,
12073 gen_rtx_SUBREG (SImode
, outval
, 0),
12075 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
12077 gen_lowpart (QImode
, scratch
)));
12081 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
12083 gen_lowpart (QImode
, outval
)));
12084 emit_insn (gen_lshrsi3 (scratch
,
12085 gen_rtx_SUBREG (SImode
, outval
, 0),
12087 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
12088 plus_constant (Pmode
, base
,
12090 gen_lowpart (QImode
, scratch
)));
12094 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12095 (padded to the size of a word) should be passed in a register. */
12098 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
12100 if (TARGET_AAPCS_BASED
)
12101 return must_pass_in_stack_var_size (mode
, type
);
12103 return must_pass_in_stack_var_size_or_pad (mode
, type
);
12107 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12108 Return true if an argument passed on the stack should be padded upwards,
12109 i.e. if the least-significant byte has useful data.
12110 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12111 aggregate types are placed in the lowest memory address. */
12114 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
12116 if (!TARGET_AAPCS_BASED
)
12117 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
12119 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
12126 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12127 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12128 register has useful data, and return the opposite if the most
12129 significant byte does. */
12132 arm_pad_reg_upward (enum machine_mode mode
,
12133 tree type
, int first ATTRIBUTE_UNUSED
)
12135 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
12137 /* For AAPCS, small aggregates, small fixed-point types,
12138 and small complex types are always padded upwards. */
12141 if ((AGGREGATE_TYPE_P (type
)
12142 || TREE_CODE (type
) == COMPLEX_TYPE
12143 || FIXED_POINT_TYPE_P (type
))
12144 && int_size_in_bytes (type
) <= 4)
12149 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
12150 && GET_MODE_SIZE (mode
) <= 4)
12155 /* Otherwise, use default padding. */
12156 return !BYTES_BIG_ENDIAN
;
12160 /* Print a symbolic form of X to the debug file, F. */
12162 arm_print_value (FILE *f
, rtx x
)
12164 switch (GET_CODE (x
))
12167 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
12171 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
12179 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
12181 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
12182 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
12190 fprintf (f
, "\"%s\"", XSTR (x
, 0));
12194 fprintf (f
, "`%s'", XSTR (x
, 0));
12198 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
12202 arm_print_value (f
, XEXP (x
, 0));
12206 arm_print_value (f
, XEXP (x
, 0));
12208 arm_print_value (f
, XEXP (x
, 1));
12216 fprintf (f
, "????");
12221 /* Routines for manipulation of the constant pool. */
12223 /* Arm instructions cannot load a large constant directly into a
12224 register; they have to come from a pc relative load. The constant
12225 must therefore be placed in the addressable range of the pc
12226 relative load. Depending on the precise pc relative load
12227 instruction the range is somewhere between 256 bytes and 4k. This
12228 means that we often have to dump a constant inside a function, and
12229 generate code to branch around it.
12231 It is important to minimize this, since the branches will slow
12232 things down and make the code larger.
12234 Normally we can hide the table after an existing unconditional
12235 branch so that there is no interruption of the flow, but in the
12236 worst case the code looks like this:
12254 We fix this by performing a scan after scheduling, which notices
12255 which instructions need to have their operands fetched from the
12256 constant table and builds the table.
12258 The algorithm starts by building a table of all the constants that
12259 need fixing up and all the natural barriers in the function (places
12260 where a constant table can be dropped without breaking the flow).
12261 For each fixup we note how far the pc-relative replacement will be
12262 able to reach and the offset of the instruction into the function.
12264 Having built the table we then group the fixes together to form
12265 tables that are as large as possible (subject to addressing
12266 constraints) and emit each table of constants after the last
12267 barrier that is within range of all the instructions in the group.
12268 If a group does not contain a barrier, then we forcibly create one
12269 by inserting a jump instruction into the flow. Once the table has
12270 been inserted, the insns are then modified to reference the
12271 relevant entry in the pool.
12273 Possible enhancements to the algorithm (not implemented) are:
12275 1) For some processors and object formats, there may be benefit in
12276 aligning the pools to the start of cache lines; this alignment
12277 would need to be taken into account when calculating addressability
12280 /* These typedefs are located at the start of this file, so that
12281 they can be used in the prototypes there. This comment is to
12282 remind readers of that fact so that the following structures
12283 can be understood more easily.
12285 typedef struct minipool_node Mnode;
12286 typedef struct minipool_fixup Mfix; */
12288 struct minipool_node
12290 /* Doubly linked chain of entries. */
12293 /* The maximum offset into the code that this entry can be placed. While
12294 pushing fixes for forward references, all entries are sorted in order
12295 of increasing max_address. */
12296 HOST_WIDE_INT max_address
;
12297 /* Similarly for an entry inserted for a backwards ref. */
12298 HOST_WIDE_INT min_address
;
12299 /* The number of fixes referencing this entry. This can become zero
12300 if we "unpush" an entry. In this case we ignore the entry when we
12301 come to emit the code. */
12303 /* The offset from the start of the minipool. */
12304 HOST_WIDE_INT offset
;
12305 /* The value in table. */
12307 /* The mode of value. */
12308 enum machine_mode mode
;
12309 /* The size of the value. With iWMMXt enabled
12310 sizes > 4 also imply an alignment of 8-bytes. */
12314 struct minipool_fixup
12318 HOST_WIDE_INT address
;
12320 enum machine_mode mode
;
12324 HOST_WIDE_INT forwards
;
12325 HOST_WIDE_INT backwards
;
12328 /* Fixes less than a word need padding out to a word boundary. */
12329 #define MINIPOOL_FIX_SIZE(mode) \
12330 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12332 static Mnode
* minipool_vector_head
;
12333 static Mnode
* minipool_vector_tail
;
12334 static rtx minipool_vector_label
;
12335 static int minipool_pad
;
12337 /* The linked list of all minipool fixes required for this function. */
12338 Mfix
* minipool_fix_head
;
12339 Mfix
* minipool_fix_tail
;
12340 /* The fix entry for the current minipool, once it has been placed. */
12341 Mfix
* minipool_barrier
;
12343 /* Determines if INSN is the start of a jump table. Returns the end
12344 of the TABLE or NULL_RTX. */
12346 is_jump_table (rtx insn
)
12350 if (jump_to_label_p (insn
)
12351 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
12352 == next_real_insn (insn
))
12354 && GET_CODE (table
) == JUMP_INSN
12355 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
12356 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
12362 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12363 #define JUMP_TABLES_IN_TEXT_SECTION 0
12366 static HOST_WIDE_INT
12367 get_jump_table_size (rtx insn
)
12369 /* ADDR_VECs only take room if read-only data does into the text
12371 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
12373 rtx body
= PATTERN (insn
);
12374 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
12375 HOST_WIDE_INT size
;
12376 HOST_WIDE_INT modesize
;
12378 modesize
= GET_MODE_SIZE (GET_MODE (body
));
12379 size
= modesize
* XVECLEN (body
, elt
);
12383 /* Round up size of TBB table to a halfword boundary. */
12384 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
12387 /* No padding necessary for TBH. */
12390 /* Add two bytes for alignment on Thumb. */
12395 gcc_unreachable ();
12403 /* Return the maximum amount of padding that will be inserted before
12406 static HOST_WIDE_INT
12407 get_label_padding (rtx label
)
12409 HOST_WIDE_INT align
, min_insn_size
;
12411 align
= 1 << label_to_alignment (label
);
12412 min_insn_size
= TARGET_THUMB
? 2 : 4;
12413 return align
> min_insn_size
? align
- min_insn_size
: 0;
12416 /* Move a minipool fix MP from its current location to before MAX_MP.
12417 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12418 constraints may need updating. */
12420 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
12421 HOST_WIDE_INT max_address
)
12423 /* The code below assumes these are different. */
12424 gcc_assert (mp
!= max_mp
);
12426 if (max_mp
== NULL
)
12428 if (max_address
< mp
->max_address
)
12429 mp
->max_address
= max_address
;
12433 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
12434 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
12436 mp
->max_address
= max_address
;
12438 /* Unlink MP from its current position. Since max_mp is non-null,
12439 mp->prev must be non-null. */
12440 mp
->prev
->next
= mp
->next
;
12441 if (mp
->next
!= NULL
)
12442 mp
->next
->prev
= mp
->prev
;
12444 minipool_vector_tail
= mp
->prev
;
12446 /* Re-insert it before MAX_MP. */
12448 mp
->prev
= max_mp
->prev
;
12451 if (mp
->prev
!= NULL
)
12452 mp
->prev
->next
= mp
;
12454 minipool_vector_head
= mp
;
12457 /* Save the new entry. */
12460 /* Scan over the preceding entries and adjust their addresses as
12462 while (mp
->prev
!= NULL
12463 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
12465 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
12472 /* Add a constant to the minipool for a forward reference. Returns the
12473 node added or NULL if the constant will not fit in this pool. */
12475 add_minipool_forward_ref (Mfix
*fix
)
12477 /* If set, max_mp is the first pool_entry that has a lower
12478 constraint than the one we are trying to add. */
12479 Mnode
* max_mp
= NULL
;
12480 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
12483 /* If the minipool starts before the end of FIX->INSN then this FIX
12484 can not be placed into the current pool. Furthermore, adding the
12485 new constant pool entry may cause the pool to start FIX_SIZE bytes
12487 if (minipool_vector_head
&&
12488 (fix
->address
+ get_attr_length (fix
->insn
)
12489 >= minipool_vector_head
->max_address
- fix
->fix_size
))
12492 /* Scan the pool to see if a constant with the same value has
12493 already been added. While we are doing this, also note the
12494 location where we must insert the constant if it doesn't already
12496 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12498 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
12499 && fix
->mode
== mp
->mode
12500 && (GET_CODE (fix
->value
) != CODE_LABEL
12501 || (CODE_LABEL_NUMBER (fix
->value
)
12502 == CODE_LABEL_NUMBER (mp
->value
)))
12503 && rtx_equal_p (fix
->value
, mp
->value
))
12505 /* More than one fix references this entry. */
12507 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
12510 /* Note the insertion point if necessary. */
12512 && mp
->max_address
> max_address
)
12515 /* If we are inserting an 8-bytes aligned quantity and
12516 we have not already found an insertion point, then
12517 make sure that all such 8-byte aligned quantities are
12518 placed at the start of the pool. */
12519 if (ARM_DOUBLEWORD_ALIGN
12521 && fix
->fix_size
>= 8
12522 && mp
->fix_size
< 8)
12525 max_address
= mp
->max_address
;
12529 /* The value is not currently in the minipool, so we need to create
12530 a new entry for it. If MAX_MP is NULL, the entry will be put on
12531 the end of the list since the placement is less constrained than
12532 any existing entry. Otherwise, we insert the new fix before
12533 MAX_MP and, if necessary, adjust the constraints on the other
12536 mp
->fix_size
= fix
->fix_size
;
12537 mp
->mode
= fix
->mode
;
12538 mp
->value
= fix
->value
;
12540 /* Not yet required for a backwards ref. */
12541 mp
->min_address
= -65536;
12543 if (max_mp
== NULL
)
12545 mp
->max_address
= max_address
;
12547 mp
->prev
= minipool_vector_tail
;
12549 if (mp
->prev
== NULL
)
12551 minipool_vector_head
= mp
;
12552 minipool_vector_label
= gen_label_rtx ();
12555 mp
->prev
->next
= mp
;
12557 minipool_vector_tail
= mp
;
12561 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
12562 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
12564 mp
->max_address
= max_address
;
12567 mp
->prev
= max_mp
->prev
;
12569 if (mp
->prev
!= NULL
)
12570 mp
->prev
->next
= mp
;
12572 minipool_vector_head
= mp
;
12575 /* Save the new entry. */
12578 /* Scan over the preceding entries and adjust their addresses as
12580 while (mp
->prev
!= NULL
12581 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
12583 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
12591 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
12592 HOST_WIDE_INT min_address
)
12594 HOST_WIDE_INT offset
;
12596 /* The code below assumes these are different. */
12597 gcc_assert (mp
!= min_mp
);
12599 if (min_mp
== NULL
)
12601 if (min_address
> mp
->min_address
)
12602 mp
->min_address
= min_address
;
12606 /* We will adjust this below if it is too loose. */
12607 mp
->min_address
= min_address
;
12609 /* Unlink MP from its current position. Since min_mp is non-null,
12610 mp->next must be non-null. */
12611 mp
->next
->prev
= mp
->prev
;
12612 if (mp
->prev
!= NULL
)
12613 mp
->prev
->next
= mp
->next
;
12615 minipool_vector_head
= mp
->next
;
12617 /* Reinsert it after MIN_MP. */
12619 mp
->next
= min_mp
->next
;
12621 if (mp
->next
!= NULL
)
12622 mp
->next
->prev
= mp
;
12624 minipool_vector_tail
= mp
;
12630 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12632 mp
->offset
= offset
;
12633 if (mp
->refcount
> 0)
12634 offset
+= mp
->fix_size
;
12636 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
12637 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
12643 /* Add a constant to the minipool for a backward reference. Returns the
12644 node added or NULL if the constant will not fit in this pool.
12646 Note that the code for insertion for a backwards reference can be
12647 somewhat confusing because the calculated offsets for each fix do
12648 not take into account the size of the pool (which is still under
12651 add_minipool_backward_ref (Mfix
*fix
)
12653 /* If set, min_mp is the last pool_entry that has a lower constraint
12654 than the one we are trying to add. */
12655 Mnode
*min_mp
= NULL
;
12656 /* This can be negative, since it is only a constraint. */
12657 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
12660 /* If we can't reach the current pool from this insn, or if we can't
12661 insert this entry at the end of the pool without pushing other
12662 fixes out of range, then we don't try. This ensures that we
12663 can't fail later on. */
12664 if (min_address
>= minipool_barrier
->address
12665 || (minipool_vector_tail
->min_address
+ fix
->fix_size
12666 >= minipool_barrier
->address
))
12669 /* Scan the pool to see if a constant with the same value has
12670 already been added. While we are doing this, also note the
12671 location where we must insert the constant if it doesn't already
12673 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
12675 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
12676 && fix
->mode
== mp
->mode
12677 && (GET_CODE (fix
->value
) != CODE_LABEL
12678 || (CODE_LABEL_NUMBER (fix
->value
)
12679 == CODE_LABEL_NUMBER (mp
->value
)))
12680 && rtx_equal_p (fix
->value
, mp
->value
)
12681 /* Check that there is enough slack to move this entry to the
12682 end of the table (this is conservative). */
12683 && (mp
->max_address
12684 > (minipool_barrier
->address
12685 + minipool_vector_tail
->offset
12686 + minipool_vector_tail
->fix_size
)))
12689 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
12692 if (min_mp
!= NULL
)
12693 mp
->min_address
+= fix
->fix_size
;
12696 /* Note the insertion point if necessary. */
12697 if (mp
->min_address
< min_address
)
12699 /* For now, we do not allow the insertion of 8-byte alignment
12700 requiring nodes anywhere but at the start of the pool. */
12701 if (ARM_DOUBLEWORD_ALIGN
12702 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
12707 else if (mp
->max_address
12708 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
12710 /* Inserting before this entry would push the fix beyond
12711 its maximum address (which can happen if we have
12712 re-located a forwards fix); force the new fix to come
12714 if (ARM_DOUBLEWORD_ALIGN
12715 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
12720 min_address
= mp
->min_address
+ fix
->fix_size
;
12723 /* Do not insert a non-8-byte aligned quantity before 8-byte
12724 aligned quantities. */
12725 else if (ARM_DOUBLEWORD_ALIGN
12726 && fix
->fix_size
< 8
12727 && mp
->fix_size
>= 8)
12730 min_address
= mp
->min_address
+ fix
->fix_size
;
12735 /* We need to create a new entry. */
12737 mp
->fix_size
= fix
->fix_size
;
12738 mp
->mode
= fix
->mode
;
12739 mp
->value
= fix
->value
;
12741 mp
->max_address
= minipool_barrier
->address
+ 65536;
12743 mp
->min_address
= min_address
;
12745 if (min_mp
== NULL
)
12748 mp
->next
= minipool_vector_head
;
12750 if (mp
->next
== NULL
)
12752 minipool_vector_tail
= mp
;
12753 minipool_vector_label
= gen_label_rtx ();
12756 mp
->next
->prev
= mp
;
12758 minipool_vector_head
= mp
;
12762 mp
->next
= min_mp
->next
;
12766 if (mp
->next
!= NULL
)
12767 mp
->next
->prev
= mp
;
12769 minipool_vector_tail
= mp
;
12772 /* Save the new entry. */
12780 /* Scan over the following entries and adjust their offsets. */
12781 while (mp
->next
!= NULL
)
12783 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
12784 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
12787 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
12789 mp
->next
->offset
= mp
->offset
;
12798 assign_minipool_offsets (Mfix
*barrier
)
12800 HOST_WIDE_INT offset
= 0;
12803 minipool_barrier
= barrier
;
12805 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12807 mp
->offset
= offset
;
12809 if (mp
->refcount
> 0)
12810 offset
+= mp
->fix_size
;
12814 /* Output the literal table */
12816 dump_minipool (rtx scan
)
12822 if (ARM_DOUBLEWORD_ALIGN
)
12823 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12824 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
12831 fprintf (dump_file
,
12832 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12833 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
12835 scan
= emit_label_after (gen_label_rtx (), scan
);
12836 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
12837 scan
= emit_label_after (minipool_vector_label
, scan
);
12839 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
12841 if (mp
->refcount
> 0)
12845 fprintf (dump_file
,
12846 ";; Offset %u, min %ld, max %ld ",
12847 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
12848 (unsigned long) mp
->max_address
);
12849 arm_print_value (dump_file
, mp
->value
);
12850 fputc ('\n', dump_file
);
12853 switch (mp
->fix_size
)
12855 #ifdef HAVE_consttable_1
12857 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
12861 #ifdef HAVE_consttable_2
12863 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
12867 #ifdef HAVE_consttable_4
12869 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
12873 #ifdef HAVE_consttable_8
12875 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
12879 #ifdef HAVE_consttable_16
12881 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
12886 gcc_unreachable ();
12894 minipool_vector_head
= minipool_vector_tail
= NULL
;
12895 scan
= emit_insn_after (gen_consttable_end (), scan
);
12896 scan
= emit_barrier_after (scan
);
12899 /* Return the cost of forcibly inserting a barrier after INSN. */
12901 arm_barrier_cost (rtx insn
)
12903 /* Basing the location of the pool on the loop depth is preferable,
12904 but at the moment, the basic block information seems to be
12905 corrupt by this stage of the compilation. */
12906 int base_cost
= 50;
12907 rtx next
= next_nonnote_insn (insn
);
12909 if (next
!= NULL
&& GET_CODE (next
) == CODE_LABEL
)
12912 switch (GET_CODE (insn
))
12915 /* It will always be better to place the table before the label, rather
12924 return base_cost
- 10;
12927 return base_cost
+ 10;
12931 /* Find the best place in the insn stream in the range
12932 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12933 Create the barrier by inserting a jump and add a new fix entry for
12936 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
12938 HOST_WIDE_INT count
= 0;
12940 rtx from
= fix
->insn
;
12941 /* The instruction after which we will insert the jump. */
12942 rtx selected
= NULL
;
12944 /* The address at which the jump instruction will be placed. */
12945 HOST_WIDE_INT selected_address
;
12947 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
12948 rtx label
= gen_label_rtx ();
12950 selected_cost
= arm_barrier_cost (from
);
12951 selected_address
= fix
->address
;
12953 while (from
&& count
< max_count
)
12958 /* This code shouldn't have been called if there was a natural barrier
12960 gcc_assert (GET_CODE (from
) != BARRIER
);
12962 /* Count the length of this insn. This must stay in sync with the
12963 code that pushes minipool fixes. */
12964 if (LABEL_P (from
))
12965 count
+= get_label_padding (from
);
12967 count
+= get_attr_length (from
);
12969 /* If there is a jump table, add its length. */
12970 tmp
= is_jump_table (from
);
12973 count
+= get_jump_table_size (tmp
);
12975 /* Jump tables aren't in a basic block, so base the cost on
12976 the dispatch insn. If we select this location, we will
12977 still put the pool after the table. */
12978 new_cost
= arm_barrier_cost (from
);
12980 if (count
< max_count
12981 && (!selected
|| new_cost
<= selected_cost
))
12984 selected_cost
= new_cost
;
12985 selected_address
= fix
->address
+ count
;
12988 /* Continue after the dispatch table. */
12989 from
= NEXT_INSN (tmp
);
12993 new_cost
= arm_barrier_cost (from
);
12995 if (count
< max_count
12996 && (!selected
|| new_cost
<= selected_cost
))
12999 selected_cost
= new_cost
;
13000 selected_address
= fix
->address
+ count
;
13003 from
= NEXT_INSN (from
);
13006 /* Make sure that we found a place to insert the jump. */
13007 gcc_assert (selected
);
13009 /* Make sure we do not split a call and its corresponding
13010 CALL_ARG_LOCATION note. */
13011 if (CALL_P (selected
))
13013 rtx next
= NEXT_INSN (selected
);
13014 if (next
&& NOTE_P (next
)
13015 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
13019 /* Create a new JUMP_INSN that branches around a barrier. */
13020 from
= emit_jump_insn_after (gen_jump (label
), selected
);
13021 JUMP_LABEL (from
) = label
;
13022 barrier
= emit_barrier_after (from
);
13023 emit_label_after (label
, barrier
);
13025 /* Create a minipool barrier entry for the new barrier. */
13026 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
13027 new_fix
->insn
= barrier
;
13028 new_fix
->address
= selected_address
;
13029 new_fix
->next
= fix
->next
;
13030 fix
->next
= new_fix
;
13035 /* Record that there is a natural barrier in the insn stream at
13038 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
13040 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
13043 fix
->address
= address
;
13046 if (minipool_fix_head
!= NULL
)
13047 minipool_fix_tail
->next
= fix
;
13049 minipool_fix_head
= fix
;
13051 minipool_fix_tail
= fix
;
13054 /* Record INSN, which will need fixing up to load a value from the
13055 minipool. ADDRESS is the offset of the insn since the start of the
13056 function; LOC is a pointer to the part of the insn which requires
13057 fixing; VALUE is the constant that must be loaded, which is of type
13060 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
13061 enum machine_mode mode
, rtx value
)
13063 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
13066 fix
->address
= address
;
13069 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
13070 fix
->value
= value
;
13071 fix
->forwards
= get_attr_pool_range (insn
);
13072 fix
->backwards
= get_attr_neg_pool_range (insn
);
13073 fix
->minipool
= NULL
;
13075 /* If an insn doesn't have a range defined for it, then it isn't
13076 expecting to be reworked by this code. Better to stop now than
13077 to generate duff assembly code. */
13078 gcc_assert (fix
->forwards
|| fix
->backwards
);
13080 /* If an entry requires 8-byte alignment then assume all constant pools
13081 require 4 bytes of padding. Trying to do this later on a per-pool
13082 basis is awkward because existing pool entries have to be modified. */
13083 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
13088 fprintf (dump_file
,
13089 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13090 GET_MODE_NAME (mode
),
13091 INSN_UID (insn
), (unsigned long) address
,
13092 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
13093 arm_print_value (dump_file
, fix
->value
);
13094 fprintf (dump_file
, "\n");
13097 /* Add it to the chain of fixes. */
13100 if (minipool_fix_head
!= NULL
)
13101 minipool_fix_tail
->next
= fix
;
13103 minipool_fix_head
= fix
;
13105 minipool_fix_tail
= fix
;
13108 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13109 Returns the number of insns needed, or 99 if we don't know how to
13112 arm_const_double_inline_cost (rtx val
)
13114 rtx lowpart
, highpart
;
13115 enum machine_mode mode
;
13117 mode
= GET_MODE (val
);
13119 if (mode
== VOIDmode
)
13122 gcc_assert (GET_MODE_SIZE (mode
) == 8);
13124 lowpart
= gen_lowpart (SImode
, val
);
13125 highpart
= gen_highpart_mode (SImode
, mode
, val
);
13127 gcc_assert (GET_CODE (lowpart
) == CONST_INT
);
13128 gcc_assert (GET_CODE (highpart
) == CONST_INT
);
13130 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
13131 NULL_RTX
, NULL_RTX
, 0, 0)
13132 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
13133 NULL_RTX
, NULL_RTX
, 0, 0));
13136 /* Return true if it is worthwhile to split a 64-bit constant into two
13137 32-bit operations. This is the case if optimizing for size, or
13138 if we have load delay slots, or if one 32-bit part can be done with
13139 a single data operation. */
13141 arm_const_double_by_parts (rtx val
)
13143 enum machine_mode mode
= GET_MODE (val
);
13146 if (optimize_size
|| arm_ld_sched
)
13149 if (mode
== VOIDmode
)
13152 part
= gen_highpart_mode (SImode
, mode
, val
);
13154 gcc_assert (GET_CODE (part
) == CONST_INT
);
13156 if (const_ok_for_arm (INTVAL (part
))
13157 || const_ok_for_arm (~INTVAL (part
)))
13160 part
= gen_lowpart (SImode
, val
);
13162 gcc_assert (GET_CODE (part
) == CONST_INT
);
13164 if (const_ok_for_arm (INTVAL (part
))
13165 || const_ok_for_arm (~INTVAL (part
)))
13171 /* Return true if it is possible to inline both the high and low parts
13172 of a 64-bit constant into 32-bit data processing instructions. */
13174 arm_const_double_by_immediates (rtx val
)
13176 enum machine_mode mode
= GET_MODE (val
);
13179 if (mode
== VOIDmode
)
13182 part
= gen_highpart_mode (SImode
, mode
, val
);
13184 gcc_assert (GET_CODE (part
) == CONST_INT
);
13186 if (!const_ok_for_arm (INTVAL (part
)))
13189 part
= gen_lowpart (SImode
, val
);
13191 gcc_assert (GET_CODE (part
) == CONST_INT
);
13193 if (!const_ok_for_arm (INTVAL (part
)))
13199 /* Scan INSN and note any of its operands that need fixing.
13200 If DO_PUSHES is false we do not actually push any of the fixups
13203 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
13207 extract_insn (insn
);
13209 if (!constrain_operands (1))
13210 fatal_insn_not_found (insn
);
13212 if (recog_data
.n_alternatives
== 0)
13215 /* Fill in recog_op_alt with information about the constraints of
13217 preprocess_constraints ();
13219 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
13221 /* Things we need to fix can only occur in inputs. */
13222 if (recog_data
.operand_type
[opno
] != OP_IN
)
13225 /* If this alternative is a memory reference, then any mention
13226 of constants in this alternative is really to fool reload
13227 into allowing us to accept one there. We need to fix them up
13228 now so that we output the right code. */
13229 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
13231 rtx op
= recog_data
.operand
[opno
];
13233 if (CONSTANT_P (op
))
13236 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
13237 recog_data
.operand_mode
[opno
], op
);
13239 else if (GET_CODE (op
) == MEM
13240 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
13241 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
13245 rtx cop
= avoid_constant_pool_reference (op
);
13247 /* Casting the address of something to a mode narrower
13248 than a word can cause avoid_constant_pool_reference()
13249 to return the pool reference itself. That's no good to
13250 us here. Lets just hope that we can use the
13251 constant pool value directly. */
13253 cop
= get_pool_constant (XEXP (op
, 0));
13255 push_minipool_fix (insn
, address
,
13256 recog_data
.operand_loc
[opno
],
13257 recog_data
.operand_mode
[opno
], cop
);
13267 /* Convert instructions to their cc-clobbering variant if possible, since
13268 that allows us to use smaller encodings. */
13271 thumb2_reorg (void)
13276 INIT_REG_SET (&live
);
13278 /* We are freeing block_for_insn in the toplev to keep compatibility
13279 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13280 compute_bb_for_insn ();
13287 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
13288 df_simulate_initialize_backwards (bb
, &live
);
13289 FOR_BB_INSNS_REVERSE (bb
, insn
)
13291 if (NONJUMP_INSN_P (insn
)
13292 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
13293 && GET_CODE (PATTERN (insn
)) == SET
)
13295 enum {SKIP
, CONV
, SWAP_CONV
} action
= SKIP
;
13296 rtx pat
= PATTERN (insn
);
13297 rtx dst
= XEXP (pat
, 0);
13298 rtx src
= XEXP (pat
, 1);
13299 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
13301 if (!OBJECT_P (src
))
13302 op0
= XEXP (src
, 0);
13304 if (BINARY_P (src
))
13305 op1
= XEXP (src
, 1);
13307 if (low_register_operand (dst
, SImode
))
13309 switch (GET_CODE (src
))
13312 /* Adding two registers and storing the result
13313 in the first source is already a 16-bit
13315 if (rtx_equal_p (dst
, op0
)
13316 && register_operand (op1
, SImode
))
13319 if (low_register_operand (op0
, SImode
))
13321 /* ADDS <Rd>,<Rn>,<Rm> */
13322 if (low_register_operand (op1
, SImode
))
13324 /* ADDS <Rdn>,#<imm8> */
13325 /* SUBS <Rdn>,#<imm8> */
13326 else if (rtx_equal_p (dst
, op0
)
13327 && CONST_INT_P (op1
)
13328 && IN_RANGE (INTVAL (op1
), -255, 255))
13330 /* ADDS <Rd>,<Rn>,#<imm3> */
13331 /* SUBS <Rd>,<Rn>,#<imm3> */
13332 else if (CONST_INT_P (op1
)
13333 && IN_RANGE (INTVAL (op1
), -7, 7))
13339 /* RSBS <Rd>,<Rn>,#0
13340 Not handled here: see NEG below. */
13341 /* SUBS <Rd>,<Rn>,#<imm3>
13343 Not handled here: see PLUS above. */
13344 /* SUBS <Rd>,<Rn>,<Rm> */
13345 if (low_register_operand (op0
, SImode
)
13346 && low_register_operand (op1
, SImode
))
13351 /* MULS <Rdm>,<Rn>,<Rdm>
13352 As an exception to the rule, this is only used
13353 when optimizing for size since MULS is slow on all
13354 known implementations. We do not even want to use
13355 MULS in cold code, if optimizing for speed, so we
13356 test the global flag here. */
13357 if (!optimize_size
)
13359 /* else fall through. */
13363 /* ANDS <Rdn>,<Rm> */
13364 if (rtx_equal_p (dst
, op0
)
13365 && low_register_operand (op1
, SImode
))
13367 else if (rtx_equal_p (dst
, op1
)
13368 && low_register_operand (op0
, SImode
))
13369 action
= SWAP_CONV
;
13375 /* ASRS <Rdn>,<Rm> */
13376 /* LSRS <Rdn>,<Rm> */
13377 /* LSLS <Rdn>,<Rm> */
13378 if (rtx_equal_p (dst
, op0
)
13379 && low_register_operand (op1
, SImode
))
13381 /* ASRS <Rd>,<Rm>,#<imm5> */
13382 /* LSRS <Rd>,<Rm>,#<imm5> */
13383 /* LSLS <Rd>,<Rm>,#<imm5> */
13384 else if (low_register_operand (op0
, SImode
)
13385 && CONST_INT_P (op1
)
13386 && IN_RANGE (INTVAL (op1
), 0, 31))
13391 /* RORS <Rdn>,<Rm> */
13392 if (rtx_equal_p (dst
, op0
)
13393 && low_register_operand (op1
, SImode
))
13399 /* MVNS <Rd>,<Rm> */
13400 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13401 if (low_register_operand (op0
, SImode
))
13406 /* MOVS <Rd>,#<imm8> */
13407 if (CONST_INT_P (src
)
13408 && IN_RANGE (INTVAL (src
), 0, 255))
13413 /* MOVS and MOV<c> with registers have different
13414 encodings, so are not relevant here. */
13422 if (action
!= SKIP
)
13424 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
13425 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
13428 if (action
== SWAP_CONV
)
13430 src
= copy_rtx (src
);
13431 XEXP (src
, 0) = op1
;
13432 XEXP (src
, 1) = op0
;
13433 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
13434 vec
= gen_rtvec (2, pat
, clobber
);
13436 else /* action == CONV */
13437 vec
= gen_rtvec (2, pat
, clobber
);
13439 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
13440 INSN_CODE (insn
) = -1;
13444 if (NONDEBUG_INSN_P (insn
))
13445 df_simulate_one_insn_backwards (bb
, insn
, &live
);
13449 CLEAR_REG_SET (&live
);
13452 /* Gcc puts the pool in the wrong place for ARM, since we can only
13453 load addresses a limited distance around the pc. We do some
13454 special munging to move the constant pool values to the correct
13455 point in the code. */
13460 HOST_WIDE_INT address
= 0;
13466 /* Ensure all insns that must be split have been split at this point.
13467 Otherwise, the pool placement code below may compute incorrect
13468 insn lengths. Note that when optimizing, all insns have already
13469 been split at this point. */
13471 split_all_insns_noflow ();
13473 minipool_fix_head
= minipool_fix_tail
= NULL
;
13475 /* The first insn must always be a note, or the code below won't
13476 scan it properly. */
13477 insn
= get_insns ();
13478 gcc_assert (GET_CODE (insn
) == NOTE
);
13481 /* Scan all the insns and record the operands that will need fixing. */
13482 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
13484 if (GET_CODE (insn
) == BARRIER
)
13485 push_minipool_barrier (insn
, address
);
13486 else if (INSN_P (insn
))
13490 note_invalid_constants (insn
, address
, true);
13491 address
+= get_attr_length (insn
);
13493 /* If the insn is a vector jump, add the size of the table
13494 and skip the table. */
13495 if ((table
= is_jump_table (insn
)) != NULL
)
13497 address
+= get_jump_table_size (table
);
13501 else if (LABEL_P (insn
))
13502 /* Add the worst-case padding due to alignment. We don't add
13503 the _current_ padding because the minipool insertions
13504 themselves might change it. */
13505 address
+= get_label_padding (insn
);
13508 fix
= minipool_fix_head
;
13510 /* Now scan the fixups and perform the required changes. */
13515 Mfix
* last_added_fix
;
13516 Mfix
* last_barrier
= NULL
;
13519 /* Skip any further barriers before the next fix. */
13520 while (fix
&& GET_CODE (fix
->insn
) == BARRIER
)
13523 /* No more fixes. */
13527 last_added_fix
= NULL
;
13529 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
13531 if (GET_CODE (ftmp
->insn
) == BARRIER
)
13533 if (ftmp
->address
>= minipool_vector_head
->max_address
)
13536 last_barrier
= ftmp
;
13538 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
13541 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
13544 /* If we found a barrier, drop back to that; any fixes that we
13545 could have reached but come after the barrier will now go in
13546 the next mini-pool. */
13547 if (last_barrier
!= NULL
)
13549 /* Reduce the refcount for those fixes that won't go into this
13551 for (fdel
= last_barrier
->next
;
13552 fdel
&& fdel
!= ftmp
;
13555 fdel
->minipool
->refcount
--;
13556 fdel
->minipool
= NULL
;
13559 ftmp
= last_barrier
;
13563 /* ftmp is first fix that we can't fit into this pool and
13564 there no natural barriers that we could use. Insert a
13565 new barrier in the code somewhere between the previous
13566 fix and this one, and arrange to jump around it. */
13567 HOST_WIDE_INT max_address
;
13569 /* The last item on the list of fixes must be a barrier, so
13570 we can never run off the end of the list of fixes without
13571 last_barrier being set. */
13574 max_address
= minipool_vector_head
->max_address
;
13575 /* Check that there isn't another fix that is in range that
13576 we couldn't fit into this pool because the pool was
13577 already too large: we need to put the pool before such an
13578 instruction. The pool itself may come just after the
13579 fix because create_fix_barrier also allows space for a
13580 jump instruction. */
13581 if (ftmp
->address
< max_address
)
13582 max_address
= ftmp
->address
+ 1;
13584 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
13587 assign_minipool_offsets (last_barrier
);
13591 if (GET_CODE (ftmp
->insn
) != BARRIER
13592 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
13599 /* Scan over the fixes we have identified for this pool, fixing them
13600 up and adding the constants to the pool itself. */
13601 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
13602 this_fix
= this_fix
->next
)
13603 if (GET_CODE (this_fix
->insn
) != BARRIER
)
13606 = plus_constant (Pmode
,
13607 gen_rtx_LABEL_REF (VOIDmode
,
13608 minipool_vector_label
),
13609 this_fix
->minipool
->offset
);
13610 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
13613 dump_minipool (last_barrier
->insn
);
13617 /* From now on we must synthesize any constants that we can't handle
13618 directly. This can happen if the RTL gets split during final
13619 instruction generation. */
13620 after_arm_reorg
= 1;
13622 /* Free the minipool memory. */
13623 obstack_free (&minipool_obstack
, minipool_startobj
);
13626 /* Routines to output assembly language. */
13628 /* If the rtx is the correct value then return the string of the number.
13629 In this way we can ensure that valid double constants are generated even
13630 when cross compiling. */
13632 fp_immediate_constant (rtx x
)
13636 if (!fp_consts_inited
)
13639 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
13641 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
13645 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13646 static const char *
13647 fp_const_from_val (REAL_VALUE_TYPE
*r
)
13649 if (!fp_consts_inited
)
13652 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
13656 /* OPERANDS[0] is the entire list of insns that constitute pop,
13657 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13658 is in the list, UPDATE is true iff the list contains explicit
13659 update of base register. */
13661 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
13667 const char *conditional
;
13668 int num_saves
= XVECLEN (operands
[0], 0);
13669 unsigned int regno
;
13670 unsigned int regno_base
= REGNO (operands
[1]);
13673 offset
+= update
? 1 : 0;
13674 offset
+= return_pc
? 1 : 0;
13676 /* Is the base register in the list? */
13677 for (i
= offset
; i
< num_saves
; i
++)
13679 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
13680 /* If SP is in the list, then the base register must be SP. */
13681 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
13682 /* If base register is in the list, there must be no explicit update. */
13683 if (regno
== regno_base
)
13684 gcc_assert (!update
);
13687 conditional
= reverse
? "%?%D0" : "%?%d0";
13688 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
13690 /* Output pop (not stmfd) because it has a shorter encoding. */
13691 gcc_assert (update
);
13692 sprintf (pattern
, "pop%s\t{", conditional
);
13696 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13697 It's just a convention, their semantics are identical. */
13698 if (regno_base
== SP_REGNUM
)
13699 sprintf (pattern
, "ldm%sfd\t", conditional
);
13700 else if (TARGET_UNIFIED_ASM
)
13701 sprintf (pattern
, "ldmia%s\t", conditional
);
13703 sprintf (pattern
, "ldm%sia\t", conditional
);
13705 strcat (pattern
, reg_names
[regno_base
]);
13707 strcat (pattern
, "!, {");
13709 strcat (pattern
, ", {");
13712 /* Output the first destination register. */
13714 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
13716 /* Output the rest of the destination registers. */
13717 for (i
= offset
+ 1; i
< num_saves
; i
++)
13719 strcat (pattern
, ", ");
13721 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
13724 strcat (pattern
, "}");
13726 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
13727 strcat (pattern
, "^");
13729 output_asm_insn (pattern
, &cond
);
13733 /* Output the assembly for a store multiple. */
13736 vfp_output_fstmd (rtx
* operands
)
13743 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
13744 p
= strlen (pattern
);
13746 gcc_assert (GET_CODE (operands
[1]) == REG
);
13748 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
13749 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
13751 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
13753 strcpy (&pattern
[p
], "}");
13755 output_asm_insn (pattern
, operands
);
13760 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13761 number of bytes pushed. */
13764 vfp_emit_fstmd (int base_reg
, int count
)
13771 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13772 register pairs are stored by a store multiple insn. We avoid this
13773 by pushing an extra pair. */
13774 if (count
== 2 && !arm_arch6
)
13776 if (base_reg
== LAST_VFP_REGNUM
- 3)
13781 /* FSTMD may not store more than 16 doubleword registers at once. Split
13782 larger stores into multiple parts (up to a maximum of two, in
13787 /* NOTE: base_reg is an internal register number, so each D register
13789 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
13790 saved
+= vfp_emit_fstmd (base_reg
, 16);
13794 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
13795 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
13797 reg
= gen_rtx_REG (DFmode
, base_reg
);
13800 XVECEXP (par
, 0, 0)
13801 = gen_rtx_SET (VOIDmode
,
13804 gen_rtx_PRE_MODIFY (Pmode
,
13807 (Pmode
, stack_pointer_rtx
,
13810 gen_rtx_UNSPEC (BLKmode
,
13811 gen_rtvec (1, reg
),
13812 UNSPEC_PUSH_MULT
));
13814 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13815 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
13816 RTX_FRAME_RELATED_P (tmp
) = 1;
13817 XVECEXP (dwarf
, 0, 0) = tmp
;
13819 tmp
= gen_rtx_SET (VOIDmode
,
13820 gen_frame_mem (DFmode
, stack_pointer_rtx
),
13822 RTX_FRAME_RELATED_P (tmp
) = 1;
13823 XVECEXP (dwarf
, 0, 1) = tmp
;
13825 for (i
= 1; i
< count
; i
++)
13827 reg
= gen_rtx_REG (DFmode
, base_reg
);
13829 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
13831 tmp
= gen_rtx_SET (VOIDmode
,
13832 gen_frame_mem (DFmode
,
13833 plus_constant (Pmode
,
13837 RTX_FRAME_RELATED_P (tmp
) = 1;
13838 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
13841 par
= emit_insn (par
);
13842 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
13843 RTX_FRAME_RELATED_P (par
) = 1;
13848 /* Emit a call instruction with pattern PAT. ADDR is the address of
13849 the call target. */
13852 arm_emit_call_insn (rtx pat
, rtx addr
)
13856 insn
= emit_call_insn (pat
);
13858 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13859 If the call might use such an entry, add a use of the PIC register
13860 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13861 if (TARGET_VXWORKS_RTP
13863 && GET_CODE (addr
) == SYMBOL_REF
13864 && (SYMBOL_REF_DECL (addr
)
13865 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
13866 : !SYMBOL_REF_LOCAL_P (addr
)))
13868 require_pic_register ();
13869 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
13873 /* Output a 'call' insn. */
13875 output_call (rtx
*operands
)
13877 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
13879 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13880 if (REGNO (operands
[0]) == LR_REGNUM
)
13882 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
13883 output_asm_insn ("mov%?\t%0, %|lr", operands
);
13886 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
13888 if (TARGET_INTERWORK
|| arm_arch4t
)
13889 output_asm_insn ("bx%?\t%0", operands
);
13891 output_asm_insn ("mov%?\t%|pc, %0", operands
);
13896 /* Output a 'call' insn that is a reference in memory. This is
13897 disabled for ARMv5 and we prefer a blx instead because otherwise
13898 there's a significant performance overhead. */
13900 output_call_mem (rtx
*operands
)
13902 gcc_assert (!arm_arch5
);
13903 if (TARGET_INTERWORK
)
13905 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
13906 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
13907 output_asm_insn ("bx%?\t%|ip", operands
);
13909 else if (regno_use_in (LR_REGNUM
, operands
[0]))
13911 /* LR is used in the memory address. We load the address in the
13912 first instruction. It's safe to use IP as the target of the
13913 load since the call will kill it anyway. */
13914 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
13915 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
13917 output_asm_insn ("bx%?\t%|ip", operands
);
13919 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
13923 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
13924 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
13931 /* Output a move from arm registers to arm registers of a long double
13932 OPERANDS[0] is the destination.
13933 OPERANDS[1] is the source. */
13935 output_mov_long_double_arm_from_arm (rtx
*operands
)
13937 /* We have to be careful here because the two might overlap. */
13938 int dest_start
= REGNO (operands
[0]);
13939 int src_start
= REGNO (operands
[1]);
13943 if (dest_start
< src_start
)
13945 for (i
= 0; i
< 3; i
++)
13947 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
13948 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
13949 output_asm_insn ("mov%?\t%0, %1", ops
);
13954 for (i
= 2; i
>= 0; i
--)
13956 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
13957 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
13958 output_asm_insn ("mov%?\t%0, %1", ops
);
13966 arm_emit_movpair (rtx dest
, rtx src
)
13968 /* If the src is an immediate, simplify it. */
13969 if (CONST_INT_P (src
))
13971 HOST_WIDE_INT val
= INTVAL (src
);
13972 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
13973 if ((val
>> 16) & 0x0000ffff)
13974 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
13976 GEN_INT ((val
>> 16) & 0x0000ffff));
13979 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
13980 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
13983 /* Output a move between double words. It must be REG<-MEM
13986 output_move_double (rtx
*operands
, bool emit
, int *count
)
13988 enum rtx_code code0
= GET_CODE (operands
[0]);
13989 enum rtx_code code1
= GET_CODE (operands
[1]);
13994 /* The only case when this might happen is when
13995 you are looking at the length of a DImode instruction
13996 that has an invalid constant in it. */
13997 if (code0
== REG
&& code1
!= MEM
)
13999 gcc_assert (!emit
);
14006 unsigned int reg0
= REGNO (operands
[0]);
14008 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
14010 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
14012 switch (GET_CODE (XEXP (operands
[1], 0)))
14019 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
14020 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
14022 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
14027 gcc_assert (TARGET_LDRD
);
14029 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
14036 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
14038 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
14046 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
14048 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
14053 gcc_assert (TARGET_LDRD
);
14055 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
14060 /* Autoicrement addressing modes should never have overlapping
14061 base and destination registers, and overlapping index registers
14062 are already prohibited, so this doesn't need to worry about
14064 otherops
[0] = operands
[0];
14065 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
14066 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
14068 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
14070 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
14072 /* Registers overlap so split out the increment. */
14075 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
14076 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
14083 /* Use a single insn if we can.
14084 FIXME: IWMMXT allows offsets larger than ldrd can
14085 handle, fix these up with a pair of ldr. */
14087 || GET_CODE (otherops
[2]) != CONST_INT
14088 || (INTVAL (otherops
[2]) > -256
14089 && INTVAL (otherops
[2]) < 256))
14092 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
14098 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
14099 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
14109 /* Use a single insn if we can.
14110 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14111 fix these up with a pair of ldr. */
14113 || GET_CODE (otherops
[2]) != CONST_INT
14114 || (INTVAL (otherops
[2]) > -256
14115 && INTVAL (otherops
[2]) < 256))
14118 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
14124 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
14125 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
14135 /* We might be able to use ldrd %0, %1 here. However the range is
14136 different to ldr/adr, and it is broken on some ARMv7-M
14137 implementations. */
14138 /* Use the second register of the pair to avoid problematic
14140 otherops
[1] = operands
[1];
14142 output_asm_insn ("adr%?\t%0, %1", otherops
);
14143 operands
[1] = otherops
[0];
14147 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
14149 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
14156 /* ??? This needs checking for thumb2. */
14158 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
14159 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
14161 otherops
[0] = operands
[0];
14162 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
14163 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
14165 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
14167 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
14169 switch ((int) INTVAL (otherops
[2]))
14173 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
14179 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
14185 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
14189 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
14190 operands
[1] = otherops
[0];
14192 && (GET_CODE (otherops
[2]) == REG
14194 || (GET_CODE (otherops
[2]) == CONST_INT
14195 && INTVAL (otherops
[2]) > -256
14196 && INTVAL (otherops
[2]) < 256)))
14198 if (reg_overlap_mentioned_p (operands
[0],
14202 /* Swap base and index registers over to
14203 avoid a conflict. */
14205 otherops
[1] = otherops
[2];
14208 /* If both registers conflict, it will usually
14209 have been fixed by a splitter. */
14210 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
14211 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
14215 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
14216 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
14223 otherops
[0] = operands
[0];
14225 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
14230 if (GET_CODE (otherops
[2]) == CONST_INT
)
14234 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
14235 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
14237 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
14243 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
14249 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
14256 return "ldr%(d%)\t%0, [%1]";
14258 return "ldm%(ia%)\t%1, %M0";
14262 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
14263 /* Take care of overlapping base/data reg. */
14264 if (reg_mentioned_p (operands
[0], operands
[1]))
14268 output_asm_insn ("ldr%?\t%0, %1", otherops
);
14269 output_asm_insn ("ldr%?\t%0, %1", operands
);
14279 output_asm_insn ("ldr%?\t%0, %1", operands
);
14280 output_asm_insn ("ldr%?\t%0, %1", otherops
);
14290 /* Constraints should ensure this. */
14291 gcc_assert (code0
== MEM
&& code1
== REG
);
14292 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
14294 switch (GET_CODE (XEXP (operands
[0], 0)))
14300 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
14302 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
14307 gcc_assert (TARGET_LDRD
);
14309 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
14316 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
14318 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
14326 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
14328 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
14333 gcc_assert (TARGET_LDRD
);
14335 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
14340 otherops
[0] = operands
[1];
14341 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
14342 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
14344 /* IWMMXT allows offsets larger than ldrd can handle,
14345 fix these up with a pair of ldr. */
14347 && GET_CODE (otherops
[2]) == CONST_INT
14348 && (INTVAL(otherops
[2]) <= -256
14349 || INTVAL(otherops
[2]) >= 256))
14351 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
14355 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
14356 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
14365 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
14366 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
14372 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
14375 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
14380 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
14385 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
14386 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
14388 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
14392 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
14399 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
14406 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
14411 && (GET_CODE (otherops
[2]) == REG
14413 || (GET_CODE (otherops
[2]) == CONST_INT
14414 && INTVAL (otherops
[2]) > -256
14415 && INTVAL (otherops
[2]) < 256)))
14417 otherops
[0] = operands
[1];
14418 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
14420 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
14426 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
14427 otherops
[1] = operands
[1];
14430 output_asm_insn ("str%?\t%1, %0", operands
);
14431 output_asm_insn ("str%?\t%H1, %0", otherops
);
14441 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14442 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14445 output_move_quad (rtx
*operands
)
14447 if (REG_P (operands
[0]))
14449 /* Load, or reg->reg move. */
14451 if (MEM_P (operands
[1]))
14453 switch (GET_CODE (XEXP (operands
[1], 0)))
14456 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
14461 output_asm_insn ("adr%?\t%0, %1", operands
);
14462 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
14466 gcc_unreachable ();
14474 gcc_assert (REG_P (operands
[1]));
14476 dest
= REGNO (operands
[0]);
14477 src
= REGNO (operands
[1]);
14479 /* This seems pretty dumb, but hopefully GCC won't try to do it
14482 for (i
= 0; i
< 4; i
++)
14484 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
14485 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
14486 output_asm_insn ("mov%?\t%0, %1", ops
);
14489 for (i
= 3; i
>= 0; i
--)
14491 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
14492 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
14493 output_asm_insn ("mov%?\t%0, %1", ops
);
14499 gcc_assert (MEM_P (operands
[0]));
14500 gcc_assert (REG_P (operands
[1]));
14501 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
14503 switch (GET_CODE (XEXP (operands
[0], 0)))
14506 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
14510 gcc_unreachable ();
14517 /* Output a VFP load or store instruction. */
14520 output_move_vfp (rtx
*operands
)
14522 rtx reg
, mem
, addr
, ops
[2];
14523 int load
= REG_P (operands
[0]);
14524 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
14525 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
14528 enum machine_mode mode
;
14530 reg
= operands
[!load
];
14531 mem
= operands
[load
];
14533 mode
= GET_MODE (reg
);
14535 gcc_assert (REG_P (reg
));
14536 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
14537 gcc_assert (mode
== SFmode
14541 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
14542 gcc_assert (MEM_P (mem
));
14544 addr
= XEXP (mem
, 0);
14546 switch (GET_CODE (addr
))
14549 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14550 ops
[0] = XEXP (addr
, 0);
14555 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
14556 ops
[0] = XEXP (addr
, 0);
14561 templ
= "f%s%c%%?\t%%%s0, %%1%s";
14567 sprintf (buff
, templ
,
14568 load
? "ld" : "st",
14571 integer_p
? "\t%@ int" : "");
14572 output_asm_insn (buff
, ops
);
14577 /* Output a Neon quad-word load or store, or a load or store for
14578 larger structure modes.
14580 WARNING: The ordering of elements is weird in big-endian mode,
14581 because we use VSTM, as required by the EABI. GCC RTL defines
14582 element ordering based on in-memory order. This can be differ
14583 from the architectural ordering of elements within a NEON register.
14584 The intrinsics defined in arm_neon.h use the NEON register element
14585 ordering, not the GCC RTL element ordering.
14587 For example, the in-memory ordering of a big-endian a quadword
14588 vector with 16-bit elements when stored from register pair {d0,d1}
14589 will be (lowest address first, d0[N] is NEON register element N):
14591 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14593 When necessary, quadword registers (dN, dN+1) are moved to ARM
14594 registers from rN in the order:
14596 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14598 So that STM/LDM can be used on vectors in ARM registers, and the
14599 same memory layout will result as if VSTM/VLDM were used. */
14602 output_move_neon (rtx
*operands
)
14604 rtx reg
, mem
, addr
, ops
[2];
14605 int regno
, load
= REG_P (operands
[0]);
14608 enum machine_mode mode
;
14610 reg
= operands
[!load
];
14611 mem
= operands
[load
];
14613 mode
= GET_MODE (reg
);
14615 gcc_assert (REG_P (reg
));
14616 regno
= REGNO (reg
);
14617 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
14618 || NEON_REGNO_OK_FOR_QUAD (regno
));
14619 gcc_assert (VALID_NEON_DREG_MODE (mode
)
14620 || VALID_NEON_QREG_MODE (mode
)
14621 || VALID_NEON_STRUCT_MODE (mode
));
14622 gcc_assert (MEM_P (mem
));
14624 addr
= XEXP (mem
, 0);
14626 /* Strip off const from addresses like (const (plus (...))). */
14627 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
14628 addr
= XEXP (addr
, 0);
14630 switch (GET_CODE (addr
))
14633 templ
= "v%smia%%?\t%%0!, %%h1";
14634 ops
[0] = XEXP (addr
, 0);
14639 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14640 templ
= "v%smdb%%?\t%%0!, %%h1";
14641 ops
[0] = XEXP (addr
, 0);
14646 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14647 gcc_unreachable ();
14652 int nregs
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
14655 for (i
= 0; i
< nregs
; i
++)
14657 /* We're only using DImode here because it's a convenient size. */
14658 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
14659 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
14660 if (reg_overlap_mentioned_p (ops
[0], mem
))
14662 gcc_assert (overlap
== -1);
14667 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
14668 output_asm_insn (buff
, ops
);
14673 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
14674 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
14675 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
14676 output_asm_insn (buff
, ops
);
14683 templ
= "v%smia%%?\t%%m0, %%h1";
14688 sprintf (buff
, templ
, load
? "ld" : "st");
14689 output_asm_insn (buff
, ops
);
14694 /* Compute and return the length of neon_mov<mode>, where <mode> is
14695 one of VSTRUCT modes: EI, OI, CI or XI. */
14697 arm_attr_length_move_neon (rtx insn
)
14699 rtx reg
, mem
, addr
;
14701 enum machine_mode mode
;
14703 extract_insn_cached (insn
);
14705 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
14707 mode
= GET_MODE (recog_data
.operand
[0]);
14718 gcc_unreachable ();
14722 load
= REG_P (recog_data
.operand
[0]);
14723 reg
= recog_data
.operand
[!load
];
14724 mem
= recog_data
.operand
[load
];
14726 gcc_assert (MEM_P (mem
));
14728 mode
= GET_MODE (reg
);
14729 addr
= XEXP (mem
, 0);
14731 /* Strip off const from addresses like (const (plus (...))). */
14732 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
14733 addr
= XEXP (addr
, 0);
14735 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
14737 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
14744 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14748 arm_address_offset_is_imm (rtx insn
)
14752 extract_insn_cached (insn
);
14754 if (REG_P (recog_data
.operand
[0]))
14757 mem
= recog_data
.operand
[0];
14759 gcc_assert (MEM_P (mem
));
14761 addr
= XEXP (mem
, 0);
14763 if (GET_CODE (addr
) == REG
14764 || (GET_CODE (addr
) == PLUS
14765 && GET_CODE (XEXP (addr
, 0)) == REG
14766 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
))
14772 /* Output an ADD r, s, #n where n may be too big for one instruction.
14773 If adding zero to one register, output nothing. */
14775 output_add_immediate (rtx
*operands
)
14777 HOST_WIDE_INT n
= INTVAL (operands
[2]);
14779 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
14782 output_multi_immediate (operands
,
14783 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14786 output_multi_immediate (operands
,
14787 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14794 /* Output a multiple immediate operation.
14795 OPERANDS is the vector of operands referred to in the output patterns.
14796 INSTR1 is the output pattern to use for the first constant.
14797 INSTR2 is the output pattern to use for subsequent constants.
14798 IMMED_OP is the index of the constant slot in OPERANDS.
14799 N is the constant value. */
14800 static const char *
14801 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
14802 int immed_op
, HOST_WIDE_INT n
)
14804 #if HOST_BITS_PER_WIDE_INT > 32
14810 /* Quick and easy output. */
14811 operands
[immed_op
] = const0_rtx
;
14812 output_asm_insn (instr1
, operands
);
14817 const char * instr
= instr1
;
14819 /* Note that n is never zero here (which would give no output). */
14820 for (i
= 0; i
< 32; i
+= 2)
14824 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
14825 output_asm_insn (instr
, operands
);
14835 /* Return the name of a shifter operation. */
14836 static const char *
14837 arm_shift_nmem(enum rtx_code code
)
14842 return ARM_LSL_NAME
;
14858 /* Return the appropriate ARM instruction for the operation code.
14859 The returned result should not be overwritten. OP is the rtx of the
14860 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14863 arithmetic_instr (rtx op
, int shift_first_arg
)
14865 switch (GET_CODE (op
))
14871 return shift_first_arg
? "rsb" : "sub";
14886 return arm_shift_nmem(GET_CODE(op
));
14889 gcc_unreachable ();
14893 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14894 for the operation code. The returned result should not be overwritten.
14895 OP is the rtx code of the shift.
14896 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14898 static const char *
14899 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
14902 enum rtx_code code
= GET_CODE (op
);
14904 switch (GET_CODE (XEXP (op
, 1)))
14912 *amountp
= INTVAL (XEXP (op
, 1));
14916 gcc_unreachable ();
14922 gcc_assert (*amountp
!= -1);
14923 *amountp
= 32 - *amountp
;
14926 /* Fall through. */
14932 mnem
= arm_shift_nmem(code
);
14936 /* We never have to worry about the amount being other than a
14937 power of 2, since this case can never be reloaded from a reg. */
14938 gcc_assert (*amountp
!= -1);
14939 *amountp
= int_log2 (*amountp
);
14940 return ARM_LSL_NAME
;
14943 gcc_unreachable ();
14946 if (*amountp
!= -1)
14948 /* This is not 100% correct, but follows from the desire to merge
14949 multiplication by a power of 2 with the recognizer for a
14950 shift. >=32 is not a valid shift for "lsl", so we must try and
14951 output a shift that produces the correct arithmetical result.
14952 Using lsr #32 is identical except for the fact that the carry bit
14953 is not set correctly if we set the flags; but we never use the
14954 carry bit from such an operation, so we can ignore that. */
14955 if (code
== ROTATERT
)
14956 /* Rotate is just modulo 32. */
14958 else if (*amountp
!= (*amountp
& 31))
14960 if (code
== ASHIFT
)
14965 /* Shifts of 0 are no-ops. */
14973 /* Obtain the shift from the POWER of two. */
14975 static HOST_WIDE_INT
14976 int_log2 (HOST_WIDE_INT power
)
14978 HOST_WIDE_INT shift
= 0;
14980 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
14982 gcc_assert (shift
<= 31);
14989 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14990 because /bin/as is horribly restrictive. The judgement about
14991 whether or not each character is 'printable' (and can be output as
14992 is) or not (and must be printed with an octal escape) must be made
14993 with reference to the *host* character set -- the situation is
14994 similar to that discussed in the comments above pp_c_char in
14995 c-pretty-print.c. */
14997 #define MAX_ASCII_LEN 51
15000 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
15003 int len_so_far
= 0;
15005 fputs ("\t.ascii\t\"", stream
);
15007 for (i
= 0; i
< len
; i
++)
15011 if (len_so_far
>= MAX_ASCII_LEN
)
15013 fputs ("\"\n\t.ascii\t\"", stream
);
15019 if (c
== '\\' || c
== '\"')
15021 putc ('\\', stream
);
15029 fprintf (stream
, "\\%03o", c
);
15034 fputs ("\"\n", stream
);
15037 /* Compute the register save mask for registers 0 through 12
15038 inclusive. This code is used by arm_compute_save_reg_mask. */
15040 static unsigned long
15041 arm_compute_save_reg0_reg12_mask (void)
15043 unsigned long func_type
= arm_current_func_type ();
15044 unsigned long save_reg_mask
= 0;
15047 if (IS_INTERRUPT (func_type
))
15049 unsigned int max_reg
;
15050 /* Interrupt functions must not corrupt any registers,
15051 even call clobbered ones. If this is a leaf function
15052 we can just examine the registers used by the RTL, but
15053 otherwise we have to assume that whatever function is
15054 called might clobber anything, and so we have to save
15055 all the call-clobbered registers as well. */
15056 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
15057 /* FIQ handlers have registers r8 - r12 banked, so
15058 we only need to check r0 - r7, Normal ISRs only
15059 bank r14 and r15, so we must check up to r12.
15060 r13 is the stack pointer which is always preserved,
15061 so we do not need to consider it here. */
15066 for (reg
= 0; reg
<= max_reg
; reg
++)
15067 if (df_regs_ever_live_p (reg
)
15068 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
15069 save_reg_mask
|= (1 << reg
);
15071 /* Also save the pic base register if necessary. */
15073 && !TARGET_SINGLE_PIC_BASE
15074 && arm_pic_register
!= INVALID_REGNUM
15075 && crtl
->uses_pic_offset_table
)
15076 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
15078 else if (IS_VOLATILE(func_type
))
15080 /* For noreturn functions we historically omitted register saves
15081 altogether. However this really messes up debugging. As a
15082 compromise save just the frame pointers. Combined with the link
15083 register saved elsewhere this should be sufficient to get
15085 if (frame_pointer_needed
)
15086 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
15087 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
15088 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
15089 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
15090 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
15094 /* In the normal case we only need to save those registers
15095 which are call saved and which are used by this function. */
15096 for (reg
= 0; reg
<= 11; reg
++)
15097 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
15098 save_reg_mask
|= (1 << reg
);
15100 /* Handle the frame pointer as a special case. */
15101 if (frame_pointer_needed
)
15102 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
15104 /* If we aren't loading the PIC register,
15105 don't stack it even though it may be live. */
15107 && !TARGET_SINGLE_PIC_BASE
15108 && arm_pic_register
!= INVALID_REGNUM
15109 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
15110 || crtl
->uses_pic_offset_table
))
15111 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
15113 /* The prologue will copy SP into R0, so save it. */
15114 if (IS_STACKALIGN (func_type
))
15115 save_reg_mask
|= 1;
15118 /* Save registers so the exception handler can modify them. */
15119 if (crtl
->calls_eh_return
)
15125 reg
= EH_RETURN_DATA_REGNO (i
);
15126 if (reg
== INVALID_REGNUM
)
15128 save_reg_mask
|= 1 << reg
;
15132 return save_reg_mask
;
15136 /* Compute the number of bytes used to store the static chain register on the
15137 stack, above the stack frame. We need to know this accurately to get the
15138 alignment of the rest of the stack frame correct. */
15140 static int arm_compute_static_chain_stack_bytes (void)
15142 unsigned long func_type
= arm_current_func_type ();
15143 int static_chain_stack_bytes
= 0;
15145 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
&&
15146 IS_NESTED (func_type
) &&
15147 df_regs_ever_live_p (3) && crtl
->args
.pretend_args_size
== 0)
15148 static_chain_stack_bytes
= 4;
15150 return static_chain_stack_bytes
;
15154 /* Compute a bit mask of which registers need to be
15155 saved on the stack for the current function.
15156 This is used by arm_get_frame_offsets, which may add extra registers. */
15158 static unsigned long
15159 arm_compute_save_reg_mask (void)
15161 unsigned int save_reg_mask
= 0;
15162 unsigned long func_type
= arm_current_func_type ();
15165 if (IS_NAKED (func_type
))
15166 /* This should never really happen. */
15169 /* If we are creating a stack frame, then we must save the frame pointer,
15170 IP (which will hold the old stack pointer), LR and the PC. */
15171 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
15173 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
15176 | (1 << PC_REGNUM
);
15178 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
15180 /* Decide if we need to save the link register.
15181 Interrupt routines have their own banked link register,
15182 so they never need to save it.
15183 Otherwise if we do not use the link register we do not need to save
15184 it. If we are pushing other registers onto the stack however, we
15185 can save an instruction in the epilogue by pushing the link register
15186 now and then popping it back into the PC. This incurs extra memory
15187 accesses though, so we only do it when optimizing for size, and only
15188 if we know that we will not need a fancy return sequence. */
15189 if (df_regs_ever_live_p (LR_REGNUM
)
15192 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
15193 && !crtl
->calls_eh_return
))
15194 save_reg_mask
|= 1 << LR_REGNUM
;
15196 if (cfun
->machine
->lr_save_eliminated
)
15197 save_reg_mask
&= ~ (1 << LR_REGNUM
);
15199 if (TARGET_REALLY_IWMMXT
15200 && ((bit_count (save_reg_mask
)
15201 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
15202 arm_compute_static_chain_stack_bytes())
15205 /* The total number of registers that are going to be pushed
15206 onto the stack is odd. We need to ensure that the stack
15207 is 64-bit aligned before we start to save iWMMXt registers,
15208 and also before we start to create locals. (A local variable
15209 might be a double or long long which we will load/store using
15210 an iWMMXt instruction). Therefore we need to push another
15211 ARM register, so that the stack will be 64-bit aligned. We
15212 try to avoid using the arg registers (r0 -r3) as they might be
15213 used to pass values in a tail call. */
15214 for (reg
= 4; reg
<= 12; reg
++)
15215 if ((save_reg_mask
& (1 << reg
)) == 0)
15219 save_reg_mask
|= (1 << reg
);
15222 cfun
->machine
->sibcall_blocked
= 1;
15223 save_reg_mask
|= (1 << 3);
15227 /* We may need to push an additional register for use initializing the
15228 PIC base register. */
15229 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
15230 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
15232 reg
= thumb_find_work_register (1 << 4);
15233 if (!call_used_regs
[reg
])
15234 save_reg_mask
|= (1 << reg
);
15237 return save_reg_mask
;
15241 /* Compute a bit mask of which registers need to be
15242 saved on the stack for the current function. */
15243 static unsigned long
15244 thumb1_compute_save_reg_mask (void)
15246 unsigned long mask
;
15250 for (reg
= 0; reg
< 12; reg
++)
15251 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15255 && !TARGET_SINGLE_PIC_BASE
15256 && arm_pic_register
!= INVALID_REGNUM
15257 && crtl
->uses_pic_offset_table
)
15258 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
15260 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15261 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
15262 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
15264 /* LR will also be pushed if any lo regs are pushed. */
15265 if (mask
& 0xff || thumb_force_lr_save ())
15266 mask
|= (1 << LR_REGNUM
);
15268 /* Make sure we have a low work register if we need one.
15269 We will need one if we are going to push a high register,
15270 but we are not currently intending to push a low register. */
15271 if ((mask
& 0xff) == 0
15272 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
15274 /* Use thumb_find_work_register to choose which register
15275 we will use. If the register is live then we will
15276 have to push it. Use LAST_LO_REGNUM as our fallback
15277 choice for the register to select. */
15278 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
15279 /* Make sure the register returned by thumb_find_work_register is
15280 not part of the return value. */
15281 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
15282 reg
= LAST_LO_REGNUM
;
15284 if (! call_used_regs
[reg
])
15288 /* The 504 below is 8 bytes less than 512 because there are two possible
15289 alignment words. We can't tell here if they will be present or not so we
15290 have to play it safe and assume that they are. */
15291 if ((CALLER_INTERWORKING_SLOT_SIZE
+
15292 ROUND_UP_WORD (get_frame_size ()) +
15293 crtl
->outgoing_args_size
) >= 504)
15295 /* This is the same as the code in thumb1_expand_prologue() which
15296 determines which register to use for stack decrement. */
15297 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
15298 if (mask
& (1 << reg
))
15301 if (reg
> LAST_LO_REGNUM
)
15303 /* Make sure we have a register available for stack decrement. */
15304 mask
|= 1 << LAST_LO_REGNUM
;
15312 /* Return the number of bytes required to save VFP registers. */
15314 arm_get_vfp_saved_size (void)
15316 unsigned int regno
;
15321 /* Space for saved VFP registers. */
15322 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15325 for (regno
= FIRST_VFP_REGNUM
;
15326 regno
< LAST_VFP_REGNUM
;
15329 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
15330 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
15334 /* Workaround ARM10 VFPr1 bug. */
15335 if (count
== 2 && !arm_arch6
)
15337 saved
+= count
* 8;
15346 if (count
== 2 && !arm_arch6
)
15348 saved
+= count
* 8;
15355 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15356 everything bar the final return instruction. If simple_return is true,
15357 then do not output epilogue, because it has already been emitted in RTL. */
15359 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
15360 bool simple_return
)
15362 char conditional
[10];
15365 unsigned long live_regs_mask
;
15366 unsigned long func_type
;
15367 arm_stack_offsets
*offsets
;
15369 func_type
= arm_current_func_type ();
15371 if (IS_NAKED (func_type
))
15374 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
15376 /* If this function was declared non-returning, and we have
15377 found a tail call, then we have to trust that the called
15378 function won't return. */
15383 /* Otherwise, trap an attempted return by aborting. */
15385 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
15387 assemble_external_libcall (ops
[1]);
15388 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
15394 gcc_assert (!cfun
->calls_alloca
|| really_return
);
15396 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
15398 cfun
->machine
->return_used_this_function
= 1;
15400 offsets
= arm_get_frame_offsets ();
15401 live_regs_mask
= offsets
->saved_regs_mask
;
15403 if (!simple_return
&& live_regs_mask
)
15405 const char * return_reg
;
15407 /* If we do not have any special requirements for function exit
15408 (e.g. interworking) then we can load the return address
15409 directly into the PC. Otherwise we must load it into LR. */
15411 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
15412 return_reg
= reg_names
[PC_REGNUM
];
15414 return_reg
= reg_names
[LR_REGNUM
];
15416 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
15418 /* There are three possible reasons for the IP register
15419 being saved. 1) a stack frame was created, in which case
15420 IP contains the old stack pointer, or 2) an ISR routine
15421 corrupted it, or 3) it was saved to align the stack on
15422 iWMMXt. In case 1, restore IP into SP, otherwise just
15424 if (frame_pointer_needed
)
15426 live_regs_mask
&= ~ (1 << IP_REGNUM
);
15427 live_regs_mask
|= (1 << SP_REGNUM
);
15430 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
15433 /* On some ARM architectures it is faster to use LDR rather than
15434 LDM to load a single register. On other architectures, the
15435 cost is the same. In 26 bit mode, or for exception handlers,
15436 we have to use LDM to load the PC so that the CPSR is also
15438 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
15439 if (live_regs_mask
== (1U << reg
))
15442 if (reg
<= LAST_ARM_REGNUM
15443 && (reg
!= LR_REGNUM
15445 || ! IS_INTERRUPT (func_type
)))
15447 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
15448 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
15455 /* Generate the load multiple instruction to restore the
15456 registers. Note we can get here, even if
15457 frame_pointer_needed is true, but only if sp already
15458 points to the base of the saved core registers. */
15459 if (live_regs_mask
& (1 << SP_REGNUM
))
15461 unsigned HOST_WIDE_INT stack_adjust
;
15463 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
15464 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
15466 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
15467 if (TARGET_UNIFIED_ASM
)
15468 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
15470 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
15473 /* If we can't use ldmib (SA110 bug),
15474 then try to pop r3 instead. */
15476 live_regs_mask
|= 1 << 3;
15478 if (TARGET_UNIFIED_ASM
)
15479 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
15481 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
15485 if (TARGET_UNIFIED_ASM
)
15486 sprintf (instr
, "pop%s\t{", conditional
);
15488 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
15490 p
= instr
+ strlen (instr
);
15492 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
15493 if (live_regs_mask
& (1 << reg
))
15495 int l
= strlen (reg_names
[reg
]);
15501 memcpy (p
, ", ", 2);
15505 memcpy (p
, "%|", 2);
15506 memcpy (p
+ 2, reg_names
[reg
], l
);
15510 if (live_regs_mask
& (1 << LR_REGNUM
))
15512 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
15513 /* If returning from an interrupt, restore the CPSR. */
15514 if (IS_INTERRUPT (func_type
))
15521 output_asm_insn (instr
, & operand
);
15523 /* See if we need to generate an extra instruction to
15524 perform the actual function return. */
15526 && func_type
!= ARM_FT_INTERWORKED
15527 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
15529 /* The return has already been handled
15530 by loading the LR into the PC. */
15537 switch ((int) ARM_FUNC_TYPE (func_type
))
15541 /* ??? This is wrong for unified assembly syntax. */
15542 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
15545 case ARM_FT_INTERWORKED
:
15546 sprintf (instr
, "bx%s\t%%|lr", conditional
);
15549 case ARM_FT_EXCEPTION
:
15550 /* ??? This is wrong for unified assembly syntax. */
15551 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
15555 /* Use bx if it's available. */
15556 if (arm_arch5
|| arm_arch4t
)
15557 sprintf (instr
, "bx%s\t%%|lr", conditional
);
15559 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
15563 output_asm_insn (instr
, & operand
);
15569 /* Write the function name into the code section, directly preceding
15570 the function prologue.
15572 Code will be output similar to this:
15574 .ascii "arm_poke_function_name", 0
15577 .word 0xff000000 + (t1 - t0)
15578 arm_poke_function_name
15580 stmfd sp!, {fp, ip, lr, pc}
15583 When performing a stack backtrace, code can inspect the value
15584 of 'pc' stored at 'fp' + 0. If the trace function then looks
15585 at location pc - 12 and the top 8 bits are set, then we know
15586 that there is a function name embedded immediately preceding this
15587 location and has length ((pc[-3]) & 0xff000000).
15589 We assume that pc is declared as a pointer to an unsigned long.
15591 It is of no benefit to output the function name if we are assembling
15592 a leaf function. These function types will not contain a stack
15593 backtrace structure, therefore it is not possible to determine the
15596 arm_poke_function_name (FILE *stream
, const char *name
)
15598 unsigned long alignlength
;
15599 unsigned long length
;
15602 length
= strlen (name
) + 1;
15603 alignlength
= ROUND_UP_WORD (length
);
15605 ASM_OUTPUT_ASCII (stream
, name
, length
);
15606 ASM_OUTPUT_ALIGN (stream
, 2);
15607 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
15608 assemble_aligned_integer (UNITS_PER_WORD
, x
);
15611 /* Place some comments into the assembler stream
15612 describing the current function. */
15614 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
15616 unsigned long func_type
;
15618 /* ??? Do we want to print some of the below anyway? */
15622 /* Sanity check. */
15623 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
15625 func_type
= arm_current_func_type ();
15627 switch ((int) ARM_FUNC_TYPE (func_type
))
15630 case ARM_FT_NORMAL
:
15632 case ARM_FT_INTERWORKED
:
15633 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
15636 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
15639 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
15641 case ARM_FT_EXCEPTION
:
15642 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
15646 if (IS_NAKED (func_type
))
15647 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15649 if (IS_VOLATILE (func_type
))
15650 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
15652 if (IS_NESTED (func_type
))
15653 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
15654 if (IS_STACKALIGN (func_type
))
15655 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15657 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15659 crtl
->args
.pretend_args_size
, frame_size
);
15661 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15662 frame_pointer_needed
,
15663 cfun
->machine
->uses_anonymous_args
);
15665 if (cfun
->machine
->lr_save_eliminated
)
15666 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
15668 if (crtl
->calls_eh_return
)
15669 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
15674 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
15675 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
15677 arm_stack_offsets
*offsets
;
15683 /* Emit any call-via-reg trampolines that are needed for v4t support
15684 of call_reg and call_value_reg type insns. */
15685 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
15687 rtx label
= cfun
->machine
->call_via
[regno
];
15691 switch_to_section (function_section (current_function_decl
));
15692 targetm
.asm_out
.internal_label (asm_out_file
, "L",
15693 CODE_LABEL_NUMBER (label
));
15694 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
15698 /* ??? Probably not safe to set this here, since it assumes that a
15699 function will be emitted as assembly immediately after we generate
15700 RTL for it. This does not happen for inline functions. */
15701 cfun
->machine
->return_used_this_function
= 0;
15703 else /* TARGET_32BIT */
15705 /* We need to take into account any stack-frame rounding. */
15706 offsets
= arm_get_frame_offsets ();
15708 gcc_assert (!use_return_insn (FALSE
, NULL
)
15709 || (cfun
->machine
->return_used_this_function
!= 0)
15710 || offsets
->saved_regs
== offsets
->outgoing_args
15711 || frame_pointer_needed
);
15713 /* Reset the ARM-specific per-function variables. */
15714 after_arm_reorg
= 0;
15718 /* Generate and emit an insn that we will recognize as a push_multi.
15719 Unfortunately, since this insn does not reflect very well the actual
15720 semantics of the operation, we need to annotate the insn for the benefit
15721 of DWARF2 frame unwind information. */
15723 emit_multi_reg_push (unsigned long mask
)
15726 int num_dwarf_regs
;
15730 int dwarf_par_index
;
15733 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15734 if (mask
& (1 << i
))
15737 gcc_assert (num_regs
&& num_regs
<= 16);
15739 /* We don't record the PC in the dwarf frame information. */
15740 num_dwarf_regs
= num_regs
;
15741 if (mask
& (1 << PC_REGNUM
))
15744 /* For the body of the insn we are going to generate an UNSPEC in
15745 parallel with several USEs. This allows the insn to be recognized
15746 by the push_multi pattern in the arm.md file.
15748 The body of the insn looks something like this:
15751 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15752 (const_int:SI <num>)))
15753 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15759 For the frame note however, we try to be more explicit and actually
15760 show each register being stored into the stack frame, plus a (single)
15761 decrement of the stack pointer. We do it this way in order to be
15762 friendly to the stack unwinding code, which only wants to see a single
15763 stack decrement per instruction. The RTL we generate for the note looks
15764 something like this:
15767 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15768 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15769 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15770 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15774 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15775 instead we'd have a parallel expression detailing all
15776 the stores to the various memory addresses so that debug
15777 information is more up-to-date. Remember however while writing
15778 this to take care of the constraints with the push instruction.
15780 Note also that this has to be taken care of for the VFP registers.
15782 For more see PR43399. */
15784 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
15785 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
15786 dwarf_par_index
= 1;
15788 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15790 if (mask
& (1 << i
))
15792 reg
= gen_rtx_REG (SImode
, i
);
15794 XVECEXP (par
, 0, 0)
15795 = gen_rtx_SET (VOIDmode
,
15798 gen_rtx_PRE_MODIFY (Pmode
,
15801 (Pmode
, stack_pointer_rtx
,
15804 gen_rtx_UNSPEC (BLKmode
,
15805 gen_rtvec (1, reg
),
15806 UNSPEC_PUSH_MULT
));
15808 if (i
!= PC_REGNUM
)
15810 tmp
= gen_rtx_SET (VOIDmode
,
15811 gen_frame_mem (SImode
, stack_pointer_rtx
),
15813 RTX_FRAME_RELATED_P (tmp
) = 1;
15814 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
15822 for (j
= 1, i
++; j
< num_regs
; i
++)
15824 if (mask
& (1 << i
))
15826 reg
= gen_rtx_REG (SImode
, i
);
15828 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
15830 if (i
!= PC_REGNUM
)
15833 = gen_rtx_SET (VOIDmode
,
15836 plus_constant (Pmode
, stack_pointer_rtx
,
15839 RTX_FRAME_RELATED_P (tmp
) = 1;
15840 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
15847 par
= emit_insn (par
);
15849 tmp
= gen_rtx_SET (VOIDmode
,
15851 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
15852 RTX_FRAME_RELATED_P (tmp
) = 1;
15853 XVECEXP (dwarf
, 0, 0) = tmp
;
15855 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
15860 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
15861 SAVED_REGS_MASK shows which registers need to be restored.
15863 Unfortunately, since this insn does not reflect very well the actual
15864 semantics of the operation, we need to annotate the insn for the benefit
15865 of DWARF2 frame unwind information. */
15867 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
15872 rtx dwarf
= NULL_RTX
;
15878 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
15879 offset_adj
= return_in_pc
? 1 : 0;
15880 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15881 if (saved_regs_mask
& (1 << i
))
15884 gcc_assert (num_regs
&& num_regs
<= 16);
15886 /* If SP is in reglist, then we don't emit SP update insn. */
15887 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
15889 /* The parallel needs to hold num_regs SETs
15890 and one SET for the stack update. */
15891 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
15896 XVECEXP (par
, 0, 0) = tmp
;
15901 /* Increment the stack pointer, based on there being
15902 num_regs 4-byte registers to restore. */
15903 tmp
= gen_rtx_SET (VOIDmode
,
15905 plus_constant (Pmode
,
15908 RTX_FRAME_RELATED_P (tmp
) = 1;
15909 XVECEXP (par
, 0, offset_adj
) = tmp
;
15912 /* Now restore every reg, which may include PC. */
15913 for (j
= 0, i
= 0; j
< num_regs
; i
++)
15914 if (saved_regs_mask
& (1 << i
))
15916 reg
= gen_rtx_REG (SImode
, i
);
15917 tmp
= gen_rtx_SET (VOIDmode
,
15921 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
15922 RTX_FRAME_RELATED_P (tmp
) = 1;
15923 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
15925 /* We need to maintain a sequence for DWARF info too. As dwarf info
15926 should not have PC, skip PC. */
15927 if (i
!= PC_REGNUM
)
15928 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
15934 par
= emit_jump_insn (par
);
15936 par
= emit_insn (par
);
15938 REG_NOTES (par
) = dwarf
;
15941 /* Generate and emit an insn pattern that we will recognize as a pop_multi
15942 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
15944 Unfortunately, since this insn does not reflect very well the actual
15945 semantics of the operation, we need to annotate the insn for the benefit
15946 of DWARF2 frame unwind information. */
15948 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
15952 rtx dwarf
= NULL_RTX
;
15955 gcc_assert (num_regs
&& num_regs
<= 32);
15957 /* Workaround ARM10 VFPr1 bug. */
15958 if (num_regs
== 2 && !arm_arch6
)
15960 if (first_reg
== 15)
15966 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
15967 there could be up to 32 D-registers to restore.
15968 If there are more than 16 D-registers, make two recursive calls,
15969 each of which emits one pop_multi instruction. */
15972 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
15973 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
15977 /* The parallel needs to hold num_regs SETs
15978 and one SET for the stack update. */
15979 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
15981 /* Increment the stack pointer, based on there being
15982 num_regs 8-byte registers to restore. */
15983 tmp
= gen_rtx_SET (VOIDmode
,
15985 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
15986 RTX_FRAME_RELATED_P (tmp
) = 1;
15987 XVECEXP (par
, 0, 0) = tmp
;
15989 /* Now show every reg that will be restored, using a SET for each. */
15990 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
15992 reg
= gen_rtx_REG (DFmode
, i
);
15994 tmp
= gen_rtx_SET (VOIDmode
,
15998 plus_constant (Pmode
, base_reg
, 8 * j
)));
15999 RTX_FRAME_RELATED_P (tmp
) = 1;
16000 XVECEXP (par
, 0, j
+ 1) = tmp
;
16002 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
16007 par
= emit_insn (par
);
16008 REG_NOTES (par
) = dwarf
;
16011 /* Calculate the size of the return value that is passed in registers. */
16013 arm_size_return_regs (void)
16015 enum machine_mode mode
;
16017 if (crtl
->return_rtx
!= 0)
16018 mode
= GET_MODE (crtl
->return_rtx
);
16020 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
16022 return GET_MODE_SIZE (mode
);
16025 /* Return true if the current function needs to save/restore LR. */
16027 thumb_force_lr_save (void)
16029 return !cfun
->machine
->lr_save_eliminated
16030 && (!leaf_function_p ()
16031 || thumb_far_jump_used_p ()
16032 || df_regs_ever_live_p (LR_REGNUM
));
16036 /* Return true if r3 is used by any of the tail call insns in the
16037 current function. */
16039 any_sibcall_uses_r3 (void)
16044 if (!crtl
->tail_call_emit
)
16046 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
16047 if (e
->flags
& EDGE_SIBCALL
)
16049 rtx call
= BB_END (e
->src
);
16050 if (!CALL_P (call
))
16051 call
= prev_nonnote_nondebug_insn (call
);
16052 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
16053 if (find_regno_fusage (call
, USE
, 3))
16060 /* Compute the distance from register FROM to register TO.
16061 These can be the arg pointer (26), the soft frame pointer (25),
16062 the stack pointer (13) or the hard frame pointer (11).
16063 In thumb mode r7 is used as the soft frame pointer, if needed.
16064 Typical stack layout looks like this:
16066 old stack pointer -> | |
16069 | | saved arguments for
16070 | | vararg functions
16073 hard FP & arg pointer -> | | \
16081 soft frame pointer -> | | /
16086 locals base pointer -> | | /
16091 current stack pointer -> | | /
16094 For a given function some or all of these stack components
16095 may not be needed, giving rise to the possibility of
16096 eliminating some of the registers.
16098 The values returned by this function must reflect the behavior
16099 of arm_expand_prologue() and arm_compute_save_reg_mask().
16101 The sign of the number returned reflects the direction of stack
16102 growth, so the values are positive for all eliminations except
16103 from the soft frame pointer to the hard frame pointer.
16105 SFP may point just inside the local variables block to ensure correct
16109 /* Calculate stack offsets. These are used to calculate register elimination
16110 offsets and in prologue/epilogue code. Also calculates which registers
16111 should be saved. */
16113 static arm_stack_offsets
*
16114 arm_get_frame_offsets (void)
16116 struct arm_stack_offsets
*offsets
;
16117 unsigned long func_type
;
16121 HOST_WIDE_INT frame_size
;
16124 offsets
= &cfun
->machine
->stack_offsets
;
16126 /* We need to know if we are a leaf function. Unfortunately, it
16127 is possible to be called after start_sequence has been called,
16128 which causes get_insns to return the insns for the sequence,
16129 not the function, which will cause leaf_function_p to return
16130 the incorrect result.
16132 to know about leaf functions once reload has completed, and the
16133 frame size cannot be changed after that time, so we can safely
16134 use the cached value. */
16136 if (reload_completed
)
16139 /* Initially this is the size of the local variables. It will translated
16140 into an offset once we have determined the size of preceding data. */
16141 frame_size
= ROUND_UP_WORD (get_frame_size ());
16143 leaf
= leaf_function_p ();
16145 /* Space for variadic functions. */
16146 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
16148 /* In Thumb mode this is incorrect, but never used. */
16149 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
16150 arm_compute_static_chain_stack_bytes();
16154 unsigned int regno
;
16156 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
16157 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
16158 saved
= core_saved
;
16160 /* We know that SP will be doubleword aligned on entry, and we must
16161 preserve that condition at any subroutine call. We also require the
16162 soft frame pointer to be doubleword aligned. */
16164 if (TARGET_REALLY_IWMMXT
)
16166 /* Check for the call-saved iWMMXt registers. */
16167 for (regno
= FIRST_IWMMXT_REGNUM
;
16168 regno
<= LAST_IWMMXT_REGNUM
;
16170 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
16174 func_type
= arm_current_func_type ();
16175 /* Space for saved VFP registers. */
16176 if (! IS_VOLATILE (func_type
)
16177 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
16178 saved
+= arm_get_vfp_saved_size ();
16180 else /* TARGET_THUMB1 */
16182 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
16183 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
16184 saved
= core_saved
;
16185 if (TARGET_BACKTRACE
)
16189 /* Saved registers include the stack frame. */
16190 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
16191 arm_compute_static_chain_stack_bytes();
16192 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
16193 /* A leaf function does not need any stack alignment if it has nothing
16195 if (leaf
&& frame_size
== 0
16196 /* However if it calls alloca(), we have a dynamically allocated
16197 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16198 && ! cfun
->calls_alloca
)
16200 offsets
->outgoing_args
= offsets
->soft_frame
;
16201 offsets
->locals_base
= offsets
->soft_frame
;
16205 /* Ensure SFP has the correct alignment. */
16206 if (ARM_DOUBLEWORD_ALIGN
16207 && (offsets
->soft_frame
& 7))
16209 offsets
->soft_frame
+= 4;
16210 /* Try to align stack by pushing an extra reg. Don't bother doing this
16211 when there is a stack frame as the alignment will be rolled into
16212 the normal stack adjustment. */
16213 if (frame_size
+ crtl
->outgoing_args_size
== 0)
16217 /* If it is safe to use r3, then do so. This sometimes
16218 generates better code on Thumb-2 by avoiding the need to
16219 use 32-bit push/pop instructions. */
16220 if (! any_sibcall_uses_r3 ()
16221 && arm_size_return_regs () <= 12
16222 && (offsets
->saved_regs_mask
& (1 << 3)) == 0)
16227 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
16229 /* Avoid fixed registers; they may be changed at
16230 arbitrary times so it's unsafe to restore them
16231 during the epilogue. */
16233 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
16242 offsets
->saved_regs
+= 4;
16243 offsets
->saved_regs_mask
|= (1 << reg
);
16248 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
16249 offsets
->outgoing_args
= (offsets
->locals_base
16250 + crtl
->outgoing_args_size
);
16252 if (ARM_DOUBLEWORD_ALIGN
)
16254 /* Ensure SP remains doubleword aligned. */
16255 if (offsets
->outgoing_args
& 7)
16256 offsets
->outgoing_args
+= 4;
16257 gcc_assert (!(offsets
->outgoing_args
& 7));
16264 /* Calculate the relative offsets for the different stack pointers. Positive
16265 offsets are in the direction of stack growth. */
16268 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
16270 arm_stack_offsets
*offsets
;
16272 offsets
= arm_get_frame_offsets ();
16274 /* OK, now we have enough information to compute the distances.
16275 There must be an entry in these switch tables for each pair
16276 of registers in ELIMINABLE_REGS, even if some of the entries
16277 seem to be redundant or useless. */
16280 case ARG_POINTER_REGNUM
:
16283 case THUMB_HARD_FRAME_POINTER_REGNUM
:
16286 case FRAME_POINTER_REGNUM
:
16287 /* This is the reverse of the soft frame pointer
16288 to hard frame pointer elimination below. */
16289 return offsets
->soft_frame
- offsets
->saved_args
;
16291 case ARM_HARD_FRAME_POINTER_REGNUM
:
16292 /* This is only non-zero in the case where the static chain register
16293 is stored above the frame. */
16294 return offsets
->frame
- offsets
->saved_args
- 4;
16296 case STACK_POINTER_REGNUM
:
16297 /* If nothing has been pushed on the stack at all
16298 then this will return -4. This *is* correct! */
16299 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
16302 gcc_unreachable ();
16304 gcc_unreachable ();
16306 case FRAME_POINTER_REGNUM
:
16309 case THUMB_HARD_FRAME_POINTER_REGNUM
:
16312 case ARM_HARD_FRAME_POINTER_REGNUM
:
16313 /* The hard frame pointer points to the top entry in the
16314 stack frame. The soft frame pointer to the bottom entry
16315 in the stack frame. If there is no stack frame at all,
16316 then they are identical. */
16318 return offsets
->frame
- offsets
->soft_frame
;
16320 case STACK_POINTER_REGNUM
:
16321 return offsets
->outgoing_args
- offsets
->soft_frame
;
16324 gcc_unreachable ();
16326 gcc_unreachable ();
16329 /* You cannot eliminate from the stack pointer.
16330 In theory you could eliminate from the hard frame
16331 pointer to the stack pointer, but this will never
16332 happen, since if a stack frame is not needed the
16333 hard frame pointer will never be used. */
16334 gcc_unreachable ();
16338 /* Given FROM and TO register numbers, say whether this elimination is
16339 allowed. Frame pointer elimination is automatically handled.
16341 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16342 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16343 pointer, we must eliminate FRAME_POINTER_REGNUM into
16344 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16345 ARG_POINTER_REGNUM. */
16348 arm_can_eliminate (const int from
, const int to
)
16350 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
16351 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
16352 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
16353 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
16357 /* Emit RTL to save coprocessor registers on function entry. Returns the
16358 number of bytes pushed. */
16361 arm_save_coproc_regs(void)
16363 int saved_size
= 0;
16365 unsigned start_reg
;
16368 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
16369 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
16371 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
16372 insn
= gen_rtx_MEM (V2SImode
, insn
);
16373 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
16374 RTX_FRAME_RELATED_P (insn
) = 1;
16378 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
16380 start_reg
= FIRST_VFP_REGNUM
;
16382 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
16384 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
16385 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
16387 if (start_reg
!= reg
)
16388 saved_size
+= vfp_emit_fstmd (start_reg
,
16389 (reg
- start_reg
) / 2);
16390 start_reg
= reg
+ 2;
16393 if (start_reg
!= reg
)
16394 saved_size
+= vfp_emit_fstmd (start_reg
,
16395 (reg
- start_reg
) / 2);
16401 /* Set the Thumb frame pointer from the stack pointer. */
16404 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
16406 HOST_WIDE_INT amount
;
16409 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
16411 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16412 stack_pointer_rtx
, GEN_INT (amount
)));
16415 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
16416 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16417 expects the first two operands to be the same. */
16420 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16422 hard_frame_pointer_rtx
));
16426 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16427 hard_frame_pointer_rtx
,
16428 stack_pointer_rtx
));
16430 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
16431 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
16432 RTX_FRAME_RELATED_P (dwarf
) = 1;
16433 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
16436 RTX_FRAME_RELATED_P (insn
) = 1;
16439 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16442 arm_expand_prologue (void)
16447 unsigned long live_regs_mask
;
16448 unsigned long func_type
;
16450 int saved_pretend_args
= 0;
16451 int saved_regs
= 0;
16452 unsigned HOST_WIDE_INT args_to_push
;
16453 arm_stack_offsets
*offsets
;
16455 func_type
= arm_current_func_type ();
16457 /* Naked functions don't have prologues. */
16458 if (IS_NAKED (func_type
))
16461 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16462 args_to_push
= crtl
->args
.pretend_args_size
;
16464 /* Compute which register we will have to save onto the stack. */
16465 offsets
= arm_get_frame_offsets ();
16466 live_regs_mask
= offsets
->saved_regs_mask
;
16468 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
16470 if (IS_STACKALIGN (func_type
))
16474 /* Handle a word-aligned stack pointer. We generate the following:
16479 <save and restore r0 in normal prologue/epilogue>
16483 The unwinder doesn't need to know about the stack realignment.
16484 Just tell it we saved SP in r0. */
16485 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
16487 r0
= gen_rtx_REG (SImode
, 0);
16488 r1
= gen_rtx_REG (SImode
, 1);
16490 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
16491 RTX_FRAME_RELATED_P (insn
) = 1;
16492 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
16494 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
16496 /* ??? The CFA changes here, which may cause GDB to conclude that it
16497 has entered a different function. That said, the unwind info is
16498 correct, individually, before and after this instruction because
16499 we've described the save of SP, which will override the default
16500 handling of SP as restoring from the CFA. */
16501 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
16504 /* For APCS frames, if IP register is clobbered
16505 when creating frame, save that register in a special
16507 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
16509 if (IS_INTERRUPT (func_type
))
16511 /* Interrupt functions must not corrupt any registers.
16512 Creating a frame pointer however, corrupts the IP
16513 register, so we must push it first. */
16514 emit_multi_reg_push (1 << IP_REGNUM
);
16516 /* Do not set RTX_FRAME_RELATED_P on this insn.
16517 The dwarf stack unwinding code only wants to see one
16518 stack decrement per function, and this is not it. If
16519 this instruction is labeled as being part of the frame
16520 creation sequence then dwarf2out_frame_debug_expr will
16521 die when it encounters the assignment of IP to FP
16522 later on, since the use of SP here establishes SP as
16523 the CFA register and not IP.
16525 Anyway this instruction is not really part of the stack
16526 frame creation although it is part of the prologue. */
16528 else if (IS_NESTED (func_type
))
16530 /* The Static chain register is the same as the IP register
16531 used as a scratch register during stack frame creation.
16532 To get around this need to find somewhere to store IP
16533 whilst the frame is being created. We try the following
16536 1. The last argument register.
16537 2. A slot on the stack above the frame. (This only
16538 works if the function is not a varargs function).
16539 3. Register r3, after pushing the argument registers
16542 Note - we only need to tell the dwarf2 backend about the SP
16543 adjustment in the second variant; the static chain register
16544 doesn't need to be unwound, as it doesn't contain a value
16545 inherited from the caller. */
16547 if (df_regs_ever_live_p (3) == false)
16548 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
16549 else if (args_to_push
== 0)
16553 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16556 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
16557 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
16560 /* Just tell the dwarf backend that we adjusted SP. */
16561 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16562 plus_constant (Pmode
, stack_pointer_rtx
,
16564 RTX_FRAME_RELATED_P (insn
) = 1;
16565 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
16569 /* Store the args on the stack. */
16570 if (cfun
->machine
->uses_anonymous_args
)
16571 insn
= emit_multi_reg_push
16572 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
16575 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16576 GEN_INT (- args_to_push
)));
16578 RTX_FRAME_RELATED_P (insn
) = 1;
16580 saved_pretend_args
= 1;
16581 fp_offset
= args_to_push
;
16584 /* Now reuse r3 to preserve IP. */
16585 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
16589 insn
= emit_set_insn (ip_rtx
,
16590 plus_constant (Pmode
, stack_pointer_rtx
,
16592 RTX_FRAME_RELATED_P (insn
) = 1;
16597 /* Push the argument registers, or reserve space for them. */
16598 if (cfun
->machine
->uses_anonymous_args
)
16599 insn
= emit_multi_reg_push
16600 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
16603 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16604 GEN_INT (- args_to_push
)));
16605 RTX_FRAME_RELATED_P (insn
) = 1;
16608 /* If this is an interrupt service routine, and the link register
16609 is going to be pushed, and we're not generating extra
16610 push of IP (needed when frame is needed and frame layout if apcs),
16611 subtracting four from LR now will mean that the function return
16612 can be done with a single instruction. */
16613 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
16614 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
16615 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
16618 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
16620 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
16623 if (live_regs_mask
)
16625 saved_regs
+= bit_count (live_regs_mask
) * 4;
16626 if (optimize_size
&& !frame_pointer_needed
16627 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
16629 /* If no coprocessor registers are being pushed and we don't have
16630 to worry about a frame pointer then push extra registers to
16631 create the stack frame. This is done is a way that does not
16632 alter the frame layout, so is independent of the epilogue. */
16636 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
16638 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
16639 if (frame
&& n
* 4 >= frame
)
16642 live_regs_mask
|= (1 << n
) - 1;
16643 saved_regs
+= frame
;
16646 insn
= emit_multi_reg_push (live_regs_mask
);
16647 RTX_FRAME_RELATED_P (insn
) = 1;
16650 if (! IS_VOLATILE (func_type
))
16651 saved_regs
+= arm_save_coproc_regs ();
16653 if (frame_pointer_needed
&& TARGET_ARM
)
16655 /* Create the new frame pointer. */
16656 if (TARGET_APCS_FRAME
)
16658 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
16659 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
16660 RTX_FRAME_RELATED_P (insn
) = 1;
16662 if (IS_NESTED (func_type
))
16664 /* Recover the static chain register. */
16665 if (!df_regs_ever_live_p (3)
16666 || saved_pretend_args
)
16667 insn
= gen_rtx_REG (SImode
, 3);
16668 else /* if (crtl->args.pretend_args_size == 0) */
16670 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
16671 insn
= gen_frame_mem (SImode
, insn
);
16673 emit_set_insn (ip_rtx
, insn
);
16674 /* Add a USE to stop propagate_one_insn() from barfing. */
16675 emit_insn (gen_prologue_use (ip_rtx
));
16680 insn
= GEN_INT (saved_regs
- 4);
16681 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16682 stack_pointer_rtx
, insn
));
16683 RTX_FRAME_RELATED_P (insn
) = 1;
16687 if (flag_stack_usage_info
)
16688 current_function_static_stack_size
16689 = offsets
->outgoing_args
- offsets
->saved_args
;
16691 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
16693 /* This add can produce multiple insns for a large constant, so we
16694 need to get tricky. */
16695 rtx last
= get_last_insn ();
16697 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
16698 - offsets
->outgoing_args
);
16700 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16704 last
= last
? NEXT_INSN (last
) : get_insns ();
16705 RTX_FRAME_RELATED_P (last
) = 1;
16707 while (last
!= insn
);
16709 /* If the frame pointer is needed, emit a special barrier that
16710 will prevent the scheduler from moving stores to the frame
16711 before the stack adjustment. */
16712 if (frame_pointer_needed
)
16713 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
16714 hard_frame_pointer_rtx
));
16718 if (frame_pointer_needed
&& TARGET_THUMB2
)
16719 thumb_set_frame_pointer (offsets
);
16721 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
16723 unsigned long mask
;
16725 mask
= live_regs_mask
;
16726 mask
&= THUMB2_WORK_REGS
;
16727 if (!IS_NESTED (func_type
))
16728 mask
|= (1 << IP_REGNUM
);
16729 arm_load_pic_register (mask
);
16732 /* If we are profiling, make sure no instructions are scheduled before
16733 the call to mcount. Similarly if the user has requested no
16734 scheduling in the prolog. Similarly if we want non-call exceptions
16735 using the EABI unwinder, to prevent faulting instructions from being
16736 swapped with a stack adjustment. */
16737 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
16738 || (arm_except_unwind_info (&global_options
) == UI_TARGET
16739 && cfun
->can_throw_non_call_exceptions
))
16740 emit_insn (gen_blockage ());
16742 /* If the link register is being kept alive, with the return address in it,
16743 then make sure that it does not get reused by the ce2 pass. */
16744 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
16745 cfun
->machine
->lr_save_eliminated
= 1;
16748 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16750 arm_print_condition (FILE *stream
)
16752 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
16754 /* Branch conversion is not implemented for Thumb-2. */
16757 output_operand_lossage ("predicated Thumb instruction");
16760 if (current_insn_predicate
!= NULL
)
16762 output_operand_lossage
16763 ("predicated instruction in conditional sequence");
16767 fputs (arm_condition_codes
[arm_current_cc
], stream
);
16769 else if (current_insn_predicate
)
16771 enum arm_cond_code code
;
16775 output_operand_lossage ("predicated Thumb instruction");
16779 code
= get_arm_condition_code (current_insn_predicate
);
16780 fputs (arm_condition_codes
[code
], stream
);
16785 /* If CODE is 'd', then the X is a condition operand and the instruction
16786 should only be executed if the condition is true.
16787 if CODE is 'D', then the X is a condition operand and the instruction
16788 should only be executed if the condition is false: however, if the mode
16789 of the comparison is CCFPEmode, then always execute the instruction -- we
16790 do this because in these circumstances !GE does not necessarily imply LT;
16791 in these cases the instruction pattern will take care to make sure that
16792 an instruction containing %d will follow, thereby undoing the effects of
16793 doing this instruction unconditionally.
16794 If CODE is 'N' then X is a floating point operand that must be negated
16796 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16797 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16799 arm_print_operand (FILE *stream
, rtx x
, int code
)
16804 fputs (ASM_COMMENT_START
, stream
);
16808 fputs (user_label_prefix
, stream
);
16812 fputs (REGISTER_PREFIX
, stream
);
16816 arm_print_condition (stream
);
16820 /* Nothing in unified syntax, otherwise the current condition code. */
16821 if (!TARGET_UNIFIED_ASM
)
16822 arm_print_condition (stream
);
16826 /* The current condition code in unified syntax, otherwise nothing. */
16827 if (TARGET_UNIFIED_ASM
)
16828 arm_print_condition (stream
);
16832 /* The current condition code for a condition code setting instruction.
16833 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16834 if (TARGET_UNIFIED_ASM
)
16836 fputc('s', stream
);
16837 arm_print_condition (stream
);
16841 arm_print_condition (stream
);
16842 fputc('s', stream
);
16847 /* If the instruction is conditionally executed then print
16848 the current condition code, otherwise print 's'. */
16849 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
16850 if (current_insn_predicate
)
16851 arm_print_condition (stream
);
16853 fputc('s', stream
);
16856 /* %# is a "break" sequence. It doesn't output anything, but is used to
16857 separate e.g. operand numbers from following text, if that text consists
16858 of further digits which we don't want to be part of the operand
16866 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
16867 r
= real_value_negate (&r
);
16868 fprintf (stream
, "%s", fp_const_from_val (&r
));
16872 /* An integer or symbol address without a preceding # sign. */
16874 switch (GET_CODE (x
))
16877 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
16881 output_addr_const (stream
, x
);
16885 if (GET_CODE (XEXP (x
, 0)) == PLUS
16886 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
16888 output_addr_const (stream
, x
);
16891 /* Fall through. */
16894 output_operand_lossage ("Unsupported operand for code '%c'", code
);
16898 /* An integer that we want to print in HEX. */
16900 switch (GET_CODE (x
))
16903 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16907 output_operand_lossage ("Unsupported operand for code '%c'", code
);
16912 if (GET_CODE (x
) == CONST_INT
)
16915 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
16916 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
16920 putc ('~', stream
);
16921 output_addr_const (stream
, x
);
16926 /* The low 16 bits of an immediate constant. */
16927 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
16931 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
16935 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
16943 if (!shift_operator (x
, SImode
))
16945 output_operand_lossage ("invalid shift operand");
16949 shift
= shift_op (x
, &val
);
16953 fprintf (stream
, ", %s ", shift
);
16955 arm_print_operand (stream
, XEXP (x
, 1), 0);
16957 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
16962 /* An explanation of the 'Q', 'R' and 'H' register operands:
16964 In a pair of registers containing a DI or DF value the 'Q'
16965 operand returns the register number of the register containing
16966 the least significant part of the value. The 'R' operand returns
16967 the register number of the register containing the most
16968 significant part of the value.
16970 The 'H' operand returns the higher of the two register numbers.
16971 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16972 same as the 'Q' operand, since the most significant part of the
16973 value is held in the lower number register. The reverse is true
16974 on systems where WORDS_BIG_ENDIAN is false.
16976 The purpose of these operands is to distinguish between cases
16977 where the endian-ness of the values is important (for example
16978 when they are added together), and cases where the endian-ness
16979 is irrelevant, but the order of register operations is important.
16980 For example when loading a value from memory into a register
16981 pair, the endian-ness does not matter. Provided that the value
16982 from the lower memory address is put into the lower numbered
16983 register, and the value from the higher address is put into the
16984 higher numbered register, the load will work regardless of whether
16985 the value being loaded is big-wordian or little-wordian. The
16986 order of the two register loads can matter however, if the address
16987 of the memory location is actually held in one of the registers
16988 being overwritten by the load.
16990 The 'Q' and 'R' constraints are also available for 64-bit
16993 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
16995 rtx part
= gen_lowpart (SImode
, x
);
16996 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
17000 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
17002 output_operand_lossage ("invalid operand for code '%c'", code
);
17006 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
17010 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
17012 enum machine_mode mode
= GET_MODE (x
);
17015 if (mode
== VOIDmode
)
17017 part
= gen_highpart_mode (SImode
, mode
, x
);
17018 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
17022 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
17024 output_operand_lossage ("invalid operand for code '%c'", code
);
17028 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
17032 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
17034 output_operand_lossage ("invalid operand for code '%c'", code
);
17038 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
17042 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
17044 output_operand_lossage ("invalid operand for code '%c'", code
);
17048 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
17052 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
17054 output_operand_lossage ("invalid operand for code '%c'", code
);
17058 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
17062 asm_fprintf (stream
, "%r",
17063 GET_CODE (XEXP (x
, 0)) == REG
17064 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
17068 asm_fprintf (stream
, "{%r-%r}",
17070 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
17073 /* Like 'M', but writing doubleword vector registers, for use by Neon
17077 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
17078 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
17080 asm_fprintf (stream
, "{d%d}", regno
);
17082 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
17087 /* CONST_TRUE_RTX means always -- that's the default. */
17088 if (x
== const_true_rtx
)
17091 if (!COMPARISON_P (x
))
17093 output_operand_lossage ("invalid operand for code '%c'", code
);
17097 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
17102 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17103 want to do that. */
17104 if (x
== const_true_rtx
)
17106 output_operand_lossage ("instruction never executed");
17109 if (!COMPARISON_P (x
))
17111 output_operand_lossage ("invalid operand for code '%c'", code
);
17115 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
17116 (get_arm_condition_code (x
))],
17126 /* Former Maverick support, removed after GCC-4.7. */
17127 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
17131 if (GET_CODE (x
) != REG
17132 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
17133 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
17134 /* Bad value for wCG register number. */
17136 output_operand_lossage ("invalid operand for code '%c'", code
);
17141 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
17144 /* Print an iWMMXt control register name. */
17146 if (GET_CODE (x
) != CONST_INT
17148 || INTVAL (x
) >= 16)
17149 /* Bad value for wC register number. */
17151 output_operand_lossage ("invalid operand for code '%c'", code
);
17157 static const char * wc_reg_names
[16] =
17159 "wCID", "wCon", "wCSSF", "wCASF",
17160 "wC4", "wC5", "wC6", "wC7",
17161 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17162 "wC12", "wC13", "wC14", "wC15"
17165 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
17169 /* Print the high single-precision register of a VFP double-precision
17173 int mode
= GET_MODE (x
);
17176 if (GET_MODE_SIZE (mode
) != 8 || GET_CODE (x
) != REG
)
17178 output_operand_lossage ("invalid operand for code '%c'", code
);
17183 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
17185 output_operand_lossage ("invalid operand for code '%c'", code
);
17189 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
17193 /* Print a VFP/Neon double precision or quad precision register name. */
17197 int mode
= GET_MODE (x
);
17198 int is_quad
= (code
== 'q');
17201 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
17203 output_operand_lossage ("invalid operand for code '%c'", code
);
17207 if (GET_CODE (x
) != REG
17208 || !IS_VFP_REGNUM (REGNO (x
)))
17210 output_operand_lossage ("invalid operand for code '%c'", code
);
17215 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
17216 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
17218 output_operand_lossage ("invalid operand for code '%c'", code
);
17222 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
17223 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
17227 /* These two codes print the low/high doubleword register of a Neon quad
17228 register, respectively. For pair-structure types, can also print
17229 low/high quadword registers. */
17233 int mode
= GET_MODE (x
);
17236 if ((GET_MODE_SIZE (mode
) != 16
17237 && GET_MODE_SIZE (mode
) != 32) || GET_CODE (x
) != REG
)
17239 output_operand_lossage ("invalid operand for code '%c'", code
);
17244 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
17246 output_operand_lossage ("invalid operand for code '%c'", code
);
17250 if (GET_MODE_SIZE (mode
) == 16)
17251 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
17252 + (code
== 'f' ? 1 : 0));
17254 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
17255 + (code
== 'f' ? 1 : 0));
17259 /* Print a VFPv3 floating-point constant, represented as an integer
17263 int index
= vfp3_const_double_index (x
);
17264 gcc_assert (index
!= -1);
17265 fprintf (stream
, "%d", index
);
17269 /* Print bits representing opcode features for Neon.
17271 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17272 and polynomials as unsigned.
17274 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17276 Bit 2 is 1 for rounding functions, 0 otherwise. */
17278 /* Identify the type as 's', 'u', 'p' or 'f'. */
17281 HOST_WIDE_INT bits
= INTVAL (x
);
17282 fputc ("uspf"[bits
& 3], stream
);
17286 /* Likewise, but signed and unsigned integers are both 'i'. */
17289 HOST_WIDE_INT bits
= INTVAL (x
);
17290 fputc ("iipf"[bits
& 3], stream
);
17294 /* As for 'T', but emit 'u' instead of 'p'. */
17297 HOST_WIDE_INT bits
= INTVAL (x
);
17298 fputc ("usuf"[bits
& 3], stream
);
17302 /* Bit 2: rounding (vs none). */
17305 HOST_WIDE_INT bits
= INTVAL (x
);
17306 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
17310 /* Memory operand for vld1/vst1 instruction. */
17314 bool postinc
= FALSE
;
17315 unsigned align
, memsize
, align_bits
;
17317 gcc_assert (GET_CODE (x
) == MEM
);
17318 addr
= XEXP (x
, 0);
17319 if (GET_CODE (addr
) == POST_INC
)
17322 addr
= XEXP (addr
, 0);
17324 asm_fprintf (stream
, "[%r", REGNO (addr
));
17326 /* We know the alignment of this access, so we can emit a hint in the
17327 instruction (for some alignments) as an aid to the memory subsystem
17329 align
= MEM_ALIGN (x
) >> 3;
17330 memsize
= MEM_SIZE (x
);
17332 /* Only certain alignment specifiers are supported by the hardware. */
17333 if (memsize
== 32 && (align
% 32) == 0)
17335 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
17337 else if (memsize
>= 8 && (align
% 8) == 0)
17342 if (align_bits
!= 0)
17343 asm_fprintf (stream
, ":%d", align_bits
);
17345 asm_fprintf (stream
, "]");
17348 fputs("!", stream
);
17356 gcc_assert (GET_CODE (x
) == MEM
);
17357 addr
= XEXP (x
, 0);
17358 gcc_assert (GET_CODE (addr
) == REG
);
17359 asm_fprintf (stream
, "[%r]", REGNO (addr
));
17363 /* Translate an S register number into a D register number and element index. */
17366 int mode
= GET_MODE (x
);
17369 if (GET_MODE_SIZE (mode
) != 4 || GET_CODE (x
) != REG
)
17371 output_operand_lossage ("invalid operand for code '%c'", code
);
17376 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
17378 output_operand_lossage ("invalid operand for code '%c'", code
);
17382 regno
= regno
- FIRST_VFP_REGNUM
;
17383 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
17388 gcc_assert (GET_CODE (x
) == CONST_DOUBLE
);
17389 fprintf (stream
, "#%d", vfp3_const_double_for_fract_bits (x
));
17392 /* Register specifier for vld1.16/vst1.16. Translate the S register
17393 number into a D register number and element index. */
17396 int mode
= GET_MODE (x
);
17399 if (GET_MODE_SIZE (mode
) != 2 || GET_CODE (x
) != REG
)
17401 output_operand_lossage ("invalid operand for code '%c'", code
);
17406 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
17408 output_operand_lossage ("invalid operand for code '%c'", code
);
17412 regno
= regno
- FIRST_VFP_REGNUM
;
17413 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
17420 output_operand_lossage ("missing operand");
17424 switch (GET_CODE (x
))
17427 asm_fprintf (stream
, "%r", REGNO (x
));
17431 output_memory_reference_mode
= GET_MODE (x
);
17432 output_address (XEXP (x
, 0));
17439 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17440 sizeof (fpstr
), 0, 1);
17441 fprintf (stream
, "#%s", fpstr
);
17444 fprintf (stream
, "#%s", fp_immediate_constant (x
));
17448 gcc_assert (GET_CODE (x
) != NEG
);
17449 fputc ('#', stream
);
17450 if (GET_CODE (x
) == HIGH
)
17452 fputs (":lower16:", stream
);
17456 output_addr_const (stream
, x
);
17462 /* Target hook for printing a memory address. */
17464 arm_print_operand_address (FILE *stream
, rtx x
)
17468 int is_minus
= GET_CODE (x
) == MINUS
;
17470 if (GET_CODE (x
) == REG
)
17471 asm_fprintf (stream
, "[%r]", REGNO (x
));
17472 else if (GET_CODE (x
) == PLUS
|| is_minus
)
17474 rtx base
= XEXP (x
, 0);
17475 rtx index
= XEXP (x
, 1);
17476 HOST_WIDE_INT offset
= 0;
17477 if (GET_CODE (base
) != REG
17478 || (GET_CODE (index
) == REG
&& REGNO (index
) == SP_REGNUM
))
17480 /* Ensure that BASE is a register. */
17481 /* (one of them must be). */
17482 /* Also ensure the SP is not used as in index register. */
17487 switch (GET_CODE (index
))
17490 offset
= INTVAL (index
);
17493 asm_fprintf (stream
, "[%r, #%wd]",
17494 REGNO (base
), offset
);
17498 asm_fprintf (stream
, "[%r, %s%r]",
17499 REGNO (base
), is_minus
? "-" : "",
17509 asm_fprintf (stream
, "[%r, %s%r",
17510 REGNO (base
), is_minus
? "-" : "",
17511 REGNO (XEXP (index
, 0)));
17512 arm_print_operand (stream
, index
, 'S');
17513 fputs ("]", stream
);
17518 gcc_unreachable ();
17521 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
17522 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
17524 extern enum machine_mode output_memory_reference_mode
;
17526 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
17528 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
17529 asm_fprintf (stream
, "[%r, #%s%d]!",
17530 REGNO (XEXP (x
, 0)),
17531 GET_CODE (x
) == PRE_DEC
? "-" : "",
17532 GET_MODE_SIZE (output_memory_reference_mode
));
17534 asm_fprintf (stream
, "[%r], #%s%d",
17535 REGNO (XEXP (x
, 0)),
17536 GET_CODE (x
) == POST_DEC
? "-" : "",
17537 GET_MODE_SIZE (output_memory_reference_mode
));
17539 else if (GET_CODE (x
) == PRE_MODIFY
)
17541 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
17542 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
17543 asm_fprintf (stream
, "#%wd]!",
17544 INTVAL (XEXP (XEXP (x
, 1), 1)));
17546 asm_fprintf (stream
, "%r]!",
17547 REGNO (XEXP (XEXP (x
, 1), 1)));
17549 else if (GET_CODE (x
) == POST_MODIFY
)
17551 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
17552 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
17553 asm_fprintf (stream
, "#%wd",
17554 INTVAL (XEXP (XEXP (x
, 1), 1)));
17556 asm_fprintf (stream
, "%r",
17557 REGNO (XEXP (XEXP (x
, 1), 1)));
17559 else output_addr_const (stream
, x
);
17563 if (GET_CODE (x
) == REG
)
17564 asm_fprintf (stream
, "[%r]", REGNO (x
));
17565 else if (GET_CODE (x
) == POST_INC
)
17566 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
17567 else if (GET_CODE (x
) == PLUS
)
17569 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
17570 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17571 asm_fprintf (stream
, "[%r, #%wd]",
17572 REGNO (XEXP (x
, 0)),
17573 INTVAL (XEXP (x
, 1)));
17575 asm_fprintf (stream
, "[%r, %r]",
17576 REGNO (XEXP (x
, 0)),
17577 REGNO (XEXP (x
, 1)));
17580 output_addr_const (stream
, x
);
17584 /* Target hook for indicating whether a punctuation character for
17585 TARGET_PRINT_OPERAND is valid. */
17587 arm_print_operand_punct_valid_p (unsigned char code
)
17589 return (code
== '@' || code
== '|' || code
== '.'
17590 || code
== '(' || code
== ')' || code
== '#'
17591 || (TARGET_32BIT
&& (code
== '?'))
17592 || (TARGET_THUMB2
&& (code
== '!'))
17593 || (TARGET_THUMB
&& (code
== '_')));
17596 /* Target hook for assembling integer objects. The ARM version needs to
17597 handle word-sized values specially. */
17599 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
17601 enum machine_mode mode
;
17603 if (size
== UNITS_PER_WORD
&& aligned_p
)
17605 fputs ("\t.word\t", asm_out_file
);
17606 output_addr_const (asm_out_file
, x
);
17608 /* Mark symbols as position independent. We only do this in the
17609 .text segment, not in the .data segment. */
17610 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
17611 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
17613 /* See legitimize_pic_address for an explanation of the
17614 TARGET_VXWORKS_RTP check. */
17615 if (TARGET_VXWORKS_RTP
17616 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
17617 fputs ("(GOT)", asm_out_file
);
17619 fputs ("(GOTOFF)", asm_out_file
);
17621 fputc ('\n', asm_out_file
);
17625 mode
= GET_MODE (x
);
17627 if (arm_vector_mode_supported_p (mode
))
17631 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
17633 units
= CONST_VECTOR_NUNITS (x
);
17634 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
17636 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
17637 for (i
= 0; i
< units
; i
++)
17639 rtx elt
= CONST_VECTOR_ELT (x
, i
);
17641 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
17644 for (i
= 0; i
< units
; i
++)
17646 rtx elt
= CONST_VECTOR_ELT (x
, i
);
17647 REAL_VALUE_TYPE rval
;
17649 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
17652 (rval
, GET_MODE_INNER (mode
),
17653 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
17659 return default_assemble_integer (x
, size
, aligned_p
);
17663 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
17667 if (!TARGET_AAPCS_BASED
)
17670 default_named_section_asm_out_constructor
17671 : default_named_section_asm_out_destructor
) (symbol
, priority
);
17675 /* Put these in the .init_array section, using a special relocation. */
17676 if (priority
!= DEFAULT_INIT_PRIORITY
)
17679 sprintf (buf
, "%s.%.5u",
17680 is_ctor
? ".init_array" : ".fini_array",
17682 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
17689 switch_to_section (s
);
17690 assemble_align (POINTER_SIZE
);
17691 fputs ("\t.word\t", asm_out_file
);
17692 output_addr_const (asm_out_file
, symbol
);
17693 fputs ("(target1)\n", asm_out_file
);
17696 /* Add a function to the list of static constructors. */
17699 arm_elf_asm_constructor (rtx symbol
, int priority
)
17701 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
17704 /* Add a function to the list of static destructors. */
17707 arm_elf_asm_destructor (rtx symbol
, int priority
)
17709 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
17712 /* A finite state machine takes care of noticing whether or not instructions
17713 can be conditionally executed, and thus decrease execution time and code
17714 size by deleting branch instructions. The fsm is controlled by
17715 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17717 /* The state of the fsm controlling condition codes are:
17718 0: normal, do nothing special
17719 1: make ASM_OUTPUT_OPCODE not output this instruction
17720 2: make ASM_OUTPUT_OPCODE not output this instruction
17721 3: make instructions conditional
17722 4: make instructions conditional
17724 State transitions (state->state by whom under condition):
17725 0 -> 1 final_prescan_insn if the `target' is a label
17726 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17727 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17728 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17729 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17730 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17731 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17732 (the target insn is arm_target_insn).
17734 If the jump clobbers the conditions then we use states 2 and 4.
17736 A similar thing can be done with conditional return insns.
17738 XXX In case the `target' is an unconditional branch, this conditionalising
17739 of the instructions always reduces code size, but not always execution
17740 time. But then, I want to reduce the code size to somewhere near what
17741 /bin/cc produces. */
17743 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17744 instructions. When a COND_EXEC instruction is seen the subsequent
17745 instructions are scanned so that multiple conditional instructions can be
17746 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17747 specify the length and true/false mask for the IT block. These will be
17748 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17750 /* Returns the index of the ARM condition code string in
17751 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17752 COMPARISON should be an rtx like `(eq (...) (...))'. */
17755 maybe_get_arm_condition_code (rtx comparison
)
17757 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
17758 enum arm_cond_code code
;
17759 enum rtx_code comp_code
= GET_CODE (comparison
);
17761 if (GET_MODE_CLASS (mode
) != MODE_CC
)
17762 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
17763 XEXP (comparison
, 1));
17767 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
17768 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
17769 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
17770 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
17771 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
17772 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
17773 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
17774 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
17775 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
17776 case CC_DLTUmode
: code
= ARM_CC
;
17779 if (comp_code
== EQ
)
17780 return ARM_INVERSE_CONDITION_CODE (code
);
17781 if (comp_code
== NE
)
17788 case NE
: return ARM_NE
;
17789 case EQ
: return ARM_EQ
;
17790 case GE
: return ARM_PL
;
17791 case LT
: return ARM_MI
;
17792 default: return ARM_NV
;
17798 case NE
: return ARM_NE
;
17799 case EQ
: return ARM_EQ
;
17800 default: return ARM_NV
;
17806 case NE
: return ARM_MI
;
17807 case EQ
: return ARM_PL
;
17808 default: return ARM_NV
;
17813 /* We can handle all cases except UNEQ and LTGT. */
17816 case GE
: return ARM_GE
;
17817 case GT
: return ARM_GT
;
17818 case LE
: return ARM_LS
;
17819 case LT
: return ARM_MI
;
17820 case NE
: return ARM_NE
;
17821 case EQ
: return ARM_EQ
;
17822 case ORDERED
: return ARM_VC
;
17823 case UNORDERED
: return ARM_VS
;
17824 case UNLT
: return ARM_LT
;
17825 case UNLE
: return ARM_LE
;
17826 case UNGT
: return ARM_HI
;
17827 case UNGE
: return ARM_PL
;
17828 /* UNEQ and LTGT do not have a representation. */
17829 case UNEQ
: /* Fall through. */
17830 case LTGT
: /* Fall through. */
17831 default: return ARM_NV
;
17837 case NE
: return ARM_NE
;
17838 case EQ
: return ARM_EQ
;
17839 case GE
: return ARM_LE
;
17840 case GT
: return ARM_LT
;
17841 case LE
: return ARM_GE
;
17842 case LT
: return ARM_GT
;
17843 case GEU
: return ARM_LS
;
17844 case GTU
: return ARM_CC
;
17845 case LEU
: return ARM_CS
;
17846 case LTU
: return ARM_HI
;
17847 default: return ARM_NV
;
17853 case LTU
: return ARM_CS
;
17854 case GEU
: return ARM_CC
;
17855 default: return ARM_NV
;
17861 case NE
: return ARM_NE
;
17862 case EQ
: return ARM_EQ
;
17863 case GEU
: return ARM_CS
;
17864 case GTU
: return ARM_HI
;
17865 case LEU
: return ARM_LS
;
17866 case LTU
: return ARM_CC
;
17867 default: return ARM_NV
;
17873 case GE
: return ARM_GE
;
17874 case LT
: return ARM_LT
;
17875 case GEU
: return ARM_CS
;
17876 case LTU
: return ARM_CC
;
17877 default: return ARM_NV
;
17883 case NE
: return ARM_NE
;
17884 case EQ
: return ARM_EQ
;
17885 case GE
: return ARM_GE
;
17886 case GT
: return ARM_GT
;
17887 case LE
: return ARM_LE
;
17888 case LT
: return ARM_LT
;
17889 case GEU
: return ARM_CS
;
17890 case GTU
: return ARM_HI
;
17891 case LEU
: return ARM_LS
;
17892 case LTU
: return ARM_CC
;
17893 default: return ARM_NV
;
17896 default: gcc_unreachable ();
17900 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
17901 static enum arm_cond_code
17902 get_arm_condition_code (rtx comparison
)
17904 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
17905 gcc_assert (code
!= ARM_NV
);
17909 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17912 thumb2_final_prescan_insn (rtx insn
)
17914 rtx first_insn
= insn
;
17915 rtx body
= PATTERN (insn
);
17917 enum arm_cond_code code
;
17921 /* Remove the previous insn from the count of insns to be output. */
17922 if (arm_condexec_count
)
17923 arm_condexec_count
--;
17925 /* Nothing to do if we are already inside a conditional block. */
17926 if (arm_condexec_count
)
17929 if (GET_CODE (body
) != COND_EXEC
)
17932 /* Conditional jumps are implemented directly. */
17933 if (GET_CODE (insn
) == JUMP_INSN
)
17936 predicate
= COND_EXEC_TEST (body
);
17937 arm_current_cc
= get_arm_condition_code (predicate
);
17939 n
= get_attr_ce_count (insn
);
17940 arm_condexec_count
= 1;
17941 arm_condexec_mask
= (1 << n
) - 1;
17942 arm_condexec_masklen
= n
;
17943 /* See if subsequent instructions can be combined into the same block. */
17946 insn
= next_nonnote_insn (insn
);
17948 /* Jumping into the middle of an IT block is illegal, so a label or
17949 barrier terminates the block. */
17950 if (GET_CODE (insn
) != INSN
&& GET_CODE(insn
) != JUMP_INSN
)
17953 body
= PATTERN (insn
);
17954 /* USE and CLOBBER aren't really insns, so just skip them. */
17955 if (GET_CODE (body
) == USE
17956 || GET_CODE (body
) == CLOBBER
)
17959 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17960 if (GET_CODE (body
) != COND_EXEC
)
17962 /* Allow up to 4 conditionally executed instructions in a block. */
17963 n
= get_attr_ce_count (insn
);
17964 if (arm_condexec_masklen
+ n
> 4)
17967 predicate
= COND_EXEC_TEST (body
);
17968 code
= get_arm_condition_code (predicate
);
17969 mask
= (1 << n
) - 1;
17970 if (arm_current_cc
== code
)
17971 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
17972 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
17975 arm_condexec_count
++;
17976 arm_condexec_masklen
+= n
;
17978 /* A jump must be the last instruction in a conditional block. */
17979 if (GET_CODE(insn
) == JUMP_INSN
)
17982 /* Restore recog_data (getting the attributes of other insns can
17983 destroy this array, but final.c assumes that it remains intact
17984 across this call). */
17985 extract_constrain_insn_cached (first_insn
);
17989 arm_final_prescan_insn (rtx insn
)
17991 /* BODY will hold the body of INSN. */
17992 rtx body
= PATTERN (insn
);
17994 /* This will be 1 if trying to repeat the trick, and things need to be
17995 reversed if it appears to fail. */
17998 /* If we start with a return insn, we only succeed if we find another one. */
17999 int seeking_return
= 0;
18000 enum rtx_code return_code
= UNKNOWN
;
18002 /* START_INSN will hold the insn from where we start looking. This is the
18003 first insn after the following code_label if REVERSE is true. */
18004 rtx start_insn
= insn
;
18006 /* If in state 4, check if the target branch is reached, in order to
18007 change back to state 0. */
18008 if (arm_ccfsm_state
== 4)
18010 if (insn
== arm_target_insn
)
18012 arm_target_insn
= NULL
;
18013 arm_ccfsm_state
= 0;
18018 /* If in state 3, it is possible to repeat the trick, if this insn is an
18019 unconditional branch to a label, and immediately following this branch
18020 is the previous target label which is only used once, and the label this
18021 branch jumps to is not too far off. */
18022 if (arm_ccfsm_state
== 3)
18024 if (simplejump_p (insn
))
18026 start_insn
= next_nonnote_insn (start_insn
);
18027 if (GET_CODE (start_insn
) == BARRIER
)
18029 /* XXX Isn't this always a barrier? */
18030 start_insn
= next_nonnote_insn (start_insn
);
18032 if (GET_CODE (start_insn
) == CODE_LABEL
18033 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
18034 && LABEL_NUSES (start_insn
) == 1)
18039 else if (ANY_RETURN_P (body
))
18041 start_insn
= next_nonnote_insn (start_insn
);
18042 if (GET_CODE (start_insn
) == BARRIER
)
18043 start_insn
= next_nonnote_insn (start_insn
);
18044 if (GET_CODE (start_insn
) == CODE_LABEL
18045 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
18046 && LABEL_NUSES (start_insn
) == 1)
18049 seeking_return
= 1;
18050 return_code
= GET_CODE (body
);
18059 gcc_assert (!arm_ccfsm_state
|| reverse
);
18060 if (GET_CODE (insn
) != JUMP_INSN
)
18063 /* This jump might be paralleled with a clobber of the condition codes
18064 the jump should always come first */
18065 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
18066 body
= XVECEXP (body
, 0, 0);
18069 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
18070 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
18073 int fail
= FALSE
, succeed
= FALSE
;
18074 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18075 int then_not_else
= TRUE
;
18076 rtx this_insn
= start_insn
, label
= 0;
18078 /* Register the insn jumped to. */
18081 if (!seeking_return
)
18082 label
= XEXP (SET_SRC (body
), 0);
18084 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
18085 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
18086 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
18088 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
18089 then_not_else
= FALSE
;
18091 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
18093 seeking_return
= 1;
18094 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
18096 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
18098 seeking_return
= 1;
18099 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
18100 then_not_else
= FALSE
;
18103 gcc_unreachable ();
18105 /* See how many insns this branch skips, and what kind of insns. If all
18106 insns are okay, and the label or unconditional branch to the same
18107 label is not too far away, succeed. */
18108 for (insns_skipped
= 0;
18109 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
18113 this_insn
= next_nonnote_insn (this_insn
);
18117 switch (GET_CODE (this_insn
))
18120 /* Succeed if it is the target label, otherwise fail since
18121 control falls in from somewhere else. */
18122 if (this_insn
== label
)
18124 arm_ccfsm_state
= 1;
18132 /* Succeed if the following insn is the target label.
18134 If return insns are used then the last insn in a function
18135 will be a barrier. */
18136 this_insn
= next_nonnote_insn (this_insn
);
18137 if (this_insn
&& this_insn
== label
)
18139 arm_ccfsm_state
= 1;
18147 /* The AAPCS says that conditional calls should not be
18148 used since they make interworking inefficient (the
18149 linker can't transform BL<cond> into BLX). That's
18150 only a problem if the machine has BLX. */
18157 /* Succeed if the following insn is the target label, or
18158 if the following two insns are a barrier and the
18160 this_insn
= next_nonnote_insn (this_insn
);
18161 if (this_insn
&& GET_CODE (this_insn
) == BARRIER
)
18162 this_insn
= next_nonnote_insn (this_insn
);
18164 if (this_insn
&& this_insn
== label
18165 && insns_skipped
< max_insns_skipped
)
18167 arm_ccfsm_state
= 1;
18175 /* If this is an unconditional branch to the same label, succeed.
18176 If it is to another label, do nothing. If it is conditional,
18178 /* XXX Probably, the tests for SET and the PC are
18181 scanbody
= PATTERN (this_insn
);
18182 if (GET_CODE (scanbody
) == SET
18183 && GET_CODE (SET_DEST (scanbody
)) == PC
)
18185 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
18186 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
18188 arm_ccfsm_state
= 2;
18191 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
18194 /* Fail if a conditional return is undesirable (e.g. on a
18195 StrongARM), but still allow this if optimizing for size. */
18196 else if (GET_CODE (scanbody
) == return_code
18197 && !use_return_insn (TRUE
, NULL
)
18200 else if (GET_CODE (scanbody
) == return_code
)
18202 arm_ccfsm_state
= 2;
18205 else if (GET_CODE (scanbody
) == PARALLEL
)
18207 switch (get_attr_conds (this_insn
))
18217 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
18222 /* Instructions using or affecting the condition codes make it
18224 scanbody
= PATTERN (this_insn
);
18225 if (!(GET_CODE (scanbody
) == SET
18226 || GET_CODE (scanbody
) == PARALLEL
)
18227 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
18237 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
18238 arm_target_label
= CODE_LABEL_NUMBER (label
);
18241 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
18243 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
18245 this_insn
= next_nonnote_insn (this_insn
);
18246 gcc_assert (!this_insn
18247 || (GET_CODE (this_insn
) != BARRIER
18248 && GET_CODE (this_insn
) != CODE_LABEL
));
18252 /* Oh, dear! we ran off the end.. give up. */
18253 extract_constrain_insn_cached (insn
);
18254 arm_ccfsm_state
= 0;
18255 arm_target_insn
= NULL
;
18258 arm_target_insn
= this_insn
;
18261 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18264 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
18266 if (reverse
|| then_not_else
)
18267 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
18270 /* Restore recog_data (getting the attributes of other insns can
18271 destroy this array, but final.c assumes that it remains intact
18272 across this call. */
18273 extract_constrain_insn_cached (insn
);
18277 /* Output IT instructions. */
18279 thumb2_asm_output_opcode (FILE * stream
)
18284 if (arm_condexec_mask
)
18286 for (n
= 0; n
< arm_condexec_masklen
; n
++)
18287 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
18289 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
18290 arm_condition_codes
[arm_current_cc
]);
18291 arm_condexec_mask
= 0;
18295 /* Returns true if REGNO is a valid register
18296 for holding a quantity of type MODE. */
18298 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
18300 if (GET_MODE_CLASS (mode
) == MODE_CC
)
18301 return (regno
== CC_REGNUM
18302 || (TARGET_HARD_FLOAT
&& TARGET_VFP
18303 && regno
== VFPCC_REGNUM
));
18306 /* For the Thumb we only allow values bigger than SImode in
18307 registers 0 - 6, so that there is always a second low
18308 register available to hold the upper part of the value.
18309 We probably we ought to ensure that the register is the
18310 start of an even numbered register pair. */
18311 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
18313 if (TARGET_HARD_FLOAT
&& TARGET_VFP
18314 && IS_VFP_REGNUM (regno
))
18316 if (mode
== SFmode
|| mode
== SImode
)
18317 return VFP_REGNO_OK_FOR_SINGLE (regno
);
18319 if (mode
== DFmode
)
18320 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
18322 /* VFP registers can hold HFmode values, but there is no point in
18323 putting them there unless we have hardware conversion insns. */
18324 if (mode
== HFmode
)
18325 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
18328 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
18329 || (VALID_NEON_QREG_MODE (mode
)
18330 && NEON_REGNO_OK_FOR_QUAD (regno
))
18331 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
18332 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
18333 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
18334 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
18335 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
18340 if (TARGET_REALLY_IWMMXT
)
18342 if (IS_IWMMXT_GR_REGNUM (regno
))
18343 return mode
== SImode
;
18345 if (IS_IWMMXT_REGNUM (regno
))
18346 return VALID_IWMMXT_REG_MODE (mode
);
18349 /* We allow almost any value to be stored in the general registers.
18350 Restrict doubleword quantities to even register pairs so that we can
18351 use ldrd. Do not allow very large Neon structure opaque modes in
18352 general registers; they would use too many. */
18353 if (regno
<= LAST_ARM_REGNUM
)
18354 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
18355 && ARM_NUM_REGS (mode
) <= 4;
18357 if (regno
== FRAME_POINTER_REGNUM
18358 || regno
== ARG_POINTER_REGNUM
)
18359 /* We only allow integers in the fake hard registers. */
18360 return GET_MODE_CLASS (mode
) == MODE_INT
;
18365 /* Implement MODES_TIEABLE_P. */
18368 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18370 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
18373 /* We specifically want to allow elements of "structure" modes to
18374 be tieable to the structure. This more general condition allows
18375 other rarer situations too. */
18377 && (VALID_NEON_DREG_MODE (mode1
)
18378 || VALID_NEON_QREG_MODE (mode1
)
18379 || VALID_NEON_STRUCT_MODE (mode1
))
18380 && (VALID_NEON_DREG_MODE (mode2
)
18381 || VALID_NEON_QREG_MODE (mode2
)
18382 || VALID_NEON_STRUCT_MODE (mode2
)))
18388 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18389 not used in arm mode. */
18392 arm_regno_class (int regno
)
18396 if (regno
== STACK_POINTER_REGNUM
)
18398 if (regno
== CC_REGNUM
)
18405 if (TARGET_THUMB2
&& regno
< 8)
18408 if ( regno
<= LAST_ARM_REGNUM
18409 || regno
== FRAME_POINTER_REGNUM
18410 || regno
== ARG_POINTER_REGNUM
)
18411 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
18413 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
18414 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
18416 if (IS_VFP_REGNUM (regno
))
18418 if (regno
<= D7_VFP_REGNUM
)
18419 return VFP_D0_D7_REGS
;
18420 else if (regno
<= LAST_LO_VFP_REGNUM
)
18421 return VFP_LO_REGS
;
18423 return VFP_HI_REGS
;
18426 if (IS_IWMMXT_REGNUM (regno
))
18427 return IWMMXT_REGS
;
18429 if (IS_IWMMXT_GR_REGNUM (regno
))
18430 return IWMMXT_GR_REGS
;
18435 /* Handle a special case when computing the offset
18436 of an argument from the frame pointer. */
18438 arm_debugger_arg_offset (int value
, rtx addr
)
18442 /* We are only interested if dbxout_parms() failed to compute the offset. */
18446 /* We can only cope with the case where the address is held in a register. */
18447 if (GET_CODE (addr
) != REG
)
18450 /* If we are using the frame pointer to point at the argument, then
18451 an offset of 0 is correct. */
18452 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
18455 /* If we are using the stack pointer to point at the
18456 argument, then an offset of 0 is correct. */
18457 /* ??? Check this is consistent with thumb2 frame layout. */
18458 if ((TARGET_THUMB
|| !frame_pointer_needed
)
18459 && REGNO (addr
) == SP_REGNUM
)
18462 /* Oh dear. The argument is pointed to by a register rather
18463 than being held in a register, or being stored at a known
18464 offset from the frame pointer. Since GDB only understands
18465 those two kinds of argument we must translate the address
18466 held in the register into an offset from the frame pointer.
18467 We do this by searching through the insns for the function
18468 looking to see where this register gets its value. If the
18469 register is initialized from the frame pointer plus an offset
18470 then we are in luck and we can continue, otherwise we give up.
18472 This code is exercised by producing debugging information
18473 for a function with arguments like this:
18475 double func (double a, double b, int c, double d) {return d;}
18477 Without this code the stab for parameter 'd' will be set to
18478 an offset of 0 from the frame pointer, rather than 8. */
18480 /* The if() statement says:
18482 If the insn is a normal instruction
18483 and if the insn is setting the value in a register
18484 and if the register being set is the register holding the address of the argument
18485 and if the address is computing by an addition
18486 that involves adding to a register
18487 which is the frame pointer
18492 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
18494 if ( GET_CODE (insn
) == INSN
18495 && GET_CODE (PATTERN (insn
)) == SET
18496 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
18497 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
18498 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 0)) == REG
18499 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18500 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 1)) == CONST_INT
18503 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
18512 warning (0, "unable to compute real location of stacked parameter");
18513 value
= 8; /* XXX magic hack */
18533 T_MAX
/* Size of enum. Keep last. */
18534 } neon_builtin_type_mode
;
18536 #define TYPE_MODE_BIT(X) (1 << (X))
18538 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18539 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18540 | TYPE_MODE_BIT (T_DI))
18541 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18542 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18543 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18545 #define v8qi_UP T_V8QI
18546 #define v4hi_UP T_V4HI
18547 #define v2si_UP T_V2SI
18548 #define v2sf_UP T_V2SF
18550 #define v16qi_UP T_V16QI
18551 #define v8hi_UP T_V8HI
18552 #define v4si_UP T_V4SI
18553 #define v4sf_UP T_V4SF
18554 #define v2di_UP T_V2DI
18559 #define UP(X) X##_UP
18592 NEON_LOADSTRUCTLANE
,
18594 NEON_STORESTRUCTLANE
,
18603 const neon_itype itype
;
18604 const neon_builtin_type_mode mode
;
18605 const enum insn_code code
;
18606 unsigned int fcode
;
18607 } neon_builtin_datum
;
18609 #define CF(N,X) CODE_FOR_neon_##N##X
18611 #define VAR1(T, N, A) \
18612 {#N, NEON_##T, UP (A), CF (N, A), 0}
18613 #define VAR2(T, N, A, B) \
18615 {#N, NEON_##T, UP (B), CF (N, B), 0}
18616 #define VAR3(T, N, A, B, C) \
18617 VAR2 (T, N, A, B), \
18618 {#N, NEON_##T, UP (C), CF (N, C), 0}
18619 #define VAR4(T, N, A, B, C, D) \
18620 VAR3 (T, N, A, B, C), \
18621 {#N, NEON_##T, UP (D), CF (N, D), 0}
18622 #define VAR5(T, N, A, B, C, D, E) \
18623 VAR4 (T, N, A, B, C, D), \
18624 {#N, NEON_##T, UP (E), CF (N, E), 0}
18625 #define VAR6(T, N, A, B, C, D, E, F) \
18626 VAR5 (T, N, A, B, C, D, E), \
18627 {#N, NEON_##T, UP (F), CF (N, F), 0}
18628 #define VAR7(T, N, A, B, C, D, E, F, G) \
18629 VAR6 (T, N, A, B, C, D, E, F), \
18630 {#N, NEON_##T, UP (G), CF (N, G), 0}
18631 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18632 VAR7 (T, N, A, B, C, D, E, F, G), \
18633 {#N, NEON_##T, UP (H), CF (N, H), 0}
18634 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18635 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18636 {#N, NEON_##T, UP (I), CF (N, I), 0}
18637 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18638 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18639 {#N, NEON_##T, UP (J), CF (N, J), 0}
18641 /* The mode entries in the following table correspond to the "key" type of the
18642 instruction variant, i.e. equivalent to that which would be specified after
18643 the assembler mnemonic, which usually refers to the last vector operand.
18644 (Signed/unsigned/polynomial types are not differentiated between though, and
18645 are all mapped onto the same mode for a given element size.) The modes
18646 listed per instruction should be the same as those defined for that
18647 instruction's pattern in neon.md. */
18649 static neon_builtin_datum neon_builtin_data
[] =
18651 VAR10 (BINOP
, vadd
,
18652 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18653 VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
),
18654 VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
),
18655 VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18656 VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18657 VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
),
18658 VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18659 VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18660 VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
),
18661 VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18662 VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
),
18663 VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
),
18664 VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
),
18665 VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
),
18666 VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
),
18667 VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
),
18668 VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
),
18669 VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
),
18670 VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
),
18671 VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
),
18672 VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
),
18673 VAR2 (BINOP
, vqdmull
, v4hi
, v2si
),
18674 VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18675 VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18676 VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18677 VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
),
18678 VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
),
18679 VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
),
18680 VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18681 VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18682 VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18683 VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
),
18684 VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18685 VAR10 (BINOP
, vsub
,
18686 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18687 VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
),
18688 VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
),
18689 VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18690 VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18691 VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
),
18692 VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18693 VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18694 VAR6 (BINOP
, vcgeu
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18695 VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18696 VAR6 (BINOP
, vcgtu
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18697 VAR2 (BINOP
, vcage
, v2sf
, v4sf
),
18698 VAR2 (BINOP
, vcagt
, v2sf
, v4sf
),
18699 VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18700 VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18701 VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
),
18702 VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18703 VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
),
18704 VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18705 VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18706 VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
),
18707 VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18708 VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18709 VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
),
18710 VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
),
18711 VAR2 (BINOP
, vrecps
, v2sf
, v4sf
),
18712 VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
),
18713 VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18714 VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18715 VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18716 VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18717 VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18718 VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18719 VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18720 VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18721 VAR2 (UNOP
, vcnt
, v8qi
, v16qi
),
18722 VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
),
18723 VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
),
18724 VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18725 /* FIXME: vget_lane supports more variants than this! */
18726 VAR10 (GETLANE
, vget_lane
,
18727 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18728 VAR10 (SETLANE
, vset_lane
,
18729 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18730 VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18731 VAR10 (DUP
, vdup_n
,
18732 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18733 VAR10 (DUPLANE
, vdup_lane
,
18734 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18735 VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18736 VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18737 VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18738 VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
),
18739 VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
),
18740 VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
),
18741 VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
),
18742 VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18743 VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18744 VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
),
18745 VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
),
18746 VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18747 VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
),
18748 VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
),
18749 VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18750 VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18751 VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
),
18752 VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
),
18753 VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18754 VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
),
18755 VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
),
18756 VAR10 (BINOP
, vext
,
18757 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18758 VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18759 VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
),
18760 VAR2 (UNOP
, vrev16
, v8qi
, v16qi
),
18761 VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
),
18762 VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
),
18763 VAR10 (SELECT
, vbsl
,
18764 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18765 VAR1 (VTBL
, vtbl1
, v8qi
),
18766 VAR1 (VTBL
, vtbl2
, v8qi
),
18767 VAR1 (VTBL
, vtbl3
, v8qi
),
18768 VAR1 (VTBL
, vtbl4
, v8qi
),
18769 VAR1 (VTBX
, vtbx1
, v8qi
),
18770 VAR1 (VTBX
, vtbx2
, v8qi
),
18771 VAR1 (VTBX
, vtbx3
, v8qi
),
18772 VAR1 (VTBX
, vtbx4
, v8qi
),
18773 VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18774 VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18775 VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18776 VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18777 VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18778 VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18779 VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18780 VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18781 VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18782 VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18783 VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18784 VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18785 VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18786 VAR10 (LOAD1
, vld1
,
18787 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18788 VAR10 (LOAD1LANE
, vld1_lane
,
18789 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18790 VAR10 (LOAD1
, vld1_dup
,
18791 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18792 VAR10 (STORE1
, vst1
,
18793 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18794 VAR10 (STORE1LANE
, vst1_lane
,
18795 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18797 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18798 VAR7 (LOADSTRUCTLANE
, vld2_lane
,
18799 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18800 VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18801 VAR9 (STORESTRUCT
, vst2
,
18802 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18803 VAR7 (STORESTRUCTLANE
, vst2_lane
,
18804 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18806 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18807 VAR7 (LOADSTRUCTLANE
, vld3_lane
,
18808 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18809 VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18810 VAR9 (STORESTRUCT
, vst3
,
18811 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18812 VAR7 (STORESTRUCTLANE
, vst3_lane
,
18813 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18814 VAR9 (LOADSTRUCT
, vld4
,
18815 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18816 VAR7 (LOADSTRUCTLANE
, vld4_lane
,
18817 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18818 VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18819 VAR9 (STORESTRUCT
, vst4
,
18820 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18821 VAR7 (STORESTRUCTLANE
, vst4_lane
,
18822 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18823 VAR10 (LOGICBINOP
, vand
,
18824 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18825 VAR10 (LOGICBINOP
, vorr
,
18826 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18827 VAR10 (BINOP
, veor
,
18828 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18829 VAR10 (LOGICBINOP
, vbic
,
18830 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18831 VAR10 (LOGICBINOP
, vorn
,
18832 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
)
18847 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18848 symbolic names defined here (which would require too much duplication).
18852 ARM_BUILTIN_GETWCGR0
,
18853 ARM_BUILTIN_GETWCGR1
,
18854 ARM_BUILTIN_GETWCGR2
,
18855 ARM_BUILTIN_GETWCGR3
,
18857 ARM_BUILTIN_SETWCGR0
,
18858 ARM_BUILTIN_SETWCGR1
,
18859 ARM_BUILTIN_SETWCGR2
,
18860 ARM_BUILTIN_SETWCGR3
,
18864 ARM_BUILTIN_WAVG2BR
,
18865 ARM_BUILTIN_WAVG2HR
,
18866 ARM_BUILTIN_WAVG2B
,
18867 ARM_BUILTIN_WAVG2H
,
18874 ARM_BUILTIN_WMACSZ
,
18876 ARM_BUILTIN_WMACUZ
,
18879 ARM_BUILTIN_WSADBZ
,
18881 ARM_BUILTIN_WSADHZ
,
18883 ARM_BUILTIN_WALIGNI
,
18884 ARM_BUILTIN_WALIGNR0
,
18885 ARM_BUILTIN_WALIGNR1
,
18886 ARM_BUILTIN_WALIGNR2
,
18887 ARM_BUILTIN_WALIGNR3
,
18890 ARM_BUILTIN_TMIAPH
,
18891 ARM_BUILTIN_TMIABB
,
18892 ARM_BUILTIN_TMIABT
,
18893 ARM_BUILTIN_TMIATB
,
18894 ARM_BUILTIN_TMIATT
,
18896 ARM_BUILTIN_TMOVMSKB
,
18897 ARM_BUILTIN_TMOVMSKH
,
18898 ARM_BUILTIN_TMOVMSKW
,
18900 ARM_BUILTIN_TBCSTB
,
18901 ARM_BUILTIN_TBCSTH
,
18902 ARM_BUILTIN_TBCSTW
,
18904 ARM_BUILTIN_WMADDS
,
18905 ARM_BUILTIN_WMADDU
,
18907 ARM_BUILTIN_WPACKHSS
,
18908 ARM_BUILTIN_WPACKWSS
,
18909 ARM_BUILTIN_WPACKDSS
,
18910 ARM_BUILTIN_WPACKHUS
,
18911 ARM_BUILTIN_WPACKWUS
,
18912 ARM_BUILTIN_WPACKDUS
,
18917 ARM_BUILTIN_WADDSSB
,
18918 ARM_BUILTIN_WADDSSH
,
18919 ARM_BUILTIN_WADDSSW
,
18920 ARM_BUILTIN_WADDUSB
,
18921 ARM_BUILTIN_WADDUSH
,
18922 ARM_BUILTIN_WADDUSW
,
18926 ARM_BUILTIN_WSUBSSB
,
18927 ARM_BUILTIN_WSUBSSH
,
18928 ARM_BUILTIN_WSUBSSW
,
18929 ARM_BUILTIN_WSUBUSB
,
18930 ARM_BUILTIN_WSUBUSH
,
18931 ARM_BUILTIN_WSUBUSW
,
18938 ARM_BUILTIN_WCMPEQB
,
18939 ARM_BUILTIN_WCMPEQH
,
18940 ARM_BUILTIN_WCMPEQW
,
18941 ARM_BUILTIN_WCMPGTUB
,
18942 ARM_BUILTIN_WCMPGTUH
,
18943 ARM_BUILTIN_WCMPGTUW
,
18944 ARM_BUILTIN_WCMPGTSB
,
18945 ARM_BUILTIN_WCMPGTSH
,
18946 ARM_BUILTIN_WCMPGTSW
,
18948 ARM_BUILTIN_TEXTRMSB
,
18949 ARM_BUILTIN_TEXTRMSH
,
18950 ARM_BUILTIN_TEXTRMSW
,
18951 ARM_BUILTIN_TEXTRMUB
,
18952 ARM_BUILTIN_TEXTRMUH
,
18953 ARM_BUILTIN_TEXTRMUW
,
18954 ARM_BUILTIN_TINSRB
,
18955 ARM_BUILTIN_TINSRH
,
18956 ARM_BUILTIN_TINSRW
,
18958 ARM_BUILTIN_WMAXSW
,
18959 ARM_BUILTIN_WMAXSH
,
18960 ARM_BUILTIN_WMAXSB
,
18961 ARM_BUILTIN_WMAXUW
,
18962 ARM_BUILTIN_WMAXUH
,
18963 ARM_BUILTIN_WMAXUB
,
18964 ARM_BUILTIN_WMINSW
,
18965 ARM_BUILTIN_WMINSH
,
18966 ARM_BUILTIN_WMINSB
,
18967 ARM_BUILTIN_WMINUW
,
18968 ARM_BUILTIN_WMINUH
,
18969 ARM_BUILTIN_WMINUB
,
18971 ARM_BUILTIN_WMULUM
,
18972 ARM_BUILTIN_WMULSM
,
18973 ARM_BUILTIN_WMULUL
,
18975 ARM_BUILTIN_PSADBH
,
18976 ARM_BUILTIN_WSHUFH
,
18990 ARM_BUILTIN_WSLLHI
,
18991 ARM_BUILTIN_WSLLWI
,
18992 ARM_BUILTIN_WSLLDI
,
18993 ARM_BUILTIN_WSRAHI
,
18994 ARM_BUILTIN_WSRAWI
,
18995 ARM_BUILTIN_WSRADI
,
18996 ARM_BUILTIN_WSRLHI
,
18997 ARM_BUILTIN_WSRLWI
,
18998 ARM_BUILTIN_WSRLDI
,
18999 ARM_BUILTIN_WRORHI
,
19000 ARM_BUILTIN_WRORWI
,
19001 ARM_BUILTIN_WRORDI
,
19003 ARM_BUILTIN_WUNPCKIHB
,
19004 ARM_BUILTIN_WUNPCKIHH
,
19005 ARM_BUILTIN_WUNPCKIHW
,
19006 ARM_BUILTIN_WUNPCKILB
,
19007 ARM_BUILTIN_WUNPCKILH
,
19008 ARM_BUILTIN_WUNPCKILW
,
19010 ARM_BUILTIN_WUNPCKEHSB
,
19011 ARM_BUILTIN_WUNPCKEHSH
,
19012 ARM_BUILTIN_WUNPCKEHSW
,
19013 ARM_BUILTIN_WUNPCKEHUB
,
19014 ARM_BUILTIN_WUNPCKEHUH
,
19015 ARM_BUILTIN_WUNPCKEHUW
,
19016 ARM_BUILTIN_WUNPCKELSB
,
19017 ARM_BUILTIN_WUNPCKELSH
,
19018 ARM_BUILTIN_WUNPCKELSW
,
19019 ARM_BUILTIN_WUNPCKELUB
,
19020 ARM_BUILTIN_WUNPCKELUH
,
19021 ARM_BUILTIN_WUNPCKELUW
,
19027 ARM_BUILTIN_WADDSUBHX
,
19028 ARM_BUILTIN_WSUBADDHX
,
19030 ARM_BUILTIN_WABSDIFFB
,
19031 ARM_BUILTIN_WABSDIFFH
,
19032 ARM_BUILTIN_WABSDIFFW
,
19034 ARM_BUILTIN_WADDCH
,
19035 ARM_BUILTIN_WADDCW
,
19038 ARM_BUILTIN_WAVG4R
,
19040 ARM_BUILTIN_WMADDSX
,
19041 ARM_BUILTIN_WMADDUX
,
19043 ARM_BUILTIN_WMADDSN
,
19044 ARM_BUILTIN_WMADDUN
,
19046 ARM_BUILTIN_WMULWSM
,
19047 ARM_BUILTIN_WMULWUM
,
19049 ARM_BUILTIN_WMULWSMR
,
19050 ARM_BUILTIN_WMULWUMR
,
19052 ARM_BUILTIN_WMULWL
,
19054 ARM_BUILTIN_WMULSMR
,
19055 ARM_BUILTIN_WMULUMR
,
19057 ARM_BUILTIN_WQMULM
,
19058 ARM_BUILTIN_WQMULMR
,
19060 ARM_BUILTIN_WQMULWM
,
19061 ARM_BUILTIN_WQMULWMR
,
19063 ARM_BUILTIN_WADDBHUSM
,
19064 ARM_BUILTIN_WADDBHUSL
,
19066 ARM_BUILTIN_WQMIABB
,
19067 ARM_BUILTIN_WQMIABT
,
19068 ARM_BUILTIN_WQMIATB
,
19069 ARM_BUILTIN_WQMIATT
,
19071 ARM_BUILTIN_WQMIABBN
,
19072 ARM_BUILTIN_WQMIABTN
,
19073 ARM_BUILTIN_WQMIATBN
,
19074 ARM_BUILTIN_WQMIATTN
,
19076 ARM_BUILTIN_WMIABB
,
19077 ARM_BUILTIN_WMIABT
,
19078 ARM_BUILTIN_WMIATB
,
19079 ARM_BUILTIN_WMIATT
,
19081 ARM_BUILTIN_WMIABBN
,
19082 ARM_BUILTIN_WMIABTN
,
19083 ARM_BUILTIN_WMIATBN
,
19084 ARM_BUILTIN_WMIATTN
,
19086 ARM_BUILTIN_WMIAWBB
,
19087 ARM_BUILTIN_WMIAWBT
,
19088 ARM_BUILTIN_WMIAWTB
,
19089 ARM_BUILTIN_WMIAWTT
,
19091 ARM_BUILTIN_WMIAWBBN
,
19092 ARM_BUILTIN_WMIAWBTN
,
19093 ARM_BUILTIN_WMIAWTBN
,
19094 ARM_BUILTIN_WMIAWTTN
,
19096 ARM_BUILTIN_WMERGE
,
19098 ARM_BUILTIN_THREAD_POINTER
,
19100 ARM_BUILTIN_NEON_BASE
,
19102 ARM_BUILTIN_MAX
= ARM_BUILTIN_NEON_BASE
+ ARRAY_SIZE (neon_builtin_data
)
19105 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
19108 arm_init_neon_builtins (void)
19110 unsigned int i
, fcode
;
19113 tree neon_intQI_type_node
;
19114 tree neon_intHI_type_node
;
19115 tree neon_polyQI_type_node
;
19116 tree neon_polyHI_type_node
;
19117 tree neon_intSI_type_node
;
19118 tree neon_intDI_type_node
;
19119 tree neon_float_type_node
;
19121 tree intQI_pointer_node
;
19122 tree intHI_pointer_node
;
19123 tree intSI_pointer_node
;
19124 tree intDI_pointer_node
;
19125 tree float_pointer_node
;
19127 tree const_intQI_node
;
19128 tree const_intHI_node
;
19129 tree const_intSI_node
;
19130 tree const_intDI_node
;
19131 tree const_float_node
;
19133 tree const_intQI_pointer_node
;
19134 tree const_intHI_pointer_node
;
19135 tree const_intSI_pointer_node
;
19136 tree const_intDI_pointer_node
;
19137 tree const_float_pointer_node
;
19139 tree V8QI_type_node
;
19140 tree V4HI_type_node
;
19141 tree V2SI_type_node
;
19142 tree V2SF_type_node
;
19143 tree V16QI_type_node
;
19144 tree V8HI_type_node
;
19145 tree V4SI_type_node
;
19146 tree V4SF_type_node
;
19147 tree V2DI_type_node
;
19149 tree intUQI_type_node
;
19150 tree intUHI_type_node
;
19151 tree intUSI_type_node
;
19152 tree intUDI_type_node
;
19154 tree intEI_type_node
;
19155 tree intOI_type_node
;
19156 tree intCI_type_node
;
19157 tree intXI_type_node
;
19159 tree V8QI_pointer_node
;
19160 tree V4HI_pointer_node
;
19161 tree V2SI_pointer_node
;
19162 tree V2SF_pointer_node
;
19163 tree V16QI_pointer_node
;
19164 tree V8HI_pointer_node
;
19165 tree V4SI_pointer_node
;
19166 tree V4SF_pointer_node
;
19167 tree V2DI_pointer_node
;
19169 tree void_ftype_pv8qi_v8qi_v8qi
;
19170 tree void_ftype_pv4hi_v4hi_v4hi
;
19171 tree void_ftype_pv2si_v2si_v2si
;
19172 tree void_ftype_pv2sf_v2sf_v2sf
;
19173 tree void_ftype_pdi_di_di
;
19174 tree void_ftype_pv16qi_v16qi_v16qi
;
19175 tree void_ftype_pv8hi_v8hi_v8hi
;
19176 tree void_ftype_pv4si_v4si_v4si
;
19177 tree void_ftype_pv4sf_v4sf_v4sf
;
19178 tree void_ftype_pv2di_v2di_v2di
;
19180 tree reinterp_ftype_dreg
[5][5];
19181 tree reinterp_ftype_qreg
[5][5];
19182 tree dreg_types
[5], qreg_types
[5];
19184 /* Create distinguished type nodes for NEON vector element types,
19185 and pointers to values of such types, so we can detect them later. */
19186 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
19187 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
19188 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
19189 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
19190 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
19191 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
19192 neon_float_type_node
= make_node (REAL_TYPE
);
19193 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
19194 layout_type (neon_float_type_node
);
19196 /* Define typedefs which exactly correspond to the modes we are basing vector
19197 types on. If you change these names you'll need to change
19198 the table used by arm_mangle_type too. */
19199 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
19200 "__builtin_neon_qi");
19201 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
19202 "__builtin_neon_hi");
19203 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
19204 "__builtin_neon_si");
19205 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
19206 "__builtin_neon_sf");
19207 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
19208 "__builtin_neon_di");
19209 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
19210 "__builtin_neon_poly8");
19211 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
19212 "__builtin_neon_poly16");
19214 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
19215 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
19216 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
19217 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
19218 float_pointer_node
= build_pointer_type (neon_float_type_node
);
19220 /* Next create constant-qualified versions of the above types. */
19221 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
19223 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
19225 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
19227 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
19229 const_float_node
= build_qualified_type (neon_float_type_node
,
19232 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
19233 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
19234 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
19235 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
19236 const_float_pointer_node
= build_pointer_type (const_float_node
);
19238 /* Now create vector types based on our NEON element types. */
19239 /* 64-bit vectors. */
19241 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
19243 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
19245 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
19247 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
19248 /* 128-bit vectors. */
19250 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
19252 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
19254 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
19256 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
19258 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
19260 /* Unsigned integer types for various mode sizes. */
19261 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
19262 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
19263 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
19264 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
19266 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
19267 "__builtin_neon_uqi");
19268 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
19269 "__builtin_neon_uhi");
19270 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
19271 "__builtin_neon_usi");
19272 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
19273 "__builtin_neon_udi");
19275 /* Opaque integer types for structures of vectors. */
19276 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
19277 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
19278 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
19279 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
19281 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
19282 "__builtin_neon_ti");
19283 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
19284 "__builtin_neon_ei");
19285 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
19286 "__builtin_neon_oi");
19287 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
19288 "__builtin_neon_ci");
19289 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
19290 "__builtin_neon_xi");
19292 /* Pointers to vector types. */
19293 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
19294 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
19295 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
19296 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
19297 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
19298 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
19299 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
19300 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
19301 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
19303 /* Operations which return results as pairs. */
19304 void_ftype_pv8qi_v8qi_v8qi
=
19305 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
19306 V8QI_type_node
, NULL
);
19307 void_ftype_pv4hi_v4hi_v4hi
=
19308 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
19309 V4HI_type_node
, NULL
);
19310 void_ftype_pv2si_v2si_v2si
=
19311 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
19312 V2SI_type_node
, NULL
);
19313 void_ftype_pv2sf_v2sf_v2sf
=
19314 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
19315 V2SF_type_node
, NULL
);
19316 void_ftype_pdi_di_di
=
19317 build_function_type_list (void_type_node
, intDI_pointer_node
,
19318 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
19319 void_ftype_pv16qi_v16qi_v16qi
=
19320 build_function_type_list (void_type_node
, V16QI_pointer_node
,
19321 V16QI_type_node
, V16QI_type_node
, NULL
);
19322 void_ftype_pv8hi_v8hi_v8hi
=
19323 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
19324 V8HI_type_node
, NULL
);
19325 void_ftype_pv4si_v4si_v4si
=
19326 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
19327 V4SI_type_node
, NULL
);
19328 void_ftype_pv4sf_v4sf_v4sf
=
19329 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
19330 V4SF_type_node
, NULL
);
19331 void_ftype_pv2di_v2di_v2di
=
19332 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
19333 V2DI_type_node
, NULL
);
19335 dreg_types
[0] = V8QI_type_node
;
19336 dreg_types
[1] = V4HI_type_node
;
19337 dreg_types
[2] = V2SI_type_node
;
19338 dreg_types
[3] = V2SF_type_node
;
19339 dreg_types
[4] = neon_intDI_type_node
;
19341 qreg_types
[0] = V16QI_type_node
;
19342 qreg_types
[1] = V8HI_type_node
;
19343 qreg_types
[2] = V4SI_type_node
;
19344 qreg_types
[3] = V4SF_type_node
;
19345 qreg_types
[4] = V2DI_type_node
;
19347 for (i
= 0; i
< 5; i
++)
19350 for (j
= 0; j
< 5; j
++)
19352 reinterp_ftype_dreg
[i
][j
]
19353 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
19354 reinterp_ftype_qreg
[i
][j
]
19355 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
19359 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
19360 i
< ARRAY_SIZE (neon_builtin_data
);
19363 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
19365 const char* const modenames
[] = {
19366 "v8qi", "v4hi", "v2si", "v2sf", "di",
19367 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19372 int is_load
= 0, is_store
= 0;
19374 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
19381 case NEON_LOAD1LANE
:
19382 case NEON_LOADSTRUCT
:
19383 case NEON_LOADSTRUCTLANE
:
19385 /* Fall through. */
19387 case NEON_STORE1LANE
:
19388 case NEON_STORESTRUCT
:
19389 case NEON_STORESTRUCTLANE
:
19392 /* Fall through. */
19395 case NEON_LOGICBINOP
:
19396 case NEON_SHIFTINSERT
:
19403 case NEON_SHIFTIMM
:
19404 case NEON_SHIFTACC
:
19410 case NEON_LANEMULL
:
19411 case NEON_LANEMULH
:
19413 case NEON_SCALARMUL
:
19414 case NEON_SCALARMULL
:
19415 case NEON_SCALARMULH
:
19416 case NEON_SCALARMAC
:
19422 tree return_type
= void_type_node
, args
= void_list_node
;
19424 /* Build a function type directly from the insn_data for
19425 this builtin. The build_function_type() function takes
19426 care of removing duplicates for us. */
19427 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
19431 if (is_load
&& k
== 1)
19433 /* Neon load patterns always have the memory
19434 operand in the operand 1 position. */
19435 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
19436 == neon_struct_operand
);
19442 eltype
= const_intQI_pointer_node
;
19447 eltype
= const_intHI_pointer_node
;
19452 eltype
= const_intSI_pointer_node
;
19457 eltype
= const_float_pointer_node
;
19462 eltype
= const_intDI_pointer_node
;
19465 default: gcc_unreachable ();
19468 else if (is_store
&& k
== 0)
19470 /* Similarly, Neon store patterns use operand 0 as
19471 the memory location to store to. */
19472 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
19473 == neon_struct_operand
);
19479 eltype
= intQI_pointer_node
;
19484 eltype
= intHI_pointer_node
;
19489 eltype
= intSI_pointer_node
;
19494 eltype
= float_pointer_node
;
19499 eltype
= intDI_pointer_node
;
19502 default: gcc_unreachable ();
19507 switch (insn_data
[d
->code
].operand
[k
].mode
)
19509 case VOIDmode
: eltype
= void_type_node
; break;
19511 case QImode
: eltype
= neon_intQI_type_node
; break;
19512 case HImode
: eltype
= neon_intHI_type_node
; break;
19513 case SImode
: eltype
= neon_intSI_type_node
; break;
19514 case SFmode
: eltype
= neon_float_type_node
; break;
19515 case DImode
: eltype
= neon_intDI_type_node
; break;
19516 case TImode
: eltype
= intTI_type_node
; break;
19517 case EImode
: eltype
= intEI_type_node
; break;
19518 case OImode
: eltype
= intOI_type_node
; break;
19519 case CImode
: eltype
= intCI_type_node
; break;
19520 case XImode
: eltype
= intXI_type_node
; break;
19521 /* 64-bit vectors. */
19522 case V8QImode
: eltype
= V8QI_type_node
; break;
19523 case V4HImode
: eltype
= V4HI_type_node
; break;
19524 case V2SImode
: eltype
= V2SI_type_node
; break;
19525 case V2SFmode
: eltype
= V2SF_type_node
; break;
19526 /* 128-bit vectors. */
19527 case V16QImode
: eltype
= V16QI_type_node
; break;
19528 case V8HImode
: eltype
= V8HI_type_node
; break;
19529 case V4SImode
: eltype
= V4SI_type_node
; break;
19530 case V4SFmode
: eltype
= V4SF_type_node
; break;
19531 case V2DImode
: eltype
= V2DI_type_node
; break;
19532 default: gcc_unreachable ();
19536 if (k
== 0 && !is_store
)
19537 return_type
= eltype
;
19539 args
= tree_cons (NULL_TREE
, eltype
, args
);
19542 ftype
= build_function_type (return_type
, args
);
19546 case NEON_RESULTPAIR
:
19548 switch (insn_data
[d
->code
].operand
[1].mode
)
19550 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
19551 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
19552 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
19553 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
19554 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
19555 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
19556 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
19557 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
19558 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
19559 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
19560 default: gcc_unreachable ();
19565 case NEON_REINTERP
:
19567 /* We iterate over 5 doubleword types, then 5 quadword
19569 int rhs
= d
->mode
% 5;
19570 switch (insn_data
[d
->code
].operand
[0].mode
)
19572 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
19573 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
19574 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
19575 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
19576 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
19577 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
19578 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
19579 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
19580 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
19581 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
19582 default: gcc_unreachable ();
19588 gcc_unreachable ();
19591 gcc_assert (ftype
!= NULL
);
19593 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
19595 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
19597 arm_builtin_decls
[fcode
] = decl
;
19601 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19604 if ((MASK) & insn_flags) \
19607 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19608 BUILT_IN_MD, NULL, NULL_TREE); \
19609 arm_builtin_decls[CODE] = bdecl; \
19614 struct builtin_description
19616 const unsigned int mask
;
19617 const enum insn_code icode
;
19618 const char * const name
;
19619 const enum arm_builtins code
;
19620 const enum rtx_code comparison
;
19621 const unsigned int flag
;
19624 static const struct builtin_description bdesc_2arg
[] =
19626 #define IWMMXT_BUILTIN(code, string, builtin) \
19627 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19628 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19630 #define IWMMXT2_BUILTIN(code, string, builtin) \
19631 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
19632 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19634 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
19635 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
19636 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
19637 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
19638 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
19639 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
19640 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
19641 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
19642 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
19643 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
19644 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
19645 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
19646 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
19647 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
19648 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
19649 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
19650 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
19651 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
19652 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
19653 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
19654 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
19655 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
19656 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
19657 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
19658 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
19659 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
19660 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
19661 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
19662 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
19663 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
19664 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
19665 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
19666 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
19667 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
19668 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
19669 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
19670 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
19671 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
19672 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
19673 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
19674 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
19675 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
19676 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
19677 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
19678 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
19679 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
19680 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
19681 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
19682 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
19683 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
19684 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
19685 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
19686 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
19687 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
19688 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
19689 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
19690 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
19691 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
19692 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
19693 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
19694 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
19695 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
19696 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
19697 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
19698 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
19699 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
19700 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
19701 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
19702 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
19703 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
19704 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
19705 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
19706 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
19707 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
19708 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
19709 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
19710 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
19711 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
19713 #define IWMMXT_BUILTIN2(code, builtin) \
19714 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19716 #define IWMMXT2_BUILTIN2(code, builtin) \
19717 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19719 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
19720 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
19721 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
19722 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
19723 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
19724 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
19725 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
19726 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
19727 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
19728 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
19731 static const struct builtin_description bdesc_1arg
[] =
19733 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
19734 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
19735 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
19736 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
19737 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
19738 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
19739 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
19740 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
19741 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
19742 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
19743 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
19744 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
19745 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
19746 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
19747 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
19748 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
19749 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
19750 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
19751 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
19752 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
19753 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
19754 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
19755 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
19756 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
19759 /* Set up all the iWMMXt builtins. This is not called if
19760 TARGET_IWMMXT is zero. */
19763 arm_init_iwmmxt_builtins (void)
19765 const struct builtin_description
* d
;
19768 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
19769 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
19770 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
19772 tree v8qi_ftype_v8qi_v8qi_int
19773 = build_function_type_list (V8QI_type_node
,
19774 V8QI_type_node
, V8QI_type_node
,
19775 integer_type_node
, NULL_TREE
);
19776 tree v4hi_ftype_v4hi_int
19777 = build_function_type_list (V4HI_type_node
,
19778 V4HI_type_node
, integer_type_node
, NULL_TREE
);
19779 tree v2si_ftype_v2si_int
19780 = build_function_type_list (V2SI_type_node
,
19781 V2SI_type_node
, integer_type_node
, NULL_TREE
);
19782 tree v2si_ftype_di_di
19783 = build_function_type_list (V2SI_type_node
,
19784 long_long_integer_type_node
,
19785 long_long_integer_type_node
,
19787 tree di_ftype_di_int
19788 = build_function_type_list (long_long_integer_type_node
,
19789 long_long_integer_type_node
,
19790 integer_type_node
, NULL_TREE
);
19791 tree di_ftype_di_int_int
19792 = build_function_type_list (long_long_integer_type_node
,
19793 long_long_integer_type_node
,
19795 integer_type_node
, NULL_TREE
);
19796 tree int_ftype_v8qi
19797 = build_function_type_list (integer_type_node
,
19798 V8QI_type_node
, NULL_TREE
);
19799 tree int_ftype_v4hi
19800 = build_function_type_list (integer_type_node
,
19801 V4HI_type_node
, NULL_TREE
);
19802 tree int_ftype_v2si
19803 = build_function_type_list (integer_type_node
,
19804 V2SI_type_node
, NULL_TREE
);
19805 tree int_ftype_v8qi_int
19806 = build_function_type_list (integer_type_node
,
19807 V8QI_type_node
, integer_type_node
, NULL_TREE
);
19808 tree int_ftype_v4hi_int
19809 = build_function_type_list (integer_type_node
,
19810 V4HI_type_node
, integer_type_node
, NULL_TREE
);
19811 tree int_ftype_v2si_int
19812 = build_function_type_list (integer_type_node
,
19813 V2SI_type_node
, integer_type_node
, NULL_TREE
);
19814 tree v8qi_ftype_v8qi_int_int
19815 = build_function_type_list (V8QI_type_node
,
19816 V8QI_type_node
, integer_type_node
,
19817 integer_type_node
, NULL_TREE
);
19818 tree v4hi_ftype_v4hi_int_int
19819 = build_function_type_list (V4HI_type_node
,
19820 V4HI_type_node
, integer_type_node
,
19821 integer_type_node
, NULL_TREE
);
19822 tree v2si_ftype_v2si_int_int
19823 = build_function_type_list (V2SI_type_node
,
19824 V2SI_type_node
, integer_type_node
,
19825 integer_type_node
, NULL_TREE
);
19826 /* Miscellaneous. */
19827 tree v8qi_ftype_v4hi_v4hi
19828 = build_function_type_list (V8QI_type_node
,
19829 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
19830 tree v4hi_ftype_v2si_v2si
19831 = build_function_type_list (V4HI_type_node
,
19832 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
19833 tree v8qi_ftype_v4hi_v8qi
19834 = build_function_type_list (V8QI_type_node
,
19835 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
19836 tree v2si_ftype_v4hi_v4hi
19837 = build_function_type_list (V2SI_type_node
,
19838 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
19839 tree v2si_ftype_v8qi_v8qi
19840 = build_function_type_list (V2SI_type_node
,
19841 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
19842 tree v4hi_ftype_v4hi_di
19843 = build_function_type_list (V4HI_type_node
,
19844 V4HI_type_node
, long_long_integer_type_node
,
19846 tree v2si_ftype_v2si_di
19847 = build_function_type_list (V2SI_type_node
,
19848 V2SI_type_node
, long_long_integer_type_node
,
19851 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
19852 tree int_ftype_void
19853 = build_function_type_list (integer_type_node
, NULL_TREE
);
19855 = build_function_type_list (long_long_integer_type_node
,
19856 V8QI_type_node
, NULL_TREE
);
19858 = build_function_type_list (long_long_integer_type_node
,
19859 V4HI_type_node
, NULL_TREE
);
19861 = build_function_type_list (long_long_integer_type_node
,
19862 V2SI_type_node
, NULL_TREE
);
19863 tree v2si_ftype_v4hi
19864 = build_function_type_list (V2SI_type_node
,
19865 V4HI_type_node
, NULL_TREE
);
19866 tree v4hi_ftype_v8qi
19867 = build_function_type_list (V4HI_type_node
,
19868 V8QI_type_node
, NULL_TREE
);
19869 tree v8qi_ftype_v8qi
19870 = build_function_type_list (V8QI_type_node
,
19871 V8QI_type_node
, NULL_TREE
);
19872 tree v4hi_ftype_v4hi
19873 = build_function_type_list (V4HI_type_node
,
19874 V4HI_type_node
, NULL_TREE
);
19875 tree v2si_ftype_v2si
19876 = build_function_type_list (V2SI_type_node
,
19877 V2SI_type_node
, NULL_TREE
);
19879 tree di_ftype_di_v4hi_v4hi
19880 = build_function_type_list (long_long_unsigned_type_node
,
19881 long_long_unsigned_type_node
,
19882 V4HI_type_node
, V4HI_type_node
,
19885 tree di_ftype_v4hi_v4hi
19886 = build_function_type_list (long_long_unsigned_type_node
,
19887 V4HI_type_node
,V4HI_type_node
,
19890 tree v2si_ftype_v2si_v4hi_v4hi
19891 = build_function_type_list (V2SI_type_node
,
19892 V2SI_type_node
, V4HI_type_node
,
19893 V4HI_type_node
, NULL_TREE
);
19895 tree v2si_ftype_v2si_v8qi_v8qi
19896 = build_function_type_list (V2SI_type_node
,
19897 V2SI_type_node
, V8QI_type_node
,
19898 V8QI_type_node
, NULL_TREE
);
19900 tree di_ftype_di_v2si_v2si
19901 = build_function_type_list (long_long_unsigned_type_node
,
19902 long_long_unsigned_type_node
,
19903 V2SI_type_node
, V2SI_type_node
,
19906 tree di_ftype_di_di_int
19907 = build_function_type_list (long_long_unsigned_type_node
,
19908 long_long_unsigned_type_node
,
19909 long_long_unsigned_type_node
,
19910 integer_type_node
, NULL_TREE
);
19912 tree void_ftype_int
19913 = build_function_type_list (void_type_node
,
19914 integer_type_node
, NULL_TREE
);
19916 tree v8qi_ftype_char
19917 = build_function_type_list (V8QI_type_node
,
19918 signed_char_type_node
, NULL_TREE
);
19920 tree v4hi_ftype_short
19921 = build_function_type_list (V4HI_type_node
,
19922 short_integer_type_node
, NULL_TREE
);
19924 tree v2si_ftype_int
19925 = build_function_type_list (V2SI_type_node
,
19926 integer_type_node
, NULL_TREE
);
19928 /* Normal vector binops. */
19929 tree v8qi_ftype_v8qi_v8qi
19930 = build_function_type_list (V8QI_type_node
,
19931 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
19932 tree v4hi_ftype_v4hi_v4hi
19933 = build_function_type_list (V4HI_type_node
,
19934 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
19935 tree v2si_ftype_v2si_v2si
19936 = build_function_type_list (V2SI_type_node
,
19937 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
19938 tree di_ftype_di_di
19939 = build_function_type_list (long_long_unsigned_type_node
,
19940 long_long_unsigned_type_node
,
19941 long_long_unsigned_type_node
,
19944 /* Add all builtins that are more or less simple operations on two
19946 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19948 /* Use one of the operands; the target can have a different mode for
19949 mask-generating compares. */
19950 enum machine_mode mode
;
19956 mode
= insn_data
[d
->icode
].operand
[1].mode
;
19961 type
= v8qi_ftype_v8qi_v8qi
;
19964 type
= v4hi_ftype_v4hi_v4hi
;
19967 type
= v2si_ftype_v2si_v2si
;
19970 type
= di_ftype_di_di
;
19974 gcc_unreachable ();
19977 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
19980 /* Add the remaining MMX insns with somewhat more complicated types. */
19981 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19982 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19983 ARM_BUILTIN_ ## CODE)
19985 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
19986 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
19987 ARM_BUILTIN_ ## CODE)
19989 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
19990 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
19991 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
19992 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
19993 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
19994 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
19995 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
19996 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
19997 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
19999 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
20000 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
20001 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
20002 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
20003 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
20004 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
20006 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
20007 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
20008 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
20009 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
20010 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
20011 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
20013 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
20014 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
20015 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
20016 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
20017 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
20018 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
20020 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
20021 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
20022 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
20023 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
20024 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
20025 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
20027 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
20029 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
20030 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
20031 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
20032 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
20033 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
20034 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
20035 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
20036 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
20037 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
20038 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
20040 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
20041 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
20042 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
20043 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
20044 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
20045 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
20046 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
20047 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
20048 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
20050 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
20051 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
20052 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
20054 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
20055 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
20056 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
20058 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
20059 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
20061 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
20062 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
20063 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
20064 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
20065 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
20066 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
20068 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
20069 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
20070 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
20071 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
20072 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
20073 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
20074 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
20075 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
20076 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
20077 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
20078 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
20079 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
20081 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
20082 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
20083 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
20084 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
20086 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
20087 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
20088 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
20089 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
20090 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
20091 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
20092 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
20094 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
20095 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
20096 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
20098 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
20099 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
20100 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
20101 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
20103 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
20104 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
20105 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
20106 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
20108 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
20109 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
20110 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
20111 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
20113 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
20114 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
20115 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
20116 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
20118 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
20119 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
20120 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
20121 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
20123 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
20124 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
20125 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
20126 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
20128 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
20130 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
20131 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
20132 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
20134 #undef iwmmx_mbuiltin
20135 #undef iwmmx2_mbuiltin
20139 arm_init_tls_builtins (void)
20143 ftype
= build_function_type (ptr_type_node
, void_list_node
);
20144 decl
= add_builtin_function ("__builtin_thread_pointer", ftype
,
20145 ARM_BUILTIN_THREAD_POINTER
, BUILT_IN_MD
,
20147 TREE_NOTHROW (decl
) = 1;
20148 TREE_READONLY (decl
) = 1;
20149 arm_builtin_decls
[ARM_BUILTIN_THREAD_POINTER
] = decl
;
20153 arm_init_fp16_builtins (void)
20155 tree fp16_type
= make_node (REAL_TYPE
);
20156 TYPE_PRECISION (fp16_type
) = 16;
20157 layout_type (fp16_type
);
20158 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
20162 arm_init_builtins (void)
20164 arm_init_tls_builtins ();
20166 if (TARGET_REALLY_IWMMXT
)
20167 arm_init_iwmmxt_builtins ();
20170 arm_init_neon_builtins ();
20172 if (arm_fp16_format
)
20173 arm_init_fp16_builtins ();
20176 /* Return the ARM builtin for CODE. */
20179 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
20181 if (code
>= ARM_BUILTIN_MAX
)
20182 return error_mark_node
;
20184 return arm_builtin_decls
[code
];
20187 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20189 static const char *
20190 arm_invalid_parameter_type (const_tree t
)
20192 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
20193 return N_("function parameters cannot have __fp16 type");
20197 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20199 static const char *
20200 arm_invalid_return_type (const_tree t
)
20202 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
20203 return N_("functions cannot return __fp16 type");
20207 /* Implement TARGET_PROMOTED_TYPE. */
20210 arm_promoted_type (const_tree t
)
20212 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
20213 return float_type_node
;
20217 /* Implement TARGET_CONVERT_TO_TYPE.
20218 Specifically, this hook implements the peculiarity of the ARM
20219 half-precision floating-point C semantics that requires conversions between
20220 __fp16 to or from double to do an intermediate conversion to float. */
20223 arm_convert_to_type (tree type
, tree expr
)
20225 tree fromtype
= TREE_TYPE (expr
);
20226 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
20228 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
20229 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
20230 return convert (type
, convert (float_type_node
, expr
));
20234 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20235 This simply adds HFmode as a supported mode; even though we don't
20236 implement arithmetic on this type directly, it's supported by
20237 optabs conversions, much the way the double-word arithmetic is
20238 special-cased in the default hook. */
20241 arm_scalar_mode_supported_p (enum machine_mode mode
)
20243 if (mode
== HFmode
)
20244 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
20245 else if (ALL_FIXED_POINT_MODE_P (mode
))
20248 return default_scalar_mode_supported_p (mode
);
20251 /* Errors in the source file can cause expand_expr to return const0_rtx
20252 where we expect a vector. To avoid crashing, use one of the vector
20253 clear instructions. */
20256 safe_vector_operand (rtx x
, enum machine_mode mode
)
20258 if (x
!= const0_rtx
)
20260 x
= gen_reg_rtx (mode
);
20262 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
20263 : gen_rtx_SUBREG (DImode
, x
, 0)));
20267 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20270 arm_expand_binop_builtin (enum insn_code icode
,
20271 tree exp
, rtx target
)
20274 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20275 tree arg1
= CALL_EXPR_ARG (exp
, 1);
20276 rtx op0
= expand_normal (arg0
);
20277 rtx op1
= expand_normal (arg1
);
20278 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20279 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
20280 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
20282 if (VECTOR_MODE_P (mode0
))
20283 op0
= safe_vector_operand (op0
, mode0
);
20284 if (VECTOR_MODE_P (mode1
))
20285 op1
= safe_vector_operand (op1
, mode1
);
20288 || GET_MODE (target
) != tmode
20289 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20290 target
= gen_reg_rtx (tmode
);
20292 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
20293 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
20295 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20296 op0
= copy_to_mode_reg (mode0
, op0
);
20297 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20298 op1
= copy_to_mode_reg (mode1
, op1
);
20300 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20307 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20310 arm_expand_unop_builtin (enum insn_code icode
,
20311 tree exp
, rtx target
, int do_load
)
20314 tree arg0
= CALL_EXPR_ARG (exp
, 0);
20315 rtx op0
= expand_normal (arg0
);
20316 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20317 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
20320 || GET_MODE (target
) != tmode
20321 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20322 target
= gen_reg_rtx (tmode
);
20324 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
20327 if (VECTOR_MODE_P (mode0
))
20328 op0
= safe_vector_operand (op0
, mode0
);
20330 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20331 op0
= copy_to_mode_reg (mode0
, op0
);
20334 pat
= GEN_FCN (icode
) (target
, op0
);
20342 NEON_ARG_COPY_TO_REG
,
20348 #define NEON_MAX_BUILTIN_ARGS 5
20350 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20351 and return an expression for the accessed memory.
20353 The intrinsic function operates on a block of registers that has
20354 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
20355 function references the memory at EXP of type TYPE and in mode
20356 MEM_MODE; this mode may be BLKmode if no more suitable mode is
20360 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
20361 enum machine_mode reg_mode
,
20362 neon_builtin_type_mode type_mode
)
20364 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
20365 tree elem_type
, upper_bound
, array_type
;
20367 /* Work out the size of the register block in bytes. */
20368 reg_size
= GET_MODE_SIZE (reg_mode
);
20370 /* Work out the size of each vector in bytes. */
20371 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
20372 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
20374 /* Work out how many vectors there are. */
20375 gcc_assert (reg_size
% vector_size
== 0);
20376 nvectors
= reg_size
/ vector_size
;
20378 /* Work out the type of each element. */
20379 gcc_assert (POINTER_TYPE_P (type
));
20380 elem_type
= TREE_TYPE (type
);
20382 /* Work out how many elements are being loaded or stored.
20383 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20384 and memory elements; anything else implies a lane load or store. */
20385 if (mem_mode
== reg_mode
)
20386 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
20390 /* Create a type that describes the full access. */
20391 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
20392 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
20394 /* Dereference EXP using that type. */
20395 return fold_build2 (MEM_REF
, array_type
, exp
,
20396 build_int_cst (build_pointer_type (array_type
), 0));
20399 /* Expand a Neon builtin. */
20401 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
20402 neon_builtin_type_mode type_mode
,
20403 tree exp
, int fcode
, ...)
20407 tree arg
[NEON_MAX_BUILTIN_ARGS
];
20408 rtx op
[NEON_MAX_BUILTIN_ARGS
];
20411 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
20412 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
20413 enum machine_mode other_mode
;
20419 || GET_MODE (target
) != tmode
20420 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
20421 target
= gen_reg_rtx (tmode
);
20423 va_start (ap
, fcode
);
20425 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
20429 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
20431 if (thisarg
== NEON_ARG_STOP
)
20435 opno
= argc
+ have_retval
;
20436 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
20437 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
20438 arg_type
= TREE_VALUE (formals
);
20439 if (thisarg
== NEON_ARG_MEMORY
)
20441 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
20442 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
20443 mode
[argc
], other_mode
,
20447 op
[argc
] = expand_normal (arg
[argc
]);
20451 case NEON_ARG_COPY_TO_REG
:
20452 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20453 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
20454 (op
[argc
], mode
[argc
]))
20455 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
20458 case NEON_ARG_CONSTANT
:
20459 /* FIXME: This error message is somewhat unhelpful. */
20460 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
20461 (op
[argc
], mode
[argc
]))
20462 error ("argument must be a constant");
20465 case NEON_ARG_MEMORY
:
20466 gcc_assert (MEM_P (op
[argc
]));
20467 PUT_MODE (op
[argc
], mode
[argc
]);
20468 /* ??? arm_neon.h uses the same built-in functions for signed
20469 and unsigned accesses, casting where necessary. This isn't
20471 set_mem_alias_set (op
[argc
], 0);
20472 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
20473 (op
[argc
], mode
[argc
]))
20474 op
[argc
] = (replace_equiv_address
20475 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
20478 case NEON_ARG_STOP
:
20479 gcc_unreachable ();
20483 formals
= TREE_CHAIN (formals
);
20493 pat
= GEN_FCN (icode
) (target
, op
[0]);
20497 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
20501 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
20505 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
20509 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
20513 gcc_unreachable ();
20519 pat
= GEN_FCN (icode
) (op
[0]);
20523 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
20527 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
20531 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
20535 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
20539 gcc_unreachable ();
20550 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20551 constants defined per-instruction or per instruction-variant. Instead, the
20552 required info is looked up in the table neon_builtin_data. */
20554 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
20556 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
20557 neon_itype itype
= d
->itype
;
20558 enum insn_code icode
= d
->code
;
20559 neon_builtin_type_mode type_mode
= d
->mode
;
20566 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20567 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20571 case NEON_SCALARMUL
:
20572 case NEON_SCALARMULL
:
20573 case NEON_SCALARMULH
:
20574 case NEON_SHIFTINSERT
:
20575 case NEON_LOGICBINOP
:
20576 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20577 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20581 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20582 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20583 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20587 case NEON_SHIFTIMM
:
20588 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20589 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
20593 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20594 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20598 case NEON_REINTERP
:
20599 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20600 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20604 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20605 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20607 case NEON_RESULTPAIR
:
20608 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
20609 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20613 case NEON_LANEMULL
:
20614 case NEON_LANEMULH
:
20615 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20616 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20617 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20620 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20621 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20622 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20624 case NEON_SHIFTACC
:
20625 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20626 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20627 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20629 case NEON_SCALARMAC
:
20630 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20631 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20632 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
20636 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20637 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20641 case NEON_LOADSTRUCT
:
20642 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20643 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
20645 case NEON_LOAD1LANE
:
20646 case NEON_LOADSTRUCTLANE
:
20647 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
20648 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20652 case NEON_STORESTRUCT
:
20653 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
20654 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20656 case NEON_STORE1LANE
:
20657 case NEON_STORESTRUCTLANE
:
20658 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
20659 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20663 gcc_unreachable ();
20666 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20668 neon_reinterpret (rtx dest
, rtx src
)
20670 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
20673 /* Emit code to place a Neon pair result in memory locations (with equal
20676 neon_emit_pair_result_insn (enum machine_mode mode
,
20677 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
20680 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
20681 rtx tmp1
= gen_reg_rtx (mode
);
20682 rtx tmp2
= gen_reg_rtx (mode
);
20684 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
20686 emit_move_insn (mem
, tmp1
);
20687 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
20688 emit_move_insn (mem
, tmp2
);
20691 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20692 not to early-clobber SRC registers in the process.
20694 We assume that the operands described by SRC and DEST represent a
20695 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20696 number of components into which the copy has been decomposed. */
20698 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
20702 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
20703 || REGNO (operands
[0]) < REGNO (operands
[1]))
20705 for (i
= 0; i
< count
; i
++)
20707 operands
[2 * i
] = dest
[i
];
20708 operands
[2 * i
+ 1] = src
[i
];
20713 for (i
= 0; i
< count
; i
++)
20715 operands
[2 * i
] = dest
[count
- i
- 1];
20716 operands
[2 * i
+ 1] = src
[count
- i
- 1];
20721 /* Split operands into moves from op[1] + op[2] into op[0]. */
20724 neon_split_vcombine (rtx operands
[3])
20726 unsigned int dest
= REGNO (operands
[0]);
20727 unsigned int src1
= REGNO (operands
[1]);
20728 unsigned int src2
= REGNO (operands
[2]);
20729 enum machine_mode halfmode
= GET_MODE (operands
[1]);
20730 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
20731 rtx destlo
, desthi
;
20733 if (src1
== dest
&& src2
== dest
+ halfregs
)
20735 /* No-op move. Can't split to nothing; emit something. */
20736 emit_note (NOTE_INSN_DELETED
);
20740 /* Preserve register attributes for variable tracking. */
20741 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
20742 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
20743 GET_MODE_SIZE (halfmode
));
20745 /* Special case of reversed high/low parts. Use VSWP. */
20746 if (src2
== dest
&& src1
== dest
+ halfregs
)
20748 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
20749 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
20750 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
20754 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
20756 /* Try to avoid unnecessary moves if part of the result
20757 is in the right place already. */
20759 emit_move_insn (destlo
, operands
[1]);
20760 if (src2
!= dest
+ halfregs
)
20761 emit_move_insn (desthi
, operands
[2]);
20765 if (src2
!= dest
+ halfregs
)
20766 emit_move_insn (desthi
, operands
[2]);
20768 emit_move_insn (destlo
, operands
[1]);
20772 /* Expand an expression EXP that calls a built-in function,
20773 with result going to TARGET if that's convenient
20774 (and in mode MODE if that's convenient).
20775 SUBTARGET may be used as the target for computing one of EXP's operands.
20776 IGNORE is nonzero if the value is to be ignored. */
20779 arm_expand_builtin (tree exp
,
20781 rtx subtarget ATTRIBUTE_UNUSED
,
20782 enum machine_mode mode ATTRIBUTE_UNUSED
,
20783 int ignore ATTRIBUTE_UNUSED
)
20785 const struct builtin_description
* d
;
20786 enum insn_code icode
;
20787 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
20795 int fcode
= DECL_FUNCTION_CODE (fndecl
);
20797 enum machine_mode tmode
;
20798 enum machine_mode mode0
;
20799 enum machine_mode mode1
;
20800 enum machine_mode mode2
;
20806 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
20807 return arm_expand_neon_builtin (fcode
, exp
, target
);
20811 case ARM_BUILTIN_TEXTRMSB
:
20812 case ARM_BUILTIN_TEXTRMUB
:
20813 case ARM_BUILTIN_TEXTRMSH
:
20814 case ARM_BUILTIN_TEXTRMUH
:
20815 case ARM_BUILTIN_TEXTRMSW
:
20816 case ARM_BUILTIN_TEXTRMUW
:
20817 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
20818 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
20819 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
20820 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
20821 : CODE_FOR_iwmmxt_textrmw
);
20823 arg0
= CALL_EXPR_ARG (exp
, 0);
20824 arg1
= CALL_EXPR_ARG (exp
, 1);
20825 op0
= expand_normal (arg0
);
20826 op1
= expand_normal (arg1
);
20827 tmode
= insn_data
[icode
].operand
[0].mode
;
20828 mode0
= insn_data
[icode
].operand
[1].mode
;
20829 mode1
= insn_data
[icode
].operand
[2].mode
;
20831 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20832 op0
= copy_to_mode_reg (mode0
, op0
);
20833 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20835 /* @@@ better error message */
20836 error ("selector must be an immediate");
20837 return gen_reg_rtx (tmode
);
20840 opint
= INTVAL (op1
);
20841 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
20843 if (opint
> 7 || opint
< 0)
20844 error ("the range of selector should be in 0 to 7");
20846 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
20848 if (opint
> 3 || opint
< 0)
20849 error ("the range of selector should be in 0 to 3");
20851 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
20853 if (opint
> 1 || opint
< 0)
20854 error ("the range of selector should be in 0 to 1");
20858 || GET_MODE (target
) != tmode
20859 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20860 target
= gen_reg_rtx (tmode
);
20861 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20867 case ARM_BUILTIN_WALIGNI
:
20868 /* If op2 is immediate, call walighi, else call walighr. */
20869 arg0
= CALL_EXPR_ARG (exp
, 0);
20870 arg1
= CALL_EXPR_ARG (exp
, 1);
20871 arg2
= CALL_EXPR_ARG (exp
, 2);
20872 op0
= expand_normal (arg0
);
20873 op1
= expand_normal (arg1
);
20874 op2
= expand_normal (arg2
);
20875 if (GET_CODE (op2
) == CONST_INT
)
20877 icode
= CODE_FOR_iwmmxt_waligni
;
20878 tmode
= insn_data
[icode
].operand
[0].mode
;
20879 mode0
= insn_data
[icode
].operand
[1].mode
;
20880 mode1
= insn_data
[icode
].operand
[2].mode
;
20881 mode2
= insn_data
[icode
].operand
[3].mode
;
20882 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20883 op0
= copy_to_mode_reg (mode0
, op0
);
20884 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20885 op1
= copy_to_mode_reg (mode1
, op1
);
20886 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
20887 selector
= INTVAL (op2
);
20888 if (selector
> 7 || selector
< 0)
20889 error ("the range of selector should be in 0 to 7");
20893 icode
= CODE_FOR_iwmmxt_walignr
;
20894 tmode
= insn_data
[icode
].operand
[0].mode
;
20895 mode0
= insn_data
[icode
].operand
[1].mode
;
20896 mode1
= insn_data
[icode
].operand
[2].mode
;
20897 mode2
= insn_data
[icode
].operand
[3].mode
;
20898 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20899 op0
= copy_to_mode_reg (mode0
, op0
);
20900 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20901 op1
= copy_to_mode_reg (mode1
, op1
);
20902 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
20903 op2
= copy_to_mode_reg (mode2
, op2
);
20906 || GET_MODE (target
) != tmode
20907 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20908 target
= gen_reg_rtx (tmode
);
20909 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
20915 case ARM_BUILTIN_TINSRB
:
20916 case ARM_BUILTIN_TINSRH
:
20917 case ARM_BUILTIN_TINSRW
:
20918 case ARM_BUILTIN_WMERGE
:
20919 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
20920 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
20921 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
20922 : CODE_FOR_iwmmxt_tinsrw
);
20923 arg0
= CALL_EXPR_ARG (exp
, 0);
20924 arg1
= CALL_EXPR_ARG (exp
, 1);
20925 arg2
= CALL_EXPR_ARG (exp
, 2);
20926 op0
= expand_normal (arg0
);
20927 op1
= expand_normal (arg1
);
20928 op2
= expand_normal (arg2
);
20929 tmode
= insn_data
[icode
].operand
[0].mode
;
20930 mode0
= insn_data
[icode
].operand
[1].mode
;
20931 mode1
= insn_data
[icode
].operand
[2].mode
;
20932 mode2
= insn_data
[icode
].operand
[3].mode
;
20934 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20935 op0
= copy_to_mode_reg (mode0
, op0
);
20936 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20937 op1
= copy_to_mode_reg (mode1
, op1
);
20938 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
20940 error ("selector must be an immediate");
20943 if (icode
== CODE_FOR_iwmmxt_wmerge
)
20945 selector
= INTVAL (op2
);
20946 if (selector
> 7 || selector
< 0)
20947 error ("the range of selector should be in 0 to 7");
20949 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
20950 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
20951 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
20954 selector
= INTVAL (op2
);
20955 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
20956 error ("the range of selector should be in 0 to 7");
20957 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
20958 error ("the range of selector should be in 0 to 3");
20959 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
20960 error ("the range of selector should be in 0 to 1");
20962 op2
= GEN_INT (mask
);
20965 || GET_MODE (target
) != tmode
20966 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20967 target
= gen_reg_rtx (tmode
);
20968 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
20974 case ARM_BUILTIN_SETWCGR0
:
20975 case ARM_BUILTIN_SETWCGR1
:
20976 case ARM_BUILTIN_SETWCGR2
:
20977 case ARM_BUILTIN_SETWCGR3
:
20978 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
20979 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
20980 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
20981 : CODE_FOR_iwmmxt_setwcgr3
);
20982 arg0
= CALL_EXPR_ARG (exp
, 0);
20983 op0
= expand_normal (arg0
);
20984 mode0
= insn_data
[icode
].operand
[0].mode
;
20985 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
20986 op0
= copy_to_mode_reg (mode0
, op0
);
20987 pat
= GEN_FCN (icode
) (op0
);
20993 case ARM_BUILTIN_GETWCGR0
:
20994 case ARM_BUILTIN_GETWCGR1
:
20995 case ARM_BUILTIN_GETWCGR2
:
20996 case ARM_BUILTIN_GETWCGR3
:
20997 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
20998 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
20999 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
21000 : CODE_FOR_iwmmxt_getwcgr3
);
21001 tmode
= insn_data
[icode
].operand
[0].mode
;
21003 || GET_MODE (target
) != tmode
21004 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21005 target
= gen_reg_rtx (tmode
);
21006 pat
= GEN_FCN (icode
) (target
);
21012 case ARM_BUILTIN_WSHUFH
:
21013 icode
= CODE_FOR_iwmmxt_wshufh
;
21014 arg0
= CALL_EXPR_ARG (exp
, 0);
21015 arg1
= CALL_EXPR_ARG (exp
, 1);
21016 op0
= expand_normal (arg0
);
21017 op1
= expand_normal (arg1
);
21018 tmode
= insn_data
[icode
].operand
[0].mode
;
21019 mode1
= insn_data
[icode
].operand
[1].mode
;
21020 mode2
= insn_data
[icode
].operand
[2].mode
;
21022 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
21023 op0
= copy_to_mode_reg (mode1
, op0
);
21024 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
21026 error ("mask must be an immediate");
21029 selector
= INTVAL (op1
);
21030 if (selector
< 0 || selector
> 255)
21031 error ("the range of mask should be in 0 to 255");
21033 || GET_MODE (target
) != tmode
21034 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21035 target
= gen_reg_rtx (tmode
);
21036 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
21042 case ARM_BUILTIN_WMADDS
:
21043 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
21044 case ARM_BUILTIN_WMADDSX
:
21045 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
21046 case ARM_BUILTIN_WMADDSN
:
21047 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
21048 case ARM_BUILTIN_WMADDU
:
21049 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
21050 case ARM_BUILTIN_WMADDUX
:
21051 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
21052 case ARM_BUILTIN_WMADDUN
:
21053 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
21054 case ARM_BUILTIN_WSADBZ
:
21055 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
21056 case ARM_BUILTIN_WSADHZ
:
21057 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
21059 /* Several three-argument builtins. */
21060 case ARM_BUILTIN_WMACS
:
21061 case ARM_BUILTIN_WMACU
:
21062 case ARM_BUILTIN_TMIA
:
21063 case ARM_BUILTIN_TMIAPH
:
21064 case ARM_BUILTIN_TMIATT
:
21065 case ARM_BUILTIN_TMIATB
:
21066 case ARM_BUILTIN_TMIABT
:
21067 case ARM_BUILTIN_TMIABB
:
21068 case ARM_BUILTIN_WQMIABB
:
21069 case ARM_BUILTIN_WQMIABT
:
21070 case ARM_BUILTIN_WQMIATB
:
21071 case ARM_BUILTIN_WQMIATT
:
21072 case ARM_BUILTIN_WQMIABBN
:
21073 case ARM_BUILTIN_WQMIABTN
:
21074 case ARM_BUILTIN_WQMIATBN
:
21075 case ARM_BUILTIN_WQMIATTN
:
21076 case ARM_BUILTIN_WMIABB
:
21077 case ARM_BUILTIN_WMIABT
:
21078 case ARM_BUILTIN_WMIATB
:
21079 case ARM_BUILTIN_WMIATT
:
21080 case ARM_BUILTIN_WMIABBN
:
21081 case ARM_BUILTIN_WMIABTN
:
21082 case ARM_BUILTIN_WMIATBN
:
21083 case ARM_BUILTIN_WMIATTN
:
21084 case ARM_BUILTIN_WMIAWBB
:
21085 case ARM_BUILTIN_WMIAWBT
:
21086 case ARM_BUILTIN_WMIAWTB
:
21087 case ARM_BUILTIN_WMIAWTT
:
21088 case ARM_BUILTIN_WMIAWBBN
:
21089 case ARM_BUILTIN_WMIAWBTN
:
21090 case ARM_BUILTIN_WMIAWTBN
:
21091 case ARM_BUILTIN_WMIAWTTN
:
21092 case ARM_BUILTIN_WSADB
:
21093 case ARM_BUILTIN_WSADH
:
21094 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
21095 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
21096 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
21097 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
21098 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
21099 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
21100 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
21101 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
21102 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
21103 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
21104 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
21105 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
21106 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
21107 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
21108 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
21109 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
21110 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
21111 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
21112 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
21113 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
21114 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
21115 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
21116 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
21117 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
21118 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
21119 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
21120 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
21121 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
21122 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
21123 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
21124 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
21125 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
21126 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
21127 : CODE_FOR_iwmmxt_wsadh
);
21128 arg0
= CALL_EXPR_ARG (exp
, 0);
21129 arg1
= CALL_EXPR_ARG (exp
, 1);
21130 arg2
= CALL_EXPR_ARG (exp
, 2);
21131 op0
= expand_normal (arg0
);
21132 op1
= expand_normal (arg1
);
21133 op2
= expand_normal (arg2
);
21134 tmode
= insn_data
[icode
].operand
[0].mode
;
21135 mode0
= insn_data
[icode
].operand
[1].mode
;
21136 mode1
= insn_data
[icode
].operand
[2].mode
;
21137 mode2
= insn_data
[icode
].operand
[3].mode
;
21139 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
21140 op0
= copy_to_mode_reg (mode0
, op0
);
21141 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
21142 op1
= copy_to_mode_reg (mode1
, op1
);
21143 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
21144 op2
= copy_to_mode_reg (mode2
, op2
);
21146 || GET_MODE (target
) != tmode
21147 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
21148 target
= gen_reg_rtx (tmode
);
21149 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
21155 case ARM_BUILTIN_WZERO
:
21156 target
= gen_reg_rtx (DImode
);
21157 emit_insn (gen_iwmmxt_clrdi (target
));
21160 case ARM_BUILTIN_WSRLHI
:
21161 case ARM_BUILTIN_WSRLWI
:
21162 case ARM_BUILTIN_WSRLDI
:
21163 case ARM_BUILTIN_WSLLHI
:
21164 case ARM_BUILTIN_WSLLWI
:
21165 case ARM_BUILTIN_WSLLDI
:
21166 case ARM_BUILTIN_WSRAHI
:
21167 case ARM_BUILTIN_WSRAWI
:
21168 case ARM_BUILTIN_WSRADI
:
21169 case ARM_BUILTIN_WRORHI
:
21170 case ARM_BUILTIN_WRORWI
:
21171 case ARM_BUILTIN_WRORDI
:
21172 case ARM_BUILTIN_WSRLH
:
21173 case ARM_BUILTIN_WSRLW
:
21174 case ARM_BUILTIN_WSRLD
:
21175 case ARM_BUILTIN_WSLLH
:
21176 case ARM_BUILTIN_WSLLW
:
21177 case ARM_BUILTIN_WSLLD
:
21178 case ARM_BUILTIN_WSRAH
:
21179 case ARM_BUILTIN_WSRAW
:
21180 case ARM_BUILTIN_WSRAD
:
21181 case ARM_BUILTIN_WRORH
:
21182 case ARM_BUILTIN_WRORW
:
21183 case ARM_BUILTIN_WRORD
:
21184 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
21185 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
21186 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
21187 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
21188 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
21189 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
21190 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
21191 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
21192 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
21193 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
21194 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
21195 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
21196 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
21197 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
21198 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
21199 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
21200 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
21201 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
21202 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
21203 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
21204 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
21205 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
21206 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
21207 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
21208 : CODE_FOR_nothing
);
21209 arg1
= CALL_EXPR_ARG (exp
, 1);
21210 op1
= expand_normal (arg1
);
21211 if (GET_MODE (op1
) == VOIDmode
)
21213 imm
= INTVAL (op1
);
21214 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
21215 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
21216 && (imm
< 0 || imm
> 32))
21218 if (fcode
== ARM_BUILTIN_WRORHI
)
21219 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21220 else if (fcode
== ARM_BUILTIN_WRORWI
)
21221 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21222 else if (fcode
== ARM_BUILTIN_WRORH
)
21223 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21225 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21227 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
21228 && (imm
< 0 || imm
> 64))
21230 if (fcode
== ARM_BUILTIN_WRORDI
)
21231 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21233 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21237 if (fcode
== ARM_BUILTIN_WSRLHI
)
21238 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21239 else if (fcode
== ARM_BUILTIN_WSRLWI
)
21240 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21241 else if (fcode
== ARM_BUILTIN_WSRLDI
)
21242 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21243 else if (fcode
== ARM_BUILTIN_WSLLHI
)
21244 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21245 else if (fcode
== ARM_BUILTIN_WSLLWI
)
21246 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21247 else if (fcode
== ARM_BUILTIN_WSLLDI
)
21248 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21249 else if (fcode
== ARM_BUILTIN_WSRAHI
)
21250 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21251 else if (fcode
== ARM_BUILTIN_WSRAWI
)
21252 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21253 else if (fcode
== ARM_BUILTIN_WSRADI
)
21254 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21255 else if (fcode
== ARM_BUILTIN_WSRLH
)
21256 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21257 else if (fcode
== ARM_BUILTIN_WSRLW
)
21258 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21259 else if (fcode
== ARM_BUILTIN_WSRLD
)
21260 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21261 else if (fcode
== ARM_BUILTIN_WSLLH
)
21262 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21263 else if (fcode
== ARM_BUILTIN_WSLLW
)
21264 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21265 else if (fcode
== ARM_BUILTIN_WSLLD
)
21266 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21267 else if (fcode
== ARM_BUILTIN_WSRAH
)
21268 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21269 else if (fcode
== ARM_BUILTIN_WSRAW
)
21270 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21272 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21275 return arm_expand_binop_builtin (icode
, exp
, target
);
21277 case ARM_BUILTIN_THREAD_POINTER
:
21278 return arm_load_tp (target
);
21284 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
21285 if (d
->code
== (const enum arm_builtins
) fcode
)
21286 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
21288 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
21289 if (d
->code
== (const enum arm_builtins
) fcode
)
21290 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
21292 /* @@@ Should really do something sensible here. */
21296 /* Return the number (counting from 0) of
21297 the least significant set bit in MASK. */
21300 number_of_first_bit_set (unsigned mask
)
21302 return ctz_hwi (mask
);
21305 /* Like emit_multi_reg_push, but allowing for a different set of
21306 registers to be described as saved. MASK is the set of registers
21307 to be saved; REAL_REGS is the set of registers to be described as
21308 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21311 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
21313 unsigned long regno
;
21314 rtx par
[10], tmp
, reg
, insn
;
21317 /* Build the parallel of the registers actually being stored. */
21318 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
21320 regno
= ctz_hwi (mask
);
21321 reg
= gen_rtx_REG (SImode
, regno
);
21324 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
21326 tmp
= gen_rtx_USE (VOIDmode
, reg
);
21331 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
21332 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
21333 tmp
= gen_frame_mem (BLKmode
, tmp
);
21334 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
21337 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
21338 insn
= emit_insn (tmp
);
21340 /* Always build the stack adjustment note for unwind info. */
21341 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
21342 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
21345 /* Build the parallel of the registers recorded as saved for unwind. */
21346 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
21348 regno
= ctz_hwi (real_regs
);
21349 reg
= gen_rtx_REG (SImode
, regno
);
21351 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
21352 tmp
= gen_frame_mem (SImode
, tmp
);
21353 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
21354 RTX_FRAME_RELATED_P (tmp
) = 1;
21362 RTX_FRAME_RELATED_P (par
[0]) = 1;
21363 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
21366 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
21371 /* Emit code to push or pop registers to or from the stack. F is the
21372 assembly file. MASK is the registers to pop. */
21374 thumb_pop (FILE *f
, unsigned long mask
)
21377 int lo_mask
= mask
& 0xFF;
21378 int pushed_words
= 0;
21382 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
21384 /* Special case. Do not generate a POP PC statement here, do it in
21386 thumb_exit (f
, -1);
21390 fprintf (f
, "\tpop\t{");
21392 /* Look at the low registers first. */
21393 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
21397 asm_fprintf (f
, "%r", regno
);
21399 if ((lo_mask
& ~1) != 0)
21406 if (mask
& (1 << PC_REGNUM
))
21408 /* Catch popping the PC. */
21409 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
21410 || crtl
->calls_eh_return
)
21412 /* The PC is never poped directly, instead
21413 it is popped into r3 and then BX is used. */
21414 fprintf (f
, "}\n");
21416 thumb_exit (f
, -1);
21425 asm_fprintf (f
, "%r", PC_REGNUM
);
21429 fprintf (f
, "}\n");
21432 /* Generate code to return from a thumb function.
21433 If 'reg_containing_return_addr' is -1, then the return address is
21434 actually on the stack, at the stack pointer. */
21436 thumb_exit (FILE *f
, int reg_containing_return_addr
)
21438 unsigned regs_available_for_popping
;
21439 unsigned regs_to_pop
;
21441 unsigned available
;
21445 int restore_a4
= FALSE
;
21447 /* Compute the registers we need to pop. */
21451 if (reg_containing_return_addr
== -1)
21453 regs_to_pop
|= 1 << LR_REGNUM
;
21457 if (TARGET_BACKTRACE
)
21459 /* Restore the (ARM) frame pointer and stack pointer. */
21460 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
21464 /* If there is nothing to pop then just emit the BX instruction and
21466 if (pops_needed
== 0)
21468 if (crtl
->calls_eh_return
)
21469 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
21471 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
21474 /* Otherwise if we are not supporting interworking and we have not created
21475 a backtrace structure and the function was not entered in ARM mode then
21476 just pop the return address straight into the PC. */
21477 else if (!TARGET_INTERWORK
21478 && !TARGET_BACKTRACE
21479 && !is_called_in_ARM_mode (current_function_decl
)
21480 && !crtl
->calls_eh_return
)
21482 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
21486 /* Find out how many of the (return) argument registers we can corrupt. */
21487 regs_available_for_popping
= 0;
21489 /* If returning via __builtin_eh_return, the bottom three registers
21490 all contain information needed for the return. */
21491 if (crtl
->calls_eh_return
)
21495 /* If we can deduce the registers used from the function's
21496 return value. This is more reliable that examining
21497 df_regs_ever_live_p () because that will be set if the register is
21498 ever used in the function, not just if the register is used
21499 to hold a return value. */
21501 if (crtl
->return_rtx
!= 0)
21502 mode
= GET_MODE (crtl
->return_rtx
);
21504 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
21506 size
= GET_MODE_SIZE (mode
);
21510 /* In a void function we can use any argument register.
21511 In a function that returns a structure on the stack
21512 we can use the second and third argument registers. */
21513 if (mode
== VOIDmode
)
21514 regs_available_for_popping
=
21515 (1 << ARG_REGISTER (1))
21516 | (1 << ARG_REGISTER (2))
21517 | (1 << ARG_REGISTER (3));
21519 regs_available_for_popping
=
21520 (1 << ARG_REGISTER (2))
21521 | (1 << ARG_REGISTER (3));
21523 else if (size
<= 4)
21524 regs_available_for_popping
=
21525 (1 << ARG_REGISTER (2))
21526 | (1 << ARG_REGISTER (3));
21527 else if (size
<= 8)
21528 regs_available_for_popping
=
21529 (1 << ARG_REGISTER (3));
21532 /* Match registers to be popped with registers into which we pop them. */
21533 for (available
= regs_available_for_popping
,
21534 required
= regs_to_pop
;
21535 required
!= 0 && available
!= 0;
21536 available
&= ~(available
& - available
),
21537 required
&= ~(required
& - required
))
21540 /* If we have any popping registers left over, remove them. */
21542 regs_available_for_popping
&= ~available
;
21544 /* Otherwise if we need another popping register we can use
21545 the fourth argument register. */
21546 else if (pops_needed
)
21548 /* If we have not found any free argument registers and
21549 reg a4 contains the return address, we must move it. */
21550 if (regs_available_for_popping
== 0
21551 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
21553 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
21554 reg_containing_return_addr
= LR_REGNUM
;
21556 else if (size
> 12)
21558 /* Register a4 is being used to hold part of the return value,
21559 but we have dire need of a free, low register. */
21562 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
21565 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
21567 /* The fourth argument register is available. */
21568 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
21574 /* Pop as many registers as we can. */
21575 thumb_pop (f
, regs_available_for_popping
);
21577 /* Process the registers we popped. */
21578 if (reg_containing_return_addr
== -1)
21580 /* The return address was popped into the lowest numbered register. */
21581 regs_to_pop
&= ~(1 << LR_REGNUM
);
21583 reg_containing_return_addr
=
21584 number_of_first_bit_set (regs_available_for_popping
);
21586 /* Remove this register for the mask of available registers, so that
21587 the return address will not be corrupted by further pops. */
21588 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
21591 /* If we popped other registers then handle them here. */
21592 if (regs_available_for_popping
)
21596 /* Work out which register currently contains the frame pointer. */
21597 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
21599 /* Move it into the correct place. */
21600 asm_fprintf (f
, "\tmov\t%r, %r\n",
21601 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
21603 /* (Temporarily) remove it from the mask of popped registers. */
21604 regs_available_for_popping
&= ~(1 << frame_pointer
);
21605 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
21607 if (regs_available_for_popping
)
21611 /* We popped the stack pointer as well,
21612 find the register that contains it. */
21613 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
21615 /* Move it into the stack register. */
21616 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
21618 /* At this point we have popped all necessary registers, so
21619 do not worry about restoring regs_available_for_popping
21620 to its correct value:
21622 assert (pops_needed == 0)
21623 assert (regs_available_for_popping == (1 << frame_pointer))
21624 assert (regs_to_pop == (1 << STACK_POINTER)) */
21628 /* Since we have just move the popped value into the frame
21629 pointer, the popping register is available for reuse, and
21630 we know that we still have the stack pointer left to pop. */
21631 regs_available_for_popping
|= (1 << frame_pointer
);
21635 /* If we still have registers left on the stack, but we no longer have
21636 any registers into which we can pop them, then we must move the return
21637 address into the link register and make available the register that
21639 if (regs_available_for_popping
== 0 && pops_needed
> 0)
21641 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
21643 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
21644 reg_containing_return_addr
);
21646 reg_containing_return_addr
= LR_REGNUM
;
21649 /* If we have registers left on the stack then pop some more.
21650 We know that at most we will want to pop FP and SP. */
21651 if (pops_needed
> 0)
21656 thumb_pop (f
, regs_available_for_popping
);
21658 /* We have popped either FP or SP.
21659 Move whichever one it is into the correct register. */
21660 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
21661 move_to
= number_of_first_bit_set (regs_to_pop
);
21663 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
21665 regs_to_pop
&= ~(1 << move_to
);
21670 /* If we still have not popped everything then we must have only
21671 had one register available to us and we are now popping the SP. */
21672 if (pops_needed
> 0)
21676 thumb_pop (f
, regs_available_for_popping
);
21678 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
21680 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
21682 assert (regs_to_pop == (1 << STACK_POINTER))
21683 assert (pops_needed == 1)
21687 /* If necessary restore the a4 register. */
21690 if (reg_containing_return_addr
!= LR_REGNUM
)
21692 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
21693 reg_containing_return_addr
= LR_REGNUM
;
21696 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
21699 if (crtl
->calls_eh_return
)
21700 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
21702 /* Return to caller. */
21703 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
21706 /* Scan INSN just before assembler is output for it.
21707 For Thumb-1, we track the status of the condition codes; this
21708 information is used in the cbranchsi4_insn pattern. */
21710 thumb1_final_prescan_insn (rtx insn
)
21712 if (flag_print_asm_name
)
21713 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
21714 INSN_ADDRESSES (INSN_UID (insn
)));
21715 /* Don't overwrite the previous setter when we get to a cbranch. */
21716 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
21718 enum attr_conds conds
;
21720 if (cfun
->machine
->thumb1_cc_insn
)
21722 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
21723 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
21726 conds
= get_attr_conds (insn
);
21727 if (conds
== CONDS_SET
)
21729 rtx set
= single_set (insn
);
21730 cfun
->machine
->thumb1_cc_insn
= insn
;
21731 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
21732 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
21733 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
21734 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
21736 rtx src1
= XEXP (SET_SRC (set
), 1);
21737 if (src1
== const0_rtx
)
21738 cfun
->machine
->thumb1_cc_mode
= CCmode
;
21741 else if (conds
!= CONDS_NOCOND
)
21742 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
21747 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
21749 unsigned HOST_WIDE_INT mask
= 0xff;
21752 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
21753 if (val
== 0) /* XXX */
21756 for (i
= 0; i
< 25; i
++)
21757 if ((val
& (mask
<< i
)) == val
)
21763 /* Returns nonzero if the current function contains,
21764 or might contain a far jump. */
21766 thumb_far_jump_used_p (void)
21770 /* This test is only important for leaf functions. */
21771 /* assert (!leaf_function_p ()); */
21773 /* If we have already decided that far jumps may be used,
21774 do not bother checking again, and always return true even if
21775 it turns out that they are not being used. Once we have made
21776 the decision that far jumps are present (and that hence the link
21777 register will be pushed onto the stack) we cannot go back on it. */
21778 if (cfun
->machine
->far_jump_used
)
21781 /* If this function is not being called from the prologue/epilogue
21782 generation code then it must be being called from the
21783 INITIAL_ELIMINATION_OFFSET macro. */
21784 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
21786 /* In this case we know that we are being asked about the elimination
21787 of the arg pointer register. If that register is not being used,
21788 then there are no arguments on the stack, and we do not have to
21789 worry that a far jump might force the prologue to push the link
21790 register, changing the stack offsets. In this case we can just
21791 return false, since the presence of far jumps in the function will
21792 not affect stack offsets.
21794 If the arg pointer is live (or if it was live, but has now been
21795 eliminated and so set to dead) then we do have to test to see if
21796 the function might contain a far jump. This test can lead to some
21797 false negatives, since before reload is completed, then length of
21798 branch instructions is not known, so gcc defaults to returning their
21799 longest length, which in turn sets the far jump attribute to true.
21801 A false negative will not result in bad code being generated, but it
21802 will result in a needless push and pop of the link register. We
21803 hope that this does not occur too often.
21805 If we need doubleword stack alignment this could affect the other
21806 elimination offsets so we can't risk getting it wrong. */
21807 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
21808 cfun
->machine
->arg_pointer_live
= 1;
21809 else if (!cfun
->machine
->arg_pointer_live
)
21813 /* Check to see if the function contains a branch
21814 insn with the far jump attribute set. */
21815 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
21817 if (GET_CODE (insn
) == JUMP_INSN
21818 /* Ignore tablejump patterns. */
21819 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
21820 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
21821 && get_attr_far_jump (insn
) == FAR_JUMP_YES
21824 /* Record the fact that we have decided that
21825 the function does use far jumps. */
21826 cfun
->machine
->far_jump_used
= 1;
21834 /* Return nonzero if FUNC must be entered in ARM mode. */
21836 is_called_in_ARM_mode (tree func
)
21838 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
21840 /* Ignore the problem about functions whose address is taken. */
21841 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
21845 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
21851 /* Given the stack offsets and register mask in OFFSETS, decide how
21852 many additional registers to push instead of subtracting a constant
21853 from SP. For epilogues the principle is the same except we use pop.
21854 FOR_PROLOGUE indicates which we're generating. */
21856 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
21858 HOST_WIDE_INT amount
;
21859 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
21860 /* Extract a mask of the ones we can give to the Thumb's push/pop
21862 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
21863 /* Then count how many other high registers will need to be pushed. */
21864 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
21865 int n_free
, reg_base
, size
;
21867 if (!for_prologue
&& frame_pointer_needed
)
21868 amount
= offsets
->locals_base
- offsets
->saved_regs
;
21870 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
21872 /* If the stack frame size is 512 exactly, we can save one load
21873 instruction, which should make this a win even when optimizing
21875 if (!optimize_size
&& amount
!= 512)
21878 /* Can't do this if there are high registers to push. */
21879 if (high_regs_pushed
!= 0)
21882 /* Shouldn't do it in the prologue if no registers would normally
21883 be pushed at all. In the epilogue, also allow it if we'll have
21884 a pop insn for the PC. */
21887 || TARGET_BACKTRACE
21888 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
21889 || TARGET_INTERWORK
21890 || crtl
->args
.pretend_args_size
!= 0))
21893 /* Don't do this if thumb_expand_prologue wants to emit instructions
21894 between the push and the stack frame allocation. */
21896 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21897 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
21904 size
= arm_size_return_regs ();
21905 reg_base
= ARM_NUM_INTS (size
);
21906 live_regs_mask
>>= reg_base
;
21909 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
21910 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
21912 live_regs_mask
>>= 1;
21918 gcc_assert (amount
/ 4 * 4 == amount
);
21920 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
21921 return (amount
- 508) / 4;
21922 if (amount
<= n_free
* 4)
21927 /* The bits which aren't usefully expanded as rtl. */
21929 thumb1_unexpanded_epilogue (void)
21931 arm_stack_offsets
*offsets
;
21933 unsigned long live_regs_mask
= 0;
21934 int high_regs_pushed
= 0;
21936 int had_to_push_lr
;
21939 if (cfun
->machine
->return_used_this_function
!= 0)
21942 if (IS_NAKED (arm_current_func_type ()))
21945 offsets
= arm_get_frame_offsets ();
21946 live_regs_mask
= offsets
->saved_regs_mask
;
21947 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
21949 /* If we can deduce the registers used from the function's return value.
21950 This is more reliable that examining df_regs_ever_live_p () because that
21951 will be set if the register is ever used in the function, not just if
21952 the register is used to hold a return value. */
21953 size
= arm_size_return_regs ();
21955 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
21958 unsigned long extra_mask
= (1 << extra_pop
) - 1;
21959 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
21962 /* The prolog may have pushed some high registers to use as
21963 work registers. e.g. the testsuite file:
21964 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21965 compiles to produce:
21966 push {r4, r5, r6, r7, lr}
21970 as part of the prolog. We have to undo that pushing here. */
21972 if (high_regs_pushed
)
21974 unsigned long mask
= live_regs_mask
& 0xff;
21977 /* The available low registers depend on the size of the value we are
21985 /* Oh dear! We have no low registers into which we can pop
21988 ("no low registers available for popping high registers");
21990 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
21991 if (live_regs_mask
& (1 << next_hi_reg
))
21994 while (high_regs_pushed
)
21996 /* Find lo register(s) into which the high register(s) can
21998 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
22000 if (mask
& (1 << regno
))
22001 high_regs_pushed
--;
22002 if (high_regs_pushed
== 0)
22006 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
22008 /* Pop the values into the low register(s). */
22009 thumb_pop (asm_out_file
, mask
);
22011 /* Move the value(s) into the high registers. */
22012 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
22014 if (mask
& (1 << regno
))
22016 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
22019 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
22020 if (live_regs_mask
& (1 << next_hi_reg
))
22025 live_regs_mask
&= ~0x0f00;
22028 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
22029 live_regs_mask
&= 0xff;
22031 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
22033 /* Pop the return address into the PC. */
22034 if (had_to_push_lr
)
22035 live_regs_mask
|= 1 << PC_REGNUM
;
22037 /* Either no argument registers were pushed or a backtrace
22038 structure was created which includes an adjusted stack
22039 pointer, so just pop everything. */
22040 if (live_regs_mask
)
22041 thumb_pop (asm_out_file
, live_regs_mask
);
22043 /* We have either just popped the return address into the
22044 PC or it is was kept in LR for the entire function.
22045 Note that thumb_pop has already called thumb_exit if the
22046 PC was in the list. */
22047 if (!had_to_push_lr
)
22048 thumb_exit (asm_out_file
, LR_REGNUM
);
22052 /* Pop everything but the return address. */
22053 if (live_regs_mask
)
22054 thumb_pop (asm_out_file
, live_regs_mask
);
22056 if (had_to_push_lr
)
22060 /* We have no free low regs, so save one. */
22061 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
22065 /* Get the return address into a temporary register. */
22066 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
22070 /* Move the return address to lr. */
22071 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
22073 /* Restore the low register. */
22074 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
22079 regno
= LAST_ARG_REGNUM
;
22084 /* Remove the argument registers that were pushed onto the stack. */
22085 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
22086 SP_REGNUM
, SP_REGNUM
,
22087 crtl
->args
.pretend_args_size
);
22089 thumb_exit (asm_out_file
, regno
);
22095 /* Functions to save and restore machine-specific function data. */
22096 static struct machine_function
*
22097 arm_init_machine_status (void)
22099 struct machine_function
*machine
;
22100 machine
= ggc_alloc_cleared_machine_function ();
22102 #if ARM_FT_UNKNOWN != 0
22103 machine
->func_type
= ARM_FT_UNKNOWN
;
22108 /* Return an RTX indicating where the return address to the
22109 calling function can be found. */
22111 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
22116 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
22119 /* Do anything needed before RTL is emitted for each function. */
22121 arm_init_expanders (void)
22123 /* Arrange to initialize and mark the machine per-function status. */
22124 init_machine_status
= arm_init_machine_status
;
22126 /* This is to stop the combine pass optimizing away the alignment
22127 adjustment of va_arg. */
22128 /* ??? It is claimed that this should not be necessary. */
22130 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
22134 /* Like arm_compute_initial_elimination offset. Simpler because there
22135 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22136 to point at the base of the local variables after static stack
22137 space for a function has been allocated. */
22140 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
22142 arm_stack_offsets
*offsets
;
22144 offsets
= arm_get_frame_offsets ();
22148 case ARG_POINTER_REGNUM
:
22151 case STACK_POINTER_REGNUM
:
22152 return offsets
->outgoing_args
- offsets
->saved_args
;
22154 case FRAME_POINTER_REGNUM
:
22155 return offsets
->soft_frame
- offsets
->saved_args
;
22157 case ARM_HARD_FRAME_POINTER_REGNUM
:
22158 return offsets
->saved_regs
- offsets
->saved_args
;
22160 case THUMB_HARD_FRAME_POINTER_REGNUM
:
22161 return offsets
->locals_base
- offsets
->saved_args
;
22164 gcc_unreachable ();
22168 case FRAME_POINTER_REGNUM
:
22171 case STACK_POINTER_REGNUM
:
22172 return offsets
->outgoing_args
- offsets
->soft_frame
;
22174 case ARM_HARD_FRAME_POINTER_REGNUM
:
22175 return offsets
->saved_regs
- offsets
->soft_frame
;
22177 case THUMB_HARD_FRAME_POINTER_REGNUM
:
22178 return offsets
->locals_base
- offsets
->soft_frame
;
22181 gcc_unreachable ();
22186 gcc_unreachable ();
22190 /* Generate the function's prologue. */
22193 thumb1_expand_prologue (void)
22197 HOST_WIDE_INT amount
;
22198 arm_stack_offsets
*offsets
;
22199 unsigned long func_type
;
22201 unsigned long live_regs_mask
;
22202 unsigned long l_mask
;
22203 unsigned high_regs_pushed
= 0;
22205 func_type
= arm_current_func_type ();
22207 /* Naked functions don't have prologues. */
22208 if (IS_NAKED (func_type
))
22211 if (IS_INTERRUPT (func_type
))
22213 error ("interrupt Service Routines cannot be coded in Thumb mode");
22217 if (is_called_in_ARM_mode (current_function_decl
))
22218 emit_insn (gen_prologue_thumb1_interwork ());
22220 offsets
= arm_get_frame_offsets ();
22221 live_regs_mask
= offsets
->saved_regs_mask
;
22223 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22224 l_mask
= live_regs_mask
& 0x40ff;
22225 /* Then count how many other high registers will need to be pushed. */
22226 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
22228 if (crtl
->args
.pretend_args_size
)
22230 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
22232 if (cfun
->machine
->uses_anonymous_args
)
22234 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
22235 unsigned long mask
;
22237 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
22238 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
22240 insn
= thumb1_emit_multi_reg_push (mask
, 0);
22244 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
22245 stack_pointer_rtx
, x
));
22247 RTX_FRAME_RELATED_P (insn
) = 1;
22250 if (TARGET_BACKTRACE
)
22252 HOST_WIDE_INT offset
= 0;
22253 unsigned work_register
;
22254 rtx work_reg
, x
, arm_hfp_rtx
;
22256 /* We have been asked to create a stack backtrace structure.
22257 The code looks like this:
22261 0 sub SP, #16 Reserve space for 4 registers.
22262 2 push {R7} Push low registers.
22263 4 add R7, SP, #20 Get the stack pointer before the push.
22264 6 str R7, [SP, #8] Store the stack pointer
22265 (before reserving the space).
22266 8 mov R7, PC Get hold of the start of this code + 12.
22267 10 str R7, [SP, #16] Store it.
22268 12 mov R7, FP Get hold of the current frame pointer.
22269 14 str R7, [SP, #4] Store it.
22270 16 mov R7, LR Get hold of the current return address.
22271 18 str R7, [SP, #12] Store it.
22272 20 add R7, SP, #16 Point at the start of the
22273 backtrace structure.
22274 22 mov FP, R7 Put this value into the frame pointer. */
22276 work_register
= thumb_find_work_register (live_regs_mask
);
22277 work_reg
= gen_rtx_REG (SImode
, work_register
);
22278 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
22280 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
22281 stack_pointer_rtx
, GEN_INT (-16)));
22282 RTX_FRAME_RELATED_P (insn
) = 1;
22286 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
22287 RTX_FRAME_RELATED_P (insn
) = 1;
22289 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
22292 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
22293 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
22295 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
22296 x
= gen_frame_mem (SImode
, x
);
22297 emit_move_insn (x
, work_reg
);
22299 /* Make sure that the instruction fetching the PC is in the right place
22300 to calculate "start of backtrace creation code + 12". */
22301 /* ??? The stores using the common WORK_REG ought to be enough to
22302 prevent the scheduler from doing anything weird. Failing that
22303 we could always move all of the following into an UNSPEC_VOLATILE. */
22306 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
22307 emit_move_insn (work_reg
, x
);
22309 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
22310 x
= gen_frame_mem (SImode
, x
);
22311 emit_move_insn (x
, work_reg
);
22313 emit_move_insn (work_reg
, arm_hfp_rtx
);
22315 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22316 x
= gen_frame_mem (SImode
, x
);
22317 emit_move_insn (x
, work_reg
);
22321 emit_move_insn (work_reg
, arm_hfp_rtx
);
22323 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22324 x
= gen_frame_mem (SImode
, x
);
22325 emit_move_insn (x
, work_reg
);
22327 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
22328 emit_move_insn (work_reg
, x
);
22330 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
22331 x
= gen_frame_mem (SImode
, x
);
22332 emit_move_insn (x
, work_reg
);
22335 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
22336 emit_move_insn (work_reg
, x
);
22338 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
22339 x
= gen_frame_mem (SImode
, x
);
22340 emit_move_insn (x
, work_reg
);
22342 x
= GEN_INT (offset
+ 12);
22343 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
22345 emit_move_insn (arm_hfp_rtx
, work_reg
);
22347 /* Optimization: If we are not pushing any low registers but we are going
22348 to push some high registers then delay our first push. This will just
22349 be a push of LR and we can combine it with the push of the first high
22351 else if ((l_mask
& 0xff) != 0
22352 || (high_regs_pushed
== 0 && l_mask
))
22354 unsigned long mask
= l_mask
;
22355 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
22356 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
22357 RTX_FRAME_RELATED_P (insn
) = 1;
22360 if (high_regs_pushed
)
22362 unsigned pushable_regs
;
22363 unsigned next_hi_reg
;
22365 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
22366 if (live_regs_mask
& (1 << next_hi_reg
))
22369 pushable_regs
= l_mask
& 0xff;
22371 if (pushable_regs
== 0)
22372 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
22374 while (high_regs_pushed
> 0)
22376 unsigned long real_regs_mask
= 0;
22378 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
22380 if (pushable_regs
& (1 << regno
))
22382 emit_move_insn (gen_rtx_REG (SImode
, regno
),
22383 gen_rtx_REG (SImode
, next_hi_reg
));
22385 high_regs_pushed
--;
22386 real_regs_mask
|= (1 << next_hi_reg
);
22388 if (high_regs_pushed
)
22390 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
22392 if (live_regs_mask
& (1 << next_hi_reg
))
22397 pushable_regs
&= ~((1 << regno
) - 1);
22403 /* If we had to find a work register and we have not yet
22404 saved the LR then add it to the list of regs to push. */
22405 if (l_mask
== (1 << LR_REGNUM
))
22407 pushable_regs
|= l_mask
;
22408 real_regs_mask
|= l_mask
;
22412 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
22413 RTX_FRAME_RELATED_P (insn
) = 1;
22417 /* Load the pic register before setting the frame pointer,
22418 so we can use r7 as a temporary work register. */
22419 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
22420 arm_load_pic_register (live_regs_mask
);
22422 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
22423 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
22424 stack_pointer_rtx
);
22426 if (flag_stack_usage_info
)
22427 current_function_static_stack_size
22428 = offsets
->outgoing_args
- offsets
->saved_args
;
22430 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
22431 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
22436 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22437 GEN_INT (- amount
)));
22438 RTX_FRAME_RELATED_P (insn
) = 1;
22444 /* The stack decrement is too big for an immediate value in a single
22445 insn. In theory we could issue multiple subtracts, but after
22446 three of them it becomes more space efficient to place the full
22447 value in the constant pool and load into a register. (Also the
22448 ARM debugger really likes to see only one stack decrement per
22449 function). So instead we look for a scratch register into which
22450 we can load the decrement, and then we subtract this from the
22451 stack pointer. Unfortunately on the thumb the only available
22452 scratch registers are the argument registers, and we cannot use
22453 these as they may hold arguments to the function. Instead we
22454 attempt to locate a call preserved register which is used by this
22455 function. If we can find one, then we know that it will have
22456 been pushed at the start of the prologue and so we can corrupt
22458 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
22459 if (live_regs_mask
& (1 << regno
))
22462 gcc_assert(regno
<= LAST_LO_REGNUM
);
22464 reg
= gen_rtx_REG (SImode
, regno
);
22466 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
22468 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
22469 stack_pointer_rtx
, reg
));
22471 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
22472 plus_constant (Pmode
, stack_pointer_rtx
,
22474 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22475 RTX_FRAME_RELATED_P (insn
) = 1;
22479 if (frame_pointer_needed
)
22480 thumb_set_frame_pointer (offsets
);
22482 /* If we are profiling, make sure no instructions are scheduled before
22483 the call to mcount. Similarly if the user has requested no
22484 scheduling in the prolog. Similarly if we want non-call exceptions
22485 using the EABI unwinder, to prevent faulting instructions from being
22486 swapped with a stack adjustment. */
22487 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
22488 || (arm_except_unwind_info (&global_options
) == UI_TARGET
22489 && cfun
->can_throw_non_call_exceptions
))
22490 emit_insn (gen_blockage ());
22492 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
22493 if (live_regs_mask
& 0xff)
22494 cfun
->machine
->lr_save_eliminated
= 0;
22497 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22498 POP instruction can be generated. LR should be replaced by PC. All
22499 the checks required are already done by USE_RETURN_INSN (). Hence,
22500 all we really need to check here is if single register is to be
22501 returned, or multiple register return. */
22503 thumb2_expand_return (void)
22506 unsigned long saved_regs_mask
;
22507 arm_stack_offsets
*offsets
;
22509 offsets
= arm_get_frame_offsets ();
22510 saved_regs_mask
= offsets
->saved_regs_mask
;
22512 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22513 if (saved_regs_mask
& (1 << i
))
22516 if (saved_regs_mask
)
22520 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22521 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
22522 rtx addr
= gen_rtx_MEM (SImode
,
22523 gen_rtx_POST_INC (SImode
,
22524 stack_pointer_rtx
));
22525 set_mem_alias_set (addr
, get_frame_alias_set ());
22526 XVECEXP (par
, 0, 0) = ret_rtx
;
22527 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
22528 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
22529 emit_jump_insn (par
);
22533 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
22534 saved_regs_mask
|= (1 << PC_REGNUM
);
22535 arm_emit_multi_reg_pop (saved_regs_mask
);
22540 emit_jump_insn (simple_return_rtx
);
22545 thumb1_expand_epilogue (void)
22547 HOST_WIDE_INT amount
;
22548 arm_stack_offsets
*offsets
;
22551 /* Naked functions don't have prologues. */
22552 if (IS_NAKED (arm_current_func_type ()))
22555 offsets
= arm_get_frame_offsets ();
22556 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
22558 if (frame_pointer_needed
)
22560 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
22561 amount
= offsets
->locals_base
- offsets
->saved_regs
;
22563 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
22565 gcc_assert (amount
>= 0);
22568 emit_insn (gen_blockage ());
22571 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22572 GEN_INT (amount
)));
22575 /* r3 is always free in the epilogue. */
22576 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
22578 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
22579 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
22583 /* Emit a USE (stack_pointer_rtx), so that
22584 the stack adjustment will not be deleted. */
22585 emit_insn (gen_prologue_use (stack_pointer_rtx
));
22587 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
22588 emit_insn (gen_blockage ());
22590 /* Emit a clobber for each insn that will be restored in the epilogue,
22591 so that flow2 will get register lifetimes correct. */
22592 for (regno
= 0; regno
< 13; regno
++)
22593 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
22594 emit_clobber (gen_rtx_REG (SImode
, regno
));
22596 if (! df_regs_ever_live_p (LR_REGNUM
))
22597 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
22600 /* Epilogue code for APCS frame. */
22602 arm_expand_epilogue_apcs_frame (bool really_return
)
22604 unsigned long func_type
;
22605 unsigned long saved_regs_mask
;
22608 int floats_from_frame
= 0;
22609 arm_stack_offsets
*offsets
;
22611 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
22612 func_type
= arm_current_func_type ();
22614 /* Get frame offsets for ARM. */
22615 offsets
= arm_get_frame_offsets ();
22616 saved_regs_mask
= offsets
->saved_regs_mask
;
22618 /* Find the offset of the floating-point save area in the frame. */
22619 floats_from_frame
= offsets
->saved_args
- offsets
->frame
;
22621 /* Compute how many core registers saved and how far away the floats are. */
22622 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22623 if (saved_regs_mask
& (1 << i
))
22626 floats_from_frame
+= 4;
22629 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
22633 /* The offset is from IP_REGNUM. */
22634 int saved_size
= arm_get_vfp_saved_size ();
22635 if (saved_size
> 0)
22637 floats_from_frame
+= saved_size
;
22638 emit_insn (gen_addsi3 (gen_rtx_REG (SImode
, IP_REGNUM
),
22639 hard_frame_pointer_rtx
,
22640 GEN_INT (-floats_from_frame
)));
22643 /* Generate VFP register multi-pop. */
22644 start_reg
= FIRST_VFP_REGNUM
;
22646 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
22647 /* Look for a case where a reg does not need restoring. */
22648 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
22649 && (!df_regs_ever_live_p (i
+ 1)
22650 || call_used_regs
[i
+ 1]))
22652 if (start_reg
!= i
)
22653 arm_emit_vfp_multi_reg_pop (start_reg
,
22654 (i
- start_reg
) / 2,
22655 gen_rtx_REG (SImode
,
22660 /* Restore the remaining regs that we have discovered (or possibly
22661 even all of them, if the conditional in the for loop never
22663 if (start_reg
!= i
)
22664 arm_emit_vfp_multi_reg_pop (start_reg
,
22665 (i
- start_reg
) / 2,
22666 gen_rtx_REG (SImode
, IP_REGNUM
));
22671 /* The frame pointer is guaranteed to be non-double-word aligned, as
22672 it is set to double-word-aligned old_stack_pointer - 4. */
22674 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
22676 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
22677 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
22679 rtx addr
= gen_frame_mem (V2SImode
,
22680 plus_constant (Pmode
, hard_frame_pointer_rtx
,
22682 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
22683 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
22684 gen_rtx_REG (V2SImode
, i
),
22690 /* saved_regs_mask should contain IP which contains old stack pointer
22691 at the time of activation creation. Since SP and IP are adjacent registers,
22692 we can restore the value directly into SP. */
22693 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
22694 saved_regs_mask
&= ~(1 << IP_REGNUM
);
22695 saved_regs_mask
|= (1 << SP_REGNUM
);
22697 /* There are two registers left in saved_regs_mask - LR and PC. We
22698 only need to restore LR (the return address), but to
22699 save time we can load it directly into PC, unless we need a
22700 special function exit sequence, or we are not really returning. */
22702 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
22703 && !crtl
->calls_eh_return
)
22704 /* Delete LR from the register mask, so that LR on
22705 the stack is loaded into the PC in the register mask. */
22706 saved_regs_mask
&= ~(1 << LR_REGNUM
);
22708 saved_regs_mask
&= ~(1 << PC_REGNUM
);
22710 num_regs
= bit_count (saved_regs_mask
);
22711 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
22713 /* Unwind the stack to just below the saved registers. */
22714 emit_insn (gen_addsi3 (stack_pointer_rtx
,
22715 hard_frame_pointer_rtx
,
22716 GEN_INT (- 4 * num_regs
)));
22719 arm_emit_multi_reg_pop (saved_regs_mask
);
22721 if (IS_INTERRUPT (func_type
))
22723 /* Interrupt handlers will have pushed the
22724 IP onto the stack, so restore it now. */
22726 rtx addr
= gen_rtx_MEM (SImode
,
22727 gen_rtx_POST_INC (SImode
,
22728 stack_pointer_rtx
));
22729 set_mem_alias_set (addr
, get_frame_alias_set ());
22730 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
22731 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
22732 gen_rtx_REG (SImode
, IP_REGNUM
),
22736 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
22739 if (crtl
->calls_eh_return
)
22740 emit_insn (gen_addsi3 (stack_pointer_rtx
,
22742 GEN_INT (ARM_EH_STACKADJ_REGNUM
)));
22744 if (IS_STACKALIGN (func_type
))
22745 /* Restore the original stack pointer. Before prologue, the stack was
22746 realigned and the original stack pointer saved in r0. For details,
22747 see comment in arm_expand_prologue. */
22748 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
22750 emit_jump_insn (simple_return_rtx
);
22753 /* Generate RTL to represent ARM epilogue. Really_return is true if the
22754 function is not a sibcall. */
22756 arm_expand_epilogue (bool really_return
)
22758 unsigned long func_type
;
22759 unsigned long saved_regs_mask
;
22763 arm_stack_offsets
*offsets
;
22765 func_type
= arm_current_func_type ();
22767 /* Naked functions don't have epilogue. Hence, generate return pattern, and
22768 let output_return_instruction take care of instruction emition if any. */
22769 if (IS_NAKED (func_type
)
22770 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
22772 emit_jump_insn (simple_return_rtx
);
22776 /* If we are throwing an exception, then we really must be doing a
22777 return, so we can't tail-call. */
22778 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
22780 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
22782 arm_expand_epilogue_apcs_frame (really_return
);
22786 /* Get frame offsets for ARM. */
22787 offsets
= arm_get_frame_offsets ();
22788 saved_regs_mask
= offsets
->saved_regs_mask
;
22789 num_regs
= bit_count (saved_regs_mask
);
22791 if (frame_pointer_needed
)
22793 /* Restore stack pointer if necessary. */
22796 /* In ARM mode, frame pointer points to first saved register.
22797 Restore stack pointer to last saved register. */
22798 amount
= offsets
->frame
- offsets
->saved_regs
;
22800 /* Force out any pending memory operations that reference stacked data
22801 before stack de-allocation occurs. */
22802 emit_insn (gen_blockage ());
22803 emit_insn (gen_addsi3 (stack_pointer_rtx
,
22804 hard_frame_pointer_rtx
,
22805 GEN_INT (amount
)));
22807 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22809 emit_insn (gen_prologue_use (stack_pointer_rtx
));
22813 /* In Thumb-2 mode, the frame pointer points to the last saved
22815 amount
= offsets
->locals_base
- offsets
->saved_regs
;
22817 emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22818 hard_frame_pointer_rtx
,
22819 GEN_INT (amount
)));
22821 /* Force out any pending memory operations that reference stacked data
22822 before stack de-allocation occurs. */
22823 emit_insn (gen_blockage ());
22824 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
22825 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22827 emit_insn (gen_prologue_use (stack_pointer_rtx
));
22832 /* Pop off outgoing args and local frame to adjust stack pointer to
22833 last saved register. */
22834 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
22837 /* Force out any pending memory operations that reference stacked data
22838 before stack de-allocation occurs. */
22839 emit_insn (gen_blockage ());
22840 emit_insn (gen_addsi3 (stack_pointer_rtx
,
22842 GEN_INT (amount
)));
22843 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
22845 emit_insn (gen_prologue_use (stack_pointer_rtx
));
22849 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
22851 /* Generate VFP register multi-pop. */
22852 int end_reg
= LAST_VFP_REGNUM
+ 1;
22854 /* Scan the registers in reverse order. We need to match
22855 any groupings made in the prologue and generate matching
22856 vldm operations. The need to match groups is because,
22857 unlike pop, vldm can only do consecutive regs. */
22858 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
22859 /* Look for a case where a reg does not need restoring. */
22860 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
22861 && (!df_regs_ever_live_p (i
+ 1)
22862 || call_used_regs
[i
+ 1]))
22864 /* Restore the regs discovered so far (from reg+2 to
22866 if (end_reg
> i
+ 2)
22867 arm_emit_vfp_multi_reg_pop (i
+ 2,
22868 (end_reg
- (i
+ 2)) / 2,
22869 stack_pointer_rtx
);
22873 /* Restore the remaining regs that we have discovered (or possibly
22874 even all of them, if the conditional in the for loop never
22876 if (end_reg
> i
+ 2)
22877 arm_emit_vfp_multi_reg_pop (i
+ 2,
22878 (end_reg
- (i
+ 2)) / 2,
22879 stack_pointer_rtx
);
22883 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
22884 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
22887 rtx addr
= gen_rtx_MEM (V2SImode
,
22888 gen_rtx_POST_INC (SImode
,
22889 stack_pointer_rtx
));
22890 set_mem_alias_set (addr
, get_frame_alias_set ());
22891 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
22892 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
22893 gen_rtx_REG (V2SImode
, i
),
22897 if (saved_regs_mask
)
22900 bool return_in_pc
= false;
22902 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
22903 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
22904 && !IS_STACKALIGN (func_type
)
22906 && crtl
->args
.pretend_args_size
== 0
22907 && saved_regs_mask
& (1 << LR_REGNUM
)
22908 && !crtl
->calls_eh_return
)
22910 saved_regs_mask
&= ~(1 << LR_REGNUM
);
22911 saved_regs_mask
|= (1 << PC_REGNUM
);
22912 return_in_pc
= true;
22915 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
22917 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22918 if (saved_regs_mask
& (1 << i
))
22920 rtx addr
= gen_rtx_MEM (SImode
,
22921 gen_rtx_POST_INC (SImode
,
22922 stack_pointer_rtx
));
22923 set_mem_alias_set (addr
, get_frame_alias_set ());
22925 if (i
== PC_REGNUM
)
22927 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22928 XVECEXP (insn
, 0, 0) = ret_rtx
;
22929 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
22930 gen_rtx_REG (SImode
, i
),
22932 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
22933 insn
= emit_jump_insn (insn
);
22937 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
22939 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
22940 gen_rtx_REG (SImode
, i
),
22947 arm_emit_multi_reg_pop (saved_regs_mask
);
22950 if (return_in_pc
== true)
22954 if (crtl
->args
.pretend_args_size
)
22955 emit_insn (gen_addsi3 (stack_pointer_rtx
,
22957 GEN_INT (crtl
->args
.pretend_args_size
)));
22959 if (!really_return
)
22962 if (crtl
->calls_eh_return
)
22963 emit_insn (gen_addsi3 (stack_pointer_rtx
,
22965 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
22967 if (IS_STACKALIGN (func_type
))
22968 /* Restore the original stack pointer. Before prologue, the stack was
22969 realigned and the original stack pointer saved in r0. For details,
22970 see comment in arm_expand_prologue. */
22971 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
22973 emit_jump_insn (simple_return_rtx
);
22976 /* Implementation of insn prologue_thumb1_interwork. This is the first
22977 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22980 thumb1_output_interwork (void)
22983 FILE *f
= asm_out_file
;
22985 gcc_assert (GET_CODE (DECL_RTL (current_function_decl
)) == MEM
);
22986 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
22988 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
22990 /* Generate code sequence to switch us into Thumb mode. */
22991 /* The .code 32 directive has already been emitted by
22992 ASM_DECLARE_FUNCTION_NAME. */
22993 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
22994 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
22996 /* Generate a label, so that the debugger will notice the
22997 change in instruction sets. This label is also used by
22998 the assembler to bypass the ARM code when this function
22999 is called from a Thumb encoded function elsewhere in the
23000 same file. Hence the definition of STUB_NAME here must
23001 agree with the definition in gas/config/tc-arm.c. */
23003 #define STUB_NAME ".real_start_of"
23005 fprintf (f
, "\t.code\t16\n");
23007 if (arm_dllexport_name_p (name
))
23008 name
= arm_strip_name_encoding (name
);
23010 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
23011 fprintf (f
, "\t.thumb_func\n");
23012 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
23017 /* Handle the case of a double word load into a low register from
23018 a computed memory address. The computed address may involve a
23019 register which is overwritten by the load. */
23021 thumb_load_double_from_address (rtx
*operands
)
23029 gcc_assert (GET_CODE (operands
[0]) == REG
);
23030 gcc_assert (GET_CODE (operands
[1]) == MEM
);
23032 /* Get the memory address. */
23033 addr
= XEXP (operands
[1], 0);
23035 /* Work out how the memory address is computed. */
23036 switch (GET_CODE (addr
))
23039 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23041 if (REGNO (operands
[0]) == REGNO (addr
))
23043 output_asm_insn ("ldr\t%H0, %2", operands
);
23044 output_asm_insn ("ldr\t%0, %1", operands
);
23048 output_asm_insn ("ldr\t%0, %1", operands
);
23049 output_asm_insn ("ldr\t%H0, %2", operands
);
23054 /* Compute <address> + 4 for the high order load. */
23055 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23057 output_asm_insn ("ldr\t%0, %1", operands
);
23058 output_asm_insn ("ldr\t%H0, %2", operands
);
23062 arg1
= XEXP (addr
, 0);
23063 arg2
= XEXP (addr
, 1);
23065 if (CONSTANT_P (arg1
))
23066 base
= arg2
, offset
= arg1
;
23068 base
= arg1
, offset
= arg2
;
23070 gcc_assert (GET_CODE (base
) == REG
);
23072 /* Catch the case of <address> = <reg> + <reg> */
23073 if (GET_CODE (offset
) == REG
)
23075 int reg_offset
= REGNO (offset
);
23076 int reg_base
= REGNO (base
);
23077 int reg_dest
= REGNO (operands
[0]);
23079 /* Add the base and offset registers together into the
23080 higher destination register. */
23081 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
23082 reg_dest
+ 1, reg_base
, reg_offset
);
23084 /* Load the lower destination register from the address in
23085 the higher destination register. */
23086 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
23087 reg_dest
, reg_dest
+ 1);
23089 /* Load the higher destination register from its own address
23091 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
23092 reg_dest
+ 1, reg_dest
+ 1);
23096 /* Compute <address> + 4 for the high order load. */
23097 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23099 /* If the computed address is held in the low order register
23100 then load the high order register first, otherwise always
23101 load the low order register first. */
23102 if (REGNO (operands
[0]) == REGNO (base
))
23104 output_asm_insn ("ldr\t%H0, %2", operands
);
23105 output_asm_insn ("ldr\t%0, %1", operands
);
23109 output_asm_insn ("ldr\t%0, %1", operands
);
23110 output_asm_insn ("ldr\t%H0, %2", operands
);
23116 /* With no registers to worry about we can just load the value
23118 operands
[2] = adjust_address (operands
[1], SImode
, 4);
23120 output_asm_insn ("ldr\t%H0, %2", operands
);
23121 output_asm_insn ("ldr\t%0, %1", operands
);
23125 gcc_unreachable ();
23132 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
23139 if (REGNO (operands
[4]) > REGNO (operands
[5]))
23142 operands
[4] = operands
[5];
23145 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
23146 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
23150 if (REGNO (operands
[4]) > REGNO (operands
[5]))
23153 operands
[4] = operands
[5];
23156 if (REGNO (operands
[5]) > REGNO (operands
[6]))
23159 operands
[5] = operands
[6];
23162 if (REGNO (operands
[4]) > REGNO (operands
[5]))
23165 operands
[4] = operands
[5];
23169 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
23170 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
23174 gcc_unreachable ();
23180 /* Output a call-via instruction for thumb state. */
23182 thumb_call_via_reg (rtx reg
)
23184 int regno
= REGNO (reg
);
23187 gcc_assert (regno
< LR_REGNUM
);
23189 /* If we are in the normal text section we can use a single instance
23190 per compilation unit. If we are doing function sections, then we need
23191 an entry per section, since we can't rely on reachability. */
23192 if (in_section
== text_section
)
23194 thumb_call_reg_needed
= 1;
23196 if (thumb_call_via_label
[regno
] == NULL
)
23197 thumb_call_via_label
[regno
] = gen_label_rtx ();
23198 labelp
= thumb_call_via_label
+ regno
;
23202 if (cfun
->machine
->call_via
[regno
] == NULL
)
23203 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
23204 labelp
= cfun
->machine
->call_via
+ regno
;
23207 output_asm_insn ("bl\t%a0", labelp
);
23211 /* Routines for generating rtl. */
23213 thumb_expand_movmemqi (rtx
*operands
)
23215 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
23216 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
23217 HOST_WIDE_INT len
= INTVAL (operands
[2]);
23218 HOST_WIDE_INT offset
= 0;
23222 emit_insn (gen_movmem12b (out
, in
, out
, in
));
23228 emit_insn (gen_movmem8b (out
, in
, out
, in
));
23234 rtx reg
= gen_reg_rtx (SImode
);
23235 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
23236 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
23243 rtx reg
= gen_reg_rtx (HImode
);
23244 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
23245 plus_constant (Pmode
, in
,
23247 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
23256 rtx reg
= gen_reg_rtx (QImode
);
23257 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
23258 plus_constant (Pmode
, in
,
23260 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
23267 thumb_reload_out_hi (rtx
*operands
)
23269 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
23272 /* Handle reading a half-word from memory during reload. */
23274 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
23276 gcc_unreachable ();
23279 /* Return the length of a function name prefix
23280 that starts with the character 'c'. */
23282 arm_get_strip_length (int c
)
23286 ARM_NAME_ENCODING_LENGTHS
23291 /* Return a pointer to a function's name with any
23292 and all prefix encodings stripped from it. */
23294 arm_strip_name_encoding (const char *name
)
23298 while ((skip
= arm_get_strip_length (* name
)))
23304 /* If there is a '*' anywhere in the name's prefix, then
23305 emit the stripped name verbatim, otherwise prepend an
23306 underscore if leading underscores are being used. */
23308 arm_asm_output_labelref (FILE *stream
, const char *name
)
23313 while ((skip
= arm_get_strip_length (* name
)))
23315 verbatim
|= (*name
== '*');
23320 fputs (name
, stream
);
23322 asm_fprintf (stream
, "%U%s", name
);
23325 /* This function is used to emit an EABI tag and its associated value.
23326 We emit the numerical value of the tag in case the assembler does not
23327 support textual tags. (Eg gas prior to 2.20). If requested we include
23328 the tag name in a comment so that anyone reading the assembler output
23329 will know which tag is being set.
23331 This function is not static because arm-c.c needs it too. */
23334 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
23336 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
23337 if (flag_verbose_asm
|| flag_debug_asm
)
23338 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
23339 asm_fprintf (asm_out_file
, "\n");
23343 arm_file_start (void)
23347 if (TARGET_UNIFIED_ASM
)
23348 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
23352 const char *fpu_name
;
23353 if (arm_selected_arch
)
23354 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
23355 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
23356 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
23358 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_selected_cpu
->name
);
23360 if (TARGET_SOFT_FLOAT
)
23362 fpu_name
= "softvfp";
23366 fpu_name
= arm_fpu_desc
->name
;
23367 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
23369 if (TARGET_HARD_FLOAT
)
23370 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23371 if (TARGET_HARD_FLOAT_ABI
)
23372 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23375 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
23377 /* Some of these attributes only apply when the corresponding features
23378 are used. However we don't have any easy way of figuring this out.
23379 Conservatively record the setting that would have been used. */
23381 if (flag_rounding_math
)
23382 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23384 if (!flag_unsafe_math_optimizations
)
23386 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23387 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23389 if (flag_signaling_nans
)
23390 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23392 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23393 flag_finite_math_only
? 1 : 3);
23395 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23396 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23397 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23398 flag_short_enums
? 1 : 2);
23400 /* Tag_ABI_optimization_goals. */
23403 else if (optimize
>= 2)
23409 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
23411 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23414 if (arm_fp16_format
)
23415 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23416 (int) arm_fp16_format
);
23418 if (arm_lang_output_object_attributes_hook
)
23419 arm_lang_output_object_attributes_hook();
23422 default_file_start ();
23426 arm_file_end (void)
23430 if (NEED_INDICATE_EXEC_STACK
)
23431 /* Add .note.GNU-stack. */
23432 file_end_indicate_exec_stack ();
23434 if (! thumb_call_reg_needed
)
23437 switch_to_section (text_section
);
23438 asm_fprintf (asm_out_file
, "\t.code 16\n");
23439 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
23441 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
23443 rtx label
= thumb_call_via_label
[regno
];
23447 targetm
.asm_out
.internal_label (asm_out_file
, "L",
23448 CODE_LABEL_NUMBER (label
));
23449 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
23455 /* Symbols in the text segment can be accessed without indirecting via the
23456 constant pool; it may take an extra binary operation, but this is still
23457 faster than indirecting via memory. Don't do this when not optimizing,
23458 since we won't be calculating al of the offsets necessary to do this
23462 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
23464 if (optimize
> 0 && TREE_CONSTANT (decl
))
23465 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
23467 default_encode_section_info (decl
, rtl
, first
);
23469 #endif /* !ARM_PE */
23472 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
23474 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
23475 && !strcmp (prefix
, "L"))
23477 arm_ccfsm_state
= 0;
23478 arm_target_insn
= NULL
;
23480 default_internal_label (stream
, prefix
, labelno
);
23483 /* Output code to add DELTA to the first argument, and then jump
23484 to FUNCTION. Used for C++ multiple inheritance. */
23486 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
23487 HOST_WIDE_INT delta
,
23488 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
23491 static int thunk_label
= 0;
23494 int mi_delta
= delta
;
23495 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
23497 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
23500 mi_delta
= - mi_delta
;
23504 int labelno
= thunk_label
++;
23505 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
23506 /* Thunks are entered in arm mode when avaiable. */
23507 if (TARGET_THUMB1_ONLY
)
23509 /* push r3 so we can use it as a temporary. */
23510 /* TODO: Omit this save if r3 is not used. */
23511 fputs ("\tpush {r3}\n", file
);
23512 fputs ("\tldr\tr3, ", file
);
23516 fputs ("\tldr\tr12, ", file
);
23518 assemble_name (file
, label
);
23519 fputc ('\n', file
);
23522 /* If we are generating PIC, the ldr instruction below loads
23523 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23524 the address of the add + 8, so we have:
23526 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23529 Note that we have "+ 1" because some versions of GNU ld
23530 don't set the low bit of the result for R_ARM_REL32
23531 relocations against thumb function symbols.
23532 On ARMv6M this is +4, not +8. */
23533 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
23534 assemble_name (file
, labelpc
);
23535 fputs (":\n", file
);
23536 if (TARGET_THUMB1_ONLY
)
23538 /* This is 2 insns after the start of the thunk, so we know it
23539 is 4-byte aligned. */
23540 fputs ("\tadd\tr3, pc, r3\n", file
);
23541 fputs ("\tmov r12, r3\n", file
);
23544 fputs ("\tadd\tr12, pc, r12\n", file
);
23546 else if (TARGET_THUMB1_ONLY
)
23547 fputs ("\tmov r12, r3\n", file
);
23549 if (TARGET_THUMB1_ONLY
)
23551 if (mi_delta
> 255)
23553 fputs ("\tldr\tr3, ", file
);
23554 assemble_name (file
, label
);
23555 fputs ("+4\n", file
);
23556 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
23557 mi_op
, this_regno
, this_regno
);
23559 else if (mi_delta
!= 0)
23561 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
23562 mi_op
, this_regno
, this_regno
,
23568 /* TODO: Use movw/movt for large constants when available. */
23569 while (mi_delta
!= 0)
23571 if ((mi_delta
& (3 << shift
)) == 0)
23575 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
23576 mi_op
, this_regno
, this_regno
,
23577 mi_delta
& (0xff << shift
));
23578 mi_delta
&= ~(0xff << shift
);
23585 if (TARGET_THUMB1_ONLY
)
23586 fputs ("\tpop\t{r3}\n", file
);
23588 fprintf (file
, "\tbx\tr12\n");
23589 ASM_OUTPUT_ALIGN (file
, 2);
23590 assemble_name (file
, label
);
23591 fputs (":\n", file
);
23594 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23595 rtx tem
= XEXP (DECL_RTL (function
), 0);
23596 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
23597 tem
= gen_rtx_MINUS (GET_MODE (tem
),
23599 gen_rtx_SYMBOL_REF (Pmode
,
23600 ggc_strdup (labelpc
)));
23601 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
23604 /* Output ".word .LTHUNKn". */
23605 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
23607 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
23608 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
23612 fputs ("\tb\t", file
);
23613 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
23614 if (NEED_PLT_RELOC
)
23615 fputs ("(PLT)", file
);
23616 fputc ('\n', file
);
23621 arm_emit_vector_const (FILE *file
, rtx x
)
23624 const char * pattern
;
23626 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
23628 switch (GET_MODE (x
))
23630 case V2SImode
: pattern
= "%08x"; break;
23631 case V4HImode
: pattern
= "%04x"; break;
23632 case V8QImode
: pattern
= "%02x"; break;
23633 default: gcc_unreachable ();
23636 fprintf (file
, "0x");
23637 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
23641 element
= CONST_VECTOR_ELT (x
, i
);
23642 fprintf (file
, pattern
, INTVAL (element
));
23648 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23649 HFmode constant pool entries are actually loaded with ldr. */
23651 arm_emit_fp16_const (rtx c
)
23656 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
23657 bits
= real_to_target (NULL
, &r
, HFmode
);
23658 if (WORDS_BIG_ENDIAN
)
23659 assemble_zeros (2);
23660 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
23661 if (!WORDS_BIG_ENDIAN
)
23662 assemble_zeros (2);
23666 arm_output_load_gr (rtx
*operands
)
23673 if (GET_CODE (operands
[1]) != MEM
23674 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
23675 || GET_CODE (reg
= XEXP (sum
, 0)) != REG
23676 || GET_CODE (offset
= XEXP (sum
, 1)) != CONST_INT
23677 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
23678 return "wldrw%?\t%0, %1";
23680 /* Fix up an out-of-range load of a GR register. */
23681 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
23682 wcgr
= operands
[0];
23684 output_asm_insn ("ldr%?\t%0, %1", operands
);
23686 operands
[0] = wcgr
;
23688 output_asm_insn ("tmcr%?\t%0, %1", operands
);
23689 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
23694 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23696 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23697 named arg and all anonymous args onto the stack.
23698 XXX I know the prologue shouldn't be pushing registers, but it is faster
23702 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
23703 enum machine_mode mode
,
23706 int second_time ATTRIBUTE_UNUSED
)
23708 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
23711 cfun
->machine
->uses_anonymous_args
= 1;
23712 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
23714 nregs
= pcum
->aapcs_ncrn
;
23715 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
23719 nregs
= pcum
->nregs
;
23721 if (nregs
< NUM_ARG_REGS
)
23722 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
23725 /* Return nonzero if the CONSUMER instruction (a store) does not need
23726 PRODUCER's value to calculate the address. */
23729 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
23731 rtx value
= PATTERN (producer
);
23732 rtx addr
= PATTERN (consumer
);
23734 if (GET_CODE (value
) == COND_EXEC
)
23735 value
= COND_EXEC_CODE (value
);
23736 if (GET_CODE (value
) == PARALLEL
)
23737 value
= XVECEXP (value
, 0, 0);
23738 value
= XEXP (value
, 0);
23739 if (GET_CODE (addr
) == COND_EXEC
)
23740 addr
= COND_EXEC_CODE (addr
);
23741 if (GET_CODE (addr
) == PARALLEL
)
23742 addr
= XVECEXP (addr
, 0, 0);
23743 addr
= XEXP (addr
, 0);
23745 return !reg_overlap_mentioned_p (value
, addr
);
23748 /* Return nonzero if the CONSUMER instruction (a store) does need
23749 PRODUCER's value to calculate the address. */
23752 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
23754 return !arm_no_early_store_addr_dep (producer
, consumer
);
23757 /* Return nonzero if the CONSUMER instruction (a load) does need
23758 PRODUCER's value to calculate the address. */
23761 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
23763 rtx value
= PATTERN (producer
);
23764 rtx addr
= PATTERN (consumer
);
23766 if (GET_CODE (value
) == COND_EXEC
)
23767 value
= COND_EXEC_CODE (value
);
23768 if (GET_CODE (value
) == PARALLEL
)
23769 value
= XVECEXP (value
, 0, 0);
23770 value
= XEXP (value
, 0);
23771 if (GET_CODE (addr
) == COND_EXEC
)
23772 addr
= COND_EXEC_CODE (addr
);
23773 if (GET_CODE (addr
) == PARALLEL
)
23775 if (GET_CODE (XVECEXP (addr
, 0, 0)) == RETURN
)
23776 addr
= XVECEXP (addr
, 0, 1);
23778 addr
= XVECEXP (addr
, 0, 0);
23780 addr
= XEXP (addr
, 1);
23782 return reg_overlap_mentioned_p (value
, addr
);
23785 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23786 have an early register shift value or amount dependency on the
23787 result of PRODUCER. */
23790 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
23792 rtx value
= PATTERN (producer
);
23793 rtx op
= PATTERN (consumer
);
23796 if (GET_CODE (value
) == COND_EXEC
)
23797 value
= COND_EXEC_CODE (value
);
23798 if (GET_CODE (value
) == PARALLEL
)
23799 value
= XVECEXP (value
, 0, 0);
23800 value
= XEXP (value
, 0);
23801 if (GET_CODE (op
) == COND_EXEC
)
23802 op
= COND_EXEC_CODE (op
);
23803 if (GET_CODE (op
) == PARALLEL
)
23804 op
= XVECEXP (op
, 0, 0);
23807 early_op
= XEXP (op
, 0);
23808 /* This is either an actual independent shift, or a shift applied to
23809 the first operand of another operation. We want the whole shift
23811 if (GET_CODE (early_op
) == REG
)
23814 return !reg_overlap_mentioned_p (value
, early_op
);
23817 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23818 have an early register shift value dependency on the result of
23822 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
23824 rtx value
= PATTERN (producer
);
23825 rtx op
= PATTERN (consumer
);
23828 if (GET_CODE (value
) == COND_EXEC
)
23829 value
= COND_EXEC_CODE (value
);
23830 if (GET_CODE (value
) == PARALLEL
)
23831 value
= XVECEXP (value
, 0, 0);
23832 value
= XEXP (value
, 0);
23833 if (GET_CODE (op
) == COND_EXEC
)
23834 op
= COND_EXEC_CODE (op
);
23835 if (GET_CODE (op
) == PARALLEL
)
23836 op
= XVECEXP (op
, 0, 0);
23839 early_op
= XEXP (op
, 0);
23841 /* This is either an actual independent shift, or a shift applied to
23842 the first operand of another operation. We want the value being
23843 shifted, in either case. */
23844 if (GET_CODE (early_op
) != REG
)
23845 early_op
= XEXP (early_op
, 0);
23847 return !reg_overlap_mentioned_p (value
, early_op
);
23850 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23851 have an early register mult dependency on the result of
23855 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
23857 rtx value
= PATTERN (producer
);
23858 rtx op
= PATTERN (consumer
);
23860 if (GET_CODE (value
) == COND_EXEC
)
23861 value
= COND_EXEC_CODE (value
);
23862 if (GET_CODE (value
) == PARALLEL
)
23863 value
= XVECEXP (value
, 0, 0);
23864 value
= XEXP (value
, 0);
23865 if (GET_CODE (op
) == COND_EXEC
)
23866 op
= COND_EXEC_CODE (op
);
23867 if (GET_CODE (op
) == PARALLEL
)
23868 op
= XVECEXP (op
, 0, 0);
23871 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
23873 if (GET_CODE (XEXP (op
, 0)) == MULT
)
23874 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
23876 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
23882 /* We can't rely on the caller doing the proper promotion when
23883 using APCS or ATPCS. */
23886 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
23888 return !TARGET_AAPCS_BASED
;
23891 static enum machine_mode
23892 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
23893 enum machine_mode mode
,
23894 int *punsignedp ATTRIBUTE_UNUSED
,
23895 const_tree fntype ATTRIBUTE_UNUSED
,
23896 int for_return ATTRIBUTE_UNUSED
)
23898 if (GET_MODE_CLASS (mode
) == MODE_INT
23899 && GET_MODE_SIZE (mode
) < 4)
23905 /* AAPCS based ABIs use short enums by default. */
23908 arm_default_short_enums (void)
23910 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
23914 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23917 arm_align_anon_bitfield (void)
23919 return TARGET_AAPCS_BASED
;
23923 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23926 arm_cxx_guard_type (void)
23928 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
23931 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23932 has an accumulator dependency on the result of the producer (a
23933 multiplication instruction) and no other dependency on that result. */
23935 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
23937 rtx mul
= PATTERN (producer
);
23938 rtx mac
= PATTERN (consumer
);
23940 rtx mac_op0
, mac_op1
, mac_acc
;
23942 if (GET_CODE (mul
) == COND_EXEC
)
23943 mul
= COND_EXEC_CODE (mul
);
23944 if (GET_CODE (mac
) == COND_EXEC
)
23945 mac
= COND_EXEC_CODE (mac
);
23947 /* Check that mul is of the form (set (...) (mult ...))
23948 and mla is of the form (set (...) (plus (mult ...) (...))). */
23949 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
23950 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
23951 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
23954 mul_result
= XEXP (mul
, 0);
23955 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
23956 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
23957 mac_acc
= XEXP (XEXP (mac
, 1), 1);
23959 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
23960 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
23961 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
23965 /* The EABI says test the least significant bit of a guard variable. */
23968 arm_cxx_guard_mask_bit (void)
23970 return TARGET_AAPCS_BASED
;
23974 /* The EABI specifies that all array cookies are 8 bytes long. */
23977 arm_get_cookie_size (tree type
)
23981 if (!TARGET_AAPCS_BASED
)
23982 return default_cxx_get_cookie_size (type
);
23984 size
= build_int_cst (sizetype
, 8);
23989 /* The EABI says that array cookies should also contain the element size. */
23992 arm_cookie_has_size (void)
23994 return TARGET_AAPCS_BASED
;
23998 /* The EABI says constructors and destructors should return a pointer to
23999 the object constructed/destroyed. */
24002 arm_cxx_cdtor_returns_this (void)
24004 return TARGET_AAPCS_BASED
;
24007 /* The EABI says that an inline function may never be the key
24011 arm_cxx_key_method_may_be_inline (void)
24013 return !TARGET_AAPCS_BASED
;
24017 arm_cxx_determine_class_data_visibility (tree decl
)
24019 if (!TARGET_AAPCS_BASED
24020 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
24023 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24024 is exported. However, on systems without dynamic vague linkage,
24025 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
24026 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
24027 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
24029 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
24030 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
24034 arm_cxx_class_data_always_comdat (void)
24036 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24037 vague linkage if the class has no key function. */
24038 return !TARGET_AAPCS_BASED
;
24042 /* The EABI says __aeabi_atexit should be used to register static
24046 arm_cxx_use_aeabi_atexit (void)
24048 return TARGET_AAPCS_BASED
;
24053 arm_set_return_address (rtx source
, rtx scratch
)
24055 arm_stack_offsets
*offsets
;
24056 HOST_WIDE_INT delta
;
24058 unsigned long saved_regs
;
24060 offsets
= arm_get_frame_offsets ();
24061 saved_regs
= offsets
->saved_regs_mask
;
24063 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
24064 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
24067 if (frame_pointer_needed
)
24068 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
24071 /* LR will be the first saved register. */
24072 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
24077 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
24078 GEN_INT (delta
& ~4095)));
24083 addr
= stack_pointer_rtx
;
24085 addr
= plus_constant (Pmode
, addr
, delta
);
24087 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
24093 thumb_set_return_address (rtx source
, rtx scratch
)
24095 arm_stack_offsets
*offsets
;
24096 HOST_WIDE_INT delta
;
24097 HOST_WIDE_INT limit
;
24100 unsigned long mask
;
24104 offsets
= arm_get_frame_offsets ();
24105 mask
= offsets
->saved_regs_mask
;
24106 if (mask
& (1 << LR_REGNUM
))
24109 /* Find the saved regs. */
24110 if (frame_pointer_needed
)
24112 delta
= offsets
->soft_frame
- offsets
->saved_args
;
24113 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
24119 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
24122 /* Allow for the stack frame. */
24123 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
24125 /* The link register is always the first saved register. */
24128 /* Construct the address. */
24129 addr
= gen_rtx_REG (SImode
, reg
);
24132 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
24133 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
24137 addr
= plus_constant (Pmode
, addr
, delta
);
24139 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
24142 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
24145 /* Implements target hook vector_mode_supported_p. */
24147 arm_vector_mode_supported_p (enum machine_mode mode
)
24149 /* Neon also supports V2SImode, etc. listed in the clause below. */
24150 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
24151 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
24154 if ((TARGET_NEON
|| TARGET_IWMMXT
)
24155 && ((mode
== V2SImode
)
24156 || (mode
== V4HImode
)
24157 || (mode
== V8QImode
)))
24160 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
24161 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
24162 || mode
== V2HAmode
))
24168 /* Implements target hook array_mode_supported_p. */
24171 arm_array_mode_supported_p (enum machine_mode mode
,
24172 unsigned HOST_WIDE_INT nelems
)
24175 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
24176 && (nelems
>= 2 && nelems
<= 4))
24182 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24183 registers when autovectorizing for Neon, at least until multiple vector
24184 widths are supported properly by the middle-end. */
24186 static enum machine_mode
24187 arm_preferred_simd_mode (enum machine_mode mode
)
24193 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
24195 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
24197 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
24199 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
24201 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
24208 if (TARGET_REALLY_IWMMXT
)
24224 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24226 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24227 using r0-r4 for function arguments, r7 for the stack frame and don't have
24228 enough left over to do doubleword arithmetic. For Thumb-2 all the
24229 potentially problematic instructions accept high registers so this is not
24230 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24231 that require many low registers. */
24233 arm_class_likely_spilled_p (reg_class_t rclass
)
24235 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
24236 || rclass
== CC_REG
)
24242 /* Implements target hook small_register_classes_for_mode_p. */
24244 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
24246 return TARGET_THUMB1
;
24249 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24250 ARM insns and therefore guarantee that the shift count is modulo 256.
24251 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24252 guarantee no particular behavior for out-of-range counts. */
24254 static unsigned HOST_WIDE_INT
24255 arm_shift_truncation_mask (enum machine_mode mode
)
24257 return mode
== SImode
? 255 : 0;
24261 /* Map internal gcc register numbers to DWARF2 register numbers. */
24264 arm_dbx_register_number (unsigned int regno
)
24269 if (IS_VFP_REGNUM (regno
))
24271 /* See comment in arm_dwarf_register_span. */
24272 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
24273 return 64 + regno
- FIRST_VFP_REGNUM
;
24275 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
24278 if (IS_IWMMXT_GR_REGNUM (regno
))
24279 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
24281 if (IS_IWMMXT_REGNUM (regno
))
24282 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
24284 gcc_unreachable ();
24287 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24288 GCC models tham as 64 32-bit registers, so we need to describe this to
24289 the DWARF generation code. Other registers can use the default. */
24291 arm_dwarf_register_span (rtx rtl
)
24298 regno
= REGNO (rtl
);
24299 if (!IS_VFP_REGNUM (regno
))
24302 /* XXX FIXME: The EABI defines two VFP register ranges:
24303 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24305 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24306 corresponding D register. Until GDB supports this, we shall use the
24307 legacy encodings. We also use these encodings for D0-D15 for
24308 compatibility with older debuggers. */
24309 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
24312 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
24313 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
24314 regno
= (regno
- FIRST_VFP_REGNUM
) / 2;
24315 for (i
= 0; i
< nregs
; i
++)
24316 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, 256 + regno
+ i
);
24321 #if ARM_UNWIND_INFO
24322 /* Emit unwind directives for a store-multiple instruction or stack pointer
24323 push during alignment.
24324 These should only ever be generated by the function prologue code, so
24325 expect them to have a particular form. */
24328 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
24331 HOST_WIDE_INT offset
;
24332 HOST_WIDE_INT nregs
;
24338 e
= XVECEXP (p
, 0, 0);
24339 if (GET_CODE (e
) != SET
)
24342 /* First insn will adjust the stack pointer. */
24343 if (GET_CODE (e
) != SET
24344 || GET_CODE (XEXP (e
, 0)) != REG
24345 || REGNO (XEXP (e
, 0)) != SP_REGNUM
24346 || GET_CODE (XEXP (e
, 1)) != PLUS
)
24349 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
24350 nregs
= XVECLEN (p
, 0) - 1;
24352 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
24355 /* The function prologue may also push pc, but not annotate it as it is
24356 never restored. We turn this into a stack pointer adjustment. */
24357 if (nregs
* 4 == offset
- 4)
24359 fprintf (asm_out_file
, "\t.pad #4\n");
24363 fprintf (asm_out_file
, "\t.save {");
24365 else if (IS_VFP_REGNUM (reg
))
24368 fprintf (asm_out_file
, "\t.vsave {");
24371 /* Unknown register type. */
24374 /* If the stack increment doesn't match the size of the saved registers,
24375 something has gone horribly wrong. */
24376 if (offset
!= nregs
* reg_size
)
24381 /* The remaining insns will describe the stores. */
24382 for (i
= 1; i
<= nregs
; i
++)
24384 /* Expect (set (mem <addr>) (reg)).
24385 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24386 e
= XVECEXP (p
, 0, i
);
24387 if (GET_CODE (e
) != SET
24388 || GET_CODE (XEXP (e
, 0)) != MEM
24389 || GET_CODE (XEXP (e
, 1)) != REG
)
24392 reg
= REGNO (XEXP (e
, 1));
24397 fprintf (asm_out_file
, ", ");
24398 /* We can't use %r for vfp because we need to use the
24399 double precision register names. */
24400 if (IS_VFP_REGNUM (reg
))
24401 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
24403 asm_fprintf (asm_out_file
, "%r", reg
);
24405 #ifdef ENABLE_CHECKING
24406 /* Check that the addresses are consecutive. */
24407 e
= XEXP (XEXP (e
, 0), 0);
24408 if (GET_CODE (e
) == PLUS
)
24410 offset
+= reg_size
;
24411 if (GET_CODE (XEXP (e
, 0)) != REG
24412 || REGNO (XEXP (e
, 0)) != SP_REGNUM
24413 || GET_CODE (XEXP (e
, 1)) != CONST_INT
24414 || offset
!= INTVAL (XEXP (e
, 1)))
24418 || GET_CODE (e
) != REG
24419 || REGNO (e
) != SP_REGNUM
)
24423 fprintf (asm_out_file
, "}\n");
24426 /* Emit unwind directives for a SET. */
24429 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
24437 switch (GET_CODE (e0
))
24440 /* Pushing a single register. */
24441 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
24442 || GET_CODE (XEXP (XEXP (e0
, 0), 0)) != REG
24443 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
24446 asm_fprintf (asm_out_file
, "\t.save ");
24447 if (IS_VFP_REGNUM (REGNO (e1
)))
24448 asm_fprintf(asm_out_file
, "{d%d}\n",
24449 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
24451 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
24455 if (REGNO (e0
) == SP_REGNUM
)
24457 /* A stack increment. */
24458 if (GET_CODE (e1
) != PLUS
24459 || GET_CODE (XEXP (e1
, 0)) != REG
24460 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
24461 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
24464 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
24465 -INTVAL (XEXP (e1
, 1)));
24467 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
24469 HOST_WIDE_INT offset
;
24471 if (GET_CODE (e1
) == PLUS
)
24473 if (GET_CODE (XEXP (e1
, 0)) != REG
24474 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
24476 reg
= REGNO (XEXP (e1
, 0));
24477 offset
= INTVAL (XEXP (e1
, 1));
24478 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
24479 HARD_FRAME_POINTER_REGNUM
, reg
,
24482 else if (GET_CODE (e1
) == REG
)
24485 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
24486 HARD_FRAME_POINTER_REGNUM
, reg
);
24491 else if (GET_CODE (e1
) == REG
&& REGNO (e1
) == SP_REGNUM
)
24493 /* Move from sp to reg. */
24494 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
24496 else if (GET_CODE (e1
) == PLUS
24497 && GET_CODE (XEXP (e1
, 0)) == REG
24498 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
24499 && GET_CODE (XEXP (e1
, 1)) == CONST_INT
)
24501 /* Set reg to offset from sp. */
24502 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
24503 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
24515 /* Emit unwind directives for the given insn. */
24518 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
24521 bool handled_one
= false;
24523 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
24526 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
24527 && (TREE_NOTHROW (current_function_decl
)
24528 || crtl
->all_throwers_are_sibcalls
))
24531 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
24534 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
24536 pat
= XEXP (note
, 0);
24537 switch (REG_NOTE_KIND (note
))
24539 case REG_FRAME_RELATED_EXPR
:
24542 case REG_CFA_REGISTER
:
24545 pat
= PATTERN (insn
);
24546 if (GET_CODE (pat
) == PARALLEL
)
24547 pat
= XVECEXP (pat
, 0, 0);
24550 /* Only emitted for IS_STACKALIGN re-alignment. */
24555 src
= SET_SRC (pat
);
24556 dest
= SET_DEST (pat
);
24558 gcc_assert (src
== stack_pointer_rtx
);
24559 reg
= REGNO (dest
);
24560 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24563 handled_one
= true;
24566 case REG_CFA_DEF_CFA
:
24567 case REG_CFA_EXPRESSION
:
24568 case REG_CFA_ADJUST_CFA
:
24569 case REG_CFA_OFFSET
:
24570 /* ??? Only handling here what we actually emit. */
24571 gcc_unreachable ();
24579 pat
= PATTERN (insn
);
24582 switch (GET_CODE (pat
))
24585 arm_unwind_emit_set (asm_out_file
, pat
);
24589 /* Store multiple. */
24590 arm_unwind_emit_sequence (asm_out_file
, pat
);
24599 /* Output a reference from a function exception table to the type_info
24600 object X. The EABI specifies that the symbol should be relocated by
24601 an R_ARM_TARGET2 relocation. */
24604 arm_output_ttype (rtx x
)
24606 fputs ("\t.word\t", asm_out_file
);
24607 output_addr_const (asm_out_file
, x
);
24608 /* Use special relocations for symbol references. */
24609 if (GET_CODE (x
) != CONST_INT
)
24610 fputs ("(TARGET2)", asm_out_file
);
24611 fputc ('\n', asm_out_file
);
24616 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24619 arm_asm_emit_except_personality (rtx personality
)
24621 fputs ("\t.personality\t", asm_out_file
);
24622 output_addr_const (asm_out_file
, personality
);
24623 fputc ('\n', asm_out_file
);
24626 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24629 arm_asm_init_sections (void)
24631 exception_section
= get_unnamed_section (0, output_section_asm_op
,
24634 #endif /* ARM_UNWIND_INFO */
24636 /* Output unwind directives for the start/end of a function. */
24639 arm_output_fn_unwind (FILE * f
, bool prologue
)
24641 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
24645 fputs ("\t.fnstart\n", f
);
24648 /* If this function will never be unwound, then mark it as such.
24649 The came condition is used in arm_unwind_emit to suppress
24650 the frame annotations. */
24651 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
24652 && (TREE_NOTHROW (current_function_decl
)
24653 || crtl
->all_throwers_are_sibcalls
))
24654 fputs("\t.cantunwind\n", f
);
24656 fputs ("\t.fnend\n", f
);
24661 arm_emit_tls_decoration (FILE *fp
, rtx x
)
24663 enum tls_reloc reloc
;
24666 val
= XVECEXP (x
, 0, 0);
24667 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
24669 output_addr_const (fp
, val
);
24674 fputs ("(tlsgd)", fp
);
24677 fputs ("(tlsldm)", fp
);
24680 fputs ("(tlsldo)", fp
);
24683 fputs ("(gottpoff)", fp
);
24686 fputs ("(tpoff)", fp
);
24689 fputs ("(tlsdesc)", fp
);
24692 gcc_unreachable ();
24701 fputs (" + (. - ", fp
);
24702 output_addr_const (fp
, XVECEXP (x
, 0, 2));
24703 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24704 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
24705 output_addr_const (fp
, XVECEXP (x
, 0, 3));
24715 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24718 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
24720 gcc_assert (size
== 4);
24721 fputs ("\t.word\t", file
);
24722 output_addr_const (file
, x
);
24723 fputs ("(tlsldo)", file
);
24726 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24729 arm_output_addr_const_extra (FILE *fp
, rtx x
)
24731 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
24732 return arm_emit_tls_decoration (fp
, x
);
24733 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
24736 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
24738 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
24739 assemble_name_raw (fp
, label
);
24743 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
24745 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
24749 output_addr_const (fp
, XVECEXP (x
, 0, 0));
24753 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
24755 output_addr_const (fp
, XVECEXP (x
, 0, 0));
24759 output_addr_const (fp
, XVECEXP (x
, 0, 1));
24763 else if (GET_CODE (x
) == CONST_VECTOR
)
24764 return arm_emit_vector_const (fp
, x
);
24769 /* Output assembly for a shift instruction.
24770 SET_FLAGS determines how the instruction modifies the condition codes.
24771 0 - Do not set condition codes.
24772 1 - Set condition codes.
24773 2 - Use smallest instruction. */
24775 arm_output_shift(rtx
* operands
, int set_flags
)
24778 static const char flag_chars
[3] = {'?', '.', '!'};
24783 c
= flag_chars
[set_flags
];
24784 if (TARGET_UNIFIED_ASM
)
24786 shift
= shift_op(operands
[3], &val
);
24790 operands
[2] = GEN_INT(val
);
24791 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
24794 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
24797 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
24798 output_asm_insn (pattern
, operands
);
24802 /* Output assembly for a WMMX immediate shift instruction. */
24804 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
24806 int shift
= INTVAL (operands
[2]);
24808 enum machine_mode opmode
= GET_MODE (operands
[0]);
24810 gcc_assert (shift
>= 0);
24812 /* If the shift value in the register versions is > 63 (for D qualifier),
24813 31 (for W qualifier) or 15 (for H qualifier). */
24814 if (((opmode
== V4HImode
) && (shift
> 15))
24815 || ((opmode
== V2SImode
) && (shift
> 31))
24816 || ((opmode
== DImode
) && (shift
> 63)))
24820 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
24821 output_asm_insn (templ
, operands
);
24822 if (opmode
== DImode
)
24824 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
24825 output_asm_insn (templ
, operands
);
24830 /* The destination register will contain all zeros. */
24831 sprintf (templ
, "wzero\t%%0");
24832 output_asm_insn (templ
, operands
);
24837 if ((opmode
== DImode
) && (shift
> 32))
24839 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
24840 output_asm_insn (templ
, operands
);
24841 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
24842 output_asm_insn (templ
, operands
);
24846 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
24847 output_asm_insn (templ
, operands
);
24852 /* Output assembly for a WMMX tinsr instruction. */
24854 arm_output_iwmmxt_tinsr (rtx
*operands
)
24856 int mask
= INTVAL (operands
[3]);
24859 int units
= mode_nunits
[GET_MODE (operands
[0])];
24860 gcc_assert ((mask
& (mask
- 1)) == 0);
24861 for (i
= 0; i
< units
; ++i
)
24863 if ((mask
& 0x01) == 1)
24869 gcc_assert (i
< units
);
24871 switch (GET_MODE (operands
[0]))
24874 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
24877 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
24880 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
24883 gcc_unreachable ();
24886 output_asm_insn (templ
, operands
);
24891 /* Output a Thumb-1 casesi dispatch sequence. */
24893 thumb1_output_casesi (rtx
*operands
)
24895 rtx diff_vec
= PATTERN (next_real_insn (operands
[0]));
24897 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
24899 switch (GET_MODE(diff_vec
))
24902 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
24903 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24905 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
24906 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24908 return "bl\t%___gnu_thumb1_case_si";
24910 gcc_unreachable ();
24914 /* Output a Thumb-2 casesi instruction. */
24916 thumb2_output_casesi (rtx
*operands
)
24918 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
24920 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
24922 output_asm_insn ("cmp\t%0, %1", operands
);
24923 output_asm_insn ("bhi\t%l3", operands
);
24924 switch (GET_MODE(diff_vec
))
24927 return "tbb\t[%|pc, %0]";
24929 return "tbh\t[%|pc, %0, lsl #1]";
24933 output_asm_insn ("adr\t%4, %l2", operands
);
24934 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
24935 output_asm_insn ("add\t%4, %4, %5", operands
);
24940 output_asm_insn ("adr\t%4, %l2", operands
);
24941 return "ldr\t%|pc, [%4, %0, lsl #2]";
24944 gcc_unreachable ();
24948 /* Most ARM cores are single issue, but some newer ones can dual issue.
24949 The scheduler descriptions rely on this being correct. */
24951 arm_issue_rate (void)
24973 /* A table and a function to perform ARM-specific name mangling for
24974 NEON vector types in order to conform to the AAPCS (see "Procedure
24975 Call Standard for the ARM Architecture", Appendix A). To qualify
24976 for emission with the mangled names defined in that document, a
24977 vector type must not only be of the correct mode but also be
24978 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24981 enum machine_mode mode
;
24982 const char *element_type_name
;
24983 const char *aapcs_name
;
24984 } arm_mangle_map_entry
;
24986 static arm_mangle_map_entry arm_mangle_map
[] = {
24987 /* 64-bit containerized types. */
24988 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
24989 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24990 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
24991 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24992 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
24993 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
24994 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
24995 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24996 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24997 /* 128-bit containerized types. */
24998 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
24999 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
25000 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
25001 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
25002 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
25003 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
25004 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
25005 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
25006 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
25007 { VOIDmode
, NULL
, NULL
}
25011 arm_mangle_type (const_tree type
)
25013 arm_mangle_map_entry
*pos
= arm_mangle_map
;
25015 /* The ARM ABI documents (10th October 2008) say that "__va_list"
25016 has to be managled as if it is in the "std" namespace. */
25017 if (TARGET_AAPCS_BASED
25018 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
25020 static bool warned
;
25021 if (!warned
&& warn_psabi
&& !in_system_header
)
25024 inform (input_location
,
25025 "the mangling of %<va_list%> has changed in GCC 4.4");
25027 return "St9__va_list";
25030 /* Half-precision float. */
25031 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
25034 if (TREE_CODE (type
) != VECTOR_TYPE
)
25037 /* Check the mode of the vector type, and the name of the vector
25038 element type, against the table. */
25039 while (pos
->mode
!= VOIDmode
)
25041 tree elt_type
= TREE_TYPE (type
);
25043 if (pos
->mode
== TYPE_MODE (type
)
25044 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
25045 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
25046 pos
->element_type_name
))
25047 return pos
->aapcs_name
;
25052 /* Use the default mangling for unrecognized (possibly user-defined)
25057 /* Order of allocation of core registers for Thumb: this allocation is
25058 written over the corresponding initial entries of the array
25059 initialized with REG_ALLOC_ORDER. We allocate all low registers
25060 first. Saving and restoring a low register is usually cheaper than
25061 using a call-clobbered high register. */
25063 static const int thumb_core_reg_alloc_order
[] =
25065 3, 2, 1, 0, 4, 5, 6, 7,
25066 14, 12, 8, 9, 10, 11
25069 /* Adjust register allocation order when compiling for Thumb. */
25072 arm_order_regs_for_local_alloc (void)
25074 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
25075 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
25077 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
25078 sizeof (thumb_core_reg_alloc_order
));
25081 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25084 arm_frame_pointer_required (void)
25086 return (cfun
->has_nonlocal_label
25087 || SUBTARGET_FRAME_POINTER_REQUIRED
25088 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
25091 /* Only thumb1 can't support conditional execution, so return true if
25092 the target is not thumb1. */
25094 arm_have_conditional_execution (void)
25096 return !TARGET_THUMB1
;
25099 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
25100 static HOST_WIDE_INT
25101 arm_vector_alignment (const_tree type
)
25103 HOST_WIDE_INT align
= tree_low_cst (TYPE_SIZE (type
), 0);
25105 if (TARGET_AAPCS_BASED
)
25106 align
= MIN (align
, 64);
25111 static unsigned int
25112 arm_autovectorize_vector_sizes (void)
25114 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
25118 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
25120 /* Vectors which aren't in packed structures will not be less aligned than
25121 the natural alignment of their element type, so this is safe. */
25122 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
25125 return default_builtin_vector_alignment_reachable (type
, is_packed
);
25129 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
25130 const_tree type
, int misalignment
,
25133 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
25135 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
25140 /* If the misalignment is unknown, we should be able to handle the access
25141 so long as it is not to a member of a packed data structure. */
25142 if (misalignment
== -1)
25145 /* Return true if the misalignment is a multiple of the natural alignment
25146 of the vector's element type. This is probably always going to be
25147 true in practice, since we've already established that this isn't a
25149 return ((misalignment
% align
) == 0);
25152 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
25157 arm_conditional_register_usage (void)
25161 if (TARGET_THUMB1
&& optimize_size
)
25163 /* When optimizing for size on Thumb-1, it's better not
25164 to use the HI regs, because of the overhead of
25166 for (regno
= FIRST_HI_REGNUM
;
25167 regno
<= LAST_HI_REGNUM
; ++regno
)
25168 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
25171 /* The link register can be clobbered by any branch insn,
25172 but we have no way to track that at present, so mark
25173 it as unavailable. */
25175 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
25177 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
25179 /* VFPv3 registers are disabled when earlier VFP
25180 versions are selected due to the definition of
25181 LAST_VFP_REGNUM. */
25182 for (regno
= FIRST_VFP_REGNUM
;
25183 regno
<= LAST_VFP_REGNUM
; ++ regno
)
25185 fixed_regs
[regno
] = 0;
25186 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
25187 || regno
>= FIRST_VFP_REGNUM
+ 32;
25191 if (TARGET_REALLY_IWMMXT
)
25193 regno
= FIRST_IWMMXT_GR_REGNUM
;
25194 /* The 2002/10/09 revision of the XScale ABI has wCG0
25195 and wCG1 as call-preserved registers. The 2002/11/21
25196 revision changed this so that all wCG registers are
25197 scratch registers. */
25198 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
25199 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
25200 fixed_regs
[regno
] = 0;
25201 /* The XScale ABI has wR0 - wR9 as scratch registers,
25202 the rest as call-preserved registers. */
25203 for (regno
= FIRST_IWMMXT_REGNUM
;
25204 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
25206 fixed_regs
[regno
] = 0;
25207 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
25211 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
25213 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
25214 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
25216 else if (TARGET_APCS_STACK
)
25218 fixed_regs
[10] = 1;
25219 call_used_regs
[10] = 1;
25221 /* -mcaller-super-interworking reserves r11 for calls to
25222 _interwork_r11_call_via_rN(). Making the register global
25223 is an easy way of ensuring that it remains valid for all
25225 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
25226 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
25228 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
25229 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
25230 if (TARGET_CALLER_INTERWORKING
)
25231 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
25233 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25237 arm_preferred_rename_class (reg_class_t rclass
)
25239 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25240 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25241 and code size can be reduced. */
25242 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
25248 /* Compute the atrribute "length" of insn "*push_multi".
25249 So this function MUST be kept in sync with that insn pattern. */
25251 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
25253 int i
, regno
, hi_reg
;
25254 int num_saves
= XVECLEN (parallel_op
, 0);
25264 regno
= REGNO (first_op
);
25265 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
25266 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
25268 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
25269 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
25277 /* Compute the number of instructions emitted by output_move_double. */
25279 arm_count_output_move_double_insns (rtx
*operands
)
25283 /* output_move_double may modify the operands array, so call it
25284 here on a copy of the array. */
25285 ops
[0] = operands
[0];
25286 ops
[1] = operands
[1];
25287 output_move_double (ops
, false, &count
);
25292 vfp3_const_double_for_fract_bits (rtx operand
)
25294 REAL_VALUE_TYPE r0
;
25296 if (GET_CODE (operand
) != CONST_DOUBLE
)
25299 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
25300 if (exact_real_inverse (DFmode
, &r0
))
25302 if (exact_real_truncate (DFmode
, &r0
))
25304 HOST_WIDE_INT value
= real_to_integer (&r0
);
25305 value
= value
& 0xffffffff;
25306 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
25307 return int_log2 (value
);
25313 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25316 arm_pre_atomic_barrier (enum memmodel model
)
25318 if (need_atomic_barrier_p (model
, true))
25319 emit_insn (gen_memory_barrier ());
25323 arm_post_atomic_barrier (enum memmodel model
)
25325 if (need_atomic_barrier_p (model
, false))
25326 emit_insn (gen_memory_barrier ());
25329 /* Emit the load-exclusive and store-exclusive instructions. */
25332 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
)
25334 rtx (*gen
) (rtx
, rtx
);
25338 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
25339 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
25340 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
25341 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
25343 gcc_unreachable ();
25346 emit_insn (gen (rval
, mem
));
25350 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
, rtx mem
)
25352 rtx (*gen
) (rtx
, rtx
, rtx
);
25356 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
25357 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
25358 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
25359 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
25361 gcc_unreachable ();
25364 emit_insn (gen (bval
, rval
, mem
));
25367 /* Mark the previous jump instruction as unlikely. */
25370 emit_unlikely_jump (rtx insn
)
25372 rtx very_unlikely
= GEN_INT (REG_BR_PROB_BASE
/ 100 - 1);
25374 insn
= emit_jump_insn (insn
);
25375 add_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
25378 /* Expand a compare and swap pattern. */
25381 arm_expand_compare_and_swap (rtx operands
[])
25383 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
25384 enum machine_mode mode
;
25385 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
25387 bval
= operands
[0];
25388 rval
= operands
[1];
25390 oldval
= operands
[3];
25391 newval
= operands
[4];
25392 is_weak
= operands
[5];
25393 mod_s
= operands
[6];
25394 mod_f
= operands
[7];
25395 mode
= GET_MODE (mem
);
25401 /* For narrow modes, we're going to perform the comparison in SImode,
25402 so do the zero-extension now. */
25403 rval
= gen_reg_rtx (SImode
);
25404 oldval
= convert_modes (SImode
, mode
, oldval
, true);
25408 /* Force the value into a register if needed. We waited until after
25409 the zero-extension above to do this properly. */
25410 if (!arm_add_operand (oldval
, mode
))
25411 oldval
= force_reg (mode
, oldval
);
25415 if (!cmpdi_operand (oldval
, mode
))
25416 oldval
= force_reg (mode
, oldval
);
25420 gcc_unreachable ();
25425 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
25426 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
25427 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
25428 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
25430 gcc_unreachable ();
25433 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
25435 if (mode
== QImode
|| mode
== HImode
)
25436 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
25438 /* In all cases, we arrange for success to be signaled by Z set.
25439 This arrangement allows for the boolean result to be used directly
25440 in a subsequent branch, post optimization. */
25441 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
25442 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
25443 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
25446 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25447 another memory store between the load-exclusive and store-exclusive can
25448 reset the monitor from Exclusive to Open state. This means we must wait
25449 until after reload to split the pattern, lest we get a register spill in
25450 the middle of the atomic sequence. */
25453 arm_split_compare_and_swap (rtx operands
[])
25455 rtx rval
, mem
, oldval
, newval
, scratch
;
25456 enum machine_mode mode
;
25457 enum memmodel mod_s
, mod_f
;
25459 rtx label1
, label2
, x
, cond
;
25461 rval
= operands
[0];
25463 oldval
= operands
[2];
25464 newval
= operands
[3];
25465 is_weak
= (operands
[4] != const0_rtx
);
25466 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
25467 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
25468 scratch
= operands
[7];
25469 mode
= GET_MODE (mem
);
25471 arm_pre_atomic_barrier (mod_s
);
25476 label1
= gen_label_rtx ();
25477 emit_label (label1
);
25479 label2
= gen_label_rtx ();
25481 arm_emit_load_exclusive (mode
, rval
, mem
);
25483 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
25484 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
25485 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
25486 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
25487 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
25489 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
);
25491 /* Weak or strong, we want EQ to be true for success, so that we
25492 match the flags that we got from the compare above. */
25493 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
25494 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
25495 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
25499 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
25500 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
25501 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
25502 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
25505 if (mod_f
!= MEMMODEL_RELAXED
)
25506 emit_label (label2
);
25508 arm_post_atomic_barrier (mod_s
);
25510 if (mod_f
== MEMMODEL_RELAXED
)
25511 emit_label (label2
);
25515 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
25516 rtx value
, rtx model_rtx
, rtx cond
)
25518 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
25519 enum machine_mode mode
= GET_MODE (mem
);
25520 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
25523 arm_pre_atomic_barrier (model
);
25525 label
= gen_label_rtx ();
25526 emit_label (label
);
25529 new_out
= gen_lowpart (wmode
, new_out
);
25531 old_out
= gen_lowpart (wmode
, old_out
);
25534 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
25536 arm_emit_load_exclusive (mode
, old_out
, mem
);
25545 x
= gen_rtx_AND (wmode
, old_out
, value
);
25546 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
25547 x
= gen_rtx_NOT (wmode
, new_out
);
25548 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
25552 if (CONST_INT_P (value
))
25554 value
= GEN_INT (-INTVAL (value
));
25560 if (mode
== DImode
)
25562 /* DImode plus/minus need to clobber flags. */
25563 /* The adddi3 and subdi3 patterns are incorrectly written so that
25564 they require matching operands, even when we could easily support
25565 three operands. Thankfully, this can be fixed up post-splitting,
25566 as the individual add+adc patterns do accept three operands and
25567 post-reload cprop can make these moves go away. */
25568 emit_move_insn (new_out
, old_out
);
25570 x
= gen_adddi3 (new_out
, new_out
, value
);
25572 x
= gen_subdi3 (new_out
, new_out
, value
);
25579 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
25580 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
25584 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
));
25586 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
25587 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
25589 arm_post_atomic_barrier (model
);
25592 #define MAX_VECT_LEN 16
25594 struct expand_vec_perm_d
25596 rtx target
, op0
, op1
;
25597 unsigned char perm
[MAX_VECT_LEN
];
25598 enum machine_mode vmode
;
25599 unsigned char nelt
;
25604 /* Generate a variable permutation. */
25607 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
25609 enum machine_mode vmode
= GET_MODE (target
);
25610 bool one_vector_p
= rtx_equal_p (op0
, op1
);
25612 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
25613 gcc_checking_assert (GET_MODE (op0
) == vmode
);
25614 gcc_checking_assert (GET_MODE (op1
) == vmode
);
25615 gcc_checking_assert (GET_MODE (sel
) == vmode
);
25616 gcc_checking_assert (TARGET_NEON
);
25620 if (vmode
== V8QImode
)
25621 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
25623 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
25629 if (vmode
== V8QImode
)
25631 pair
= gen_reg_rtx (V16QImode
);
25632 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
25633 pair
= gen_lowpart (TImode
, pair
);
25634 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
25638 pair
= gen_reg_rtx (OImode
);
25639 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
25640 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
25646 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
25648 enum machine_mode vmode
= GET_MODE (target
);
25649 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
25650 bool one_vector_p
= rtx_equal_p (op0
, op1
);
25651 rtx rmask
[MAX_VECT_LEN
], mask
;
25653 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25654 numbering of elements for big-endian, we must reverse the order. */
25655 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
25657 /* The VTBL instruction does not use a modulo index, so we must take care
25658 of that ourselves. */
25659 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
25660 for (i
= 0; i
< nelt
; ++i
)
25662 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
25663 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
25665 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
25668 /* Generate or test for an insn that supports a constant permutation. */
25670 /* Recognize patterns for the VUZP insns. */
25673 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
25675 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
25676 rtx out0
, out1
, in0
, in1
, x
;
25677 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
25679 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
25682 /* Note that these are little-endian tests. Adjust for big-endian later. */
25683 if (d
->perm
[0] == 0)
25685 else if (d
->perm
[0] == 1)
25689 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
25691 for (i
= 0; i
< nelt
; i
++)
25693 unsigned elt
= (i
* 2 + odd
) & mask
;
25694 if (d
->perm
[i
] != elt
)
25704 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
25705 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
25706 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
25707 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
25708 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
25709 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
25710 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
25711 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
25713 gcc_unreachable ();
25718 if (BYTES_BIG_ENDIAN
)
25720 x
= in0
, in0
= in1
, in1
= x
;
25725 out1
= gen_reg_rtx (d
->vmode
);
25727 x
= out0
, out0
= out1
, out1
= x
;
25729 emit_insn (gen (out0
, in0
, in1
, out1
));
25733 /* Recognize patterns for the VZIP insns. */
25736 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
25738 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
25739 rtx out0
, out1
, in0
, in1
, x
;
25740 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
25742 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
25745 /* Note that these are little-endian tests. Adjust for big-endian later. */
25747 if (d
->perm
[0] == high
)
25749 else if (d
->perm
[0] == 0)
25753 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
25755 for (i
= 0; i
< nelt
/ 2; i
++)
25757 unsigned elt
= (i
+ high
) & mask
;
25758 if (d
->perm
[i
* 2] != elt
)
25760 elt
= (elt
+ nelt
) & mask
;
25761 if (d
->perm
[i
* 2 + 1] != elt
)
25771 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
25772 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
25773 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
25774 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
25775 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
25776 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
25777 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
25778 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
25780 gcc_unreachable ();
25785 if (BYTES_BIG_ENDIAN
)
25787 x
= in0
, in0
= in1
, in1
= x
;
25792 out1
= gen_reg_rtx (d
->vmode
);
25794 x
= out0
, out0
= out1
, out1
= x
;
25796 emit_insn (gen (out0
, in0
, in1
, out1
));
25800 /* Recognize patterns for the VREV insns. */
25803 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
25805 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
25806 rtx (*gen
)(rtx
, rtx
, rtx
);
25808 if (!d
->one_vector_p
)
25817 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
25818 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
25826 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
25827 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
25828 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
25829 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
25837 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
25838 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
25839 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
25840 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
25841 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
25842 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
25843 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
25844 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
25853 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
25854 for (j
= 0; j
<= diff
; j
+= 1)
25856 /* This is guaranteed to be true as the value of diff
25857 is 7, 3, 1 and we should have enough elements in the
25858 queue to generate this. Getting a vector mask with a
25859 value of diff other than these values implies that
25860 something is wrong by the time we get here. */
25861 gcc_assert (i
+ j
< nelt
);
25862 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
25870 /* ??? The third operand is an artifact of the builtin infrastructure
25871 and is ignored by the actual instruction. */
25872 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
25876 /* Recognize patterns for the VTRN insns. */
25879 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
25881 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
25882 rtx out0
, out1
, in0
, in1
, x
;
25883 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
25885 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
25888 /* Note that these are little-endian tests. Adjust for big-endian later. */
25889 if (d
->perm
[0] == 0)
25891 else if (d
->perm
[0] == 1)
25895 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
25897 for (i
= 0; i
< nelt
; i
+= 2)
25899 if (d
->perm
[i
] != i
+ odd
)
25901 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
25911 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
25912 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
25913 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
25914 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
25915 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
25916 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
25917 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
25918 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
25920 gcc_unreachable ();
25925 if (BYTES_BIG_ENDIAN
)
25927 x
= in0
, in0
= in1
, in1
= x
;
25932 out1
= gen_reg_rtx (d
->vmode
);
25934 x
= out0
, out0
= out1
, out1
= x
;
25936 emit_insn (gen (out0
, in0
, in1
, out1
));
25940 /* Recognize patterns for the VEXT insns. */
25943 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
25945 unsigned int i
, nelt
= d
->nelt
;
25946 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
25949 unsigned int location
;
25951 unsigned int next
= d
->perm
[0] + 1;
25953 /* TODO: Handle GCC's numbering of elements for big-endian. */
25954 if (BYTES_BIG_ENDIAN
)
25957 /* Check if the extracted indexes are increasing by one. */
25958 for (i
= 1; i
< nelt
; next
++, i
++)
25960 /* If we hit the most significant element of the 2nd vector in
25961 the previous iteration, no need to test further. */
25962 if (next
== 2 * nelt
)
25965 /* If we are operating on only one vector: it could be a
25966 rotation. If there are only two elements of size < 64, let
25967 arm_evpc_neon_vrev catch it. */
25968 if (d
->one_vector_p
&& (next
== nelt
))
25970 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
25976 if (d
->perm
[i
] != next
)
25980 location
= d
->perm
[0];
25984 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
25985 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
25986 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
25987 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
25988 case V2SImode
: gen
= gen_neon_vextv2si
; break;
25989 case V4SImode
: gen
= gen_neon_vextv4si
; break;
25990 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
25991 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
25992 case V2DImode
: gen
= gen_neon_vextv2di
; break;
26001 offset
= GEN_INT (location
);
26002 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
26006 /* The NEON VTBL instruction is a fully variable permuation that's even
26007 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
26008 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
26009 can do slightly better by expanding this as a constant where we don't
26010 have to apply a mask. */
26013 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
26015 rtx rperm
[MAX_VECT_LEN
], sel
;
26016 enum machine_mode vmode
= d
->vmode
;
26017 unsigned int i
, nelt
= d
->nelt
;
26019 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26020 numbering of elements for big-endian, we must reverse the order. */
26021 if (BYTES_BIG_ENDIAN
)
26027 /* Generic code will try constant permutation twice. Once with the
26028 original mode and again with the elements lowered to QImode.
26029 So wait and don't do the selector expansion ourselves. */
26030 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
26033 for (i
= 0; i
< nelt
; ++i
)
26034 rperm
[i
] = GEN_INT (d
->perm
[i
]);
26035 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
26036 sel
= force_reg (vmode
, sel
);
26038 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
26043 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
26045 /* Check if the input mask matches vext before reordering the
26048 if (arm_evpc_neon_vext (d
))
26051 /* The pattern matching functions above are written to look for a small
26052 number to begin the sequence (0, 1, N/2). If we begin with an index
26053 from the second operand, we can swap the operands. */
26054 if (d
->perm
[0] >= d
->nelt
)
26056 unsigned i
, nelt
= d
->nelt
;
26059 for (i
= 0; i
< nelt
; ++i
)
26060 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
26069 if (arm_evpc_neon_vuzp (d
))
26071 if (arm_evpc_neon_vzip (d
))
26073 if (arm_evpc_neon_vrev (d
))
26075 if (arm_evpc_neon_vtrn (d
))
26077 return arm_evpc_neon_vtbl (d
);
26082 /* Expand a vec_perm_const pattern. */
26085 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
26087 struct expand_vec_perm_d d
;
26088 int i
, nelt
, which
;
26094 d
.vmode
= GET_MODE (target
);
26095 gcc_assert (VECTOR_MODE_P (d
.vmode
));
26096 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
26097 d
.testing_p
= false;
26099 for (i
= which
= 0; i
< nelt
; ++i
)
26101 rtx e
= XVECEXP (sel
, 0, i
);
26102 int ei
= INTVAL (e
) & (2 * nelt
- 1);
26103 which
|= (ei
< nelt
? 1 : 2);
26113 d
.one_vector_p
= false;
26114 if (!rtx_equal_p (op0
, op1
))
26117 /* The elements of PERM do not suggest that only the first operand
26118 is used, but both operands are identical. Allow easier matching
26119 of the permutation by folding the permutation into the single
26123 for (i
= 0; i
< nelt
; ++i
)
26124 d
.perm
[i
] &= nelt
- 1;
26126 d
.one_vector_p
= true;
26131 d
.one_vector_p
= true;
26135 return arm_expand_vec_perm_const_1 (&d
);
26138 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
26141 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
26142 const unsigned char *sel
)
26144 struct expand_vec_perm_d d
;
26145 unsigned int i
, nelt
, which
;
26149 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
26150 d
.testing_p
= true;
26151 memcpy (d
.perm
, sel
, nelt
);
26153 /* Categorize the set of elements in the selector. */
26154 for (i
= which
= 0; i
< nelt
; ++i
)
26156 unsigned char e
= d
.perm
[i
];
26157 gcc_assert (e
< 2 * nelt
);
26158 which
|= (e
< nelt
? 1 : 2);
26161 /* For all elements from second vector, fold the elements to first. */
26163 for (i
= 0; i
< nelt
; ++i
)
26166 /* Check whether the mask can be applied to the vector type. */
26167 d
.one_vector_p
= (which
!= 3);
26169 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
26170 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
26171 if (!d
.one_vector_p
)
26172 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
26175 ret
= arm_expand_vec_perm_const_1 (&d
);
26182 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
26184 /* If we are soft float and we do not have ldrd
26185 then all auto increment forms are ok. */
26186 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
26191 /* Post increment and Pre Decrement are supported for all
26192 instruction forms except for vector forms. */
26195 if (VECTOR_MODE_P (mode
))
26197 if (code
!= ARM_PRE_DEC
)
26207 /* Without LDRD and mode size greater than
26208 word size, there is no point in auto-incrementing
26209 because ldm and stm will not have these forms. */
26210 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
26213 /* Vector and floating point modes do not support
26214 these auto increment forms. */
26215 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
26228 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26229 on ARM, since we know that shifts by negative amounts are no-ops.
26230 Additionally, the default expansion code is not available or suitable
26231 for post-reload insn splits (this can occur when the register allocator
26232 chooses not to do a shift in NEON).
26234 This function is used in both initial expand and post-reload splits, and
26235 handles all kinds of 64-bit shifts.
26237 Input requirements:
26238 - It is safe for the input and output to be the same register, but
26239 early-clobber rules apply for the shift amount and scratch registers.
26240 - Shift by register requires both scratch registers. Shift by a constant
26241 less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases
26242 the scratch registers may be NULL.
26243 - Ashiftrt by a register also clobbers the CC register. */
26245 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
26246 rtx amount
, rtx scratch1
, rtx scratch2
)
26248 rtx out_high
= gen_highpart (SImode
, out
);
26249 rtx out_low
= gen_lowpart (SImode
, out
);
26250 rtx in_high
= gen_highpart (SImode
, in
);
26251 rtx in_low
= gen_lowpart (SImode
, in
);
26254 in = the register pair containing the input value.
26255 out = the destination register pair.
26256 up = the high- or low-part of each pair.
26257 down = the opposite part to "up".
26258 In a shift, we can consider bits to shift from "up"-stream to
26259 "down"-stream, so in a left-shift "up" is the low-part and "down"
26260 is the high-part of each register pair. */
26262 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
26263 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
26264 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
26265 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
26267 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
26269 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
26270 && GET_MODE (out
) == DImode
);
26272 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
26273 && GET_MODE (in
) == DImode
);
26275 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
26276 && GET_MODE (amount
) == SImode
)
26277 || CONST_INT_P (amount
)));
26278 gcc_assert (scratch1
== NULL
26279 || (GET_CODE (scratch1
) == SCRATCH
)
26280 || (GET_MODE (scratch1
) == SImode
26281 && REG_P (scratch1
)));
26282 gcc_assert (scratch2
== NULL
26283 || (GET_CODE (scratch2
) == SCRATCH
)
26284 || (GET_MODE (scratch2
) == SImode
26285 && REG_P (scratch2
)));
26286 gcc_assert (!REG_P (out
) || !REG_P (amount
)
26287 || !HARD_REGISTER_P (out
)
26288 || (REGNO (out
) != REGNO (amount
)
26289 && REGNO (out
) + 1 != REGNO (amount
)));
26291 /* Macros to make following code more readable. */
26292 #define SUB_32(DEST,SRC) \
26293 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
26294 #define RSB_32(DEST,SRC) \
26295 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
26296 #define SUB_S_32(DEST,SRC) \
26297 gen_addsi3_compare0 ((DEST), (SRC), \
26299 #define SET(DEST,SRC) \
26300 gen_rtx_SET (SImode, (DEST), (SRC))
26301 #define SHIFT(CODE,SRC,AMOUNT) \
26302 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26303 #define LSHIFT(CODE,SRC,AMOUNT) \
26304 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26305 SImode, (SRC), (AMOUNT))
26306 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26307 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26308 SImode, (SRC), (AMOUNT))
26310 gen_rtx_IOR (SImode, (A), (B))
26311 #define BRANCH(COND,LABEL) \
26312 gen_arm_cond_branch ((LABEL), \
26313 gen_rtx_ ## COND (CCmode, cc_reg, \
26317 /* Shifts by register and shifts by constant are handled separately. */
26318 if (CONST_INT_P (amount
))
26320 /* We have a shift-by-constant. */
26322 /* First, handle out-of-range shift amounts.
26323 In both cases we try to match the result an ARM instruction in a
26324 shift-by-register would give. This helps reduce execution
26325 differences between optimization levels, but it won't stop other
26326 parts of the compiler doing different things. This is "undefined
26327 behaviour, in any case. */
26328 if (INTVAL (amount
) <= 0)
26329 emit_insn (gen_movdi (out
, in
));
26330 else if (INTVAL (amount
) >= 64)
26332 if (code
== ASHIFTRT
)
26334 rtx const31_rtx
= GEN_INT (31);
26335 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
26336 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
26339 emit_insn (gen_movdi (out
, const0_rtx
));
26342 /* Now handle valid shifts. */
26343 else if (INTVAL (amount
) < 32)
26345 /* Shifts by a constant less than 32. */
26346 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
26348 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
26349 emit_insn (SET (out_down
,
26350 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
26352 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
26356 /* Shifts by a constant greater than 31. */
26357 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
26359 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
26360 if (code
== ASHIFTRT
)
26361 emit_insn (gen_ashrsi3 (out_up
, in_up
,
26364 emit_insn (SET (out_up
, const0_rtx
));
26369 /* We have a shift-by-register. */
26370 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
26372 /* This alternative requires the scratch registers. */
26373 gcc_assert (scratch1
&& REG_P (scratch1
));
26374 gcc_assert (scratch2
&& REG_P (scratch2
));
26376 /* We will need the values "amount-32" and "32-amount" later.
26377 Swapping them around now allows the later code to be more general. */
26381 emit_insn (SUB_32 (scratch1
, amount
));
26382 emit_insn (RSB_32 (scratch2
, amount
));
26385 emit_insn (RSB_32 (scratch1
, amount
));
26386 /* Also set CC = amount > 32. */
26387 emit_insn (SUB_S_32 (scratch2
, amount
));
26390 emit_insn (RSB_32 (scratch1
, amount
));
26391 emit_insn (SUB_32 (scratch2
, amount
));
26394 gcc_unreachable ();
26397 /* Emit code like this:
26400 out_down = in_down << amount;
26401 out_down = (in_up << (amount - 32)) | out_down;
26402 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26403 out_up = in_up << amount;
26406 out_down = in_down >> amount;
26407 out_down = (in_up << (32 - amount)) | out_down;
26409 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26410 out_up = in_up << amount;
26413 out_down = in_down >> amount;
26414 out_down = (in_up << (32 - amount)) | out_down;
26416 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26417 out_up = in_up << amount;
26419 The ARM and Thumb2 variants are the same but implemented slightly
26420 differently. If this were only called during expand we could just
26421 use the Thumb2 case and let combine do the right thing, but this
26422 can also be called from post-reload splitters. */
26424 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
26426 if (!TARGET_THUMB2
)
26428 /* Emit code for ARM mode. */
26429 emit_insn (SET (out_down
,
26430 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
26431 if (code
== ASHIFTRT
)
26433 rtx done_label
= gen_label_rtx ();
26434 emit_jump_insn (BRANCH (LT
, done_label
));
26435 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
26437 emit_label (done_label
);
26440 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
26445 /* Emit code for Thumb2 mode.
26446 Thumb2 can't do shift and or in one insn. */
26447 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
26448 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
26450 if (code
== ASHIFTRT
)
26452 rtx done_label
= gen_label_rtx ();
26453 emit_jump_insn (BRANCH (LT
, done_label
));
26454 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
26455 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
26456 emit_label (done_label
);
26460 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
26461 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
26465 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
26480 /* Returns true if a valid comparison operation and makes
26481 the operands in a form that is valid. */
26483 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
26485 enum rtx_code code
= GET_CODE (*comparison
);
26486 enum rtx_code canonical_code
;
26487 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
26488 ? GET_MODE (*op2
) : GET_MODE (*op1
);
26490 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
26492 if (code
== UNEQ
|| code
== LTGT
)
26495 canonical_code
= arm_canonicalize_comparison (code
, op1
, op2
);
26496 PUT_CODE (*comparison
, canonical_code
);
26501 if (!arm_add_operand (*op1
, mode
))
26502 *op1
= force_reg (mode
, *op1
);
26503 if (!arm_add_operand (*op2
, mode
))
26504 *op2
= force_reg (mode
, *op2
);
26508 if (!cmpdi_operand (*op1
, mode
))
26509 *op1
= force_reg (mode
, *op1
);
26510 if (!cmpdi_operand (*op2
, mode
))
26511 *op2
= force_reg (mode
, *op2
);
26516 if (!arm_float_compare_operand (*op1
, mode
))
26517 *op1
= force_reg (mode
, *op1
);
26518 if (!arm_float_compare_operand (*op2
, mode
))
26519 *op2
= force_reg (mode
, *op2
);
26529 #include "gt-arm.h"