1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
46 #include "diagnostic-core.h"
53 #include "sched-int.h"
54 #include "target-def.h"
56 #include "langhooks.h"
63 #include "gimple-expr.h"
66 /* Forward definitions of types. */
67 typedef struct minipool_node Mnode
;
68 typedef struct minipool_fixup Mfix
;
70 void (*arm_lang_output_object_attributes_hook
)(void);
77 /* Forward function declarations. */
78 static bool arm_const_not_ok_for_debug_p (rtx
);
79 static bool arm_lra_p (void);
80 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
81 static int arm_compute_static_chain_stack_bytes (void);
82 static arm_stack_offsets
*arm_get_frame_offsets (void);
83 static void arm_add_gc_roots (void);
84 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
85 HOST_WIDE_INT
, rtx
, rtx
, int, int);
86 static unsigned bit_count (unsigned long);
87 static int arm_address_register_rtx_p (rtx
, int);
88 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
89 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
90 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
91 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
92 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
93 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
94 inline static int thumb1_index_register_rtx_p (rtx
, int);
95 static int thumb_far_jump_used_p (void);
96 static bool thumb_force_lr_save (void);
97 static unsigned arm_size_return_regs (void);
98 static bool arm_assemble_integer (rtx
, unsigned int, int);
99 static void arm_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
);
100 static void arm_print_operand (FILE *, rtx
, int);
101 static void arm_print_operand_address (FILE *, rtx
);
102 static bool arm_print_operand_punct_valid_p (unsigned char code
);
103 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
104 static arm_cc
get_arm_condition_code (rtx
);
105 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
106 static const char *output_multi_immediate (rtx
*, const char *, const char *,
108 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
109 static struct machine_function
*arm_init_machine_status (void);
110 static void thumb_exit (FILE *, int);
111 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
112 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
113 static Mnode
*add_minipool_forward_ref (Mfix
*);
114 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
115 static Mnode
*add_minipool_backward_ref (Mfix
*);
116 static void assign_minipool_offsets (Mfix
*);
117 static void arm_print_value (FILE *, rtx
);
118 static void dump_minipool (rtx
);
119 static int arm_barrier_cost (rtx
);
120 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
121 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
122 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
124 static void arm_reorg (void);
125 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
126 static unsigned long arm_compute_save_reg0_reg12_mask (void);
127 static unsigned long arm_compute_save_reg_mask (void);
128 static unsigned long arm_isr_value (tree
);
129 static unsigned long arm_compute_func_type (void);
130 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
131 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
132 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
133 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
134 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
136 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
137 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
138 static int arm_comp_type_attributes (const_tree
, const_tree
);
139 static void arm_set_default_type_attributes (tree
);
140 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
141 static int arm_sched_reorder (FILE *, int, rtx
*, int *, int);
142 static int optimal_immediate_sequence (enum rtx_code code
,
143 unsigned HOST_WIDE_INT val
,
144 struct four_ints
*return_sequence
);
145 static int optimal_immediate_sequence_1 (enum rtx_code code
,
146 unsigned HOST_WIDE_INT val
,
147 struct four_ints
*return_sequence
,
149 static int arm_get_strip_length (int);
150 static bool arm_function_ok_for_sibcall (tree
, tree
);
151 static enum machine_mode
arm_promote_function_mode (const_tree
,
152 enum machine_mode
, int *,
154 static bool arm_return_in_memory (const_tree
, const_tree
);
155 static rtx
arm_function_value (const_tree
, const_tree
, bool);
156 static rtx
arm_libcall_value_1 (enum machine_mode
);
157 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
158 static bool arm_function_value_regno_p (const unsigned int);
159 static void arm_internal_label (FILE *, const char *, unsigned long);
160 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
162 static bool arm_have_conditional_execution (void);
163 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
164 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
165 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
166 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
167 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
168 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
169 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
170 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
171 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
172 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
173 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
174 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
175 static void arm_init_builtins (void);
176 static void arm_init_iwmmxt_builtins (void);
177 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
178 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
179 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
180 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
181 static tree
arm_builtin_decl (unsigned, bool);
182 static void emit_constant_insn (rtx cond
, rtx pattern
);
183 static rtx
emit_set_insn (rtx
, rtx
);
184 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
185 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
187 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
189 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
191 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
192 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
194 static rtx
aapcs_libcall_value (enum machine_mode
);
195 static int aapcs_select_return_coproc (const_tree
, const_tree
);
197 #ifdef OBJECT_FORMAT_ELF
198 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
199 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
202 static void arm_encode_section_info (tree
, rtx
, int);
205 static void arm_file_end (void);
206 static void arm_file_start (void);
208 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
210 static bool arm_pass_by_reference (cumulative_args_t
,
211 enum machine_mode
, const_tree
, bool);
212 static bool arm_promote_prototypes (const_tree
);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree
);
216 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
217 static bool arm_return_in_memory (const_tree
, const_tree
);
219 static void arm_unwind_emit (FILE *, rtx
);
220 static bool arm_output_ttype (rtx
);
221 static void arm_asm_emit_except_personality (rtx
);
222 static void arm_asm_init_sections (void);
224 static rtx
arm_dwarf_register_span (rtx
);
226 static tree
arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree
arm_get_cookie_size (tree
);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree
);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree
arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree
, rtx
);
238 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
239 static void arm_option_override (void);
240 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
241 static bool arm_cannot_copy_insn_p (rtx
);
242 static int arm_issue_rate (void);
243 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
244 static bool arm_output_addr_const_extra (FILE *, rtx
);
245 static bool arm_allocate_stack_slots_for_args (void);
246 static bool arm_warn_func_return (tree
);
247 static const char *arm_invalid_parameter_type (const_tree t
);
248 static const char *arm_invalid_return_type (const_tree t
);
249 static tree
arm_promoted_type (const_tree t
);
250 static tree
arm_convert_to_type (tree type
, tree expr
);
251 static bool arm_scalar_mode_supported_p (enum machine_mode
);
252 static bool arm_frame_pointer_required (void);
253 static bool arm_can_eliminate (const int, const int);
254 static void arm_asm_trampoline_template (FILE *);
255 static void arm_trampoline_init (rtx
, tree
, rtx
);
256 static rtx
arm_trampoline_adjust_address (rtx
);
257 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
258 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
259 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
260 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
261 static bool arm_array_mode_supported_p (enum machine_mode
,
262 unsigned HOST_WIDE_INT
);
263 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
264 static bool arm_class_likely_spilled_p (reg_class_t
);
265 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
266 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
267 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
271 static void arm_conditional_register_usage (void);
272 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
273 static unsigned int arm_autovectorize_vector_sizes (void);
274 static int arm_default_branch_cost (bool, bool);
275 static int arm_cortex_a5_branch_cost (bool, bool);
276 static int arm_cortex_m_branch_cost (bool, bool);
278 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
279 const unsigned char *sel
);
281 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
283 int misalign ATTRIBUTE_UNUSED
);
284 static unsigned arm_add_stmt_cost (void *data
, int count
,
285 enum vect_cost_for_stmt kind
,
286 struct _stmt_vec_info
*stmt_info
,
288 enum vect_cost_model_location where
);
290 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
291 bool op0_preserve_value
);
292 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
294 /* Table of machine attributes. */
295 static const struct attribute_spec arm_attribute_table
[] =
297 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
298 affects_type_identity } */
299 /* Function calls made to this symbol must be done indirectly, because
300 it may lie outside of the 26 bit addressing range of a normal function
302 { "long_call", 0, 0, false, true, true, NULL
, false },
303 /* Whereas these functions are always known to reside within the 26 bit
305 { "short_call", 0, 0, false, true, true, NULL
, false },
306 /* Specify the procedure call conventions for a function. */
307 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
309 /* Interrupt Service Routines have special prologue and epilogue requirements. */
310 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
312 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
314 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
317 /* ARM/PE has three new attributes:
319 dllexport - for exporting a function/variable that will live in a dll
320 dllimport - for importing a function/variable from a dll
322 Microsoft allows multiple declspecs in one __declspec, separating
323 them with spaces. We do NOT support this. Instead, use __declspec
326 { "dllimport", 0, 0, true, false, false, NULL
, false },
327 { "dllexport", 0, 0, true, false, false, NULL
, false },
328 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
330 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
331 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
332 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
333 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
336 { NULL
, 0, 0, false, false, false, NULL
, false }
339 /* Initialize the GCC target structure. */
340 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 #undef TARGET_MERGE_DECL_ATTRIBUTES
342 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
345 #undef TARGET_LEGITIMIZE_ADDRESS
346 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
349 #define TARGET_LRA_P arm_lra_p
351 #undef TARGET_ATTRIBUTE_TABLE
352 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
354 #undef TARGET_ASM_FILE_START
355 #define TARGET_ASM_FILE_START arm_file_start
356 #undef TARGET_ASM_FILE_END
357 #define TARGET_ASM_FILE_END arm_file_end
359 #undef TARGET_ASM_ALIGNED_SI_OP
360 #define TARGET_ASM_ALIGNED_SI_OP NULL
361 #undef TARGET_ASM_INTEGER
362 #define TARGET_ASM_INTEGER arm_assemble_integer
364 #undef TARGET_PRINT_OPERAND
365 #define TARGET_PRINT_OPERAND arm_print_operand
366 #undef TARGET_PRINT_OPERAND_ADDRESS
367 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
368 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
369 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
371 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
372 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
374 #undef TARGET_ASM_FUNCTION_PROLOGUE
375 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
377 #undef TARGET_ASM_FUNCTION_EPILOGUE
378 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
380 #undef TARGET_OPTION_OVERRIDE
381 #define TARGET_OPTION_OVERRIDE arm_option_override
383 #undef TARGET_COMP_TYPE_ATTRIBUTES
384 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
386 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
387 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
389 #undef TARGET_SCHED_ADJUST_COST
390 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
392 #undef TARGET_SCHED_REORDER
393 #define TARGET_SCHED_REORDER arm_sched_reorder
395 #undef TARGET_REGISTER_MOVE_COST
396 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
398 #undef TARGET_MEMORY_MOVE_COST
399 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
401 #undef TARGET_ENCODE_SECTION_INFO
403 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
405 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
408 #undef TARGET_STRIP_NAME_ENCODING
409 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
411 #undef TARGET_ASM_INTERNAL_LABEL
412 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
414 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
415 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
417 #undef TARGET_FUNCTION_VALUE
418 #define TARGET_FUNCTION_VALUE arm_function_value
420 #undef TARGET_LIBCALL_VALUE
421 #define TARGET_LIBCALL_VALUE arm_libcall_value
423 #undef TARGET_FUNCTION_VALUE_REGNO_P
424 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
426 #undef TARGET_ASM_OUTPUT_MI_THUNK
427 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
428 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
429 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
431 #undef TARGET_RTX_COSTS
432 #define TARGET_RTX_COSTS arm_rtx_costs
433 #undef TARGET_ADDRESS_COST
434 #define TARGET_ADDRESS_COST arm_address_cost
436 #undef TARGET_SHIFT_TRUNCATION_MASK
437 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
438 #undef TARGET_VECTOR_MODE_SUPPORTED_P
439 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
440 #undef TARGET_ARRAY_MODE_SUPPORTED_P
441 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
442 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
443 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
444 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
445 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
446 arm_autovectorize_vector_sizes
448 #undef TARGET_MACHINE_DEPENDENT_REORG
449 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
451 #undef TARGET_INIT_BUILTINS
452 #define TARGET_INIT_BUILTINS arm_init_builtins
453 #undef TARGET_EXPAND_BUILTIN
454 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
455 #undef TARGET_BUILTIN_DECL
456 #define TARGET_BUILTIN_DECL arm_builtin_decl
458 #undef TARGET_INIT_LIBFUNCS
459 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
461 #undef TARGET_PROMOTE_FUNCTION_MODE
462 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
463 #undef TARGET_PROMOTE_PROTOTYPES
464 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
465 #undef TARGET_PASS_BY_REFERENCE
466 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
467 #undef TARGET_ARG_PARTIAL_BYTES
468 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
469 #undef TARGET_FUNCTION_ARG
470 #define TARGET_FUNCTION_ARG arm_function_arg
471 #undef TARGET_FUNCTION_ARG_ADVANCE
472 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
473 #undef TARGET_FUNCTION_ARG_BOUNDARY
474 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
476 #undef TARGET_SETUP_INCOMING_VARARGS
477 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
479 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
480 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
482 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
483 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
484 #undef TARGET_TRAMPOLINE_INIT
485 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
486 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
487 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
489 #undef TARGET_WARN_FUNC_RETURN
490 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
492 #undef TARGET_DEFAULT_SHORT_ENUMS
493 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
495 #undef TARGET_ALIGN_ANON_BITFIELD
496 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
498 #undef TARGET_NARROW_VOLATILE_BITFIELD
499 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
501 #undef TARGET_CXX_GUARD_TYPE
502 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
504 #undef TARGET_CXX_GUARD_MASK_BIT
505 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
507 #undef TARGET_CXX_GET_COOKIE_SIZE
508 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
510 #undef TARGET_CXX_COOKIE_HAS_SIZE
511 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
513 #undef TARGET_CXX_CDTOR_RETURNS_THIS
514 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
516 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
517 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
519 #undef TARGET_CXX_USE_AEABI_ATEXIT
520 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
522 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
523 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
524 arm_cxx_determine_class_data_visibility
526 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
527 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
529 #undef TARGET_RETURN_IN_MSB
530 #define TARGET_RETURN_IN_MSB arm_return_in_msb
532 #undef TARGET_RETURN_IN_MEMORY
533 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
535 #undef TARGET_MUST_PASS_IN_STACK
536 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
539 #undef TARGET_ASM_UNWIND_EMIT
540 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
542 /* EABI unwinding tables use a different format for the typeinfo tables. */
543 #undef TARGET_ASM_TTYPE
544 #define TARGET_ASM_TTYPE arm_output_ttype
546 #undef TARGET_ARM_EABI_UNWINDER
547 #define TARGET_ARM_EABI_UNWINDER true
549 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
550 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
552 #undef TARGET_ASM_INIT_SECTIONS
553 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
554 #endif /* ARM_UNWIND_INFO */
556 #undef TARGET_DWARF_REGISTER_SPAN
557 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
559 #undef TARGET_CANNOT_COPY_INSN_P
560 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
563 #undef TARGET_HAVE_TLS
564 #define TARGET_HAVE_TLS true
567 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
568 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
570 #undef TARGET_LEGITIMATE_CONSTANT_P
571 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
573 #undef TARGET_CANNOT_FORCE_CONST_MEM
574 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
576 #undef TARGET_MAX_ANCHOR_OFFSET
577 #define TARGET_MAX_ANCHOR_OFFSET 4095
579 /* The minimum is set such that the total size of the block
580 for a particular anchor is -4088 + 1 + 4095 bytes, which is
581 divisible by eight, ensuring natural spacing of anchors. */
582 #undef TARGET_MIN_ANCHOR_OFFSET
583 #define TARGET_MIN_ANCHOR_OFFSET -4088
585 #undef TARGET_SCHED_ISSUE_RATE
586 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
588 #undef TARGET_MANGLE_TYPE
589 #define TARGET_MANGLE_TYPE arm_mangle_type
591 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
592 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
594 #undef TARGET_BUILD_BUILTIN_VA_LIST
595 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
596 #undef TARGET_EXPAND_BUILTIN_VA_START
597 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
598 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
599 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
602 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
603 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
606 #undef TARGET_LEGITIMATE_ADDRESS_P
607 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
609 #undef TARGET_PREFERRED_RELOAD_CLASS
610 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
612 #undef TARGET_INVALID_PARAMETER_TYPE
613 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
615 #undef TARGET_INVALID_RETURN_TYPE
616 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
618 #undef TARGET_PROMOTED_TYPE
619 #define TARGET_PROMOTED_TYPE arm_promoted_type
621 #undef TARGET_CONVERT_TO_TYPE
622 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
624 #undef TARGET_SCALAR_MODE_SUPPORTED_P
625 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
627 #undef TARGET_FRAME_POINTER_REQUIRED
628 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
630 #undef TARGET_CAN_ELIMINATE
631 #define TARGET_CAN_ELIMINATE arm_can_eliminate
633 #undef TARGET_CONDITIONAL_REGISTER_USAGE
634 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
636 #undef TARGET_CLASS_LIKELY_SPILLED_P
637 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
639 #undef TARGET_VECTORIZE_BUILTINS
640 #define TARGET_VECTORIZE_BUILTINS
642 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
643 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
644 arm_builtin_vectorized_function
646 #undef TARGET_VECTOR_ALIGNMENT
647 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
649 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
650 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
651 arm_vector_alignment_reachable
653 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
654 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
655 arm_builtin_support_vector_misalignment
657 #undef TARGET_PREFERRED_RENAME_CLASS
658 #define TARGET_PREFERRED_RENAME_CLASS \
659 arm_preferred_rename_class
661 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
662 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
663 arm_vectorize_vec_perm_const_ok
665 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
666 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
667 arm_builtin_vectorization_cost
668 #undef TARGET_VECTORIZE_ADD_STMT_COST
669 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
671 #undef TARGET_CANONICALIZE_COMPARISON
672 #define TARGET_CANONICALIZE_COMPARISON \
673 arm_canonicalize_comparison
675 #undef TARGET_ASAN_SHADOW_OFFSET
676 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
678 #undef MAX_INSN_PER_IT_BLOCK
679 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
681 #undef TARGET_CAN_USE_DOLOOP_P
682 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
684 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
685 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
687 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
688 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
690 struct gcc_target targetm
= TARGET_INITIALIZER
;
692 /* Obstack for minipool constant handling. */
693 static struct obstack minipool_obstack
;
694 static char * minipool_startobj
;
696 /* The maximum number of insns skipped which
697 will be conditionalised if possible. */
698 static int max_insns_skipped
= 5;
700 extern FILE * asm_out_file
;
702 /* True if we are currently building a constant table. */
703 int making_const_table
;
705 /* The processor for which instructions should be scheduled. */
706 enum processor_type arm_tune
= arm_none
;
708 /* The current tuning set. */
709 const struct tune_params
*current_tune
;
711 /* Which floating point hardware to schedule for. */
714 /* Which floating popint hardware to use. */
715 const struct arm_fpu_desc
*arm_fpu_desc
;
717 /* Used for Thumb call_via trampolines. */
718 rtx thumb_call_via_label
[14];
719 static int thumb_call_reg_needed
;
721 /* Bit values used to identify processor capabilities. */
722 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
723 #define FL_ARCH3M (1 << 1) /* Extended multiply */
724 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
725 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
726 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
727 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
728 #define FL_THUMB (1 << 6) /* Thumb aware */
729 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
730 #define FL_STRONG (1 << 8) /* StrongARM */
731 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
732 #define FL_XSCALE (1 << 10) /* XScale */
733 /* spare (1 << 11) */
734 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
735 media instructions. */
736 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
737 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
738 Note: ARM6 & 7 derivatives only. */
739 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
740 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
741 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
743 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
744 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
745 #define FL_NEON (1 << 20) /* Neon instructions. */
746 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
748 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
749 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
750 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
751 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
753 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
754 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
756 /* Flags that only effect tuning, not available instructions. */
757 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
760 #define FL_FOR_ARCH2 FL_NOTM
761 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
762 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
763 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
764 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
765 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
766 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
767 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
768 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
769 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
770 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
771 #define FL_FOR_ARCH6J FL_FOR_ARCH6
772 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
773 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
774 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
775 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
776 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
777 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
778 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
779 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
780 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
781 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
782 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
783 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
785 /* The bits in this mask specify which
786 instructions we are allowed to generate. */
787 static unsigned long insn_flags
= 0;
789 /* The bits in this mask specify which instruction scheduling options should
791 static unsigned long tune_flags
= 0;
793 /* The highest ARM architecture version supported by the
795 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
797 /* The following are used in the arm.md file as equivalents to bits
798 in the above two flag variables. */
800 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
803 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
806 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
809 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
812 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
815 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
818 /* Nonzero if this chip supports the ARM 6K extensions. */
821 /* Nonzero if instructions present in ARMv6-M can be used. */
824 /* Nonzero if this chip supports the ARM 7 extensions. */
827 /* Nonzero if instructions not present in the 'M' profile can be used. */
828 int arm_arch_notm
= 0;
830 /* Nonzero if instructions present in ARMv7E-M can be used. */
833 /* Nonzero if instructions present in ARMv8 can be used. */
836 /* Nonzero if this chip can benefit from load scheduling. */
837 int arm_ld_sched
= 0;
839 /* Nonzero if this chip is a StrongARM. */
840 int arm_tune_strongarm
= 0;
842 /* Nonzero if this chip supports Intel Wireless MMX technology. */
843 int arm_arch_iwmmxt
= 0;
845 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
846 int arm_arch_iwmmxt2
= 0;
848 /* Nonzero if this chip is an XScale. */
849 int arm_arch_xscale
= 0;
851 /* Nonzero if tuning for XScale */
852 int arm_tune_xscale
= 0;
854 /* Nonzero if we want to tune for stores that access the write-buffer.
855 This typically means an ARM6 or ARM7 with MMU or MPU. */
856 int arm_tune_wbuf
= 0;
858 /* Nonzero if tuning for Cortex-A9. */
859 int arm_tune_cortex_a9
= 0;
861 /* Nonzero if generating Thumb instructions. */
864 /* Nonzero if generating Thumb-1 instructions. */
867 /* Nonzero if we should define __THUMB_INTERWORK__ in the
869 XXX This is a bit of a hack, it's intended to help work around
870 problems in GLD which doesn't understand that armv5t code is
871 interworking clean. */
872 int arm_cpp_interwork
= 0;
874 /* Nonzero if chip supports Thumb 2. */
877 /* Nonzero if chip supports integer division instruction. */
878 int arm_arch_arm_hwdiv
;
879 int arm_arch_thumb_hwdiv
;
881 /* Nonzero if we should use Neon to handle 64-bits operations rather
882 than core registers. */
883 int prefer_neon_for_64bits
= 0;
885 /* Nonzero if we shouldn't use literal pools. */
886 bool arm_disable_literal_pool
= false;
888 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
889 we must report the mode of the memory reference from
890 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
891 enum machine_mode output_memory_reference_mode
;
893 /* The register number to be used for the PIC offset register. */
894 unsigned arm_pic_register
= INVALID_REGNUM
;
896 enum arm_pcs arm_pcs_default
;
898 /* For an explanation of these variables, see final_prescan_insn below. */
900 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
901 enum arm_cond_code arm_current_cc
;
904 int arm_target_label
;
905 /* The number of conditionally executed insns, including the current insn. */
906 int arm_condexec_count
= 0;
907 /* A bitmask specifying the patterns for the IT block.
908 Zero means do not output an IT block before this insn. */
909 int arm_condexec_mask
= 0;
910 /* The number of bits used in arm_condexec_mask. */
911 int arm_condexec_masklen
= 0;
913 /* Nonzero if chip supports the ARMv8 CRC instructions. */
914 int arm_arch_crc
= 0;
916 /* The condition codes of the ARM, and the inverse function. */
917 static const char * const arm_condition_codes
[] =
919 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
920 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
923 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
924 int arm_regs_in_sequence
[] =
926 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
929 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
930 #define streq(string1, string2) (strcmp (string1, string2) == 0)
932 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
933 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
934 | (1 << PIC_OFFSET_TABLE_REGNUM)))
936 /* Initialization code. */
940 const char *const name
;
941 enum processor_type core
;
943 enum base_architecture base_arch
;
944 const unsigned long flags
;
945 const struct tune_params
*const tune
;
949 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
950 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
955 /* arm generic vectorizer costs. */
957 struct cpu_vec_costs arm_default_vec_cost
= {
958 1, /* scalar_stmt_cost. */
959 1, /* scalar load_cost. */
960 1, /* scalar_store_cost. */
961 1, /* vec_stmt_cost. */
962 1, /* vec_to_scalar_cost. */
963 1, /* scalar_to_vec_cost. */
964 1, /* vec_align_load_cost. */
965 1, /* vec_unalign_load_cost. */
966 1, /* vec_unalign_store_cost. */
967 1, /* vec_store_cost. */
968 3, /* cond_taken_branch_cost. */
969 1, /* cond_not_taken_branch_cost. */
972 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
973 #include "aarch-cost-tables.h"
977 const struct cpu_cost_table cortexa9_extra_costs
=
984 COSTS_N_INSNS (1), /* shift_reg. */
985 COSTS_N_INSNS (1), /* arith_shift. */
986 COSTS_N_INSNS (2), /* arith_shift_reg. */
988 COSTS_N_INSNS (1), /* log_shift_reg. */
989 COSTS_N_INSNS (1), /* extend. */
990 COSTS_N_INSNS (2), /* extend_arith. */
991 COSTS_N_INSNS (1), /* bfi. */
992 COSTS_N_INSNS (1), /* bfx. */
996 true /* non_exec_costs_exec. */
1001 COSTS_N_INSNS (3), /* simple. */
1002 COSTS_N_INSNS (3), /* flag_setting. */
1003 COSTS_N_INSNS (2), /* extend. */
1004 COSTS_N_INSNS (3), /* add. */
1005 COSTS_N_INSNS (2), /* extend_add. */
1006 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1010 0, /* simple (N/A). */
1011 0, /* flag_setting (N/A). */
1012 COSTS_N_INSNS (4), /* extend. */
1014 COSTS_N_INSNS (4), /* extend_add. */
1020 COSTS_N_INSNS (2), /* load. */
1021 COSTS_N_INSNS (2), /* load_sign_extend. */
1022 COSTS_N_INSNS (2), /* ldrd. */
1023 COSTS_N_INSNS (2), /* ldm_1st. */
1024 1, /* ldm_regs_per_insn_1st. */
1025 2, /* ldm_regs_per_insn_subsequent. */
1026 COSTS_N_INSNS (5), /* loadf. */
1027 COSTS_N_INSNS (5), /* loadd. */
1028 COSTS_N_INSNS (1), /* load_unaligned. */
1029 COSTS_N_INSNS (2), /* store. */
1030 COSTS_N_INSNS (2), /* strd. */
1031 COSTS_N_INSNS (2), /* stm_1st. */
1032 1, /* stm_regs_per_insn_1st. */
1033 2, /* stm_regs_per_insn_subsequent. */
1034 COSTS_N_INSNS (1), /* storef. */
1035 COSTS_N_INSNS (1), /* stored. */
1036 COSTS_N_INSNS (1) /* store_unaligned. */
1041 COSTS_N_INSNS (14), /* div. */
1042 COSTS_N_INSNS (4), /* mult. */
1043 COSTS_N_INSNS (7), /* mult_addsub. */
1044 COSTS_N_INSNS (30), /* fma. */
1045 COSTS_N_INSNS (3), /* addsub. */
1046 COSTS_N_INSNS (1), /* fpconst. */
1047 COSTS_N_INSNS (1), /* neg. */
1048 COSTS_N_INSNS (3), /* compare. */
1049 COSTS_N_INSNS (3), /* widen. */
1050 COSTS_N_INSNS (3), /* narrow. */
1051 COSTS_N_INSNS (3), /* toint. */
1052 COSTS_N_INSNS (3), /* fromint. */
1053 COSTS_N_INSNS (3) /* roundint. */
1057 COSTS_N_INSNS (24), /* div. */
1058 COSTS_N_INSNS (5), /* mult. */
1059 COSTS_N_INSNS (8), /* mult_addsub. */
1060 COSTS_N_INSNS (30), /* fma. */
1061 COSTS_N_INSNS (3), /* addsub. */
1062 COSTS_N_INSNS (1), /* fpconst. */
1063 COSTS_N_INSNS (1), /* neg. */
1064 COSTS_N_INSNS (3), /* compare. */
1065 COSTS_N_INSNS (3), /* widen. */
1066 COSTS_N_INSNS (3), /* narrow. */
1067 COSTS_N_INSNS (3), /* toint. */
1068 COSTS_N_INSNS (3), /* fromint. */
1069 COSTS_N_INSNS (3) /* roundint. */
1074 COSTS_N_INSNS (1) /* alu. */
1078 const struct cpu_cost_table cortexa8_extra_costs
=
1084 COSTS_N_INSNS (1), /* shift. */
1086 COSTS_N_INSNS (1), /* arith_shift. */
1087 0, /* arith_shift_reg. */
1088 COSTS_N_INSNS (1), /* log_shift. */
1089 0, /* log_shift_reg. */
1091 0, /* extend_arith. */
1097 true /* non_exec_costs_exec. */
1102 COSTS_N_INSNS (1), /* simple. */
1103 COSTS_N_INSNS (1), /* flag_setting. */
1104 COSTS_N_INSNS (1), /* extend. */
1105 COSTS_N_INSNS (1), /* add. */
1106 COSTS_N_INSNS (1), /* extend_add. */
1107 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1111 0, /* simple (N/A). */
1112 0, /* flag_setting (N/A). */
1113 COSTS_N_INSNS (2), /* extend. */
1115 COSTS_N_INSNS (2), /* extend_add. */
1121 COSTS_N_INSNS (1), /* load. */
1122 COSTS_N_INSNS (1), /* load_sign_extend. */
1123 COSTS_N_INSNS (1), /* ldrd. */
1124 COSTS_N_INSNS (1), /* ldm_1st. */
1125 1, /* ldm_regs_per_insn_1st. */
1126 2, /* ldm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (1), /* loadf. */
1128 COSTS_N_INSNS (1), /* loadd. */
1129 COSTS_N_INSNS (1), /* load_unaligned. */
1130 COSTS_N_INSNS (1), /* store. */
1131 COSTS_N_INSNS (1), /* strd. */
1132 COSTS_N_INSNS (1), /* stm_1st. */
1133 1, /* stm_regs_per_insn_1st. */
1134 2, /* stm_regs_per_insn_subsequent. */
1135 COSTS_N_INSNS (1), /* storef. */
1136 COSTS_N_INSNS (1), /* stored. */
1137 COSTS_N_INSNS (1) /* store_unaligned. */
1142 COSTS_N_INSNS (36), /* div. */
1143 COSTS_N_INSNS (11), /* mult. */
1144 COSTS_N_INSNS (20), /* mult_addsub. */
1145 COSTS_N_INSNS (30), /* fma. */
1146 COSTS_N_INSNS (9), /* addsub. */
1147 COSTS_N_INSNS (3), /* fpconst. */
1148 COSTS_N_INSNS (3), /* neg. */
1149 COSTS_N_INSNS (6), /* compare. */
1150 COSTS_N_INSNS (4), /* widen. */
1151 COSTS_N_INSNS (4), /* narrow. */
1152 COSTS_N_INSNS (8), /* toint. */
1153 COSTS_N_INSNS (8), /* fromint. */
1154 COSTS_N_INSNS (8) /* roundint. */
1158 COSTS_N_INSNS (64), /* div. */
1159 COSTS_N_INSNS (16), /* mult. */
1160 COSTS_N_INSNS (25), /* mult_addsub. */
1161 COSTS_N_INSNS (30), /* fma. */
1162 COSTS_N_INSNS (9), /* addsub. */
1163 COSTS_N_INSNS (3), /* fpconst. */
1164 COSTS_N_INSNS (3), /* neg. */
1165 COSTS_N_INSNS (6), /* compare. */
1166 COSTS_N_INSNS (6), /* widen. */
1167 COSTS_N_INSNS (6), /* narrow. */
1168 COSTS_N_INSNS (8), /* toint. */
1169 COSTS_N_INSNS (8), /* fromint. */
1170 COSTS_N_INSNS (8) /* roundint. */
1175 COSTS_N_INSNS (1) /* alu. */
1179 const struct cpu_cost_table cortexa5_extra_costs
=
1185 COSTS_N_INSNS (1), /* shift. */
1186 COSTS_N_INSNS (1), /* shift_reg. */
1187 COSTS_N_INSNS (1), /* arith_shift. */
1188 COSTS_N_INSNS (1), /* arith_shift_reg. */
1189 COSTS_N_INSNS (1), /* log_shift. */
1190 COSTS_N_INSNS (1), /* log_shift_reg. */
1191 COSTS_N_INSNS (1), /* extend. */
1192 COSTS_N_INSNS (1), /* extend_arith. */
1193 COSTS_N_INSNS (1), /* bfi. */
1194 COSTS_N_INSNS (1), /* bfx. */
1195 COSTS_N_INSNS (1), /* clz. */
1196 COSTS_N_INSNS (1), /* rev. */
1198 true /* non_exec_costs_exec. */
1205 COSTS_N_INSNS (1), /* flag_setting. */
1206 COSTS_N_INSNS (1), /* extend. */
1207 COSTS_N_INSNS (1), /* add. */
1208 COSTS_N_INSNS (1), /* extend_add. */
1209 COSTS_N_INSNS (7) /* idiv. */
1213 0, /* simple (N/A). */
1214 0, /* flag_setting (N/A). */
1215 COSTS_N_INSNS (1), /* extend. */
1217 COSTS_N_INSNS (2), /* extend_add. */
1223 COSTS_N_INSNS (1), /* load. */
1224 COSTS_N_INSNS (1), /* load_sign_extend. */
1225 COSTS_N_INSNS (6), /* ldrd. */
1226 COSTS_N_INSNS (1), /* ldm_1st. */
1227 1, /* ldm_regs_per_insn_1st. */
1228 2, /* ldm_regs_per_insn_subsequent. */
1229 COSTS_N_INSNS (2), /* loadf. */
1230 COSTS_N_INSNS (4), /* loadd. */
1231 COSTS_N_INSNS (1), /* load_unaligned. */
1232 COSTS_N_INSNS (1), /* store. */
1233 COSTS_N_INSNS (3), /* strd. */
1234 COSTS_N_INSNS (1), /* stm_1st. */
1235 1, /* stm_regs_per_insn_1st. */
1236 2, /* stm_regs_per_insn_subsequent. */
1237 COSTS_N_INSNS (2), /* storef. */
1238 COSTS_N_INSNS (2), /* stored. */
1239 COSTS_N_INSNS (1) /* store_unaligned. */
1244 COSTS_N_INSNS (15), /* div. */
1245 COSTS_N_INSNS (3), /* mult. */
1246 COSTS_N_INSNS (7), /* mult_addsub. */
1247 COSTS_N_INSNS (7), /* fma. */
1248 COSTS_N_INSNS (3), /* addsub. */
1249 COSTS_N_INSNS (3), /* fpconst. */
1250 COSTS_N_INSNS (3), /* neg. */
1251 COSTS_N_INSNS (3), /* compare. */
1252 COSTS_N_INSNS (3), /* widen. */
1253 COSTS_N_INSNS (3), /* narrow. */
1254 COSTS_N_INSNS (3), /* toint. */
1255 COSTS_N_INSNS (3), /* fromint. */
1256 COSTS_N_INSNS (3) /* roundint. */
1260 COSTS_N_INSNS (30), /* div. */
1261 COSTS_N_INSNS (6), /* mult. */
1262 COSTS_N_INSNS (10), /* mult_addsub. */
1263 COSTS_N_INSNS (7), /* fma. */
1264 COSTS_N_INSNS (3), /* addsub. */
1265 COSTS_N_INSNS (3), /* fpconst. */
1266 COSTS_N_INSNS (3), /* neg. */
1267 COSTS_N_INSNS (3), /* compare. */
1268 COSTS_N_INSNS (3), /* widen. */
1269 COSTS_N_INSNS (3), /* narrow. */
1270 COSTS_N_INSNS (3), /* toint. */
1271 COSTS_N_INSNS (3), /* fromint. */
1272 COSTS_N_INSNS (3) /* roundint. */
1277 COSTS_N_INSNS (1) /* alu. */
1282 const struct cpu_cost_table cortexa7_extra_costs
=
1288 COSTS_N_INSNS (1), /* shift. */
1289 COSTS_N_INSNS (1), /* shift_reg. */
1290 COSTS_N_INSNS (1), /* arith_shift. */
1291 COSTS_N_INSNS (1), /* arith_shift_reg. */
1292 COSTS_N_INSNS (1), /* log_shift. */
1293 COSTS_N_INSNS (1), /* log_shift_reg. */
1294 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (1), /* extend_arith. */
1296 COSTS_N_INSNS (1), /* bfi. */
1297 COSTS_N_INSNS (1), /* bfx. */
1298 COSTS_N_INSNS (1), /* clz. */
1299 COSTS_N_INSNS (1), /* rev. */
1301 true /* non_exec_costs_exec. */
1308 COSTS_N_INSNS (1), /* flag_setting. */
1309 COSTS_N_INSNS (1), /* extend. */
1310 COSTS_N_INSNS (1), /* add. */
1311 COSTS_N_INSNS (1), /* extend_add. */
1312 COSTS_N_INSNS (7) /* idiv. */
1316 0, /* simple (N/A). */
1317 0, /* flag_setting (N/A). */
1318 COSTS_N_INSNS (1), /* extend. */
1320 COSTS_N_INSNS (2), /* extend_add. */
1326 COSTS_N_INSNS (1), /* load. */
1327 COSTS_N_INSNS (1), /* load_sign_extend. */
1328 COSTS_N_INSNS (3), /* ldrd. */
1329 COSTS_N_INSNS (1), /* ldm_1st. */
1330 1, /* ldm_regs_per_insn_1st. */
1331 2, /* ldm_regs_per_insn_subsequent. */
1332 COSTS_N_INSNS (2), /* loadf. */
1333 COSTS_N_INSNS (2), /* loadd. */
1334 COSTS_N_INSNS (1), /* load_unaligned. */
1335 COSTS_N_INSNS (1), /* store. */
1336 COSTS_N_INSNS (3), /* strd. */
1337 COSTS_N_INSNS (1), /* stm_1st. */
1338 1, /* stm_regs_per_insn_1st. */
1339 2, /* stm_regs_per_insn_subsequent. */
1340 COSTS_N_INSNS (2), /* storef. */
1341 COSTS_N_INSNS (2), /* stored. */
1342 COSTS_N_INSNS (1) /* store_unaligned. */
1347 COSTS_N_INSNS (15), /* div. */
1348 COSTS_N_INSNS (3), /* mult. */
1349 COSTS_N_INSNS (7), /* mult_addsub. */
1350 COSTS_N_INSNS (7), /* fma. */
1351 COSTS_N_INSNS (3), /* addsub. */
1352 COSTS_N_INSNS (3), /* fpconst. */
1353 COSTS_N_INSNS (3), /* neg. */
1354 COSTS_N_INSNS (3), /* compare. */
1355 COSTS_N_INSNS (3), /* widen. */
1356 COSTS_N_INSNS (3), /* narrow. */
1357 COSTS_N_INSNS (3), /* toint. */
1358 COSTS_N_INSNS (3), /* fromint. */
1359 COSTS_N_INSNS (3) /* roundint. */
1363 COSTS_N_INSNS (30), /* div. */
1364 COSTS_N_INSNS (6), /* mult. */
1365 COSTS_N_INSNS (10), /* mult_addsub. */
1366 COSTS_N_INSNS (7), /* fma. */
1367 COSTS_N_INSNS (3), /* addsub. */
1368 COSTS_N_INSNS (3), /* fpconst. */
1369 COSTS_N_INSNS (3), /* neg. */
1370 COSTS_N_INSNS (3), /* compare. */
1371 COSTS_N_INSNS (3), /* widen. */
1372 COSTS_N_INSNS (3), /* narrow. */
1373 COSTS_N_INSNS (3), /* toint. */
1374 COSTS_N_INSNS (3), /* fromint. */
1375 COSTS_N_INSNS (3) /* roundint. */
1380 COSTS_N_INSNS (1) /* alu. */
1384 const struct cpu_cost_table cortexa12_extra_costs
=
1391 COSTS_N_INSNS (1), /* shift_reg. */
1392 COSTS_N_INSNS (1), /* arith_shift. */
1393 COSTS_N_INSNS (1), /* arith_shift_reg. */
1394 COSTS_N_INSNS (1), /* log_shift. */
1395 COSTS_N_INSNS (1), /* log_shift_reg. */
1397 COSTS_N_INSNS (1), /* extend_arith. */
1399 COSTS_N_INSNS (1), /* bfx. */
1400 COSTS_N_INSNS (1), /* clz. */
1401 COSTS_N_INSNS (1), /* rev. */
1403 true /* non_exec_costs_exec. */
1408 COSTS_N_INSNS (2), /* simple. */
1409 COSTS_N_INSNS (3), /* flag_setting. */
1410 COSTS_N_INSNS (2), /* extend. */
1411 COSTS_N_INSNS (3), /* add. */
1412 COSTS_N_INSNS (2), /* extend_add. */
1413 COSTS_N_INSNS (18) /* idiv. */
1417 0, /* simple (N/A). */
1418 0, /* flag_setting (N/A). */
1419 COSTS_N_INSNS (3), /* extend. */
1421 COSTS_N_INSNS (3), /* extend_add. */
1427 COSTS_N_INSNS (3), /* load. */
1428 COSTS_N_INSNS (3), /* load_sign_extend. */
1429 COSTS_N_INSNS (3), /* ldrd. */
1430 COSTS_N_INSNS (3), /* ldm_1st. */
1431 1, /* ldm_regs_per_insn_1st. */
1432 2, /* ldm_regs_per_insn_subsequent. */
1433 COSTS_N_INSNS (3), /* loadf. */
1434 COSTS_N_INSNS (3), /* loadd. */
1435 0, /* load_unaligned. */
1439 1, /* stm_regs_per_insn_1st. */
1440 2, /* stm_regs_per_insn_subsequent. */
1441 COSTS_N_INSNS (2), /* storef. */
1442 COSTS_N_INSNS (2), /* stored. */
1443 0 /* store_unaligned. */
1448 COSTS_N_INSNS (17), /* div. */
1449 COSTS_N_INSNS (4), /* mult. */
1450 COSTS_N_INSNS (8), /* mult_addsub. */
1451 COSTS_N_INSNS (8), /* fma. */
1452 COSTS_N_INSNS (4), /* addsub. */
1453 COSTS_N_INSNS (2), /* fpconst. */
1454 COSTS_N_INSNS (2), /* neg. */
1455 COSTS_N_INSNS (2), /* compare. */
1456 COSTS_N_INSNS (4), /* widen. */
1457 COSTS_N_INSNS (4), /* narrow. */
1458 COSTS_N_INSNS (4), /* toint. */
1459 COSTS_N_INSNS (4), /* fromint. */
1460 COSTS_N_INSNS (4) /* roundint. */
1464 COSTS_N_INSNS (31), /* div. */
1465 COSTS_N_INSNS (4), /* mult. */
1466 COSTS_N_INSNS (8), /* mult_addsub. */
1467 COSTS_N_INSNS (8), /* fma. */
1468 COSTS_N_INSNS (4), /* addsub. */
1469 COSTS_N_INSNS (2), /* fpconst. */
1470 COSTS_N_INSNS (2), /* neg. */
1471 COSTS_N_INSNS (2), /* compare. */
1472 COSTS_N_INSNS (4), /* widen. */
1473 COSTS_N_INSNS (4), /* narrow. */
1474 COSTS_N_INSNS (4), /* toint. */
1475 COSTS_N_INSNS (4), /* fromint. */
1476 COSTS_N_INSNS (4) /* roundint. */
1481 COSTS_N_INSNS (1) /* alu. */
1485 const struct cpu_cost_table cortexa15_extra_costs
=
1493 COSTS_N_INSNS (1), /* arith_shift. */
1494 COSTS_N_INSNS (1), /* arith_shift_reg. */
1495 COSTS_N_INSNS (1), /* log_shift. */
1496 COSTS_N_INSNS (1), /* log_shift_reg. */
1498 COSTS_N_INSNS (1), /* extend_arith. */
1499 COSTS_N_INSNS (1), /* bfi. */
1504 true /* non_exec_costs_exec. */
1509 COSTS_N_INSNS (2), /* simple. */
1510 COSTS_N_INSNS (3), /* flag_setting. */
1511 COSTS_N_INSNS (2), /* extend. */
1512 COSTS_N_INSNS (2), /* add. */
1513 COSTS_N_INSNS (2), /* extend_add. */
1514 COSTS_N_INSNS (18) /* idiv. */
1518 0, /* simple (N/A). */
1519 0, /* flag_setting (N/A). */
1520 COSTS_N_INSNS (3), /* extend. */
1522 COSTS_N_INSNS (3), /* extend_add. */
1528 COSTS_N_INSNS (3), /* load. */
1529 COSTS_N_INSNS (3), /* load_sign_extend. */
1530 COSTS_N_INSNS (3), /* ldrd. */
1531 COSTS_N_INSNS (4), /* ldm_1st. */
1532 1, /* ldm_regs_per_insn_1st. */
1533 2, /* ldm_regs_per_insn_subsequent. */
1534 COSTS_N_INSNS (4), /* loadf. */
1535 COSTS_N_INSNS (4), /* loadd. */
1536 0, /* load_unaligned. */
1539 COSTS_N_INSNS (1), /* stm_1st. */
1540 1, /* stm_regs_per_insn_1st. */
1541 2, /* stm_regs_per_insn_subsequent. */
1544 0 /* store_unaligned. */
1549 COSTS_N_INSNS (17), /* div. */
1550 COSTS_N_INSNS (4), /* mult. */
1551 COSTS_N_INSNS (8), /* mult_addsub. */
1552 COSTS_N_INSNS (8), /* fma. */
1553 COSTS_N_INSNS (4), /* addsub. */
1554 COSTS_N_INSNS (2), /* fpconst. */
1555 COSTS_N_INSNS (2), /* neg. */
1556 COSTS_N_INSNS (5), /* compare. */
1557 COSTS_N_INSNS (4), /* widen. */
1558 COSTS_N_INSNS (4), /* narrow. */
1559 COSTS_N_INSNS (4), /* toint. */
1560 COSTS_N_INSNS (4), /* fromint. */
1561 COSTS_N_INSNS (4) /* roundint. */
1565 COSTS_N_INSNS (31), /* div. */
1566 COSTS_N_INSNS (4), /* mult. */
1567 COSTS_N_INSNS (8), /* mult_addsub. */
1568 COSTS_N_INSNS (8), /* fma. */
1569 COSTS_N_INSNS (4), /* addsub. */
1570 COSTS_N_INSNS (2), /* fpconst. */
1571 COSTS_N_INSNS (2), /* neg. */
1572 COSTS_N_INSNS (2), /* compare. */
1573 COSTS_N_INSNS (4), /* widen. */
1574 COSTS_N_INSNS (4), /* narrow. */
1575 COSTS_N_INSNS (4), /* toint. */
1576 COSTS_N_INSNS (4), /* fromint. */
1577 COSTS_N_INSNS (4) /* roundint. */
1582 COSTS_N_INSNS (1) /* alu. */
1586 const struct cpu_cost_table v7m_extra_costs
=
1594 0, /* arith_shift. */
1595 COSTS_N_INSNS (1), /* arith_shift_reg. */
1597 COSTS_N_INSNS (1), /* log_shift_reg. */
1599 COSTS_N_INSNS (1), /* extend_arith. */
1604 COSTS_N_INSNS (1), /* non_exec. */
1605 false /* non_exec_costs_exec. */
1610 COSTS_N_INSNS (1), /* simple. */
1611 COSTS_N_INSNS (1), /* flag_setting. */
1612 COSTS_N_INSNS (2), /* extend. */
1613 COSTS_N_INSNS (1), /* add. */
1614 COSTS_N_INSNS (3), /* extend_add. */
1615 COSTS_N_INSNS (8) /* idiv. */
1619 0, /* simple (N/A). */
1620 0, /* flag_setting (N/A). */
1621 COSTS_N_INSNS (2), /* extend. */
1623 COSTS_N_INSNS (3), /* extend_add. */
1629 COSTS_N_INSNS (2), /* load. */
1630 0, /* load_sign_extend. */
1631 COSTS_N_INSNS (3), /* ldrd. */
1632 COSTS_N_INSNS (2), /* ldm_1st. */
1633 1, /* ldm_regs_per_insn_1st. */
1634 1, /* ldm_regs_per_insn_subsequent. */
1635 COSTS_N_INSNS (2), /* loadf. */
1636 COSTS_N_INSNS (3), /* loadd. */
1637 COSTS_N_INSNS (1), /* load_unaligned. */
1638 COSTS_N_INSNS (2), /* store. */
1639 COSTS_N_INSNS (3), /* strd. */
1640 COSTS_N_INSNS (2), /* stm_1st. */
1641 1, /* stm_regs_per_insn_1st. */
1642 1, /* stm_regs_per_insn_subsequent. */
1643 COSTS_N_INSNS (2), /* storef. */
1644 COSTS_N_INSNS (3), /* stored. */
1645 COSTS_N_INSNS (1) /* store_unaligned. */
1650 COSTS_N_INSNS (7), /* div. */
1651 COSTS_N_INSNS (2), /* mult. */
1652 COSTS_N_INSNS (5), /* mult_addsub. */
1653 COSTS_N_INSNS (3), /* fma. */
1654 COSTS_N_INSNS (1), /* addsub. */
1666 COSTS_N_INSNS (15), /* div. */
1667 COSTS_N_INSNS (5), /* mult. */
1668 COSTS_N_INSNS (7), /* mult_addsub. */
1669 COSTS_N_INSNS (7), /* fma. */
1670 COSTS_N_INSNS (3), /* addsub. */
1683 COSTS_N_INSNS (1) /* alu. */
1687 const struct tune_params arm_slowmul_tune
=
1689 arm_slowmul_rtx_costs
,
1691 NULL
, /* Sched adj cost. */
1692 3, /* Constant limit. */
1693 5, /* Max cond insns. */
1694 ARM_PREFETCH_NOT_BENEFICIAL
,
1695 true, /* Prefer constant pool. */
1696 arm_default_branch_cost
,
1697 false, /* Prefer LDRD/STRD. */
1698 {true, true}, /* Prefer non short circuit. */
1699 &arm_default_vec_cost
, /* Vectorizer costs. */
1700 false, /* Prefer Neon for 64-bits bitops. */
1701 false, false /* Prefer 32-bit encodings. */
1704 const struct tune_params arm_fastmul_tune
=
1706 arm_fastmul_rtx_costs
,
1708 NULL
, /* Sched adj cost. */
1709 1, /* Constant limit. */
1710 5, /* Max cond insns. */
1711 ARM_PREFETCH_NOT_BENEFICIAL
,
1712 true, /* Prefer constant pool. */
1713 arm_default_branch_cost
,
1714 false, /* Prefer LDRD/STRD. */
1715 {true, true}, /* Prefer non short circuit. */
1716 &arm_default_vec_cost
, /* Vectorizer costs. */
1717 false, /* Prefer Neon for 64-bits bitops. */
1718 false, false /* Prefer 32-bit encodings. */
1721 /* StrongARM has early execution of branches, so a sequence that is worth
1722 skipping is shorter. Set max_insns_skipped to a lower value. */
1724 const struct tune_params arm_strongarm_tune
=
1726 arm_fastmul_rtx_costs
,
1728 NULL
, /* Sched adj cost. */
1729 1, /* Constant limit. */
1730 3, /* Max cond insns. */
1731 ARM_PREFETCH_NOT_BENEFICIAL
,
1732 true, /* Prefer constant pool. */
1733 arm_default_branch_cost
,
1734 false, /* Prefer LDRD/STRD. */
1735 {true, true}, /* Prefer non short circuit. */
1736 &arm_default_vec_cost
, /* Vectorizer costs. */
1737 false, /* Prefer Neon for 64-bits bitops. */
1738 false, false /* Prefer 32-bit encodings. */
1741 const struct tune_params arm_xscale_tune
=
1743 arm_xscale_rtx_costs
,
1745 xscale_sched_adjust_cost
,
1746 2, /* Constant limit. */
1747 3, /* Max cond insns. */
1748 ARM_PREFETCH_NOT_BENEFICIAL
,
1749 true, /* Prefer constant pool. */
1750 arm_default_branch_cost
,
1751 false, /* Prefer LDRD/STRD. */
1752 {true, true}, /* Prefer non short circuit. */
1753 &arm_default_vec_cost
, /* Vectorizer costs. */
1754 false, /* Prefer Neon for 64-bits bitops. */
1755 false, false /* Prefer 32-bit encodings. */
1758 const struct tune_params arm_9e_tune
=
1762 NULL
, /* Sched adj cost. */
1763 1, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 ARM_PREFETCH_NOT_BENEFICIAL
,
1766 true, /* Prefer constant pool. */
1767 arm_default_branch_cost
,
1768 false, /* Prefer LDRD/STRD. */
1769 {true, true}, /* Prefer non short circuit. */
1770 &arm_default_vec_cost
, /* Vectorizer costs. */
1771 false, /* Prefer Neon for 64-bits bitops. */
1772 false, false /* Prefer 32-bit encodings. */
1775 const struct tune_params arm_v6t2_tune
=
1779 NULL
, /* Sched adj cost. */
1780 1, /* Constant limit. */
1781 5, /* Max cond insns. */
1782 ARM_PREFETCH_NOT_BENEFICIAL
,
1783 false, /* Prefer constant pool. */
1784 arm_default_branch_cost
,
1785 false, /* Prefer LDRD/STRD. */
1786 {true, true}, /* Prefer non short circuit. */
1787 &arm_default_vec_cost
, /* Vectorizer costs. */
1788 false, /* Prefer Neon for 64-bits bitops. */
1789 false, false /* Prefer 32-bit encodings. */
1792 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1793 const struct tune_params arm_cortex_tune
=
1796 &generic_extra_costs
,
1797 NULL
, /* Sched adj cost. */
1798 1, /* Constant limit. */
1799 5, /* Max cond insns. */
1800 ARM_PREFETCH_NOT_BENEFICIAL
,
1801 false, /* Prefer constant pool. */
1802 arm_default_branch_cost
,
1803 false, /* Prefer LDRD/STRD. */
1804 {true, true}, /* Prefer non short circuit. */
1805 &arm_default_vec_cost
, /* Vectorizer costs. */
1806 false, /* Prefer Neon for 64-bits bitops. */
1807 false, false /* Prefer 32-bit encodings. */
1810 const struct tune_params arm_cortex_a8_tune
=
1813 &cortexa8_extra_costs
,
1814 NULL
, /* Sched adj cost. */
1815 1, /* Constant limit. */
1816 5, /* Max cond insns. */
1817 ARM_PREFETCH_NOT_BENEFICIAL
,
1818 false, /* Prefer constant pool. */
1819 arm_default_branch_cost
,
1820 false, /* Prefer LDRD/STRD. */
1821 {true, true}, /* Prefer non short circuit. */
1822 &arm_default_vec_cost
, /* Vectorizer costs. */
1823 false, /* Prefer Neon for 64-bits bitops. */
1824 false, false /* Prefer 32-bit encodings. */
1827 const struct tune_params arm_cortex_a7_tune
=
1830 &cortexa7_extra_costs
,
1832 1, /* Constant limit. */
1833 5, /* Max cond insns. */
1834 ARM_PREFETCH_NOT_BENEFICIAL
,
1835 false, /* Prefer constant pool. */
1836 arm_default_branch_cost
,
1837 false, /* Prefer LDRD/STRD. */
1838 {true, true}, /* Prefer non short circuit. */
1839 &arm_default_vec_cost
, /* Vectorizer costs. */
1840 false, /* Prefer Neon for 64-bits bitops. */
1841 false, false /* Prefer 32-bit encodings. */
1844 const struct tune_params arm_cortex_a15_tune
=
1847 &cortexa15_extra_costs
,
1848 NULL
, /* Sched adj cost. */
1849 1, /* Constant limit. */
1850 2, /* Max cond insns. */
1851 ARM_PREFETCH_NOT_BENEFICIAL
,
1852 false, /* Prefer constant pool. */
1853 arm_default_branch_cost
,
1854 true, /* Prefer LDRD/STRD. */
1855 {true, true}, /* Prefer non short circuit. */
1856 &arm_default_vec_cost
, /* Vectorizer costs. */
1857 false, /* Prefer Neon for 64-bits bitops. */
1858 true, true /* Prefer 32-bit encodings. */
1861 const struct tune_params arm_cortex_a53_tune
=
1864 &cortexa53_extra_costs
,
1865 NULL
, /* Scheduler cost adjustment. */
1866 1, /* Constant limit. */
1867 5, /* Max cond insns. */
1868 ARM_PREFETCH_NOT_BENEFICIAL
,
1869 false, /* Prefer constant pool. */
1870 arm_default_branch_cost
,
1871 false, /* Prefer LDRD/STRD. */
1872 {true, true}, /* Prefer non short circuit. */
1873 &arm_default_vec_cost
, /* Vectorizer costs. */
1874 false, /* Prefer Neon for 64-bits bitops. */
1875 false, false /* Prefer 32-bit encodings. */
1878 const struct tune_params arm_cortex_a57_tune
=
1881 &cortexa57_extra_costs
,
1882 NULL
, /* Scheduler cost adjustment. */
1883 1, /* Constant limit. */
1884 2, /* Max cond insns. */
1885 ARM_PREFETCH_NOT_BENEFICIAL
,
1886 false, /* Prefer constant pool. */
1887 arm_default_branch_cost
,
1888 true, /* Prefer LDRD/STRD. */
1889 {true, true}, /* Prefer non short circuit. */
1890 &arm_default_vec_cost
, /* Vectorizer costs. */
1891 false, /* Prefer Neon for 64-bits bitops. */
1892 true, true /* Prefer 32-bit encodings. */
1895 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1896 less appealing. Set max_insns_skipped to a low value. */
1898 const struct tune_params arm_cortex_a5_tune
=
1901 &cortexa5_extra_costs
,
1902 NULL
, /* Sched adj cost. */
1903 1, /* Constant limit. */
1904 1, /* Max cond insns. */
1905 ARM_PREFETCH_NOT_BENEFICIAL
,
1906 false, /* Prefer constant pool. */
1907 arm_cortex_a5_branch_cost
,
1908 false, /* Prefer LDRD/STRD. */
1909 {false, false}, /* Prefer non short circuit. */
1910 &arm_default_vec_cost
, /* Vectorizer costs. */
1911 false, /* Prefer Neon for 64-bits bitops. */
1912 false, false /* Prefer 32-bit encodings. */
1915 const struct tune_params arm_cortex_a9_tune
=
1918 &cortexa9_extra_costs
,
1919 cortex_a9_sched_adjust_cost
,
1920 1, /* Constant limit. */
1921 5, /* Max cond insns. */
1922 ARM_PREFETCH_BENEFICIAL(4,32,32),
1923 false, /* Prefer constant pool. */
1924 arm_default_branch_cost
,
1925 false, /* Prefer LDRD/STRD. */
1926 {true, true}, /* Prefer non short circuit. */
1927 &arm_default_vec_cost
, /* Vectorizer costs. */
1928 false, /* Prefer Neon for 64-bits bitops. */
1929 false, false /* Prefer 32-bit encodings. */
1932 const struct tune_params arm_cortex_a12_tune
=
1935 &cortexa12_extra_costs
,
1937 1, /* Constant limit. */
1938 5, /* Max cond insns. */
1939 ARM_PREFETCH_BENEFICIAL(4,32,32),
1940 false, /* Prefer constant pool. */
1941 arm_default_branch_cost
,
1942 true, /* Prefer LDRD/STRD. */
1943 {true, true}, /* Prefer non short circuit. */
1944 &arm_default_vec_cost
, /* Vectorizer costs. */
1945 false, /* Prefer Neon for 64-bits bitops. */
1946 false, false /* Prefer 32-bit encodings. */
1949 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1950 cycle to execute each. An LDR from the constant pool also takes two cycles
1951 to execute, but mildly increases pipelining opportunity (consecutive
1952 loads/stores can be pipelined together, saving one cycle), and may also
1953 improve icache utilisation. Hence we prefer the constant pool for such
1956 const struct tune_params arm_v7m_tune
=
1960 NULL
, /* Sched adj cost. */
1961 1, /* Constant limit. */
1962 2, /* Max cond insns. */
1963 ARM_PREFETCH_NOT_BENEFICIAL
,
1964 true, /* Prefer constant pool. */
1965 arm_cortex_m_branch_cost
,
1966 false, /* Prefer LDRD/STRD. */
1967 {false, false}, /* Prefer non short circuit. */
1968 &arm_default_vec_cost
, /* Vectorizer costs. */
1969 false, /* Prefer Neon for 64-bits bitops. */
1970 false, false /* Prefer 32-bit encodings. */
1973 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1974 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1975 const struct tune_params arm_v6m_tune
=
1979 NULL
, /* Sched adj cost. */
1980 1, /* Constant limit. */
1981 5, /* Max cond insns. */
1982 ARM_PREFETCH_NOT_BENEFICIAL
,
1983 false, /* Prefer constant pool. */
1984 arm_default_branch_cost
,
1985 false, /* Prefer LDRD/STRD. */
1986 {false, false}, /* Prefer non short circuit. */
1987 &arm_default_vec_cost
, /* Vectorizer costs. */
1988 false, /* Prefer Neon for 64-bits bitops. */
1989 false, false /* Prefer 32-bit encodings. */
1992 const struct tune_params arm_fa726te_tune
=
1996 fa726te_sched_adjust_cost
,
1997 1, /* Constant limit. */
1998 5, /* Max cond insns. */
1999 ARM_PREFETCH_NOT_BENEFICIAL
,
2000 true, /* Prefer constant pool. */
2001 arm_default_branch_cost
,
2002 false, /* Prefer LDRD/STRD. */
2003 {true, true}, /* Prefer non short circuit. */
2004 &arm_default_vec_cost
, /* Vectorizer costs. */
2005 false, /* Prefer Neon for 64-bits bitops. */
2006 false, false /* Prefer 32-bit encodings. */
2010 /* Not all of these give usefully different compilation alternatives,
2011 but there is no simple way of generalizing them. */
2012 static const struct processors all_cores
[] =
2015 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2016 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2017 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2018 #include "arm-cores.def"
2020 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2023 static const struct processors all_architectures
[] =
2025 /* ARM Architectures */
2026 /* We don't specify tuning costs here as it will be figured out
2029 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2030 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2031 #include "arm-arches.def"
2033 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2037 /* These are populated as commandline arguments are processed, or NULL
2038 if not specified. */
2039 static const struct processors
*arm_selected_arch
;
2040 static const struct processors
*arm_selected_cpu
;
2041 static const struct processors
*arm_selected_tune
;
2043 /* The name of the preprocessor macro to define for this architecture. */
2045 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2047 /* Available values for -mfpu=. */
2049 static const struct arm_fpu_desc all_fpus
[] =
2051 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2052 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2053 #include "arm-fpus.def"
2058 /* Supported TLS relocations. */
2066 TLS_DESCSEQ
/* GNU scheme */
2069 /* The maximum number of insns to be used when loading a constant. */
2071 arm_constant_limit (bool size_p
)
2073 return size_p
? 1 : current_tune
->constant_limit
;
2076 /* Emit an insn that's a simple single-set. Both the operands must be known
2079 emit_set_insn (rtx x
, rtx y
)
2081 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
2084 /* Return the number of bits set in VALUE. */
2086 bit_count (unsigned long value
)
2088 unsigned long count
= 0;
2093 value
&= value
- 1; /* Clear the least-significant set bit. */
2101 enum machine_mode mode
;
2103 } arm_fixed_mode_set
;
2105 /* A small helper for setting fixed-point library libfuncs. */
2108 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
2109 const char *funcname
, const char *modename
,
2114 if (num_suffix
== 0)
2115 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2117 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2119 set_optab_libfunc (optable
, mode
, buffer
);
2123 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
2124 enum machine_mode from
, const char *funcname
,
2125 const char *toname
, const char *fromname
)
2128 const char *maybe_suffix_2
= "";
2130 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2131 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2132 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2133 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2134 maybe_suffix_2
= "2";
2136 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2139 set_conv_libfunc (optable
, to
, from
, buffer
);
2142 /* Set up library functions unique to ARM. */
2145 arm_init_libfuncs (void)
2147 /* For Linux, we have access to kernel support for atomic operations. */
2148 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2149 init_sync_libfuncs (2 * UNITS_PER_WORD
);
2151 /* There are no special library functions unless we are using the
2156 /* The functions below are described in Section 4 of the "Run-Time
2157 ABI for the ARM architecture", Version 1.0. */
2159 /* Double-precision floating-point arithmetic. Table 2. */
2160 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2161 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2162 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2163 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2164 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2166 /* Double-precision comparisons. Table 3. */
2167 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2168 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2169 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2170 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2171 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2172 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2173 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2175 /* Single-precision floating-point arithmetic. Table 4. */
2176 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2177 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2178 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2179 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2180 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2182 /* Single-precision comparisons. Table 5. */
2183 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2184 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2185 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2186 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2187 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2188 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2189 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2191 /* Floating-point to integer conversions. Table 6. */
2192 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2193 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2194 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2195 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2196 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2197 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2198 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2199 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2201 /* Conversions between floating types. Table 7. */
2202 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2203 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2205 /* Integer to floating-point conversions. Table 8. */
2206 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2207 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2208 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2209 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2210 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2211 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2212 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2213 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2215 /* Long long. Table 9. */
2216 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2217 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2218 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2219 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2220 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2221 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2222 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2223 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2225 /* Integer (32/32->32) division. \S 4.3.1. */
2226 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2227 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2229 /* The divmod functions are designed so that they can be used for
2230 plain division, even though they return both the quotient and the
2231 remainder. The quotient is returned in the usual location (i.e.,
2232 r0 for SImode, {r0, r1} for DImode), just as would be expected
2233 for an ordinary division routine. Because the AAPCS calling
2234 conventions specify that all of { r0, r1, r2, r3 } are
2235 callee-saved registers, there is no need to tell the compiler
2236 explicitly that those registers are clobbered by these
2238 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2239 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2241 /* For SImode division the ABI provides div-without-mod routines,
2242 which are faster. */
2243 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2244 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2246 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2247 divmod libcalls instead. */
2248 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2249 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2250 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2251 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2253 /* Half-precision float operations. The compiler handles all operations
2254 with NULL libfuncs by converting the SFmode. */
2255 switch (arm_fp16_format
)
2257 case ARM_FP16_FORMAT_IEEE
:
2258 case ARM_FP16_FORMAT_ALTERNATIVE
:
2261 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2262 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2264 : "__gnu_f2h_alternative"));
2265 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2266 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2268 : "__gnu_h2f_alternative"));
2271 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2272 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2273 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2274 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2275 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2278 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2279 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2280 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2281 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2282 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2283 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2284 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2291 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2293 const arm_fixed_mode_set fixed_arith_modes
[] =
2314 const arm_fixed_mode_set fixed_conv_modes
[] =
2344 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2346 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2347 "add", fixed_arith_modes
[i
].name
, 3);
2348 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2349 "ssadd", fixed_arith_modes
[i
].name
, 3);
2350 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2351 "usadd", fixed_arith_modes
[i
].name
, 3);
2352 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2353 "sub", fixed_arith_modes
[i
].name
, 3);
2354 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2355 "sssub", fixed_arith_modes
[i
].name
, 3);
2356 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2357 "ussub", fixed_arith_modes
[i
].name
, 3);
2358 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2359 "mul", fixed_arith_modes
[i
].name
, 3);
2360 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2361 "ssmul", fixed_arith_modes
[i
].name
, 3);
2362 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2363 "usmul", fixed_arith_modes
[i
].name
, 3);
2364 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2365 "div", fixed_arith_modes
[i
].name
, 3);
2366 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2367 "udiv", fixed_arith_modes
[i
].name
, 3);
2368 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2369 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2370 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2371 "usdiv", fixed_arith_modes
[i
].name
, 3);
2372 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2373 "neg", fixed_arith_modes
[i
].name
, 2);
2374 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2375 "ssneg", fixed_arith_modes
[i
].name
, 2);
2376 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2377 "usneg", fixed_arith_modes
[i
].name
, 2);
2378 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2379 "ashl", fixed_arith_modes
[i
].name
, 3);
2380 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2381 "ashr", fixed_arith_modes
[i
].name
, 3);
2382 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2383 "lshr", fixed_arith_modes
[i
].name
, 3);
2384 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2385 "ssashl", fixed_arith_modes
[i
].name
, 3);
2386 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2387 "usashl", fixed_arith_modes
[i
].name
, 3);
2388 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2389 "cmp", fixed_arith_modes
[i
].name
, 2);
2392 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2393 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2396 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2397 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2400 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2401 fixed_conv_modes
[j
].mode
, "fract",
2402 fixed_conv_modes
[i
].name
,
2403 fixed_conv_modes
[j
].name
);
2404 arm_set_fixed_conv_libfunc (satfract_optab
,
2405 fixed_conv_modes
[i
].mode
,
2406 fixed_conv_modes
[j
].mode
, "satfract",
2407 fixed_conv_modes
[i
].name
,
2408 fixed_conv_modes
[j
].name
);
2409 arm_set_fixed_conv_libfunc (fractuns_optab
,
2410 fixed_conv_modes
[i
].mode
,
2411 fixed_conv_modes
[j
].mode
, "fractuns",
2412 fixed_conv_modes
[i
].name
,
2413 fixed_conv_modes
[j
].name
);
2414 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2415 fixed_conv_modes
[i
].mode
,
2416 fixed_conv_modes
[j
].mode
, "satfractuns",
2417 fixed_conv_modes
[i
].name
,
2418 fixed_conv_modes
[j
].name
);
2422 if (TARGET_AAPCS_BASED
)
2423 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2426 /* On AAPCS systems, this is the "struct __va_list". */
2427 static GTY(()) tree va_list_type
;
2429 /* Return the type to use as __builtin_va_list. */
2431 arm_build_builtin_va_list (void)
2436 if (!TARGET_AAPCS_BASED
)
2437 return std_build_builtin_va_list ();
2439 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2447 The C Library ABI further reinforces this definition in \S
2450 We must follow this definition exactly. The structure tag
2451 name is visible in C++ mangled names, and thus forms a part
2452 of the ABI. The field name may be used by people who
2453 #include <stdarg.h>. */
2454 /* Create the type. */
2455 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2456 /* Give it the required name. */
2457 va_list_name
= build_decl (BUILTINS_LOCATION
,
2459 get_identifier ("__va_list"),
2461 DECL_ARTIFICIAL (va_list_name
) = 1;
2462 TYPE_NAME (va_list_type
) = va_list_name
;
2463 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2464 /* Create the __ap field. */
2465 ap_field
= build_decl (BUILTINS_LOCATION
,
2467 get_identifier ("__ap"),
2469 DECL_ARTIFICIAL (ap_field
) = 1;
2470 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2471 TYPE_FIELDS (va_list_type
) = ap_field
;
2472 /* Compute its layout. */
2473 layout_type (va_list_type
);
2475 return va_list_type
;
2478 /* Return an expression of type "void *" pointing to the next
2479 available argument in a variable-argument list. VALIST is the
2480 user-level va_list object, of type __builtin_va_list. */
2482 arm_extract_valist_ptr (tree valist
)
2484 if (TREE_TYPE (valist
) == error_mark_node
)
2485 return error_mark_node
;
2487 /* On an AAPCS target, the pointer is stored within "struct
2489 if (TARGET_AAPCS_BASED
)
2491 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2492 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2493 valist
, ap_field
, NULL_TREE
);
2499 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2501 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2503 valist
= arm_extract_valist_ptr (valist
);
2504 std_expand_builtin_va_start (valist
, nextarg
);
2507 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2509 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2512 valist
= arm_extract_valist_ptr (valist
);
2513 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2516 /* Fix up any incompatible options that the user has specified. */
2518 arm_option_override (void)
2520 if (global_options_set
.x_arm_arch_option
)
2521 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2523 if (global_options_set
.x_arm_cpu_option
)
2525 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2526 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2529 if (global_options_set
.x_arm_tune_option
)
2530 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2532 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2533 SUBTARGET_OVERRIDE_OPTIONS
;
2536 if (arm_selected_arch
)
2538 if (arm_selected_cpu
)
2540 /* Check for conflict between mcpu and march. */
2541 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2543 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2544 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2545 /* -march wins for code generation.
2546 -mcpu wins for default tuning. */
2547 if (!arm_selected_tune
)
2548 arm_selected_tune
= arm_selected_cpu
;
2550 arm_selected_cpu
= arm_selected_arch
;
2554 arm_selected_arch
= NULL
;
2557 /* Pick a CPU based on the architecture. */
2558 arm_selected_cpu
= arm_selected_arch
;
2561 /* If the user did not specify a processor, choose one for them. */
2562 if (!arm_selected_cpu
)
2564 const struct processors
* sel
;
2565 unsigned int sought
;
2567 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2568 if (!arm_selected_cpu
->name
)
2570 #ifdef SUBTARGET_CPU_DEFAULT
2571 /* Use the subtarget default CPU if none was specified by
2573 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2575 /* Default to ARM6. */
2576 if (!arm_selected_cpu
->name
)
2577 arm_selected_cpu
= &all_cores
[arm6
];
2580 sel
= arm_selected_cpu
;
2581 insn_flags
= sel
->flags
;
2583 /* Now check to see if the user has specified some command line
2584 switch that require certain abilities from the cpu. */
2587 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2589 sought
|= (FL_THUMB
| FL_MODE32
);
2591 /* There are no ARM processors that support both APCS-26 and
2592 interworking. Therefore we force FL_MODE26 to be removed
2593 from insn_flags here (if it was set), so that the search
2594 below will always be able to find a compatible processor. */
2595 insn_flags
&= ~FL_MODE26
;
2598 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2600 /* Try to locate a CPU type that supports all of the abilities
2601 of the default CPU, plus the extra abilities requested by
2603 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2604 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2607 if (sel
->name
== NULL
)
2609 unsigned current_bit_count
= 0;
2610 const struct processors
* best_fit
= NULL
;
2612 /* Ideally we would like to issue an error message here
2613 saying that it was not possible to find a CPU compatible
2614 with the default CPU, but which also supports the command
2615 line options specified by the programmer, and so they
2616 ought to use the -mcpu=<name> command line option to
2617 override the default CPU type.
2619 If we cannot find a cpu that has both the
2620 characteristics of the default cpu and the given
2621 command line options we scan the array again looking
2622 for a best match. */
2623 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2624 if ((sel
->flags
& sought
) == sought
)
2628 count
= bit_count (sel
->flags
& insn_flags
);
2630 if (count
>= current_bit_count
)
2633 current_bit_count
= count
;
2637 gcc_assert (best_fit
);
2641 arm_selected_cpu
= sel
;
2645 gcc_assert (arm_selected_cpu
);
2646 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2647 if (!arm_selected_tune
)
2648 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2650 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2651 insn_flags
= arm_selected_cpu
->flags
;
2652 arm_base_arch
= arm_selected_cpu
->base_arch
;
2654 arm_tune
= arm_selected_tune
->core
;
2655 tune_flags
= arm_selected_tune
->flags
;
2656 current_tune
= arm_selected_tune
->tune
;
2658 /* Make sure that the processor choice does not conflict with any of the
2659 other command line choices. */
2660 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2661 error ("target CPU does not support ARM mode");
2663 /* BPABI targets use linker tricks to allow interworking on cores
2664 without thumb support. */
2665 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2667 warning (0, "target CPU does not support interworking" );
2668 target_flags
&= ~MASK_INTERWORK
;
2671 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2673 warning (0, "target CPU does not support THUMB instructions");
2674 target_flags
&= ~MASK_THUMB
;
2677 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2679 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2680 target_flags
&= ~MASK_APCS_FRAME
;
2683 /* Callee super interworking implies thumb interworking. Adding
2684 this to the flags here simplifies the logic elsewhere. */
2685 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2686 target_flags
|= MASK_INTERWORK
;
2688 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2689 from here where no function is being compiled currently. */
2690 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2691 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2693 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2694 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2696 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2698 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2699 target_flags
|= MASK_APCS_FRAME
;
2702 if (TARGET_POKE_FUNCTION_NAME
)
2703 target_flags
|= MASK_APCS_FRAME
;
2705 if (TARGET_APCS_REENT
&& flag_pic
)
2706 error ("-fpic and -mapcs-reent are incompatible");
2708 if (TARGET_APCS_REENT
)
2709 warning (0, "APCS reentrant code not supported. Ignored");
2711 /* If this target is normally configured to use APCS frames, warn if they
2712 are turned off and debugging is turned on. */
2714 && write_symbols
!= NO_DEBUG
2715 && !TARGET_APCS_FRAME
2716 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2717 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2719 if (TARGET_APCS_FLOAT
)
2720 warning (0, "passing floating point arguments in fp regs not yet supported");
2722 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2723 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2724 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2725 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2726 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2727 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2728 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2729 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2730 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2731 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2732 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2733 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2734 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2735 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2736 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2738 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2739 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2740 thumb_code
= TARGET_ARM
== 0;
2741 thumb1_code
= TARGET_THUMB1
!= 0;
2742 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2743 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2744 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2745 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2746 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2747 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2748 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2749 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2750 if (arm_restrict_it
== 2)
2751 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2754 arm_restrict_it
= 0;
2756 /* If we are not using the default (ARM mode) section anchor offset
2757 ranges, then set the correct ranges now. */
2760 /* Thumb-1 LDR instructions cannot have negative offsets.
2761 Permissible positive offset ranges are 5-bit (for byte loads),
2762 6-bit (for halfword loads), or 7-bit (for word loads).
2763 Empirical results suggest a 7-bit anchor range gives the best
2764 overall code size. */
2765 targetm
.min_anchor_offset
= 0;
2766 targetm
.max_anchor_offset
= 127;
2768 else if (TARGET_THUMB2
)
2770 /* The minimum is set such that the total size of the block
2771 for a particular anchor is 248 + 1 + 4095 bytes, which is
2772 divisible by eight, ensuring natural spacing of anchors. */
2773 targetm
.min_anchor_offset
= -248;
2774 targetm
.max_anchor_offset
= 4095;
2777 /* V5 code we generate is completely interworking capable, so we turn off
2778 TARGET_INTERWORK here to avoid many tests later on. */
2780 /* XXX However, we must pass the right pre-processor defines to CPP
2781 or GLD can get confused. This is a hack. */
2782 if (TARGET_INTERWORK
)
2783 arm_cpp_interwork
= 1;
2786 target_flags
&= ~MASK_INTERWORK
;
2788 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2789 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2791 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2792 error ("iwmmxt abi requires an iwmmxt capable cpu");
2794 if (!global_options_set
.x_arm_fpu_index
)
2796 const char *target_fpu_name
;
2799 #ifdef FPUTYPE_DEFAULT
2800 target_fpu_name
= FPUTYPE_DEFAULT
;
2802 target_fpu_name
= "vfp";
2805 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2810 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2812 switch (arm_fpu_desc
->model
)
2814 case ARM_FP_MODEL_VFP
:
2815 arm_fpu_attr
= FPU_VFP
;
2822 if (TARGET_AAPCS_BASED
)
2824 if (TARGET_CALLER_INTERWORKING
)
2825 error ("AAPCS does not support -mcaller-super-interworking");
2827 if (TARGET_CALLEE_INTERWORKING
)
2828 error ("AAPCS does not support -mcallee-super-interworking");
2831 /* iWMMXt and NEON are incompatible. */
2832 if (TARGET_IWMMXT
&& TARGET_NEON
)
2833 error ("iWMMXt and NEON are incompatible");
2835 /* iWMMXt unsupported under Thumb mode. */
2836 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2837 error ("iWMMXt unsupported under Thumb mode");
2839 /* __fp16 support currently assumes the core has ldrh. */
2840 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2841 sorry ("__fp16 and no ldrh");
2843 /* If soft-float is specified then don't use FPU. */
2844 if (TARGET_SOFT_FLOAT
)
2845 arm_fpu_attr
= FPU_NONE
;
2847 if (TARGET_AAPCS_BASED
)
2849 if (arm_abi
== ARM_ABI_IWMMXT
)
2850 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2851 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2852 && TARGET_HARD_FLOAT
2854 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2856 arm_pcs_default
= ARM_PCS_AAPCS
;
2860 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2861 sorry ("-mfloat-abi=hard and VFP");
2863 if (arm_abi
== ARM_ABI_APCS
)
2864 arm_pcs_default
= ARM_PCS_APCS
;
2866 arm_pcs_default
= ARM_PCS_ATPCS
;
2869 /* For arm2/3 there is no need to do any scheduling if we are doing
2870 software floating-point. */
2871 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2872 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2874 /* Use the cp15 method if it is available. */
2875 if (target_thread_pointer
== TP_AUTO
)
2877 if (arm_arch6k
&& !TARGET_THUMB1
)
2878 target_thread_pointer
= TP_CP15
;
2880 target_thread_pointer
= TP_SOFT
;
2883 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2884 error ("can not use -mtp=cp15 with 16-bit Thumb");
2886 /* Override the default structure alignment for AAPCS ABI. */
2887 if (!global_options_set
.x_arm_structure_size_boundary
)
2889 if (TARGET_AAPCS_BASED
)
2890 arm_structure_size_boundary
= 8;
2894 if (arm_structure_size_boundary
!= 8
2895 && arm_structure_size_boundary
!= 32
2896 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2898 if (ARM_DOUBLEWORD_ALIGN
)
2900 "structure size boundary can only be set to 8, 32 or 64");
2902 warning (0, "structure size boundary can only be set to 8 or 32");
2903 arm_structure_size_boundary
2904 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2908 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2910 error ("RTP PIC is incompatible with Thumb");
2914 /* If stack checking is disabled, we can use r10 as the PIC register,
2915 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2916 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2918 if (TARGET_VXWORKS_RTP
)
2919 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2920 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2923 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2924 arm_pic_register
= 9;
2926 if (arm_pic_register_string
!= NULL
)
2928 int pic_register
= decode_reg_name (arm_pic_register_string
);
2931 warning (0, "-mpic-register= is useless without -fpic");
2933 /* Prevent the user from choosing an obviously stupid PIC register. */
2934 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2935 || pic_register
== HARD_FRAME_POINTER_REGNUM
2936 || pic_register
== STACK_POINTER_REGNUM
2937 || pic_register
>= PC_REGNUM
2938 || (TARGET_VXWORKS_RTP
2939 && (unsigned int) pic_register
!= arm_pic_register
))
2940 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2942 arm_pic_register
= pic_register
;
2945 if (TARGET_VXWORKS_RTP
2946 && !global_options_set
.x_arm_pic_data_is_text_relative
)
2947 arm_pic_data_is_text_relative
= 0;
2949 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2950 if (fix_cm3_ldrd
== 2)
2952 if (arm_selected_cpu
->core
== cortexm3
)
2958 /* Enable -munaligned-access by default for
2959 - all ARMv6 architecture-based processors
2960 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2961 - ARMv8 architecture-base processors.
2963 Disable -munaligned-access by default for
2964 - all pre-ARMv6 architecture-based processors
2965 - ARMv6-M architecture-based processors. */
2967 if (unaligned_access
== 2)
2969 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2970 unaligned_access
= 1;
2972 unaligned_access
= 0;
2974 else if (unaligned_access
== 1
2975 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2977 warning (0, "target CPU does not support unaligned accesses");
2978 unaligned_access
= 0;
2981 if (TARGET_THUMB1
&& flag_schedule_insns
)
2983 /* Don't warn since it's on by default in -O2. */
2984 flag_schedule_insns
= 0;
2989 /* If optimizing for size, bump the number of instructions that we
2990 are prepared to conditionally execute (even on a StrongARM). */
2991 max_insns_skipped
= 6;
2993 /* For THUMB2, we limit the conditional sequence to one IT block. */
2995 max_insns_skipped
= MAX_INSN_PER_IT_BLOCK
;
2998 max_insns_skipped
= current_tune
->max_insns_skipped
;
3000 /* Hot/Cold partitioning is not currently supported, since we can't
3001 handle literal pool placement in that case. */
3002 if (flag_reorder_blocks_and_partition
)
3004 inform (input_location
,
3005 "-freorder-blocks-and-partition not supported on this architecture");
3006 flag_reorder_blocks_and_partition
= 0;
3007 flag_reorder_blocks
= 1;
3011 /* Hoisting PIC address calculations more aggressively provides a small,
3012 but measurable, size reduction for PIC code. Therefore, we decrease
3013 the bar for unrestricted expression hoisting to the cost of PIC address
3014 calculation, which is 2 instructions. */
3015 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3016 global_options
.x_param_values
,
3017 global_options_set
.x_param_values
);
3019 /* ARM EABI defaults to strict volatile bitfields. */
3020 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3021 && abi_version_at_least(2))
3022 flag_strict_volatile_bitfields
= 1;
3024 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3025 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3026 if (flag_prefetch_loop_arrays
< 0
3029 && current_tune
->num_prefetch_slots
> 0)
3030 flag_prefetch_loop_arrays
= 1;
3032 /* Set up parameters to be used in prefetching algorithm. Do not override the
3033 defaults unless we are tuning for a core we have researched values for. */
3034 if (current_tune
->num_prefetch_slots
> 0)
3035 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3036 current_tune
->num_prefetch_slots
,
3037 global_options
.x_param_values
,
3038 global_options_set
.x_param_values
);
3039 if (current_tune
->l1_cache_line_size
>= 0)
3040 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3041 current_tune
->l1_cache_line_size
,
3042 global_options
.x_param_values
,
3043 global_options_set
.x_param_values
);
3044 if (current_tune
->l1_cache_size
>= 0)
3045 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3046 current_tune
->l1_cache_size
,
3047 global_options
.x_param_values
,
3048 global_options_set
.x_param_values
);
3050 /* Use Neon to perform 64-bits operations rather than core
3052 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3053 if (use_neon_for_64bits
== 1)
3054 prefer_neon_for_64bits
= true;
3056 /* Use the alternative scheduling-pressure algorithm by default. */
3057 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3058 global_options
.x_param_values
,
3059 global_options_set
.x_param_values
);
3061 /* Disable shrink-wrap when optimizing function for size, since it tends to
3062 generate additional returns. */
3063 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
3064 flag_shrink_wrap
= false;
3065 /* TBD: Dwarf info for apcs frame is not handled yet. */
3066 if (TARGET_APCS_FRAME
)
3067 flag_shrink_wrap
= false;
3069 /* We only support -mslow-flash-data on armv7-m targets. */
3070 if (target_slow_flash_data
3071 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
3072 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
3073 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3075 /* Currently, for slow flash data, we just disable literal pools. */
3076 if (target_slow_flash_data
)
3077 arm_disable_literal_pool
= true;
3079 /* Register global variables with the garbage collector. */
3080 arm_add_gc_roots ();
3084 arm_add_gc_roots (void)
3086 gcc_obstack_init(&minipool_obstack
);
3087 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3090 /* A table of known ARM exception types.
3091 For use with the interrupt function attribute. */
3095 const char *const arg
;
3096 const unsigned long return_value
;
3100 static const isr_attribute_arg isr_attribute_args
[] =
3102 { "IRQ", ARM_FT_ISR
},
3103 { "irq", ARM_FT_ISR
},
3104 { "FIQ", ARM_FT_FIQ
},
3105 { "fiq", ARM_FT_FIQ
},
3106 { "ABORT", ARM_FT_ISR
},
3107 { "abort", ARM_FT_ISR
},
3108 { "ABORT", ARM_FT_ISR
},
3109 { "abort", ARM_FT_ISR
},
3110 { "UNDEF", ARM_FT_EXCEPTION
},
3111 { "undef", ARM_FT_EXCEPTION
},
3112 { "SWI", ARM_FT_EXCEPTION
},
3113 { "swi", ARM_FT_EXCEPTION
},
3114 { NULL
, ARM_FT_NORMAL
}
3117 /* Returns the (interrupt) function type of the current
3118 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3120 static unsigned long
3121 arm_isr_value (tree argument
)
3123 const isr_attribute_arg
* ptr
;
3127 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3129 /* No argument - default to IRQ. */
3130 if (argument
== NULL_TREE
)
3133 /* Get the value of the argument. */
3134 if (TREE_VALUE (argument
) == NULL_TREE
3135 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3136 return ARM_FT_UNKNOWN
;
3138 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3140 /* Check it against the list of known arguments. */
3141 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3142 if (streq (arg
, ptr
->arg
))
3143 return ptr
->return_value
;
3145 /* An unrecognized interrupt type. */
3146 return ARM_FT_UNKNOWN
;
3149 /* Computes the type of the current function. */
3151 static unsigned long
3152 arm_compute_func_type (void)
3154 unsigned long type
= ARM_FT_UNKNOWN
;
3158 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3160 /* Decide if the current function is volatile. Such functions
3161 never return, and many memory cycles can be saved by not storing
3162 register values that will never be needed again. This optimization
3163 was added to speed up context switching in a kernel application. */
3165 && (TREE_NOTHROW (current_function_decl
)
3166 || !(flag_unwind_tables
3168 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3169 && TREE_THIS_VOLATILE (current_function_decl
))
3170 type
|= ARM_FT_VOLATILE
;
3172 if (cfun
->static_chain_decl
!= NULL
)
3173 type
|= ARM_FT_NESTED
;
3175 attr
= DECL_ATTRIBUTES (current_function_decl
);
3177 a
= lookup_attribute ("naked", attr
);
3179 type
|= ARM_FT_NAKED
;
3181 a
= lookup_attribute ("isr", attr
);
3183 a
= lookup_attribute ("interrupt", attr
);
3186 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3188 type
|= arm_isr_value (TREE_VALUE (a
));
3193 /* Returns the type of the current function. */
3196 arm_current_func_type (void)
3198 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3199 cfun
->machine
->func_type
= arm_compute_func_type ();
3201 return cfun
->machine
->func_type
;
3205 arm_allocate_stack_slots_for_args (void)
3207 /* Naked functions should not allocate stack slots for arguments. */
3208 return !IS_NAKED (arm_current_func_type ());
3212 arm_warn_func_return (tree decl
)
3214 /* Naked functions are implemented entirely in assembly, including the
3215 return sequence, so suppress warnings about this. */
3216 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3220 /* Output assembler code for a block containing the constant parts
3221 of a trampoline, leaving space for the variable parts.
3223 On the ARM, (if r8 is the static chain regnum, and remembering that
3224 referencing pc adds an offset of 8) the trampoline looks like:
3227 .word static chain value
3228 .word function's address
3229 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3232 arm_asm_trampoline_template (FILE *f
)
3236 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3237 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3239 else if (TARGET_THUMB2
)
3241 /* The Thumb-2 trampoline is similar to the arm implementation.
3242 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3243 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3244 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3245 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3249 ASM_OUTPUT_ALIGN (f
, 2);
3250 fprintf (f
, "\t.code\t16\n");
3251 fprintf (f
, ".Ltrampoline_start:\n");
3252 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3253 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3254 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3255 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3256 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3257 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3259 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3260 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3263 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3266 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3268 rtx fnaddr
, mem
, a_tramp
;
3270 emit_block_move (m_tramp
, assemble_trampoline_template (),
3271 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3273 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3274 emit_move_insn (mem
, chain_value
);
3276 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3277 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3278 emit_move_insn (mem
, fnaddr
);
3280 a_tramp
= XEXP (m_tramp
, 0);
3281 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3282 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3283 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3286 /* Thumb trampolines should be entered in thumb mode, so set
3287 the bottom bit of the address. */
3290 arm_trampoline_adjust_address (rtx addr
)
3293 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3294 NULL
, 0, OPTAB_LIB_WIDEN
);
3298 /* Return 1 if it is possible to return using a single instruction.
3299 If SIBLING is non-null, this is a test for a return before a sibling
3300 call. SIBLING is the call insn, so we can examine its register usage. */
3303 use_return_insn (int iscond
, rtx sibling
)
3306 unsigned int func_type
;
3307 unsigned long saved_int_regs
;
3308 unsigned HOST_WIDE_INT stack_adjust
;
3309 arm_stack_offsets
*offsets
;
3311 /* Never use a return instruction before reload has run. */
3312 if (!reload_completed
)
3315 func_type
= arm_current_func_type ();
3317 /* Naked, volatile and stack alignment functions need special
3319 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3322 /* So do interrupt functions that use the frame pointer and Thumb
3323 interrupt functions. */
3324 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3327 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3328 && !optimize_function_for_size_p (cfun
))
3331 offsets
= arm_get_frame_offsets ();
3332 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3334 /* As do variadic functions. */
3335 if (crtl
->args
.pretend_args_size
3336 || cfun
->machine
->uses_anonymous_args
3337 /* Or if the function calls __builtin_eh_return () */
3338 || crtl
->calls_eh_return
3339 /* Or if the function calls alloca */
3340 || cfun
->calls_alloca
3341 /* Or if there is a stack adjustment. However, if the stack pointer
3342 is saved on the stack, we can use a pre-incrementing stack load. */
3343 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3344 && stack_adjust
== 4)))
3347 saved_int_regs
= offsets
->saved_regs_mask
;
3349 /* Unfortunately, the insn
3351 ldmib sp, {..., sp, ...}
3353 triggers a bug on most SA-110 based devices, such that the stack
3354 pointer won't be correctly restored if the instruction takes a
3355 page fault. We work around this problem by popping r3 along with
3356 the other registers, since that is never slower than executing
3357 another instruction.
3359 We test for !arm_arch5 here, because code for any architecture
3360 less than this could potentially be run on one of the buggy
3362 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3364 /* Validate that r3 is a call-clobbered register (always true in
3365 the default abi) ... */
3366 if (!call_used_regs
[3])
3369 /* ... that it isn't being used for a return value ... */
3370 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3373 /* ... or for a tail-call argument ... */
3376 gcc_assert (CALL_P (sibling
));
3378 if (find_regno_fusage (sibling
, USE
, 3))
3382 /* ... and that there are no call-saved registers in r0-r2
3383 (always true in the default ABI). */
3384 if (saved_int_regs
& 0x7)
3388 /* Can't be done if interworking with Thumb, and any registers have been
3390 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3393 /* On StrongARM, conditional returns are expensive if they aren't
3394 taken and multiple registers have been stacked. */
3395 if (iscond
&& arm_tune_strongarm
)
3397 /* Conditional return when just the LR is stored is a simple
3398 conditional-load instruction, that's not expensive. */
3399 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3403 && arm_pic_register
!= INVALID_REGNUM
3404 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3408 /* If there are saved registers but the LR isn't saved, then we need
3409 two instructions for the return. */
3410 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3413 /* Can't be done if any of the VFP regs are pushed,
3414 since this also requires an insn. */
3415 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3416 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3417 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3420 if (TARGET_REALLY_IWMMXT
)
3421 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3422 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3428 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3429 shrink-wrapping if possible. This is the case if we need to emit a
3430 prologue, which we can test by looking at the offsets. */
3432 use_simple_return_p (void)
3434 arm_stack_offsets
*offsets
;
3436 offsets
= arm_get_frame_offsets ();
3437 return offsets
->outgoing_args
!= 0;
3440 /* Return TRUE if int I is a valid immediate ARM constant. */
3443 const_ok_for_arm (HOST_WIDE_INT i
)
3447 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3448 be all zero, or all one. */
3449 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3450 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3451 != ((~(unsigned HOST_WIDE_INT
) 0)
3452 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3455 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3457 /* Fast return for 0 and small values. We must do this for zero, since
3458 the code below can't handle that one case. */
3459 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3462 /* Get the number of trailing zeros. */
3463 lowbit
= ffs((int) i
) - 1;
3465 /* Only even shifts are allowed in ARM mode so round down to the
3466 nearest even number. */
3470 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3475 /* Allow rotated constants in ARM mode. */
3477 && ((i
& ~0xc000003f) == 0
3478 || (i
& ~0xf000000f) == 0
3479 || (i
& ~0xfc000003) == 0))
3486 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3489 if (i
== v
|| i
== (v
| (v
<< 8)))
3492 /* Allow repeated pattern 0xXY00XY00. */
3502 /* Return true if I is a valid constant for the operation CODE. */
3504 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3506 if (const_ok_for_arm (i
))
3512 /* See if we can use movw. */
3513 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3516 /* Otherwise, try mvn. */
3517 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3520 /* See if we can use addw or subw. */
3522 && ((i
& 0xfffff000) == 0
3523 || ((-i
) & 0xfffff000) == 0))
3525 /* else fall through. */
3545 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3547 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3553 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3557 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3564 /* Return true if I is a valid di mode constant for the operation CODE. */
3566 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3568 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3569 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3570 rtx hi
= GEN_INT (hi_val
);
3571 rtx lo
= GEN_INT (lo_val
);
3581 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3582 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3584 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3591 /* Emit a sequence of insns to handle a large constant.
3592 CODE is the code of the operation required, it can be any of SET, PLUS,
3593 IOR, AND, XOR, MINUS;
3594 MODE is the mode in which the operation is being performed;
3595 VAL is the integer to operate on;
3596 SOURCE is the other operand (a register, or a null-pointer for SET);
3597 SUBTARGETS means it is safe to create scratch registers if that will
3598 either produce a simpler sequence, or we will want to cse the values.
3599 Return value is the number of insns emitted. */
3601 /* ??? Tweak this for thumb2. */
3603 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
3604 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3608 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3609 cond
= COND_EXEC_TEST (PATTERN (insn
));
3613 if (subtargets
|| code
== SET
3614 || (REG_P (target
) && REG_P (source
)
3615 && REGNO (target
) != REGNO (source
)))
3617 /* After arm_reorg has been called, we can't fix up expensive
3618 constants by pushing them into memory so we must synthesize
3619 them in-line, regardless of the cost. This is only likely to
3620 be more costly on chips that have load delay slots and we are
3621 compiling without running the scheduler (so no splitting
3622 occurred before the final instruction emission).
3624 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3626 if (!cfun
->machine
->after_arm_reorg
3628 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3630 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3635 /* Currently SET is the only monadic value for CODE, all
3636 the rest are diadic. */
3637 if (TARGET_USE_MOVT
)
3638 arm_emit_movpair (target
, GEN_INT (val
));
3640 emit_set_insn (target
, GEN_INT (val
));
3646 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3648 if (TARGET_USE_MOVT
)
3649 arm_emit_movpair (temp
, GEN_INT (val
));
3651 emit_set_insn (temp
, GEN_INT (val
));
3653 /* For MINUS, the value is subtracted from, since we never
3654 have subtraction of a constant. */
3656 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3658 emit_set_insn (target
,
3659 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3665 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3669 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3670 ARM/THUMB2 immediates, and add up to VAL.
3671 Thr function return value gives the number of insns required. */
3673 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3674 struct four_ints
*return_sequence
)
3676 int best_consecutive_zeros
= 0;
3680 struct four_ints tmp_sequence
;
3682 /* If we aren't targeting ARM, the best place to start is always at
3683 the bottom, otherwise look more closely. */
3686 for (i
= 0; i
< 32; i
+= 2)
3688 int consecutive_zeros
= 0;
3690 if (!(val
& (3 << i
)))
3692 while ((i
< 32) && !(val
& (3 << i
)))
3694 consecutive_zeros
+= 2;
3697 if (consecutive_zeros
> best_consecutive_zeros
)
3699 best_consecutive_zeros
= consecutive_zeros
;
3700 best_start
= i
- consecutive_zeros
;
3707 /* So long as it won't require any more insns to do so, it's
3708 desirable to emit a small constant (in bits 0...9) in the last
3709 insn. This way there is more chance that it can be combined with
3710 a later addressing insn to form a pre-indexed load or store
3711 operation. Consider:
3713 *((volatile int *)0xe0000100) = 1;
3714 *((volatile int *)0xe0000110) = 2;
3716 We want this to wind up as:
3720 str rB, [rA, #0x100]
3722 str rB, [rA, #0x110]
3724 rather than having to synthesize both large constants from scratch.
3726 Therefore, we calculate how many insns would be required to emit
3727 the constant starting from `best_start', and also starting from
3728 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3729 yield a shorter sequence, we may as well use zero. */
3730 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3732 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3734 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3735 if (insns2
<= insns1
)
3737 *return_sequence
= tmp_sequence
;
3745 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3747 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3748 struct four_ints
*return_sequence
, int i
)
3750 int remainder
= val
& 0xffffffff;
3753 /* Try and find a way of doing the job in either two or three
3756 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3757 location. We start at position I. This may be the MSB, or
3758 optimial_immediate_sequence may have positioned it at the largest block
3759 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3760 wrapping around to the top of the word when we drop off the bottom.
3761 In the worst case this code should produce no more than four insns.
3763 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3764 constants, shifted to any arbitrary location. We should always start
3769 unsigned int b1
, b2
, b3
, b4
;
3770 unsigned HOST_WIDE_INT result
;
3773 gcc_assert (insns
< 4);
3778 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3779 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3782 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3783 /* We can use addw/subw for the last 12 bits. */
3787 /* Use an 8-bit shifted/rotated immediate. */
3791 result
= remainder
& ((0x0ff << end
)
3792 | ((i
< end
) ? (0xff >> (32 - end
))
3799 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3800 arbitrary shifts. */
3801 i
-= TARGET_ARM
? 2 : 1;
3805 /* Next, see if we can do a better job with a thumb2 replicated
3808 We do it this way around to catch the cases like 0x01F001E0 where
3809 two 8-bit immediates would work, but a replicated constant would
3812 TODO: 16-bit constants that don't clear all the bits, but still win.
3813 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3816 b1
= (remainder
& 0xff000000) >> 24;
3817 b2
= (remainder
& 0x00ff0000) >> 16;
3818 b3
= (remainder
& 0x0000ff00) >> 8;
3819 b4
= remainder
& 0xff;
3823 /* The 8-bit immediate already found clears b1 (and maybe b2),
3824 but must leave b3 and b4 alone. */
3826 /* First try to find a 32-bit replicated constant that clears
3827 almost everything. We can assume that we can't do it in one,
3828 or else we wouldn't be here. */
3829 unsigned int tmp
= b1
& b2
& b3
& b4
;
3830 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3832 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3833 + (tmp
== b3
) + (tmp
== b4
);
3835 && (matching_bytes
>= 3
3836 || (matching_bytes
== 2
3837 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3839 /* At least 3 of the bytes match, and the fourth has at
3840 least as many bits set, or two of the bytes match
3841 and it will only require one more insn to finish. */
3849 /* Second, try to find a 16-bit replicated constant that can
3850 leave three of the bytes clear. If b2 or b4 is already
3851 zero, then we can. If the 8-bit from above would not
3852 clear b2 anyway, then we still win. */
3853 else if (b1
== b3
&& (!b2
|| !b4
3854 || (remainder
& 0x00ff0000 & ~result
)))
3856 result
= remainder
& 0xff00ff00;
3862 /* The 8-bit immediate already found clears b2 (and maybe b3)
3863 and we don't get here unless b1 is alredy clear, but it will
3864 leave b4 unchanged. */
3866 /* If we can clear b2 and b4 at once, then we win, since the
3867 8-bits couldn't possibly reach that far. */
3870 result
= remainder
& 0x00ff00ff;
3876 return_sequence
->i
[insns
++] = result
;
3877 remainder
&= ~result
;
3879 if (code
== SET
|| code
== MINUS
)
3887 /* Emit an instruction with the indicated PATTERN. If COND is
3888 non-NULL, conditionalize the execution of the instruction on COND
3892 emit_constant_insn (rtx cond
, rtx pattern
)
3895 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3896 emit_insn (pattern
);
3899 /* As above, but extra parameter GENERATE which, if clear, suppresses
3903 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
3904 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3909 int final_invert
= 0;
3911 int set_sign_bit_copies
= 0;
3912 int clear_sign_bit_copies
= 0;
3913 int clear_zero_bit_copies
= 0;
3914 int set_zero_bit_copies
= 0;
3915 int insns
= 0, neg_insns
, inv_insns
;
3916 unsigned HOST_WIDE_INT temp1
, temp2
;
3917 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3918 struct four_ints
*immediates
;
3919 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3921 /* Find out which operations are safe for a given CODE. Also do a quick
3922 check for degenerate cases; these can occur when DImode operations
3935 if (remainder
== 0xffffffff)
3938 emit_constant_insn (cond
,
3939 gen_rtx_SET (VOIDmode
, target
,
3940 GEN_INT (ARM_SIGN_EXTEND (val
))));
3946 if (reload_completed
&& rtx_equal_p (target
, source
))
3950 emit_constant_insn (cond
,
3951 gen_rtx_SET (VOIDmode
, target
, source
));
3960 emit_constant_insn (cond
,
3961 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
3964 if (remainder
== 0xffffffff)
3966 if (reload_completed
&& rtx_equal_p (target
, source
))
3969 emit_constant_insn (cond
,
3970 gen_rtx_SET (VOIDmode
, target
, source
));
3979 if (reload_completed
&& rtx_equal_p (target
, source
))
3982 emit_constant_insn (cond
,
3983 gen_rtx_SET (VOIDmode
, target
, source
));
3987 if (remainder
== 0xffffffff)
3990 emit_constant_insn (cond
,
3991 gen_rtx_SET (VOIDmode
, target
,
3992 gen_rtx_NOT (mode
, source
)));
3999 /* We treat MINUS as (val - source), since (source - val) is always
4000 passed as (source + (-val)). */
4004 emit_constant_insn (cond
,
4005 gen_rtx_SET (VOIDmode
, target
,
4006 gen_rtx_NEG (mode
, source
)));
4009 if (const_ok_for_arm (val
))
4012 emit_constant_insn (cond
,
4013 gen_rtx_SET (VOIDmode
, target
,
4014 gen_rtx_MINUS (mode
, GEN_INT (val
),
4025 /* If we can do it in one insn get out quickly. */
4026 if (const_ok_for_op (val
, code
))
4029 emit_constant_insn (cond
,
4030 gen_rtx_SET (VOIDmode
, target
,
4032 ? gen_rtx_fmt_ee (code
, mode
, source
,
4038 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4040 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4041 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4045 if (mode
== SImode
&& i
== 16)
4046 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4048 emit_constant_insn (cond
,
4049 gen_zero_extendhisi2
4050 (target
, gen_lowpart (HImode
, source
)));
4052 /* Extz only supports SImode, but we can coerce the operands
4054 emit_constant_insn (cond
,
4055 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4056 gen_lowpart (SImode
, source
),
4057 GEN_INT (i
), const0_rtx
));
4063 /* Calculate a few attributes that may be useful for specific
4065 /* Count number of leading zeros. */
4066 for (i
= 31; i
>= 0; i
--)
4068 if ((remainder
& (1 << i
)) == 0)
4069 clear_sign_bit_copies
++;
4074 /* Count number of leading 1's. */
4075 for (i
= 31; i
>= 0; i
--)
4077 if ((remainder
& (1 << i
)) != 0)
4078 set_sign_bit_copies
++;
4083 /* Count number of trailing zero's. */
4084 for (i
= 0; i
<= 31; i
++)
4086 if ((remainder
& (1 << i
)) == 0)
4087 clear_zero_bit_copies
++;
4092 /* Count number of trailing 1's. */
4093 for (i
= 0; i
<= 31; i
++)
4095 if ((remainder
& (1 << i
)) != 0)
4096 set_zero_bit_copies
++;
4104 /* See if we can do this by sign_extending a constant that is known
4105 to be negative. This is a good, way of doing it, since the shift
4106 may well merge into a subsequent insn. */
4107 if (set_sign_bit_copies
> 1)
4109 if (const_ok_for_arm
4110 (temp1
= ARM_SIGN_EXTEND (remainder
4111 << (set_sign_bit_copies
- 1))))
4115 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4116 emit_constant_insn (cond
,
4117 gen_rtx_SET (VOIDmode
, new_src
,
4119 emit_constant_insn (cond
,
4120 gen_ashrsi3 (target
, new_src
,
4121 GEN_INT (set_sign_bit_copies
- 1)));
4125 /* For an inverted constant, we will need to set the low bits,
4126 these will be shifted out of harm's way. */
4127 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4128 if (const_ok_for_arm (~temp1
))
4132 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4133 emit_constant_insn (cond
,
4134 gen_rtx_SET (VOIDmode
, new_src
,
4136 emit_constant_insn (cond
,
4137 gen_ashrsi3 (target
, new_src
,
4138 GEN_INT (set_sign_bit_copies
- 1)));
4144 /* See if we can calculate the value as the difference between two
4145 valid immediates. */
4146 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4148 int topshift
= clear_sign_bit_copies
& ~1;
4150 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4151 & (0xff000000 >> topshift
));
4153 /* If temp1 is zero, then that means the 9 most significant
4154 bits of remainder were 1 and we've caused it to overflow.
4155 When topshift is 0 we don't need to do anything since we
4156 can borrow from 'bit 32'. */
4157 if (temp1
== 0 && topshift
!= 0)
4158 temp1
= 0x80000000 >> (topshift
- 1);
4160 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4162 if (const_ok_for_arm (temp2
))
4166 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4167 emit_constant_insn (cond
,
4168 gen_rtx_SET (VOIDmode
, new_src
,
4170 emit_constant_insn (cond
,
4171 gen_addsi3 (target
, new_src
,
4179 /* See if we can generate this by setting the bottom (or the top)
4180 16 bits, and then shifting these into the other half of the
4181 word. We only look for the simplest cases, to do more would cost
4182 too much. Be careful, however, not to generate this when the
4183 alternative would take fewer insns. */
4184 if (val
& 0xffff0000)
4186 temp1
= remainder
& 0xffff0000;
4187 temp2
= remainder
& 0x0000ffff;
4189 /* Overlaps outside this range are best done using other methods. */
4190 for (i
= 9; i
< 24; i
++)
4192 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4193 && !const_ok_for_arm (temp2
))
4195 rtx new_src
= (subtargets
4196 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4198 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4199 source
, subtargets
, generate
);
4207 gen_rtx_ASHIFT (mode
, source
,
4214 /* Don't duplicate cases already considered. */
4215 for (i
= 17; i
< 24; i
++)
4217 if (((temp1
| (temp1
>> i
)) == remainder
)
4218 && !const_ok_for_arm (temp1
))
4220 rtx new_src
= (subtargets
4221 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4223 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4224 source
, subtargets
, generate
);
4229 gen_rtx_SET (VOIDmode
, target
,
4232 gen_rtx_LSHIFTRT (mode
, source
,
4243 /* If we have IOR or XOR, and the constant can be loaded in a
4244 single instruction, and we can find a temporary to put it in,
4245 then this can be done in two instructions instead of 3-4. */
4247 /* TARGET can't be NULL if SUBTARGETS is 0 */
4248 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4250 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4254 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4256 emit_constant_insn (cond
,
4257 gen_rtx_SET (VOIDmode
, sub
,
4259 emit_constant_insn (cond
,
4260 gen_rtx_SET (VOIDmode
, target
,
4261 gen_rtx_fmt_ee (code
, mode
,
4272 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4273 and the remainder 0s for e.g. 0xfff00000)
4274 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4276 This can be done in 2 instructions by using shifts with mov or mvn.
4281 mvn r0, r0, lsr #12 */
4282 if (set_sign_bit_copies
> 8
4283 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4287 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4288 rtx shift
= GEN_INT (set_sign_bit_copies
);
4292 gen_rtx_SET (VOIDmode
, sub
,
4294 gen_rtx_ASHIFT (mode
,
4299 gen_rtx_SET (VOIDmode
, target
,
4301 gen_rtx_LSHIFTRT (mode
, sub
,
4308 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4310 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4312 For eg. r0 = r0 | 0xfff
4317 if (set_zero_bit_copies
> 8
4318 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4322 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4323 rtx shift
= GEN_INT (set_zero_bit_copies
);
4327 gen_rtx_SET (VOIDmode
, sub
,
4329 gen_rtx_LSHIFTRT (mode
,
4334 gen_rtx_SET (VOIDmode
, target
,
4336 gen_rtx_ASHIFT (mode
, sub
,
4342 /* This will never be reached for Thumb2 because orn is a valid
4343 instruction. This is for Thumb1 and the ARM 32 bit cases.
4345 x = y | constant (such that ~constant is a valid constant)
4347 x = ~(~y & ~constant).
4349 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4353 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4354 emit_constant_insn (cond
,
4355 gen_rtx_SET (VOIDmode
, sub
,
4356 gen_rtx_NOT (mode
, source
)));
4359 sub
= gen_reg_rtx (mode
);
4360 emit_constant_insn (cond
,
4361 gen_rtx_SET (VOIDmode
, sub
,
4362 gen_rtx_AND (mode
, source
,
4364 emit_constant_insn (cond
,
4365 gen_rtx_SET (VOIDmode
, target
,
4366 gen_rtx_NOT (mode
, sub
)));
4373 /* See if two shifts will do 2 or more insn's worth of work. */
4374 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4376 HOST_WIDE_INT shift_mask
= ((0xffffffff
4377 << (32 - clear_sign_bit_copies
))
4380 if ((remainder
| shift_mask
) != 0xffffffff)
4384 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4385 insns
= arm_gen_constant (AND
, mode
, cond
,
4386 remainder
| shift_mask
,
4387 new_src
, source
, subtargets
, 1);
4392 rtx targ
= subtargets
? NULL_RTX
: target
;
4393 insns
= arm_gen_constant (AND
, mode
, cond
,
4394 remainder
| shift_mask
,
4395 targ
, source
, subtargets
, 0);
4401 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4402 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4404 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4405 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4411 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4413 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4415 if ((remainder
| shift_mask
) != 0xffffffff)
4419 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4421 insns
= arm_gen_constant (AND
, mode
, cond
,
4422 remainder
| shift_mask
,
4423 new_src
, source
, subtargets
, 1);
4428 rtx targ
= subtargets
? NULL_RTX
: target
;
4430 insns
= arm_gen_constant (AND
, mode
, cond
,
4431 remainder
| shift_mask
,
4432 targ
, source
, subtargets
, 0);
4438 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4439 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4441 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4442 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4454 /* Calculate what the instruction sequences would be if we generated it
4455 normally, negated, or inverted. */
4457 /* AND cannot be split into multiple insns, so invert and use BIC. */
4460 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4463 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4468 if (can_invert
|| final_invert
)
4469 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4474 immediates
= &pos_immediates
;
4476 /* Is the negated immediate sequence more efficient? */
4477 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4480 immediates
= &neg_immediates
;
4485 /* Is the inverted immediate sequence more efficient?
4486 We must allow for an extra NOT instruction for XOR operations, although
4487 there is some chance that the final 'mvn' will get optimized later. */
4488 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4491 immediates
= &inv_immediates
;
4499 /* Now output the chosen sequence as instructions. */
4502 for (i
= 0; i
< insns
; i
++)
4504 rtx new_src
, temp1_rtx
;
4506 temp1
= immediates
->i
[i
];
4508 if (code
== SET
|| code
== MINUS
)
4509 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4510 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4511 new_src
= gen_reg_rtx (mode
);
4517 else if (can_negate
)
4520 temp1
= trunc_int_for_mode (temp1
, mode
);
4521 temp1_rtx
= GEN_INT (temp1
);
4525 else if (code
== MINUS
)
4526 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4528 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4530 emit_constant_insn (cond
,
4531 gen_rtx_SET (VOIDmode
, new_src
,
4537 can_negate
= can_invert
;
4541 else if (code
== MINUS
)
4549 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4550 gen_rtx_NOT (mode
, source
)));
4557 /* Canonicalize a comparison so that we are more likely to recognize it.
4558 This can be done for a few constant compares, where we can make the
4559 immediate value easier to load. */
4562 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4563 bool op0_preserve_value
)
4565 enum machine_mode mode
;
4566 unsigned HOST_WIDE_INT i
, maxval
;
4568 mode
= GET_MODE (*op0
);
4569 if (mode
== VOIDmode
)
4570 mode
= GET_MODE (*op1
);
4572 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4574 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4575 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4576 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4577 for GTU/LEU in Thumb mode. */
4582 if (*code
== GT
|| *code
== LE
4583 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4585 /* Missing comparison. First try to use an available
4587 if (CONST_INT_P (*op1
))
4595 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4597 *op1
= GEN_INT (i
+ 1);
4598 *code
= *code
== GT
? GE
: LT
;
4604 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4605 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4607 *op1
= GEN_INT (i
+ 1);
4608 *code
= *code
== GTU
? GEU
: LTU
;
4617 /* If that did not work, reverse the condition. */
4618 if (!op0_preserve_value
)
4623 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4629 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4630 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4631 to facilitate possible combining with a cmp into 'ands'. */
4633 && GET_CODE (*op0
) == ZERO_EXTEND
4634 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4635 && GET_MODE (XEXP (*op0
, 0)) == QImode
4636 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4637 && subreg_lowpart_p (XEXP (*op0
, 0))
4638 && *op1
== const0_rtx
)
4639 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4642 /* Comparisons smaller than DImode. Only adjust comparisons against
4643 an out-of-range constant. */
4644 if (!CONST_INT_P (*op1
)
4645 || const_ok_for_arm (INTVAL (*op1
))
4646 || const_ok_for_arm (- INTVAL (*op1
)))
4660 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4662 *op1
= GEN_INT (i
+ 1);
4663 *code
= *code
== GT
? GE
: LT
;
4671 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4673 *op1
= GEN_INT (i
- 1);
4674 *code
= *code
== GE
? GT
: LE
;
4681 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4682 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4684 *op1
= GEN_INT (i
+ 1);
4685 *code
= *code
== GTU
? GEU
: LTU
;
4693 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4695 *op1
= GEN_INT (i
- 1);
4696 *code
= *code
== GEU
? GTU
: LEU
;
4707 /* Define how to find the value returned by a function. */
4710 arm_function_value(const_tree type
, const_tree func
,
4711 bool outgoing ATTRIBUTE_UNUSED
)
4713 enum machine_mode mode
;
4714 int unsignedp ATTRIBUTE_UNUSED
;
4715 rtx r ATTRIBUTE_UNUSED
;
4717 mode
= TYPE_MODE (type
);
4719 if (TARGET_AAPCS_BASED
)
4720 return aapcs_allocate_return_reg (mode
, type
, func
);
4722 /* Promote integer types. */
4723 if (INTEGRAL_TYPE_P (type
))
4724 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4726 /* Promotes small structs returned in a register to full-word size
4727 for big-endian AAPCS. */
4728 if (arm_return_in_msb (type
))
4730 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4731 if (size
% UNITS_PER_WORD
!= 0)
4733 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4734 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4738 return arm_libcall_value_1 (mode
);
4741 /* libcall hashtable helpers. */
4743 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4745 typedef rtx_def value_type
;
4746 typedef rtx_def compare_type
;
4747 static inline hashval_t
hash (const value_type
*);
4748 static inline bool equal (const value_type
*, const compare_type
*);
4749 static inline void remove (value_type
*);
4753 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4755 return rtx_equal_p (p1
, p2
);
4759 libcall_hasher::hash (const value_type
*p1
)
4761 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4764 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4767 add_libcall (libcall_table_type
*htab
, rtx libcall
)
4769 *htab
->find_slot (libcall
, INSERT
) = libcall
;
4773 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4775 static bool init_done
= false;
4776 static libcall_table_type
*libcall_htab
= NULL
;
4782 libcall_htab
= new libcall_table_type (31);
4783 add_libcall (libcall_htab
,
4784 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4785 add_libcall (libcall_htab
,
4786 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4787 add_libcall (libcall_htab
,
4788 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4789 add_libcall (libcall_htab
,
4790 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4792 add_libcall (libcall_htab
,
4793 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4794 add_libcall (libcall_htab
,
4795 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4796 add_libcall (libcall_htab
,
4797 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4798 add_libcall (libcall_htab
,
4799 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4801 add_libcall (libcall_htab
,
4802 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4803 add_libcall (libcall_htab
,
4804 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4805 add_libcall (libcall_htab
,
4806 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4807 add_libcall (libcall_htab
,
4808 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4809 add_libcall (libcall_htab
,
4810 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4811 add_libcall (libcall_htab
,
4812 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4813 add_libcall (libcall_htab
,
4814 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4815 add_libcall (libcall_htab
,
4816 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4818 /* Values from double-precision helper functions are returned in core
4819 registers if the selected core only supports single-precision
4820 arithmetic, even if we are using the hard-float ABI. The same is
4821 true for single-precision helpers, but we will never be using the
4822 hard-float ABI on a CPU which doesn't support single-precision
4823 operations in hardware. */
4824 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4825 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4826 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4827 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4828 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4829 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4830 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4831 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4832 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4833 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4834 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4835 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4837 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4841 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
4845 arm_libcall_value_1 (enum machine_mode mode
)
4847 if (TARGET_AAPCS_BASED
)
4848 return aapcs_libcall_value (mode
);
4849 else if (TARGET_IWMMXT_ABI
4850 && arm_vector_mode_supported_p (mode
))
4851 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4853 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4856 /* Define how to find the value returned by a library function
4857 assuming the value has mode MODE. */
4860 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
4862 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4863 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4865 /* The following libcalls return their result in integer registers,
4866 even though they return a floating point value. */
4867 if (arm_libcall_uses_aapcs_base (libcall
))
4868 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4872 return arm_libcall_value_1 (mode
);
4875 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4878 arm_function_value_regno_p (const unsigned int regno
)
4880 if (regno
== ARG_REGISTER (1)
4882 && TARGET_AAPCS_BASED
4884 && TARGET_HARD_FLOAT
4885 && regno
== FIRST_VFP_REGNUM
)
4886 || (TARGET_IWMMXT_ABI
4887 && regno
== FIRST_IWMMXT_REGNUM
))
4893 /* Determine the amount of memory needed to store the possible return
4894 registers of an untyped call. */
4896 arm_apply_result_size (void)
4902 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4904 if (TARGET_IWMMXT_ABI
)
4911 /* Decide whether TYPE should be returned in memory (true)
4912 or in a register (false). FNTYPE is the type of the function making
4915 arm_return_in_memory (const_tree type
, const_tree fntype
)
4919 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4921 if (TARGET_AAPCS_BASED
)
4923 /* Simple, non-aggregate types (ie not including vectors and
4924 complex) are always returned in a register (or registers).
4925 We don't care about which register here, so we can short-cut
4926 some of the detail. */
4927 if (!AGGREGATE_TYPE_P (type
)
4928 && TREE_CODE (type
) != VECTOR_TYPE
4929 && TREE_CODE (type
) != COMPLEX_TYPE
)
4932 /* Any return value that is no larger than one word can be
4934 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4937 /* Check any available co-processors to see if they accept the
4938 type as a register candidate (VFP, for example, can return
4939 some aggregates in consecutive registers). These aren't
4940 available if the call is variadic. */
4941 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4944 /* Vector values should be returned using ARM registers, not
4945 memory (unless they're over 16 bytes, which will break since
4946 we only have four call-clobbered registers to play with). */
4947 if (TREE_CODE (type
) == VECTOR_TYPE
)
4948 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4950 /* The rest go in memory. */
4954 if (TREE_CODE (type
) == VECTOR_TYPE
)
4955 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4957 if (!AGGREGATE_TYPE_P (type
) &&
4958 (TREE_CODE (type
) != VECTOR_TYPE
))
4959 /* All simple types are returned in registers. */
4962 if (arm_abi
!= ARM_ABI_APCS
)
4964 /* ATPCS and later return aggregate types in memory only if they are
4965 larger than a word (or are variable size). */
4966 return (size
< 0 || size
> UNITS_PER_WORD
);
4969 /* For the arm-wince targets we choose to be compatible with Microsoft's
4970 ARM and Thumb compilers, which always return aggregates in memory. */
4972 /* All structures/unions bigger than one word are returned in memory.
4973 Also catch the case where int_size_in_bytes returns -1. In this case
4974 the aggregate is either huge or of variable size, and in either case
4975 we will want to return it via memory and not in a register. */
4976 if (size
< 0 || size
> UNITS_PER_WORD
)
4979 if (TREE_CODE (type
) == RECORD_TYPE
)
4983 /* For a struct the APCS says that we only return in a register
4984 if the type is 'integer like' and every addressable element
4985 has an offset of zero. For practical purposes this means
4986 that the structure can have at most one non bit-field element
4987 and that this element must be the first one in the structure. */
4989 /* Find the first field, ignoring non FIELD_DECL things which will
4990 have been created by C++. */
4991 for (field
= TYPE_FIELDS (type
);
4992 field
&& TREE_CODE (field
) != FIELD_DECL
;
4993 field
= DECL_CHAIN (field
))
4997 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4999 /* Check that the first field is valid for returning in a register. */
5001 /* ... Floats are not allowed */
5002 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5005 /* ... Aggregates that are not themselves valid for returning in
5006 a register are not allowed. */
5007 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5010 /* Now check the remaining fields, if any. Only bitfields are allowed,
5011 since they are not addressable. */
5012 for (field
= DECL_CHAIN (field
);
5014 field
= DECL_CHAIN (field
))
5016 if (TREE_CODE (field
) != FIELD_DECL
)
5019 if (!DECL_BIT_FIELD_TYPE (field
))
5026 if (TREE_CODE (type
) == UNION_TYPE
)
5030 /* Unions can be returned in registers if every element is
5031 integral, or can be returned in an integer register. */
5032 for (field
= TYPE_FIELDS (type
);
5034 field
= DECL_CHAIN (field
))
5036 if (TREE_CODE (field
) != FIELD_DECL
)
5039 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5042 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5048 #endif /* not ARM_WINCE */
5050 /* Return all other types in memory. */
5054 const struct pcs_attribute_arg
5058 } pcs_attribute_args
[] =
5060 {"aapcs", ARM_PCS_AAPCS
},
5061 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5063 /* We could recognize these, but changes would be needed elsewhere
5064 * to implement them. */
5065 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5066 {"atpcs", ARM_PCS_ATPCS
},
5067 {"apcs", ARM_PCS_APCS
},
5069 {NULL
, ARM_PCS_UNKNOWN
}
5073 arm_pcs_from_attribute (tree attr
)
5075 const struct pcs_attribute_arg
*ptr
;
5078 /* Get the value of the argument. */
5079 if (TREE_VALUE (attr
) == NULL_TREE
5080 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5081 return ARM_PCS_UNKNOWN
;
5083 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5085 /* Check it against the list of known arguments. */
5086 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5087 if (streq (arg
, ptr
->arg
))
5090 /* An unrecognized interrupt type. */
5091 return ARM_PCS_UNKNOWN
;
5094 /* Get the PCS variant to use for this call. TYPE is the function's type
5095 specification, DECL is the specific declartion. DECL may be null if
5096 the call could be indirect or if this is a library call. */
5098 arm_get_pcs_model (const_tree type
, const_tree decl
)
5100 bool user_convention
= false;
5101 enum arm_pcs user_pcs
= arm_pcs_default
;
5106 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5109 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5110 user_convention
= true;
5113 if (TARGET_AAPCS_BASED
)
5115 /* Detect varargs functions. These always use the base rules
5116 (no argument is ever a candidate for a co-processor
5118 bool base_rules
= stdarg_p (type
);
5120 if (user_convention
)
5122 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5123 sorry ("non-AAPCS derived PCS variant");
5124 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5125 error ("variadic functions must use the base AAPCS variant");
5129 return ARM_PCS_AAPCS
;
5130 else if (user_convention
)
5132 else if (decl
&& flag_unit_at_a_time
)
5134 /* Local functions never leak outside this compilation unit,
5135 so we are free to use whatever conventions are
5137 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5138 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5140 return ARM_PCS_AAPCS_LOCAL
;
5143 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5144 sorry ("PCS variant");
5146 /* For everything else we use the target's default. */
5147 return arm_pcs_default
;
5152 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5153 const_tree fntype ATTRIBUTE_UNUSED
,
5154 rtx libcall ATTRIBUTE_UNUSED
,
5155 const_tree fndecl ATTRIBUTE_UNUSED
)
5157 /* Record the unallocated VFP registers. */
5158 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5159 pcum
->aapcs_vfp_reg_alloc
= 0;
5162 /* Walk down the type tree of TYPE counting consecutive base elements.
5163 If *MODEP is VOIDmode, then set it to the first valid floating point
5164 type. If a non-floating point type is found, or if a floating point
5165 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5166 otherwise return the count in the sub-tree. */
5168 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
5170 enum machine_mode mode
;
5173 switch (TREE_CODE (type
))
5176 mode
= TYPE_MODE (type
);
5177 if (mode
!= DFmode
&& mode
!= SFmode
)
5180 if (*modep
== VOIDmode
)
5189 mode
= TYPE_MODE (TREE_TYPE (type
));
5190 if (mode
!= DFmode
&& mode
!= SFmode
)
5193 if (*modep
== VOIDmode
)
5202 /* Use V2SImode and V4SImode as representatives of all 64-bit
5203 and 128-bit vector types, whether or not those modes are
5204 supported with the present options. */
5205 size
= int_size_in_bytes (type
);
5218 if (*modep
== VOIDmode
)
5221 /* Vector modes are considered to be opaque: two vectors are
5222 equivalent for the purposes of being homogeneous aggregates
5223 if they are the same size. */
5232 tree index
= TYPE_DOMAIN (type
);
5234 /* Can't handle incomplete types nor sizes that are not
5236 if (!COMPLETE_TYPE_P (type
)
5237 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5240 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5243 || !TYPE_MAX_VALUE (index
)
5244 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5245 || !TYPE_MIN_VALUE (index
)
5246 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5250 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5251 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5253 /* There must be no padding. */
5254 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5266 /* Can't handle incomplete types nor sizes that are not
5268 if (!COMPLETE_TYPE_P (type
)
5269 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5272 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5274 if (TREE_CODE (field
) != FIELD_DECL
)
5277 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5283 /* There must be no padding. */
5284 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5291 case QUAL_UNION_TYPE
:
5293 /* These aren't very interesting except in a degenerate case. */
5298 /* Can't handle incomplete types nor sizes that are not
5300 if (!COMPLETE_TYPE_P (type
)
5301 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5304 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5306 if (TREE_CODE (field
) != FIELD_DECL
)
5309 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5312 count
= count
> sub_count
? count
: sub_count
;
5315 /* There must be no padding. */
5316 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5329 /* Return true if PCS_VARIANT should use VFP registers. */
5331 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5333 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5335 static bool seen_thumb1_vfp
= false;
5337 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5339 sorry ("Thumb-1 hard-float VFP ABI");
5340 /* sorry() is not immediately fatal, so only display this once. */
5341 seen_thumb1_vfp
= true;
5347 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5350 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5351 (TARGET_VFP_DOUBLE
|| !is_double
));
5354 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5355 suitable for passing or returning in VFP registers for the PCS
5356 variant selected. If it is, then *BASE_MODE is updated to contain
5357 a machine mode describing each element of the argument's type and
5358 *COUNT to hold the number of such elements. */
5360 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5361 enum machine_mode mode
, const_tree type
,
5362 enum machine_mode
*base_mode
, int *count
)
5364 enum machine_mode new_mode
= VOIDmode
;
5366 /* If we have the type information, prefer that to working things
5367 out from the mode. */
5370 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5372 if (ag_count
> 0 && ag_count
<= 4)
5377 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5378 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5379 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5384 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5387 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5393 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5396 *base_mode
= new_mode
;
5401 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5402 enum machine_mode mode
, const_tree type
)
5404 int count ATTRIBUTE_UNUSED
;
5405 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
5407 if (!use_vfp_abi (pcs_variant
, false))
5409 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5414 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5417 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5420 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5421 &pcum
->aapcs_vfp_rmode
,
5422 &pcum
->aapcs_vfp_rcount
);
5426 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5427 const_tree type ATTRIBUTE_UNUSED
)
5429 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5430 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5433 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5434 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5436 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5438 || (mode
== TImode
&& ! TARGET_NEON
)
5439 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5442 int rcount
= pcum
->aapcs_vfp_rcount
;
5444 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5448 /* Avoid using unsupported vector modes. */
5449 if (rmode
== V2SImode
)
5451 else if (rmode
== V4SImode
)
5458 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5459 for (i
= 0; i
< rcount
; i
++)
5461 rtx tmp
= gen_rtx_REG (rmode
,
5462 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5463 tmp
= gen_rtx_EXPR_LIST
5465 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5466 XVECEXP (par
, 0, i
) = tmp
;
5469 pcum
->aapcs_reg
= par
;
5472 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5479 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5480 enum machine_mode mode
,
5481 const_tree type ATTRIBUTE_UNUSED
)
5483 if (!use_vfp_abi (pcs_variant
, false))
5486 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5489 enum machine_mode ag_mode
;
5494 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5499 if (ag_mode
== V2SImode
)
5501 else if (ag_mode
== V4SImode
)
5507 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5508 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5509 for (i
= 0; i
< count
; i
++)
5511 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5512 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5513 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5514 XVECEXP (par
, 0, i
) = tmp
;
5520 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5524 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5525 enum machine_mode mode ATTRIBUTE_UNUSED
,
5526 const_tree type ATTRIBUTE_UNUSED
)
5528 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5529 pcum
->aapcs_vfp_reg_alloc
= 0;
5533 #define AAPCS_CP(X) \
5535 aapcs_ ## X ## _cum_init, \
5536 aapcs_ ## X ## _is_call_candidate, \
5537 aapcs_ ## X ## _allocate, \
5538 aapcs_ ## X ## _is_return_candidate, \
5539 aapcs_ ## X ## _allocate_return_reg, \
5540 aapcs_ ## X ## _advance \
5543 /* Table of co-processors that can be used to pass arguments in
5544 registers. Idealy no arugment should be a candidate for more than
5545 one co-processor table entry, but the table is processed in order
5546 and stops after the first match. If that entry then fails to put
5547 the argument into a co-processor register, the argument will go on
5551 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5552 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5554 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5555 BLKmode) is a candidate for this co-processor's registers; this
5556 function should ignore any position-dependent state in
5557 CUMULATIVE_ARGS and only use call-type dependent information. */
5558 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5560 /* Return true if the argument does get a co-processor register; it
5561 should set aapcs_reg to an RTX of the register allocated as is
5562 required for a return from FUNCTION_ARG. */
5563 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5565 /* Return true if a result of mode MODE (or type TYPE if MODE is
5566 BLKmode) is can be returned in this co-processor's registers. */
5567 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5569 /* Allocate and return an RTX element to hold the return type of a
5570 call, this routine must not fail and will only be called if
5571 is_return_candidate returned true with the same parameters. */
5572 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5574 /* Finish processing this argument and prepare to start processing
5576 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5577 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5585 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5590 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5591 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5598 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5600 /* We aren't passed a decl, so we can't check that a call is local.
5601 However, it isn't clear that that would be a win anyway, since it
5602 might limit some tail-calling opportunities. */
5603 enum arm_pcs pcs_variant
;
5607 const_tree fndecl
= NULL_TREE
;
5609 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5612 fntype
= TREE_TYPE (fntype
);
5615 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5618 pcs_variant
= arm_pcs_default
;
5620 if (pcs_variant
!= ARM_PCS_AAPCS
)
5624 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5625 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5634 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
5637 /* We aren't passed a decl, so we can't check that a call is local.
5638 However, it isn't clear that that would be a win anyway, since it
5639 might limit some tail-calling opportunities. */
5640 enum arm_pcs pcs_variant
;
5641 int unsignedp ATTRIBUTE_UNUSED
;
5645 const_tree fndecl
= NULL_TREE
;
5647 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5650 fntype
= TREE_TYPE (fntype
);
5653 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5656 pcs_variant
= arm_pcs_default
;
5658 /* Promote integer types. */
5659 if (type
&& INTEGRAL_TYPE_P (type
))
5660 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5662 if (pcs_variant
!= ARM_PCS_AAPCS
)
5666 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5667 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5669 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5673 /* Promotes small structs returned in a register to full-word size
5674 for big-endian AAPCS. */
5675 if (type
&& arm_return_in_msb (type
))
5677 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5678 if (size
% UNITS_PER_WORD
!= 0)
5680 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5681 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5685 return gen_rtx_REG (mode
, R0_REGNUM
);
5689 aapcs_libcall_value (enum machine_mode mode
)
5691 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5692 && GET_MODE_SIZE (mode
) <= 4)
5695 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5698 /* Lay out a function argument using the AAPCS rules. The rule
5699 numbers referred to here are those in the AAPCS. */
5701 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5702 const_tree type
, bool named
)
5707 /* We only need to do this once per argument. */
5708 if (pcum
->aapcs_arg_processed
)
5711 pcum
->aapcs_arg_processed
= true;
5713 /* Special case: if named is false then we are handling an incoming
5714 anonymous argument which is on the stack. */
5718 /* Is this a potential co-processor register candidate? */
5719 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5721 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5722 pcum
->aapcs_cprc_slot
= slot
;
5724 /* We don't have to apply any of the rules from part B of the
5725 preparation phase, these are handled elsewhere in the
5730 /* A Co-processor register candidate goes either in its own
5731 class of registers or on the stack. */
5732 if (!pcum
->aapcs_cprc_failed
[slot
])
5734 /* C1.cp - Try to allocate the argument to co-processor
5736 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5739 /* C2.cp - Put the argument on the stack and note that we
5740 can't assign any more candidates in this slot. We also
5741 need to note that we have allocated stack space, so that
5742 we won't later try to split a non-cprc candidate between
5743 core registers and the stack. */
5744 pcum
->aapcs_cprc_failed
[slot
] = true;
5745 pcum
->can_split
= false;
5748 /* We didn't get a register, so this argument goes on the
5750 gcc_assert (pcum
->can_split
== false);
5755 /* C3 - For double-word aligned arguments, round the NCRN up to the
5756 next even number. */
5757 ncrn
= pcum
->aapcs_ncrn
;
5758 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5761 nregs
= ARM_NUM_REGS2(mode
, type
);
5763 /* Sigh, this test should really assert that nregs > 0, but a GCC
5764 extension allows empty structs and then gives them empty size; it
5765 then allows such a structure to be passed by value. For some of
5766 the code below we have to pretend that such an argument has
5767 non-zero size so that we 'locate' it correctly either in
5768 registers or on the stack. */
5769 gcc_assert (nregs
>= 0);
5771 nregs2
= nregs
? nregs
: 1;
5773 /* C4 - Argument fits entirely in core registers. */
5774 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5776 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5777 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5781 /* C5 - Some core registers left and there are no arguments already
5782 on the stack: split this argument between the remaining core
5783 registers and the stack. */
5784 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5786 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5787 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5788 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5792 /* C6 - NCRN is set to 4. */
5793 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5795 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5799 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5800 for a call to a function whose data type is FNTYPE.
5801 For a library call, FNTYPE is NULL. */
5803 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5805 tree fndecl ATTRIBUTE_UNUSED
)
5807 /* Long call handling. */
5809 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5811 pcum
->pcs_variant
= arm_pcs_default
;
5813 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5815 if (arm_libcall_uses_aapcs_base (libname
))
5816 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5818 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5819 pcum
->aapcs_reg
= NULL_RTX
;
5820 pcum
->aapcs_partial
= 0;
5821 pcum
->aapcs_arg_processed
= false;
5822 pcum
->aapcs_cprc_slot
= -1;
5823 pcum
->can_split
= true;
5825 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5829 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5831 pcum
->aapcs_cprc_failed
[i
] = false;
5832 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5840 /* On the ARM, the offset starts at 0. */
5842 pcum
->iwmmxt_nregs
= 0;
5843 pcum
->can_split
= true;
5845 /* Varargs vectors are treated the same as long long.
5846 named_count avoids having to change the way arm handles 'named' */
5847 pcum
->named_count
= 0;
5850 if (TARGET_REALLY_IWMMXT
&& fntype
)
5854 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5856 fn_arg
= TREE_CHAIN (fn_arg
))
5857 pcum
->named_count
+= 1;
5859 if (! pcum
->named_count
)
5860 pcum
->named_count
= INT_MAX
;
5864 /* Return true if we use LRA instead of reload pass. */
5868 return arm_lra_flag
;
5871 /* Return true if mode/type need doubleword alignment. */
5873 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
5875 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5876 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5880 /* Determine where to put an argument to a function.
5881 Value is zero to push the argument on the stack,
5882 or a hard register in which to store the argument.
5884 MODE is the argument's machine mode.
5885 TYPE is the data type of the argument (as a tree).
5886 This is null for libcalls where that information may
5888 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5889 the preceding args and about the function being called.
5890 NAMED is nonzero if this argument is a named parameter
5891 (otherwise it is an extra parameter matching an ellipsis).
5893 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5894 other arguments are passed on the stack. If (NAMED == 0) (which happens
5895 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5896 defined), say it is passed in the stack (function_prologue will
5897 indeed make it pass in the stack if necessary). */
5900 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
5901 const_tree type
, bool named
)
5903 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5906 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5907 a call insn (op3 of a call_value insn). */
5908 if (mode
== VOIDmode
)
5911 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5913 aapcs_layout_arg (pcum
, mode
, type
, named
);
5914 return pcum
->aapcs_reg
;
5917 /* Varargs vectors are treated the same as long long.
5918 named_count avoids having to change the way arm handles 'named' */
5919 if (TARGET_IWMMXT_ABI
5920 && arm_vector_mode_supported_p (mode
)
5921 && pcum
->named_count
> pcum
->nargs
+ 1)
5923 if (pcum
->iwmmxt_nregs
<= 9)
5924 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5927 pcum
->can_split
= false;
5932 /* Put doubleword aligned quantities in even register pairs. */
5934 && ARM_DOUBLEWORD_ALIGN
5935 && arm_needs_doubleword_align (mode
, type
))
5938 /* Only allow splitting an arg between regs and memory if all preceding
5939 args were allocated to regs. For args passed by reference we only count
5940 the reference pointer. */
5941 if (pcum
->can_split
)
5944 nregs
= ARM_NUM_REGS2 (mode
, type
);
5946 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5949 return gen_rtx_REG (mode
, pcum
->nregs
);
5953 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
5955 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5956 ? DOUBLEWORD_ALIGNMENT
5961 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
5962 tree type
, bool named
)
5964 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5965 int nregs
= pcum
->nregs
;
5967 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5969 aapcs_layout_arg (pcum
, mode
, type
, named
);
5970 return pcum
->aapcs_partial
;
5973 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
5976 if (NUM_ARG_REGS
> nregs
5977 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
5979 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
5984 /* Update the data in PCUM to advance over an argument
5985 of mode MODE and data type TYPE.
5986 (TYPE is null for libcalls where that information may not be available.) */
5989 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
5990 const_tree type
, bool named
)
5992 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5994 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5996 aapcs_layout_arg (pcum
, mode
, type
, named
);
5998 if (pcum
->aapcs_cprc_slot
>= 0)
6000 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6002 pcum
->aapcs_cprc_slot
= -1;
6005 /* Generic stuff. */
6006 pcum
->aapcs_arg_processed
= false;
6007 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6008 pcum
->aapcs_reg
= NULL_RTX
;
6009 pcum
->aapcs_partial
= 0;
6014 if (arm_vector_mode_supported_p (mode
)
6015 && pcum
->named_count
> pcum
->nargs
6016 && TARGET_IWMMXT_ABI
)
6017 pcum
->iwmmxt_nregs
+= 1;
6019 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6023 /* Variable sized types are passed by reference. This is a GCC
6024 extension to the ARM ABI. */
6027 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6028 enum machine_mode mode ATTRIBUTE_UNUSED
,
6029 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6031 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6034 /* Encode the current state of the #pragma [no_]long_calls. */
6037 OFF
, /* No #pragma [no_]long_calls is in effect. */
6038 LONG
, /* #pragma long_calls is in effect. */
6039 SHORT
/* #pragma no_long_calls is in effect. */
6042 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6045 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6047 arm_pragma_long_calls
= LONG
;
6051 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6053 arm_pragma_long_calls
= SHORT
;
6057 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6059 arm_pragma_long_calls
= OFF
;
6062 /* Handle an attribute requiring a FUNCTION_DECL;
6063 arguments as in struct attribute_spec.handler. */
6065 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6066 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6068 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6070 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6072 *no_add_attrs
= true;
6078 /* Handle an "interrupt" or "isr" attribute;
6079 arguments as in struct attribute_spec.handler. */
6081 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6086 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6088 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6090 *no_add_attrs
= true;
6092 /* FIXME: the argument if any is checked for type attributes;
6093 should it be checked for decl ones? */
6097 if (TREE_CODE (*node
) == FUNCTION_TYPE
6098 || TREE_CODE (*node
) == METHOD_TYPE
)
6100 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6102 warning (OPT_Wattributes
, "%qE attribute ignored",
6104 *no_add_attrs
= true;
6107 else if (TREE_CODE (*node
) == POINTER_TYPE
6108 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6109 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6110 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6112 *node
= build_variant_type_copy (*node
);
6113 TREE_TYPE (*node
) = build_type_attribute_variant
6115 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6116 *no_add_attrs
= true;
6120 /* Possibly pass this attribute on from the type to a decl. */
6121 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6122 | (int) ATTR_FLAG_FUNCTION_NEXT
6123 | (int) ATTR_FLAG_ARRAY_NEXT
))
6125 *no_add_attrs
= true;
6126 return tree_cons (name
, args
, NULL_TREE
);
6130 warning (OPT_Wattributes
, "%qE attribute ignored",
6139 /* Handle a "pcs" attribute; arguments as in struct
6140 attribute_spec.handler. */
6142 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6143 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6145 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6147 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6148 *no_add_attrs
= true;
6153 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6154 /* Handle the "notshared" attribute. This attribute is another way of
6155 requesting hidden visibility. ARM's compiler supports
6156 "__declspec(notshared)"; we support the same thing via an
6160 arm_handle_notshared_attribute (tree
*node
,
6161 tree name ATTRIBUTE_UNUSED
,
6162 tree args ATTRIBUTE_UNUSED
,
6163 int flags ATTRIBUTE_UNUSED
,
6166 tree decl
= TYPE_NAME (*node
);
6170 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6171 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6172 *no_add_attrs
= false;
6178 /* Return 0 if the attributes for two types are incompatible, 1 if they
6179 are compatible, and 2 if they are nearly compatible (which causes a
6180 warning to be generated). */
6182 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6186 /* Check for mismatch of non-default calling convention. */
6187 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6190 /* Check for mismatched call attributes. */
6191 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6192 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6193 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6194 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6196 /* Only bother to check if an attribute is defined. */
6197 if (l1
| l2
| s1
| s2
)
6199 /* If one type has an attribute, the other must have the same attribute. */
6200 if ((l1
!= l2
) || (s1
!= s2
))
6203 /* Disallow mixed attributes. */
6204 if ((l1
& s2
) || (l2
& s1
))
6208 /* Check for mismatched ISR attribute. */
6209 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6211 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6212 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6214 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6221 /* Assigns default attributes to newly defined type. This is used to
6222 set short_call/long_call attributes for function types of
6223 functions defined inside corresponding #pragma scopes. */
6225 arm_set_default_type_attributes (tree type
)
6227 /* Add __attribute__ ((long_call)) to all functions, when
6228 inside #pragma long_calls or __attribute__ ((short_call)),
6229 when inside #pragma no_long_calls. */
6230 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6232 tree type_attr_list
, attr_name
;
6233 type_attr_list
= TYPE_ATTRIBUTES (type
);
6235 if (arm_pragma_long_calls
== LONG
)
6236 attr_name
= get_identifier ("long_call");
6237 else if (arm_pragma_long_calls
== SHORT
)
6238 attr_name
= get_identifier ("short_call");
6242 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6243 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6247 /* Return true if DECL is known to be linked into section SECTION. */
6250 arm_function_in_section_p (tree decl
, section
*section
)
6252 /* We can only be certain about functions defined in the same
6253 compilation unit. */
6254 if (!TREE_STATIC (decl
))
6257 /* Make sure that SYMBOL always binds to the definition in this
6258 compilation unit. */
6259 if (!targetm
.binds_local_p (decl
))
6262 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6263 if (!DECL_SECTION_NAME (decl
))
6265 /* Make sure that we will not create a unique section for DECL. */
6266 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6270 return function_section (decl
) == section
;
6273 /* Return nonzero if a 32-bit "long_call" should be generated for
6274 a call from the current function to DECL. We generate a long_call
6277 a. has an __attribute__((long call))
6278 or b. is within the scope of a #pragma long_calls
6279 or c. the -mlong-calls command line switch has been specified
6281 However we do not generate a long call if the function:
6283 d. has an __attribute__ ((short_call))
6284 or e. is inside the scope of a #pragma no_long_calls
6285 or f. is defined in the same section as the current function. */
6288 arm_is_long_call_p (tree decl
)
6293 return TARGET_LONG_CALLS
;
6295 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6296 if (lookup_attribute ("short_call", attrs
))
6299 /* For "f", be conservative, and only cater for cases in which the
6300 whole of the current function is placed in the same section. */
6301 if (!flag_reorder_blocks_and_partition
6302 && TREE_CODE (decl
) == FUNCTION_DECL
6303 && arm_function_in_section_p (decl
, current_function_section ()))
6306 if (lookup_attribute ("long_call", attrs
))
6309 return TARGET_LONG_CALLS
;
6312 /* Return nonzero if it is ok to make a tail-call to DECL. */
6314 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6316 unsigned long func_type
;
6318 if (cfun
->machine
->sibcall_blocked
)
6321 /* Never tailcall something if we are generating code for Thumb-1. */
6325 /* The PIC register is live on entry to VxWorks PLT entries, so we
6326 must make the call before restoring the PIC register. */
6327 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6330 /* If we are interworking and the function is not declared static
6331 then we can't tail-call it unless we know that it exists in this
6332 compilation unit (since it might be a Thumb routine). */
6333 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6334 && !TREE_ASM_WRITTEN (decl
))
6337 func_type
= arm_current_func_type ();
6338 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6339 if (IS_INTERRUPT (func_type
))
6342 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6344 /* Check that the return value locations are the same. For
6345 example that we aren't returning a value from the sibling in
6346 a VFP register but then need to transfer it to a core
6350 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6351 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6353 if (!rtx_equal_p (a
, b
))
6357 /* Never tailcall if function may be called with a misaligned SP. */
6358 if (IS_STACKALIGN (func_type
))
6361 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6362 references should become a NOP. Don't convert such calls into
6364 if (TARGET_AAPCS_BASED
6365 && arm_abi
== ARM_ABI_AAPCS
6367 && DECL_WEAK (decl
))
6370 /* Everything else is ok. */
6375 /* Addressing mode support functions. */
6377 /* Return nonzero if X is a legitimate immediate operand when compiling
6378 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6380 legitimate_pic_operand_p (rtx x
)
6382 if (GET_CODE (x
) == SYMBOL_REF
6383 || (GET_CODE (x
) == CONST
6384 && GET_CODE (XEXP (x
, 0)) == PLUS
6385 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6391 /* Record that the current function needs a PIC register. Initialize
6392 cfun->machine->pic_reg if we have not already done so. */
6395 require_pic_register (void)
6397 /* A lot of the logic here is made obscure by the fact that this
6398 routine gets called as part of the rtx cost estimation process.
6399 We don't want those calls to affect any assumptions about the real
6400 function; and further, we can't call entry_of_function() until we
6401 start the real expansion process. */
6402 if (!crtl
->uses_pic_offset_table
)
6404 gcc_assert (can_create_pseudo_p ());
6405 if (arm_pic_register
!= INVALID_REGNUM
6406 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6408 if (!cfun
->machine
->pic_reg
)
6409 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6411 /* Play games to avoid marking the function as needing pic
6412 if we are being called as part of the cost-estimation
6414 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6415 crtl
->uses_pic_offset_table
= 1;
6421 if (!cfun
->machine
->pic_reg
)
6422 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6424 /* Play games to avoid marking the function as needing pic
6425 if we are being called as part of the cost-estimation
6427 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6429 crtl
->uses_pic_offset_table
= 1;
6432 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6433 && arm_pic_register
> LAST_LO_REGNUM
)
6434 emit_move_insn (cfun
->machine
->pic_reg
,
6435 gen_rtx_REG (Pmode
, arm_pic_register
));
6437 arm_load_pic_register (0UL);
6442 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6444 INSN_LOCATION (insn
) = prologue_location
;
6446 /* We can be called during expansion of PHI nodes, where
6447 we can't yet emit instructions directly in the final
6448 insn stream. Queue the insns on the entry edge, they will
6449 be committed after everything else is expanded. */
6450 insert_insn_on_edge (seq
,
6451 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6458 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
6460 if (GET_CODE (orig
) == SYMBOL_REF
6461 || GET_CODE (orig
) == LABEL_REF
)
6467 gcc_assert (can_create_pseudo_p ());
6468 reg
= gen_reg_rtx (Pmode
);
6471 /* VxWorks does not impose a fixed gap between segments; the run-time
6472 gap can be different from the object-file gap. We therefore can't
6473 use GOTOFF unless we are absolutely sure that the symbol is in the
6474 same segment as the GOT. Unfortunately, the flexibility of linker
6475 scripts means that we can't be sure of that in general, so assume
6476 that GOTOFF is never valid on VxWorks. */
6477 if ((GET_CODE (orig
) == LABEL_REF
6478 || (GET_CODE (orig
) == SYMBOL_REF
&&
6479 SYMBOL_REF_LOCAL_P (orig
)))
6481 && arm_pic_data_is_text_relative
)
6482 insn
= arm_pic_static_addr (orig
, reg
);
6488 /* If this function doesn't have a pic register, create one now. */
6489 require_pic_register ();
6491 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6493 /* Make the MEM as close to a constant as possible. */
6494 mem
= SET_SRC (pat
);
6495 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6496 MEM_READONLY_P (mem
) = 1;
6497 MEM_NOTRAP_P (mem
) = 1;
6499 insn
= emit_insn (pat
);
6502 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6504 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6508 else if (GET_CODE (orig
) == CONST
)
6512 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6513 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6516 /* Handle the case where we have: const (UNSPEC_TLS). */
6517 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6518 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6521 /* Handle the case where we have:
6522 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6524 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6525 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6526 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6528 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6534 gcc_assert (can_create_pseudo_p ());
6535 reg
= gen_reg_rtx (Pmode
);
6538 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6540 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6541 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6542 base
== reg
? 0 : reg
);
6544 if (CONST_INT_P (offset
))
6546 /* The base register doesn't really matter, we only want to
6547 test the index for the appropriate mode. */
6548 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6550 gcc_assert (can_create_pseudo_p ());
6551 offset
= force_reg (Pmode
, offset
);
6554 if (CONST_INT_P (offset
))
6555 return plus_constant (Pmode
, base
, INTVAL (offset
));
6558 if (GET_MODE_SIZE (mode
) > 4
6559 && (GET_MODE_CLASS (mode
) == MODE_INT
6560 || TARGET_SOFT_FLOAT
))
6562 emit_insn (gen_addsi3 (reg
, base
, offset
));
6566 return gen_rtx_PLUS (Pmode
, base
, offset
);
6573 /* Find a spare register to use during the prolog of a function. */
6576 thumb_find_work_register (unsigned long pushed_regs_mask
)
6580 /* Check the argument registers first as these are call-used. The
6581 register allocation order means that sometimes r3 might be used
6582 but earlier argument registers might not, so check them all. */
6583 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6584 if (!df_regs_ever_live_p (reg
))
6587 /* Before going on to check the call-saved registers we can try a couple
6588 more ways of deducing that r3 is available. The first is when we are
6589 pushing anonymous arguments onto the stack and we have less than 4
6590 registers worth of fixed arguments(*). In this case r3 will be part of
6591 the variable argument list and so we can be sure that it will be
6592 pushed right at the start of the function. Hence it will be available
6593 for the rest of the prologue.
6594 (*): ie crtl->args.pretend_args_size is greater than 0. */
6595 if (cfun
->machine
->uses_anonymous_args
6596 && crtl
->args
.pretend_args_size
> 0)
6597 return LAST_ARG_REGNUM
;
6599 /* The other case is when we have fixed arguments but less than 4 registers
6600 worth. In this case r3 might be used in the body of the function, but
6601 it is not being used to convey an argument into the function. In theory
6602 we could just check crtl->args.size to see how many bytes are
6603 being passed in argument registers, but it seems that it is unreliable.
6604 Sometimes it will have the value 0 when in fact arguments are being
6605 passed. (See testcase execute/20021111-1.c for an example). So we also
6606 check the args_info.nregs field as well. The problem with this field is
6607 that it makes no allowances for arguments that are passed to the
6608 function but which are not used. Hence we could miss an opportunity
6609 when a function has an unused argument in r3. But it is better to be
6610 safe than to be sorry. */
6611 if (! cfun
->machine
->uses_anonymous_args
6612 && crtl
->args
.size
>= 0
6613 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6614 && (TARGET_AAPCS_BASED
6615 ? crtl
->args
.info
.aapcs_ncrn
< 4
6616 : crtl
->args
.info
.nregs
< 4))
6617 return LAST_ARG_REGNUM
;
6619 /* Otherwise look for a call-saved register that is going to be pushed. */
6620 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6621 if (pushed_regs_mask
& (1 << reg
))
6626 /* Thumb-2 can use high regs. */
6627 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6628 if (pushed_regs_mask
& (1 << reg
))
6631 /* Something went wrong - thumb_compute_save_reg_mask()
6632 should have arranged for a suitable register to be pushed. */
6636 static GTY(()) int pic_labelno
;
6638 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6642 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6644 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6646 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6649 gcc_assert (flag_pic
);
6651 pic_reg
= cfun
->machine
->pic_reg
;
6652 if (TARGET_VXWORKS_RTP
)
6654 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6655 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6656 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6658 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6660 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6661 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6665 /* We use an UNSPEC rather than a LABEL_REF because this label
6666 never appears in the code stream. */
6668 labelno
= GEN_INT (pic_labelno
++);
6669 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6670 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6672 /* On the ARM the PC register contains 'dot + 8' at the time of the
6673 addition, on the Thumb it is 'dot + 4'. */
6674 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6675 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6677 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6681 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6683 else /* TARGET_THUMB1 */
6685 if (arm_pic_register
!= INVALID_REGNUM
6686 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6688 /* We will have pushed the pic register, so we should always be
6689 able to find a work register. */
6690 pic_tmp
= gen_rtx_REG (SImode
,
6691 thumb_find_work_register (saved_regs
));
6692 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6693 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6694 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6696 else if (arm_pic_register
!= INVALID_REGNUM
6697 && arm_pic_register
> LAST_LO_REGNUM
6698 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6700 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6701 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6702 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6705 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6709 /* Need to emit this whether or not we obey regdecls,
6710 since setjmp/longjmp can cause life info to screw up. */
6714 /* Generate code to load the address of a static var when flag_pic is set. */
6716 arm_pic_static_addr (rtx orig
, rtx reg
)
6718 rtx l1
, labelno
, offset_rtx
, insn
;
6720 gcc_assert (flag_pic
);
6722 /* We use an UNSPEC rather than a LABEL_REF because this label
6723 never appears in the code stream. */
6724 labelno
= GEN_INT (pic_labelno
++);
6725 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6726 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6728 /* On the ARM the PC register contains 'dot + 8' at the time of the
6729 addition, on the Thumb it is 'dot + 4'. */
6730 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6731 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6732 UNSPEC_SYMBOL_OFFSET
);
6733 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6735 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6739 /* Return nonzero if X is valid as an ARM state addressing register. */
6741 arm_address_register_rtx_p (rtx x
, int strict_p
)
6751 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6753 return (regno
<= LAST_ARM_REGNUM
6754 || regno
>= FIRST_PSEUDO_REGISTER
6755 || regno
== FRAME_POINTER_REGNUM
6756 || regno
== ARG_POINTER_REGNUM
);
6759 /* Return TRUE if this rtx is the difference of a symbol and a label,
6760 and will reduce to a PC-relative relocation in the object file.
6761 Expressions like this can be left alone when generating PIC, rather
6762 than forced through the GOT. */
6764 pcrel_constant_p (rtx x
)
6766 if (GET_CODE (x
) == MINUS
)
6767 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6772 /* Return true if X will surely end up in an index register after next
6775 will_be_in_index_register (const_rtx x
)
6777 /* arm.md: calculate_pic_address will split this into a register. */
6778 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6781 /* Return nonzero if X is a valid ARM state address operand. */
6783 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
6787 enum rtx_code code
= GET_CODE (x
);
6789 if (arm_address_register_rtx_p (x
, strict_p
))
6792 use_ldrd
= (TARGET_LDRD
6794 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6796 if (code
== POST_INC
|| code
== PRE_DEC
6797 || ((code
== PRE_INC
|| code
== POST_DEC
)
6798 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6799 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6801 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6802 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6803 && GET_CODE (XEXP (x
, 1)) == PLUS
6804 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6806 rtx addend
= XEXP (XEXP (x
, 1), 1);
6808 /* Don't allow ldrd post increment by register because it's hard
6809 to fixup invalid register choices. */
6811 && GET_CODE (x
) == POST_MODIFY
6815 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6816 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6819 /* After reload constants split into minipools will have addresses
6820 from a LABEL_REF. */
6821 else if (reload_completed
6822 && (code
== LABEL_REF
6824 && GET_CODE (XEXP (x
, 0)) == PLUS
6825 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6826 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6829 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6832 else if (code
== PLUS
)
6834 rtx xop0
= XEXP (x
, 0);
6835 rtx xop1
= XEXP (x
, 1);
6837 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6838 && ((CONST_INT_P (xop1
)
6839 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6840 || (!strict_p
&& will_be_in_index_register (xop1
))))
6841 || (arm_address_register_rtx_p (xop1
, strict_p
)
6842 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6846 /* Reload currently can't handle MINUS, so disable this for now */
6847 else if (GET_CODE (x
) == MINUS
)
6849 rtx xop0
= XEXP (x
, 0);
6850 rtx xop1
= XEXP (x
, 1);
6852 return (arm_address_register_rtx_p (xop0
, strict_p
)
6853 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6857 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6858 && code
== SYMBOL_REF
6859 && CONSTANT_POOL_ADDRESS_P (x
)
6861 && symbol_mentioned_p (get_pool_constant (x
))
6862 && ! pcrel_constant_p (get_pool_constant (x
))))
6868 /* Return nonzero if X is a valid Thumb-2 address operand. */
6870 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6873 enum rtx_code code
= GET_CODE (x
);
6875 if (arm_address_register_rtx_p (x
, strict_p
))
6878 use_ldrd
= (TARGET_LDRD
6880 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6882 if (code
== POST_INC
|| code
== PRE_DEC
6883 || ((code
== PRE_INC
|| code
== POST_DEC
)
6884 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6885 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6887 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6888 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6889 && GET_CODE (XEXP (x
, 1)) == PLUS
6890 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6892 /* Thumb-2 only has autoincrement by constant. */
6893 rtx addend
= XEXP (XEXP (x
, 1), 1);
6894 HOST_WIDE_INT offset
;
6896 if (!CONST_INT_P (addend
))
6899 offset
= INTVAL(addend
);
6900 if (GET_MODE_SIZE (mode
) <= 4)
6901 return (offset
> -256 && offset
< 256);
6903 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6904 && (offset
& 3) == 0);
6907 /* After reload constants split into minipools will have addresses
6908 from a LABEL_REF. */
6909 else if (reload_completed
6910 && (code
== LABEL_REF
6912 && GET_CODE (XEXP (x
, 0)) == PLUS
6913 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6914 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6917 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6920 else if (code
== PLUS
)
6922 rtx xop0
= XEXP (x
, 0);
6923 rtx xop1
= XEXP (x
, 1);
6925 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6926 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6927 || (!strict_p
&& will_be_in_index_register (xop1
))))
6928 || (arm_address_register_rtx_p (xop1
, strict_p
)
6929 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6932 /* Normally we can assign constant values to target registers without
6933 the help of constant pool. But there are cases we have to use constant
6935 1) assign a label to register.
6936 2) sign-extend a 8bit value to 32bit and then assign to register.
6938 Constant pool access in format:
6939 (set (reg r0) (mem (symbol_ref (".LC0"))))
6940 will cause the use of literal pool (later in function arm_reorg).
6941 So here we mark such format as an invalid format, then the compiler
6942 will adjust it into:
6943 (set (reg r0) (symbol_ref (".LC0")))
6944 (set (reg r0) (mem (reg r0))).
6945 No extra register is required, and (mem (reg r0)) won't cause the use
6946 of literal pools. */
6947 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
6948 && CONSTANT_POOL_ADDRESS_P (x
))
6951 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6952 && code
== SYMBOL_REF
6953 && CONSTANT_POOL_ADDRESS_P (x
)
6955 && symbol_mentioned_p (get_pool_constant (x
))
6956 && ! pcrel_constant_p (get_pool_constant (x
))))
6962 /* Return nonzero if INDEX is valid for an address index operand in
6965 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
6968 HOST_WIDE_INT range
;
6969 enum rtx_code code
= GET_CODE (index
);
6971 /* Standard coprocessor addressing modes. */
6972 if (TARGET_HARD_FLOAT
6974 && (mode
== SFmode
|| mode
== DFmode
))
6975 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6976 && INTVAL (index
) > -1024
6977 && (INTVAL (index
) & 3) == 0);
6979 /* For quad modes, we restrict the constant offset to be slightly less
6980 than what the instruction format permits. We do this because for
6981 quad mode moves, we will actually decompose them into two separate
6982 double-mode reads or writes. INDEX must therefore be a valid
6983 (double-mode) offset and so should INDEX+8. */
6984 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6985 return (code
== CONST_INT
6986 && INTVAL (index
) < 1016
6987 && INTVAL (index
) > -1024
6988 && (INTVAL (index
) & 3) == 0);
6990 /* We have no such constraint on double mode offsets, so we permit the
6991 full range of the instruction format. */
6992 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6993 return (code
== CONST_INT
6994 && INTVAL (index
) < 1024
6995 && INTVAL (index
) > -1024
6996 && (INTVAL (index
) & 3) == 0);
6998 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6999 return (code
== CONST_INT
7000 && INTVAL (index
) < 1024
7001 && INTVAL (index
) > -1024
7002 && (INTVAL (index
) & 3) == 0);
7004 if (arm_address_register_rtx_p (index
, strict_p
)
7005 && (GET_MODE_SIZE (mode
) <= 4))
7008 if (mode
== DImode
|| mode
== DFmode
)
7010 if (code
== CONST_INT
)
7012 HOST_WIDE_INT val
= INTVAL (index
);
7015 return val
> -256 && val
< 256;
7017 return val
> -4096 && val
< 4092;
7020 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7023 if (GET_MODE_SIZE (mode
) <= 4
7027 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7031 rtx xiop0
= XEXP (index
, 0);
7032 rtx xiop1
= XEXP (index
, 1);
7034 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7035 && power_of_two_operand (xiop1
, SImode
))
7036 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7037 && power_of_two_operand (xiop0
, SImode
)));
7039 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7040 || code
== ASHIFT
|| code
== ROTATERT
)
7042 rtx op
= XEXP (index
, 1);
7044 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7047 && INTVAL (op
) <= 31);
7051 /* For ARM v4 we may be doing a sign-extend operation during the
7057 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7063 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7065 return (code
== CONST_INT
7066 && INTVAL (index
) < range
7067 && INTVAL (index
) > -range
);
7070 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7071 index operand. i.e. 1, 2, 4 or 8. */
7073 thumb2_index_mul_operand (rtx op
)
7077 if (!CONST_INT_P (op
))
7081 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7084 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7086 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
7088 enum rtx_code code
= GET_CODE (index
);
7090 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7091 /* Standard coprocessor addressing modes. */
7092 if (TARGET_HARD_FLOAT
7094 && (mode
== SFmode
|| mode
== DFmode
))
7095 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7096 /* Thumb-2 allows only > -256 index range for it's core register
7097 load/stores. Since we allow SF/DF in core registers, we have
7098 to use the intersection between -256~4096 (core) and -1024~1024
7100 && INTVAL (index
) > -256
7101 && (INTVAL (index
) & 3) == 0);
7103 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7105 /* For DImode assume values will usually live in core regs
7106 and only allow LDRD addressing modes. */
7107 if (!TARGET_LDRD
|| mode
!= DImode
)
7108 return (code
== CONST_INT
7109 && INTVAL (index
) < 1024
7110 && INTVAL (index
) > -1024
7111 && (INTVAL (index
) & 3) == 0);
7114 /* For quad modes, we restrict the constant offset to be slightly less
7115 than what the instruction format permits. We do this because for
7116 quad mode moves, we will actually decompose them into two separate
7117 double-mode reads or writes. INDEX must therefore be a valid
7118 (double-mode) offset and so should INDEX+8. */
7119 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7120 return (code
== CONST_INT
7121 && INTVAL (index
) < 1016
7122 && INTVAL (index
) > -1024
7123 && (INTVAL (index
) & 3) == 0);
7125 /* We have no such constraint on double mode offsets, so we permit the
7126 full range of the instruction format. */
7127 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7128 return (code
== CONST_INT
7129 && INTVAL (index
) < 1024
7130 && INTVAL (index
) > -1024
7131 && (INTVAL (index
) & 3) == 0);
7133 if (arm_address_register_rtx_p (index
, strict_p
)
7134 && (GET_MODE_SIZE (mode
) <= 4))
7137 if (mode
== DImode
|| mode
== DFmode
)
7139 if (code
== CONST_INT
)
7141 HOST_WIDE_INT val
= INTVAL (index
);
7142 /* ??? Can we assume ldrd for thumb2? */
7143 /* Thumb-2 ldrd only has reg+const addressing modes. */
7144 /* ldrd supports offsets of +-1020.
7145 However the ldr fallback does not. */
7146 return val
> -256 && val
< 256 && (val
& 3) == 0;
7154 rtx xiop0
= XEXP (index
, 0);
7155 rtx xiop1
= XEXP (index
, 1);
7157 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7158 && thumb2_index_mul_operand (xiop1
))
7159 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7160 && thumb2_index_mul_operand (xiop0
)));
7162 else if (code
== ASHIFT
)
7164 rtx op
= XEXP (index
, 1);
7166 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7169 && INTVAL (op
) <= 3);
7172 return (code
== CONST_INT
7173 && INTVAL (index
) < 4096
7174 && INTVAL (index
) > -256);
7177 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7179 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
7189 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7191 return (regno
<= LAST_LO_REGNUM
7192 || regno
> LAST_VIRTUAL_REGISTER
7193 || regno
== FRAME_POINTER_REGNUM
7194 || (GET_MODE_SIZE (mode
) >= 4
7195 && (regno
== STACK_POINTER_REGNUM
7196 || regno
>= FIRST_PSEUDO_REGISTER
7197 || x
== hard_frame_pointer_rtx
7198 || x
== arg_pointer_rtx
)));
7201 /* Return nonzero if x is a legitimate index register. This is the case
7202 for any base register that can access a QImode object. */
7204 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7206 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7209 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7211 The AP may be eliminated to either the SP or the FP, so we use the
7212 least common denominator, e.g. SImode, and offsets from 0 to 64.
7214 ??? Verify whether the above is the right approach.
7216 ??? Also, the FP may be eliminated to the SP, so perhaps that
7217 needs special handling also.
7219 ??? Look at how the mips16 port solves this problem. It probably uses
7220 better ways to solve some of these problems.
7222 Although it is not incorrect, we don't accept QImode and HImode
7223 addresses based on the frame pointer or arg pointer until the
7224 reload pass starts. This is so that eliminating such addresses
7225 into stack based ones won't produce impossible code. */
7227 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
7229 /* ??? Not clear if this is right. Experiment. */
7230 if (GET_MODE_SIZE (mode
) < 4
7231 && !(reload_in_progress
|| reload_completed
)
7232 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7233 || reg_mentioned_p (arg_pointer_rtx
, x
)
7234 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7235 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7236 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7237 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7240 /* Accept any base register. SP only in SImode or larger. */
7241 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7244 /* This is PC relative data before arm_reorg runs. */
7245 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7246 && GET_CODE (x
) == SYMBOL_REF
7247 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7250 /* This is PC relative data after arm_reorg runs. */
7251 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7253 && (GET_CODE (x
) == LABEL_REF
7254 || (GET_CODE (x
) == CONST
7255 && GET_CODE (XEXP (x
, 0)) == PLUS
7256 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7257 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7260 /* Post-inc indexing only supported for SImode and larger. */
7261 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7262 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7265 else if (GET_CODE (x
) == PLUS
)
7267 /* REG+REG address can be any two index registers. */
7268 /* We disallow FRAME+REG addressing since we know that FRAME
7269 will be replaced with STACK, and SP relative addressing only
7270 permits SP+OFFSET. */
7271 if (GET_MODE_SIZE (mode
) <= 4
7272 && XEXP (x
, 0) != frame_pointer_rtx
7273 && XEXP (x
, 1) != frame_pointer_rtx
7274 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7275 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7276 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7279 /* REG+const has 5-7 bit offset for non-SP registers. */
7280 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7281 || XEXP (x
, 0) == arg_pointer_rtx
)
7282 && CONST_INT_P (XEXP (x
, 1))
7283 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7286 /* REG+const has 10-bit offset for SP, but only SImode and
7287 larger is supported. */
7288 /* ??? Should probably check for DI/DFmode overflow here
7289 just like GO_IF_LEGITIMATE_OFFSET does. */
7290 else if (REG_P (XEXP (x
, 0))
7291 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7292 && GET_MODE_SIZE (mode
) >= 4
7293 && CONST_INT_P (XEXP (x
, 1))
7294 && INTVAL (XEXP (x
, 1)) >= 0
7295 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7296 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7299 else if (REG_P (XEXP (x
, 0))
7300 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7301 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7302 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7303 && REGNO (XEXP (x
, 0))
7304 <= LAST_VIRTUAL_POINTER_REGISTER
))
7305 && GET_MODE_SIZE (mode
) >= 4
7306 && CONST_INT_P (XEXP (x
, 1))
7307 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7311 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7312 && GET_MODE_SIZE (mode
) == 4
7313 && GET_CODE (x
) == SYMBOL_REF
7314 && CONSTANT_POOL_ADDRESS_P (x
)
7316 && symbol_mentioned_p (get_pool_constant (x
))
7317 && ! pcrel_constant_p (get_pool_constant (x
))))
7323 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7324 instruction of mode MODE. */
7326 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
7328 switch (GET_MODE_SIZE (mode
))
7331 return val
>= 0 && val
< 32;
7334 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7338 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7344 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
7347 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7348 else if (TARGET_THUMB2
)
7349 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7350 else /* if (TARGET_THUMB1) */
7351 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7354 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7356 Given an rtx X being reloaded into a reg required to be
7357 in class CLASS, return the class of reg to actually use.
7358 In general this is just CLASS, but for the Thumb core registers and
7359 immediate constants we prefer a LO_REGS class or a subset. */
7362 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7368 if (rclass
== GENERAL_REGS
)
7375 /* Build the SYMBOL_REF for __tls_get_addr. */
7377 static GTY(()) rtx tls_get_addr_libfunc
;
7380 get_tls_get_addr (void)
7382 if (!tls_get_addr_libfunc
)
7383 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7384 return tls_get_addr_libfunc
;
7388 arm_load_tp (rtx target
)
7391 target
= gen_reg_rtx (SImode
);
7395 /* Can return in any reg. */
7396 emit_insn (gen_load_tp_hard (target
));
7400 /* Always returned in r0. Immediately copy the result into a pseudo,
7401 otherwise other uses of r0 (e.g. setting up function arguments) may
7402 clobber the value. */
7406 emit_insn (gen_load_tp_soft ());
7408 tmp
= gen_rtx_REG (SImode
, 0);
7409 emit_move_insn (target
, tmp
);
7415 load_tls_operand (rtx x
, rtx reg
)
7419 if (reg
== NULL_RTX
)
7420 reg
= gen_reg_rtx (SImode
);
7422 tmp
= gen_rtx_CONST (SImode
, x
);
7424 emit_move_insn (reg
, tmp
);
7430 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7432 rtx insns
, label
, labelno
, sum
;
7434 gcc_assert (reloc
!= TLS_DESCSEQ
);
7437 labelno
= GEN_INT (pic_labelno
++);
7438 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7439 label
= gen_rtx_CONST (VOIDmode
, label
);
7441 sum
= gen_rtx_UNSPEC (Pmode
,
7442 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7443 GEN_INT (TARGET_ARM
? 8 : 4)),
7445 reg
= load_tls_operand (sum
, reg
);
7448 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7450 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7452 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7453 LCT_PURE
, /* LCT_CONST? */
7454 Pmode
, 1, reg
, Pmode
);
7456 insns
= get_insns ();
7463 arm_tls_descseq_addr (rtx x
, rtx reg
)
7465 rtx labelno
= GEN_INT (pic_labelno
++);
7466 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7467 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7468 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7469 gen_rtx_CONST (VOIDmode
, label
),
7470 GEN_INT (!TARGET_ARM
)),
7472 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7474 emit_insn (gen_tlscall (x
, labelno
));
7476 reg
= gen_reg_rtx (SImode
);
7478 gcc_assert (REGNO (reg
) != 0);
7480 emit_move_insn (reg
, reg0
);
7486 legitimize_tls_address (rtx x
, rtx reg
)
7488 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7489 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7493 case TLS_MODEL_GLOBAL_DYNAMIC
:
7494 if (TARGET_GNU2_TLS
)
7496 reg
= arm_tls_descseq_addr (x
, reg
);
7498 tp
= arm_load_tp (NULL_RTX
);
7500 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7504 /* Original scheme */
7505 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7506 dest
= gen_reg_rtx (Pmode
);
7507 emit_libcall_block (insns
, dest
, ret
, x
);
7511 case TLS_MODEL_LOCAL_DYNAMIC
:
7512 if (TARGET_GNU2_TLS
)
7514 reg
= arm_tls_descseq_addr (x
, reg
);
7516 tp
= arm_load_tp (NULL_RTX
);
7518 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7522 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7524 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7525 share the LDM result with other LD model accesses. */
7526 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7528 dest
= gen_reg_rtx (Pmode
);
7529 emit_libcall_block (insns
, dest
, ret
, eqv
);
7531 /* Load the addend. */
7532 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7533 GEN_INT (TLS_LDO32
)),
7535 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7536 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7540 case TLS_MODEL_INITIAL_EXEC
:
7541 labelno
= GEN_INT (pic_labelno
++);
7542 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7543 label
= gen_rtx_CONST (VOIDmode
, label
);
7544 sum
= gen_rtx_UNSPEC (Pmode
,
7545 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7546 GEN_INT (TARGET_ARM
? 8 : 4)),
7548 reg
= load_tls_operand (sum
, reg
);
7551 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7552 else if (TARGET_THUMB2
)
7553 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7556 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7557 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7560 tp
= arm_load_tp (NULL_RTX
);
7562 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7564 case TLS_MODEL_LOCAL_EXEC
:
7565 tp
= arm_load_tp (NULL_RTX
);
7567 reg
= gen_rtx_UNSPEC (Pmode
,
7568 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7570 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7572 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7579 /* Try machine-dependent ways of modifying an illegitimate address
7580 to be legitimate. If we find one, return the new, valid address. */
7582 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7584 if (arm_tls_referenced_p (x
))
7588 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7590 addend
= XEXP (XEXP (x
, 0), 1);
7591 x
= XEXP (XEXP (x
, 0), 0);
7594 if (GET_CODE (x
) != SYMBOL_REF
)
7597 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7599 x
= legitimize_tls_address (x
, NULL_RTX
);
7603 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7612 /* TODO: legitimize_address for Thumb2. */
7615 return thumb_legitimize_address (x
, orig_x
, mode
);
7618 if (GET_CODE (x
) == PLUS
)
7620 rtx xop0
= XEXP (x
, 0);
7621 rtx xop1
= XEXP (x
, 1);
7623 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7624 xop0
= force_reg (SImode
, xop0
);
7626 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7627 && !symbol_mentioned_p (xop1
))
7628 xop1
= force_reg (SImode
, xop1
);
7630 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7631 && CONST_INT_P (xop1
))
7633 HOST_WIDE_INT n
, low_n
;
7637 /* VFP addressing modes actually allow greater offsets, but for
7638 now we just stick with the lowest common denominator. */
7640 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7652 low_n
= ((mode
) == TImode
? 0
7653 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7657 base_reg
= gen_reg_rtx (SImode
);
7658 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7659 emit_move_insn (base_reg
, val
);
7660 x
= plus_constant (Pmode
, base_reg
, low_n
);
7662 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7663 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7666 /* XXX We don't allow MINUS any more -- see comment in
7667 arm_legitimate_address_outer_p (). */
7668 else if (GET_CODE (x
) == MINUS
)
7670 rtx xop0
= XEXP (x
, 0);
7671 rtx xop1
= XEXP (x
, 1);
7673 if (CONSTANT_P (xop0
))
7674 xop0
= force_reg (SImode
, xop0
);
7676 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7677 xop1
= force_reg (SImode
, xop1
);
7679 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7680 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7683 /* Make sure to take full advantage of the pre-indexed addressing mode
7684 with absolute addresses which often allows for the base register to
7685 be factorized for multiple adjacent memory references, and it might
7686 even allows for the mini pool to be avoided entirely. */
7687 else if (CONST_INT_P (x
) && optimize
> 0)
7690 HOST_WIDE_INT mask
, base
, index
;
7693 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7694 use a 8-bit index. So let's use a 12-bit index for SImode only and
7695 hope that arm_gen_constant will enable ldrb to use more bits. */
7696 bits
= (mode
== SImode
) ? 12 : 8;
7697 mask
= (1 << bits
) - 1;
7698 base
= INTVAL (x
) & ~mask
;
7699 index
= INTVAL (x
) & mask
;
7700 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7702 /* It'll most probably be more efficient to generate the base
7703 with more bits set and use a negative index instead. */
7707 base_reg
= force_reg (SImode
, GEN_INT (base
));
7708 x
= plus_constant (Pmode
, base_reg
, index
);
7713 /* We need to find and carefully transform any SYMBOL and LABEL
7714 references; so go back to the original address expression. */
7715 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7717 if (new_x
!= orig_x
)
7725 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7726 to be legitimate. If we find one, return the new, valid address. */
7728 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7730 if (GET_CODE (x
) == PLUS
7731 && CONST_INT_P (XEXP (x
, 1))
7732 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7733 || INTVAL (XEXP (x
, 1)) < 0))
7735 rtx xop0
= XEXP (x
, 0);
7736 rtx xop1
= XEXP (x
, 1);
7737 HOST_WIDE_INT offset
= INTVAL (xop1
);
7739 /* Try and fold the offset into a biasing of the base register and
7740 then offsetting that. Don't do this when optimizing for space
7741 since it can cause too many CSEs. */
7742 if (optimize_size
&& offset
>= 0
7743 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7745 HOST_WIDE_INT delta
;
7748 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7749 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7750 delta
= 31 * GET_MODE_SIZE (mode
);
7752 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7754 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7756 x
= plus_constant (Pmode
, xop0
, delta
);
7758 else if (offset
< 0 && offset
> -256)
7759 /* Small negative offsets are best done with a subtract before the
7760 dereference, forcing these into a register normally takes two
7762 x
= force_operand (x
, NULL_RTX
);
7765 /* For the remaining cases, force the constant into a register. */
7766 xop1
= force_reg (SImode
, xop1
);
7767 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7770 else if (GET_CODE (x
) == PLUS
7771 && s_register_operand (XEXP (x
, 1), SImode
)
7772 && !s_register_operand (XEXP (x
, 0), SImode
))
7774 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7776 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7781 /* We need to find and carefully transform any SYMBOL and LABEL
7782 references; so go back to the original address expression. */
7783 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7785 if (new_x
!= orig_x
)
7793 arm_legitimize_reload_address (rtx
*p
,
7794 enum machine_mode mode
,
7795 int opnum
, int type
,
7796 int ind_levels ATTRIBUTE_UNUSED
)
7798 /* We must recognize output that we have already generated ourselves. */
7799 if (GET_CODE (*p
) == PLUS
7800 && GET_CODE (XEXP (*p
, 0)) == PLUS
7801 && REG_P (XEXP (XEXP (*p
, 0), 0))
7802 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7803 && CONST_INT_P (XEXP (*p
, 1)))
7805 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7806 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7807 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7811 if (GET_CODE (*p
) == PLUS
7812 && REG_P (XEXP (*p
, 0))
7813 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7814 /* If the base register is equivalent to a constant, let the generic
7815 code handle it. Otherwise we will run into problems if a future
7816 reload pass decides to rematerialize the constant. */
7817 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7818 && CONST_INT_P (XEXP (*p
, 1)))
7820 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7821 HOST_WIDE_INT low
, high
;
7823 /* Detect coprocessor load/stores. */
7824 bool coproc_p
= ((TARGET_HARD_FLOAT
7826 && (mode
== SFmode
|| mode
== DFmode
))
7827 || (TARGET_REALLY_IWMMXT
7828 && VALID_IWMMXT_REG_MODE (mode
))
7830 && (VALID_NEON_DREG_MODE (mode
)
7831 || VALID_NEON_QREG_MODE (mode
))));
7833 /* For some conditions, bail out when lower two bits are unaligned. */
7834 if ((val
& 0x3) != 0
7835 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7837 /* For DI, and DF under soft-float: */
7838 || ((mode
== DImode
|| mode
== DFmode
)
7839 /* Without ldrd, we use stm/ldm, which does not
7840 fair well with unaligned bits. */
7842 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7843 || TARGET_THUMB2
))))
7846 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7847 of which the (reg+high) gets turned into a reload add insn,
7848 we try to decompose the index into high/low values that can often
7849 also lead to better reload CSE.
7851 ldr r0, [r2, #4100] // Offset too large
7852 ldr r1, [r2, #4104] // Offset too large
7854 is best reloaded as:
7860 which post-reload CSE can simplify in most cases to eliminate the
7861 second add instruction:
7866 The idea here is that we want to split out the bits of the constant
7867 as a mask, rather than as subtracting the maximum offset that the
7868 respective type of load/store used can handle.
7870 When encountering negative offsets, we can still utilize it even if
7871 the overall offset is positive; sometimes this may lead to an immediate
7872 that can be constructed with fewer instructions.
7874 ldr r0, [r2, #0x3FFFFC]
7876 This is best reloaded as:
7877 add t1, r2, #0x400000
7880 The trick for spotting this for a load insn with N bits of offset
7881 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7882 negative offset that is going to make bit N and all the bits below
7883 it become zero in the remainder part.
7885 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7886 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7887 used in most cases of ARM load/store instructions. */
7889 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7890 (((VAL) & ((1 << (N)) - 1)) \
7891 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7896 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7898 /* NEON quad-word load/stores are made of two double-word accesses,
7899 so the valid index range is reduced by 8. Treat as 9-bit range if
7901 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7902 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7904 else if (GET_MODE_SIZE (mode
) == 8)
7907 low
= (TARGET_THUMB2
7908 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7909 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7911 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7912 to access doublewords. The supported load/store offsets are
7913 -8, -4, and 4, which we try to produce here. */
7914 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7916 else if (GET_MODE_SIZE (mode
) < 8)
7918 /* NEON element load/stores do not have an offset. */
7919 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7924 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7925 Try the wider 12-bit range first, and re-try if the result
7927 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7929 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7933 if (mode
== HImode
|| mode
== HFmode
)
7936 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7939 /* The storehi/movhi_bytes fallbacks can use only
7940 [-4094,+4094] of the full ldrb/strb index range. */
7941 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7942 if (low
== 4095 || low
== -4095)
7947 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7953 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7954 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7955 - (unsigned HOST_WIDE_INT
) 0x80000000);
7956 /* Check for overflow or zero */
7957 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
7960 /* Reload the high part into a base reg; leave the low part
7962 Note that replacing this gen_rtx_PLUS with plus_constant is
7963 wrong in this case because we rely on the
7964 (plus (plus reg c1) c2) structure being preserved so that
7965 XEXP (*p, 0) in push_reload below uses the correct term. */
7966 *p
= gen_rtx_PLUS (GET_MODE (*p
),
7967 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
7970 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7971 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7972 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7980 thumb_legitimize_reload_address (rtx
*x_p
,
7981 enum machine_mode mode
,
7982 int opnum
, int type
,
7983 int ind_levels ATTRIBUTE_UNUSED
)
7987 if (GET_CODE (x
) == PLUS
7988 && GET_MODE_SIZE (mode
) < 4
7989 && REG_P (XEXP (x
, 0))
7990 && XEXP (x
, 0) == stack_pointer_rtx
7991 && CONST_INT_P (XEXP (x
, 1))
7992 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7997 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7998 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8002 /* If both registers are hi-regs, then it's better to reload the
8003 entire expression rather than each register individually. That
8004 only requires one reload register rather than two. */
8005 if (GET_CODE (x
) == PLUS
8006 && REG_P (XEXP (x
, 0))
8007 && REG_P (XEXP (x
, 1))
8008 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
8009 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
8014 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8015 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8022 /* Test for various thread-local symbols. */
8024 /* Helper for arm_tls_referenced_p. */
8027 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
8029 if (GET_CODE (*x
) == SYMBOL_REF
)
8030 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
8032 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8033 TLS offsets, not real symbol references. */
8034 if (GET_CODE (*x
) == UNSPEC
8035 && XINT (*x
, 1) == UNSPEC_TLS
)
8041 /* Return TRUE if X contains any TLS symbol references. */
8044 arm_tls_referenced_p (rtx x
)
8046 if (! TARGET_HAVE_TLS
)
8049 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
8052 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8054 On the ARM, allow any integer (invalid ones are removed later by insn
8055 patterns), nice doubles and symbol_refs which refer to the function's
8058 When generating pic allow anything. */
8061 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
8063 /* At present, we have no support for Neon structure constants, so forbid
8064 them here. It might be possible to handle simple cases like 0 and -1
8066 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8069 return flag_pic
|| !label_mentioned_p (x
);
8073 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8075 return (CONST_INT_P (x
)
8076 || CONST_DOUBLE_P (x
)
8077 || CONSTANT_ADDRESS_P (x
)
8082 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
8084 return (!arm_cannot_force_const_mem (mode
, x
)
8086 ? arm_legitimate_constant_p_1 (mode
, x
)
8087 : thumb_legitimate_constant_p (mode
, x
)));
8090 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8093 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8097 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8099 split_const (x
, &base
, &offset
);
8100 if (GET_CODE (base
) == SYMBOL_REF
8101 && !offset_within_block_p (base
, INTVAL (offset
)))
8104 return arm_tls_referenced_p (x
);
8107 #define REG_OR_SUBREG_REG(X) \
8109 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8111 #define REG_OR_SUBREG_RTX(X) \
8112 (REG_P (X) ? (X) : SUBREG_REG (X))
8115 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8117 enum machine_mode mode
= GET_MODE (x
);
8126 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8133 return COSTS_N_INSNS (1);
8136 if (CONST_INT_P (XEXP (x
, 1)))
8139 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8146 return COSTS_N_INSNS (2) + cycles
;
8148 return COSTS_N_INSNS (1) + 16;
8151 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8153 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8154 return (COSTS_N_INSNS (words
)
8155 + 4 * ((MEM_P (SET_SRC (x
)))
8156 + MEM_P (SET_DEST (x
))));
8161 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8163 if (thumb_shiftable_const (INTVAL (x
)))
8164 return COSTS_N_INSNS (2);
8165 return COSTS_N_INSNS (3);
8167 else if ((outer
== PLUS
|| outer
== COMPARE
)
8168 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8170 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8171 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8172 return COSTS_N_INSNS (1);
8173 else if (outer
== AND
)
8176 /* This duplicates the tests in the andsi3 expander. */
8177 for (i
= 9; i
<= 31; i
++)
8178 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8179 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8180 return COSTS_N_INSNS (2);
8182 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8183 || outer
== LSHIFTRT
)
8185 return COSTS_N_INSNS (2);
8191 return COSTS_N_INSNS (3);
8209 /* XXX another guess. */
8210 /* Memory costs quite a lot for the first word, but subsequent words
8211 load at the equivalent of a single insn each. */
8212 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8213 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8218 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8224 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8225 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8231 return total
+ COSTS_N_INSNS (1);
8233 /* Assume a two-shift sequence. Increase the cost slightly so
8234 we prefer actual shifts over an extend operation. */
8235 return total
+ 1 + COSTS_N_INSNS (2);
8243 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8245 enum machine_mode mode
= GET_MODE (x
);
8246 enum rtx_code subcode
;
8248 enum rtx_code code
= GET_CODE (x
);
8254 /* Memory costs quite a lot for the first word, but subsequent words
8255 load at the equivalent of a single insn each. */
8256 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8263 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8264 *total
= COSTS_N_INSNS (2);
8265 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8266 *total
= COSTS_N_INSNS (4);
8268 *total
= COSTS_N_INSNS (20);
8272 if (REG_P (XEXP (x
, 1)))
8273 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8274 else if (!CONST_INT_P (XEXP (x
, 1)))
8275 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8281 *total
+= COSTS_N_INSNS (4);
8286 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8287 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8290 *total
+= COSTS_N_INSNS (3);
8294 *total
+= COSTS_N_INSNS (1);
8295 /* Increase the cost of complex shifts because they aren't any faster,
8296 and reduce dual issue opportunities. */
8297 if (arm_tune_cortex_a9
8298 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8306 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8307 if (CONST_INT_P (XEXP (x
, 0))
8308 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8310 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8314 if (CONST_INT_P (XEXP (x
, 1))
8315 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8317 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8324 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8326 if (TARGET_HARD_FLOAT
8328 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8330 *total
= COSTS_N_INSNS (1);
8331 if (CONST_DOUBLE_P (XEXP (x
, 0))
8332 && arm_const_double_rtx (XEXP (x
, 0)))
8334 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8338 if (CONST_DOUBLE_P (XEXP (x
, 1))
8339 && arm_const_double_rtx (XEXP (x
, 1)))
8341 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8347 *total
= COSTS_N_INSNS (20);
8351 *total
= COSTS_N_INSNS (1);
8352 if (CONST_INT_P (XEXP (x
, 0))
8353 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8355 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8359 subcode
= GET_CODE (XEXP (x
, 1));
8360 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8361 || subcode
== LSHIFTRT
8362 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8364 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8365 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8369 /* A shift as a part of RSB costs no more than RSB itself. */
8370 if (GET_CODE (XEXP (x
, 0)) == MULT
8371 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8373 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8374 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8379 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8381 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8382 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8386 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8387 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8389 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8390 if (REG_P (XEXP (XEXP (x
, 1), 0))
8391 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8392 *total
+= COSTS_N_INSNS (1);
8400 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8401 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8402 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8404 *total
= COSTS_N_INSNS (1);
8405 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8407 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8411 /* MLA: All arguments must be registers. We filter out
8412 multiplication by a power of two, so that we fall down into
8414 if (GET_CODE (XEXP (x
, 0)) == MULT
8415 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8417 /* The cost comes from the cost of the multiply. */
8421 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8423 if (TARGET_HARD_FLOAT
8425 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8427 *total
= COSTS_N_INSNS (1);
8428 if (CONST_DOUBLE_P (XEXP (x
, 1))
8429 && arm_const_double_rtx (XEXP (x
, 1)))
8431 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8438 *total
= COSTS_N_INSNS (20);
8442 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8443 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8445 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8446 if (REG_P (XEXP (XEXP (x
, 0), 0))
8447 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8448 *total
+= COSTS_N_INSNS (1);
8454 case AND
: case XOR
: case IOR
:
8456 /* Normally the frame registers will be spilt into reg+const during
8457 reload, so it is a bad idea to combine them with other instructions,
8458 since then they might not be moved outside of loops. As a compromise
8459 we allow integration with ops that have a constant as their second
8461 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8462 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8463 && !CONST_INT_P (XEXP (x
, 1)))
8464 *total
= COSTS_N_INSNS (1);
8468 *total
+= COSTS_N_INSNS (2);
8469 if (CONST_INT_P (XEXP (x
, 1))
8470 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8472 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8479 *total
+= COSTS_N_INSNS (1);
8480 if (CONST_INT_P (XEXP (x
, 1))
8481 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8483 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8486 subcode
= GET_CODE (XEXP (x
, 0));
8487 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8488 || subcode
== LSHIFTRT
8489 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8491 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8492 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8497 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8499 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8500 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8504 if (subcode
== UMIN
|| subcode
== UMAX
8505 || subcode
== SMIN
|| subcode
== SMAX
)
8507 *total
= COSTS_N_INSNS (3);
8514 /* This should have been handled by the CPU specific routines. */
8518 if (arm_arch3m
&& mode
== SImode
8519 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8520 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8521 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8522 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8523 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8524 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8526 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8529 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8533 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8535 if (TARGET_HARD_FLOAT
8537 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8539 *total
= COSTS_N_INSNS (1);
8542 *total
= COSTS_N_INSNS (2);
8548 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8549 if (mode
== SImode
&& code
== NOT
)
8551 subcode
= GET_CODE (XEXP (x
, 0));
8552 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8553 || subcode
== LSHIFTRT
8554 || subcode
== ROTATE
|| subcode
== ROTATERT
8556 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8558 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8559 /* Register shifts cost an extra cycle. */
8560 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8561 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8570 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8572 *total
= COSTS_N_INSNS (4);
8576 operand
= XEXP (x
, 0);
8578 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8579 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8580 && REG_P (XEXP (operand
, 0))
8581 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8582 *total
+= COSTS_N_INSNS (1);
8583 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8584 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8588 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8590 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8596 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8597 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8599 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8605 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8606 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8608 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8628 /* SCC insns. In the case where the comparison has already been
8629 performed, then they cost 2 instructions. Otherwise they need
8630 an additional comparison before them. */
8631 *total
= COSTS_N_INSNS (2);
8632 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8639 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8645 *total
+= COSTS_N_INSNS (1);
8646 if (CONST_INT_P (XEXP (x
, 1))
8647 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8649 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8653 subcode
= GET_CODE (XEXP (x
, 0));
8654 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8655 || subcode
== LSHIFTRT
8656 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8658 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8659 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8664 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8666 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8667 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8677 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8678 if (!CONST_INT_P (XEXP (x
, 1))
8679 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8680 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8684 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8686 if (TARGET_HARD_FLOAT
8688 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8690 *total
= COSTS_N_INSNS (1);
8693 *total
= COSTS_N_INSNS (20);
8696 *total
= COSTS_N_INSNS (1);
8698 *total
+= COSTS_N_INSNS (3);
8704 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8706 rtx op
= XEXP (x
, 0);
8707 enum machine_mode opmode
= GET_MODE (op
);
8710 *total
+= COSTS_N_INSNS (1);
8712 if (opmode
!= SImode
)
8716 /* If !arm_arch4, we use one of the extendhisi2_mem
8717 or movhi_bytes patterns for HImode. For a QImode
8718 sign extension, we first zero-extend from memory
8719 and then perform a shift sequence. */
8720 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8721 *total
+= COSTS_N_INSNS (2);
8724 *total
+= COSTS_N_INSNS (1);
8726 /* We don't have the necessary insn, so we need to perform some
8728 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8729 /* An and with constant 255. */
8730 *total
+= COSTS_N_INSNS (1);
8732 /* A shift sequence. Increase costs slightly to avoid
8733 combining two shifts into an extend operation. */
8734 *total
+= COSTS_N_INSNS (2) + 1;
8740 switch (GET_MODE (XEXP (x
, 0)))
8747 *total
= COSTS_N_INSNS (1);
8757 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8761 if (const_ok_for_arm (INTVAL (x
))
8762 || const_ok_for_arm (~INTVAL (x
)))
8763 *total
= COSTS_N_INSNS (1);
8765 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8766 INTVAL (x
), NULL_RTX
,
8773 *total
= COSTS_N_INSNS (3);
8777 *total
= COSTS_N_INSNS (1);
8781 *total
= COSTS_N_INSNS (1);
8782 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8786 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8787 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8788 *total
= COSTS_N_INSNS (1);
8790 *total
= COSTS_N_INSNS (4);
8794 /* The vec_extract patterns accept memory operands that require an
8795 address reload. Account for the cost of that reload to give the
8796 auto-inc-dec pass an incentive to try to replace them. */
8797 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8798 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8800 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8801 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8802 *total
+= COSTS_N_INSNS (1);
8805 /* Likewise for the vec_set patterns. */
8806 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8807 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8808 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8810 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8811 *total
= rtx_cost (mem
, code
, 0, speed
);
8812 if (!neon_vector_mem_operand (mem
, 2, true))
8813 *total
+= COSTS_N_INSNS (1);
8819 /* We cost this as high as our memory costs to allow this to
8820 be hoisted from loops. */
8821 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8823 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8829 && TARGET_HARD_FLOAT
8831 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8832 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8833 *total
= COSTS_N_INSNS (1);
8835 *total
= COSTS_N_INSNS (4);
8839 *total
= COSTS_N_INSNS (4);
8844 /* Estimates the size cost of thumb1 instructions.
8845 For now most of the code is copied from thumb1_rtx_costs. We need more
8846 fine grain tuning when we have more related test cases. */
8848 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8850 enum machine_mode mode
= GET_MODE (x
);
8859 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8863 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8864 defined by RTL expansion, especially for the expansion of
8866 if ((GET_CODE (XEXP (x
, 0)) == MULT
8867 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8868 || (GET_CODE (XEXP (x
, 1)) == MULT
8869 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8870 return COSTS_N_INSNS (2);
8871 /* On purpose fall through for normal RTX. */
8875 return COSTS_N_INSNS (1);
8878 if (CONST_INT_P (XEXP (x
, 1)))
8880 /* Thumb1 mul instruction can't operate on const. We must Load it
8881 into a register first. */
8882 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8883 return COSTS_N_INSNS (1) + const_size
;
8885 return COSTS_N_INSNS (1);
8888 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8890 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8891 return (COSTS_N_INSNS (words
)
8892 + 4 * ((MEM_P (SET_SRC (x
)))
8893 + MEM_P (SET_DEST (x
))));
8898 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8899 return COSTS_N_INSNS (1);
8900 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8901 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8902 return COSTS_N_INSNS (2);
8903 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8904 if (thumb_shiftable_const (INTVAL (x
)))
8905 return COSTS_N_INSNS (2);
8906 return COSTS_N_INSNS (3);
8908 else if ((outer
== PLUS
|| outer
== COMPARE
)
8909 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8911 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8912 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8913 return COSTS_N_INSNS (1);
8914 else if (outer
== AND
)
8917 /* This duplicates the tests in the andsi3 expander. */
8918 for (i
= 9; i
<= 31; i
++)
8919 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8920 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8921 return COSTS_N_INSNS (2);
8923 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8924 || outer
== LSHIFTRT
)
8926 return COSTS_N_INSNS (2);
8932 return COSTS_N_INSNS (3);
8950 /* XXX another guess. */
8951 /* Memory costs quite a lot for the first word, but subsequent words
8952 load at the equivalent of a single insn each. */
8953 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8954 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8959 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8964 /* XXX still guessing. */
8965 switch (GET_MODE (XEXP (x
, 0)))
8968 return (1 + (mode
== DImode
? 4 : 0)
8969 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8972 return (4 + (mode
== DImode
? 4 : 0)
8973 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8976 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8987 /* RTX costs when optimizing for size. */
8989 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8992 enum machine_mode mode
= GET_MODE (x
);
8995 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8999 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9003 /* A memory access costs 1 insn if the mode is small, or the address is
9004 a single register, otherwise it costs one insn per word. */
9005 if (REG_P (XEXP (x
, 0)))
9006 *total
= COSTS_N_INSNS (1);
9008 && GET_CODE (XEXP (x
, 0)) == PLUS
9009 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9010 /* This will be split into two instructions.
9011 See arm.md:calculate_pic_address. */
9012 *total
= COSTS_N_INSNS (2);
9014 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9021 /* Needs a libcall, so it costs about this. */
9022 *total
= COSTS_N_INSNS (2);
9026 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9028 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9036 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9038 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9041 else if (mode
== SImode
)
9043 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9044 /* Slightly disparage register shifts, but not by much. */
9045 if (!CONST_INT_P (XEXP (x
, 1)))
9046 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
9050 /* Needs a libcall. */
9051 *total
= COSTS_N_INSNS (2);
9055 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9056 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9058 *total
= COSTS_N_INSNS (1);
9064 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9065 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9067 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9068 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9069 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9070 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9071 || subcode1
== ASHIFTRT
)
9073 /* It's just the cost of the two operands. */
9078 *total
= COSTS_N_INSNS (1);
9082 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9086 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9087 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9089 *total
= COSTS_N_INSNS (1);
9093 /* A shift as a part of ADD costs nothing. */
9094 if (GET_CODE (XEXP (x
, 0)) == MULT
9095 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9097 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9098 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
9099 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9104 case AND
: case XOR
: case IOR
:
9107 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9109 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9110 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9111 || (code
== AND
&& subcode
== NOT
))
9113 /* It's just the cost of the two operands. */
9119 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9123 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9127 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9128 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9130 *total
= COSTS_N_INSNS (1);
9136 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9145 if (cc_register (XEXP (x
, 0), VOIDmode
))
9148 *total
= COSTS_N_INSNS (1);
9152 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9153 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9154 *total
= COSTS_N_INSNS (1);
9156 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9161 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9164 if (const_ok_for_arm (INTVAL (x
)))
9165 /* A multiplication by a constant requires another instruction
9166 to load the constant to a register. */
9167 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9169 else if (const_ok_for_arm (~INTVAL (x
)))
9170 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9171 else if (const_ok_for_arm (-INTVAL (x
)))
9173 if (outer_code
== COMPARE
|| outer_code
== PLUS
9174 || outer_code
== MINUS
)
9177 *total
= COSTS_N_INSNS (1);
9180 *total
= COSTS_N_INSNS (2);
9186 *total
= COSTS_N_INSNS (2);
9190 *total
= COSTS_N_INSNS (4);
9195 && TARGET_HARD_FLOAT
9196 && outer_code
== SET
9197 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9198 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9199 *total
= COSTS_N_INSNS (1);
9201 *total
= COSTS_N_INSNS (4);
9206 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9207 cost of these slightly. */
9208 *total
= COSTS_N_INSNS (1) + 1;
9215 if (mode
!= VOIDmode
)
9216 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9218 *total
= COSTS_N_INSNS (4); /* How knows? */
9223 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9224 operand, then return the operand that is being shifted. If the shift
9225 is not by a constant, then set SHIFT_REG to point to the operand.
9226 Return NULL if OP is not a shifter operand. */
9228 shifter_op_p (rtx op
, rtx
*shift_reg
)
9230 enum rtx_code code
= GET_CODE (op
);
9232 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9233 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9234 return XEXP (op
, 0);
9235 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9236 return XEXP (op
, 0);
9237 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9238 || code
== ASHIFTRT
)
9240 if (!CONST_INT_P (XEXP (op
, 1)))
9241 *shift_reg
= XEXP (op
, 1);
9242 return XEXP (op
, 0);
9249 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9251 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9252 gcc_assert (GET_CODE (x
) == UNSPEC
);
9254 switch (XINT (x
, 1))
9256 case UNSPEC_UNALIGNED_LOAD
:
9257 /* We can only do unaligned loads into the integer unit, and we can't
9259 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9261 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9262 + extra_cost
->ldst
.load_unaligned
);
9265 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9266 ADDR_SPACE_GENERIC
, speed_p
);
9270 case UNSPEC_UNALIGNED_STORE
:
9271 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9273 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9274 + extra_cost
->ldst
.store_unaligned
);
9276 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9278 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9279 ADDR_SPACE_GENERIC
, speed_p
);
9289 *cost
= COSTS_N_INSNS (1);
9291 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9295 *cost
= COSTS_N_INSNS (2);
9301 /* Cost of a libcall. We assume one insn per argument, an amount for the
9302 call (one insn for -Os) and then one for processing the result. */
9303 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9305 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9308 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9309 if (shift_op != NULL \
9310 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9315 *cost += extra_cost->alu.arith_shift_reg; \
9316 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9319 *cost += extra_cost->alu.arith_shift; \
9321 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9322 + rtx_cost (XEXP (x, 1 - IDX), \
9329 /* RTX costs. Make an estimate of the cost of executing the operation
9330 X, which is contained with an operation with code OUTER_CODE.
9331 SPEED_P indicates whether the cost desired is the performance cost,
9332 or the size cost. The estimate is stored in COST and the return
9333 value is TRUE if the cost calculation is final, or FALSE if the
9334 caller should recurse through the operands of X to add additional
9337 We currently make no attempt to model the size savings of Thumb-2
9338 16-bit instructions. At the normal points in compilation where
9339 this code is called we have no measure of whether the condition
9340 flags are live or not, and thus no realistic way to determine what
9341 the size will eventually be. */
9343 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9344 const struct cpu_cost_table
*extra_cost
,
9345 int *cost
, bool speed_p
)
9347 enum machine_mode mode
= GET_MODE (x
);
9352 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9354 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9362 /* SET RTXs don't have a mode so we get it from the destination. */
9363 mode
= GET_MODE (SET_DEST (x
));
9365 if (REG_P (SET_SRC (x
))
9366 && REG_P (SET_DEST (x
)))
9368 /* Assume that most copies can be done with a single insn,
9369 unless we don't have HW FP, in which case everything
9370 larger than word mode will require two insns. */
9371 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9372 && GET_MODE_SIZE (mode
) > 4)
9375 /* Conditional register moves can be encoded
9376 in 16 bits in Thumb mode. */
9377 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9383 if (CONST_INT_P (SET_SRC (x
)))
9385 /* Handle CONST_INT here, since the value doesn't have a mode
9386 and we would otherwise be unable to work out the true cost. */
9387 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9389 /* Slightly lower the cost of setting a core reg to a constant.
9390 This helps break up chains and allows for better scheduling. */
9391 if (REG_P (SET_DEST (x
))
9392 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9395 /* Immediate moves with an immediate in the range [0, 255] can be
9396 encoded in 16 bits in Thumb mode. */
9397 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9398 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9400 goto const_int_cost
;
9406 /* A memory access costs 1 insn if the mode is small, or the address is
9407 a single register, otherwise it costs one insn per word. */
9408 if (REG_P (XEXP (x
, 0)))
9409 *cost
= COSTS_N_INSNS (1);
9411 && GET_CODE (XEXP (x
, 0)) == PLUS
9412 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9413 /* This will be split into two instructions.
9414 See arm.md:calculate_pic_address. */
9415 *cost
= COSTS_N_INSNS (2);
9417 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9419 /* For speed optimizations, add the costs of the address and
9420 accessing memory. */
9423 *cost
+= (extra_cost
->ldst
.load
9424 + arm_address_cost (XEXP (x
, 0), mode
,
9425 ADDR_SPACE_GENERIC
, speed_p
));
9427 *cost
+= extra_cost
->ldst
.load
;
9433 /* Calculations of LDM costs are complex. We assume an initial cost
9434 (ldm_1st) which will load the number of registers mentioned in
9435 ldm_regs_per_insn_1st registers; then each additional
9436 ldm_regs_per_insn_subsequent registers cost one more insn. The
9437 formula for N regs is thus:
9439 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9440 + ldm_regs_per_insn_subsequent - 1)
9441 / ldm_regs_per_insn_subsequent).
9443 Additional costs may also be added for addressing. A similar
9444 formula is used for STM. */
9446 bool is_ldm
= load_multiple_operation (x
, SImode
);
9447 bool is_stm
= store_multiple_operation (x
, SImode
);
9449 *cost
= COSTS_N_INSNS (1);
9451 if (is_ldm
|| is_stm
)
9455 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9456 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9457 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9458 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9459 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9460 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9461 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9463 *cost
+= regs_per_insn_1st
9464 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9465 + regs_per_insn_sub
- 1)
9466 / regs_per_insn_sub
);
9475 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9476 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9477 *cost
= COSTS_N_INSNS (speed_p
9478 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9479 else if (mode
== SImode
&& TARGET_IDIV
)
9480 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9482 *cost
= LIBCALL_COST (2);
9483 return false; /* All arguments must be in registers. */
9487 *cost
= LIBCALL_COST (2);
9488 return false; /* All arguments must be in registers. */
9491 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9493 *cost
= (COSTS_N_INSNS (2)
9494 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9496 *cost
+= extra_cost
->alu
.shift_reg
;
9504 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9506 *cost
= (COSTS_N_INSNS (3)
9507 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9509 *cost
+= 2 * extra_cost
->alu
.shift
;
9512 else if (mode
== SImode
)
9514 *cost
= (COSTS_N_INSNS (1)
9515 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9516 /* Slightly disparage register shifts at -Os, but not by much. */
9517 if (!CONST_INT_P (XEXP (x
, 1)))
9518 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9519 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9522 else if (GET_MODE_CLASS (mode
) == MODE_INT
9523 && GET_MODE_SIZE (mode
) < 4)
9527 *cost
= (COSTS_N_INSNS (1)
9528 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9529 /* Slightly disparage register shifts at -Os, but not by
9531 if (!CONST_INT_P (XEXP (x
, 1)))
9532 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9533 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9535 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9537 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9539 /* Can use SBFX/UBFX. */
9540 *cost
= COSTS_N_INSNS (1);
9542 *cost
+= extra_cost
->alu
.bfx
;
9543 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9547 *cost
= COSTS_N_INSNS (2);
9548 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9551 if (CONST_INT_P (XEXP (x
, 1)))
9552 *cost
+= 2 * extra_cost
->alu
.shift
;
9554 *cost
+= (extra_cost
->alu
.shift
9555 + extra_cost
->alu
.shift_reg
);
9558 /* Slightly disparage register shifts. */
9559 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9564 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9565 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9568 if (CONST_INT_P (XEXP (x
, 1)))
9569 *cost
+= (2 * extra_cost
->alu
.shift
9570 + extra_cost
->alu
.log_shift
);
9572 *cost
+= (extra_cost
->alu
.shift
9573 + extra_cost
->alu
.shift_reg
9574 + extra_cost
->alu
.log_shift_reg
);
9580 *cost
= LIBCALL_COST (2);
9588 *cost
= COSTS_N_INSNS (1);
9590 *cost
+= extra_cost
->alu
.rev
;
9597 /* No rev instruction available. Look at arm_legacy_rev
9598 and thumb_legacy_rev for the form of RTL used then. */
9601 *cost
= COSTS_N_INSNS (10);
9605 *cost
+= 6 * extra_cost
->alu
.shift
;
9606 *cost
+= 3 * extra_cost
->alu
.logical
;
9611 *cost
= COSTS_N_INSNS (5);
9615 *cost
+= 2 * extra_cost
->alu
.shift
;
9616 *cost
+= extra_cost
->alu
.arith_shift
;
9617 *cost
+= 2 * extra_cost
->alu
.logical
;
9625 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9626 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9628 *cost
= COSTS_N_INSNS (1);
9629 if (GET_CODE (XEXP (x
, 0)) == MULT
9630 || GET_CODE (XEXP (x
, 1)) == MULT
)
9632 rtx mul_op0
, mul_op1
, sub_op
;
9635 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9637 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9639 mul_op0
= XEXP (XEXP (x
, 0), 0);
9640 mul_op1
= XEXP (XEXP (x
, 0), 1);
9641 sub_op
= XEXP (x
, 1);
9645 mul_op0
= XEXP (XEXP (x
, 1), 0);
9646 mul_op1
= XEXP (XEXP (x
, 1), 1);
9647 sub_op
= XEXP (x
, 0);
9650 /* The first operand of the multiply may be optionally
9652 if (GET_CODE (mul_op0
) == NEG
)
9653 mul_op0
= XEXP (mul_op0
, 0);
9655 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9656 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9657 + rtx_cost (sub_op
, code
, 0, speed_p
));
9663 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9669 rtx shift_by_reg
= NULL
;
9673 *cost
= COSTS_N_INSNS (1);
9675 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9676 if (shift_op
== NULL
)
9678 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9679 non_shift_op
= XEXP (x
, 0);
9682 non_shift_op
= XEXP (x
, 1);
9684 if (shift_op
!= NULL
)
9686 if (shift_by_reg
!= NULL
)
9689 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9690 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9693 *cost
+= extra_cost
->alu
.arith_shift
;
9695 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9696 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9701 && GET_CODE (XEXP (x
, 1)) == MULT
)
9705 *cost
+= extra_cost
->mult
[0].add
;
9706 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9707 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9708 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9712 if (CONST_INT_P (XEXP (x
, 0)))
9714 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9715 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9717 *cost
= COSTS_N_INSNS (insns
);
9719 *cost
+= insns
* extra_cost
->alu
.arith
;
9720 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9727 if (GET_MODE_CLASS (mode
) == MODE_INT
9728 && GET_MODE_SIZE (mode
) < 4)
9730 rtx shift_op
, shift_reg
;
9733 /* We check both sides of the MINUS for shifter operands since,
9734 unlike PLUS, it's not commutative. */
9736 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9737 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9739 /* Slightly disparage, as we might need to widen the result. */
9740 *cost
= 1 + COSTS_N_INSNS (1);
9742 *cost
+= extra_cost
->alu
.arith
;
9744 if (CONST_INT_P (XEXP (x
, 0)))
9746 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9755 *cost
= COSTS_N_INSNS (2);
9757 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9759 rtx op1
= XEXP (x
, 1);
9762 *cost
+= 2 * extra_cost
->alu
.arith
;
9764 if (GET_CODE (op1
) == ZERO_EXTEND
)
9765 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9767 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9768 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9772 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9775 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9776 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9778 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9781 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9782 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9785 *cost
+= (extra_cost
->alu
.arith
9786 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9787 ? extra_cost
->alu
.arith
9788 : extra_cost
->alu
.arith_shift
));
9789 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9790 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9791 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9796 *cost
+= 2 * extra_cost
->alu
.arith
;
9802 *cost
= LIBCALL_COST (2);
9806 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9807 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9809 *cost
= COSTS_N_INSNS (1);
9810 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9812 rtx mul_op0
, mul_op1
, add_op
;
9815 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9817 mul_op0
= XEXP (XEXP (x
, 0), 0);
9818 mul_op1
= XEXP (XEXP (x
, 0), 1);
9819 add_op
= XEXP (x
, 1);
9821 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9822 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9823 + rtx_cost (add_op
, code
, 0, speed_p
));
9829 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9832 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9834 *cost
= LIBCALL_COST (2);
9838 /* Narrow modes can be synthesized in SImode, but the range
9839 of useful sub-operations is limited. Check for shift operations
9840 on one of the operands. Only left shifts can be used in the
9842 if (GET_MODE_CLASS (mode
) == MODE_INT
9843 && GET_MODE_SIZE (mode
) < 4)
9845 rtx shift_op
, shift_reg
;
9848 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9850 if (CONST_INT_P (XEXP (x
, 1)))
9852 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9853 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9855 *cost
= COSTS_N_INSNS (insns
);
9857 *cost
+= insns
* extra_cost
->alu
.arith
;
9858 /* Slightly penalize a narrow operation as the result may
9860 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9864 /* Slightly penalize a narrow operation as the result may
9866 *cost
= 1 + COSTS_N_INSNS (1);
9868 *cost
+= extra_cost
->alu
.arith
;
9875 rtx shift_op
, shift_reg
;
9877 *cost
= COSTS_N_INSNS (1);
9879 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9880 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9882 /* UXTA[BH] or SXTA[BH]. */
9884 *cost
+= extra_cost
->alu
.extend_arith
;
9885 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9887 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9892 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9893 if (shift_op
!= NULL
)
9898 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9899 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9902 *cost
+= extra_cost
->alu
.arith_shift
;
9904 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9905 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9908 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9910 rtx mul_op
= XEXP (x
, 0);
9912 *cost
= COSTS_N_INSNS (1);
9914 if (TARGET_DSP_MULTIPLY
9915 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9916 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9917 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9918 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9919 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9920 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9921 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9922 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9923 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9924 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9925 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9926 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9931 *cost
+= extra_cost
->mult
[0].extend_add
;
9932 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9933 SIGN_EXTEND
, 0, speed_p
)
9934 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9935 SIGN_EXTEND
, 0, speed_p
)
9936 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9941 *cost
+= extra_cost
->mult
[0].add
;
9942 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9943 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9944 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9947 if (CONST_INT_P (XEXP (x
, 1)))
9949 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9950 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9952 *cost
= COSTS_N_INSNS (insns
);
9954 *cost
+= insns
* extra_cost
->alu
.arith
;
9955 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9964 && GET_CODE (XEXP (x
, 0)) == MULT
9965 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9966 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9967 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9968 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9970 *cost
= COSTS_N_INSNS (1);
9972 *cost
+= extra_cost
->mult
[1].extend_add
;
9973 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9974 ZERO_EXTEND
, 0, speed_p
)
9975 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9976 ZERO_EXTEND
, 0, speed_p
)
9977 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9981 *cost
= COSTS_N_INSNS (2);
9983 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9984 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9987 *cost
+= (extra_cost
->alu
.arith
9988 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9989 ? extra_cost
->alu
.arith
9990 : extra_cost
->alu
.arith_shift
));
9992 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9994 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9999 *cost
+= 2 * extra_cost
->alu
.arith
;
10004 *cost
= LIBCALL_COST (2);
10007 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10009 *cost
= COSTS_N_INSNS (1);
10011 *cost
+= extra_cost
->alu
.rev
;
10015 /* Fall through. */
10016 case AND
: case XOR
:
10017 if (mode
== SImode
)
10019 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10020 rtx op0
= XEXP (x
, 0);
10021 rtx shift_op
, shift_reg
;
10023 *cost
= COSTS_N_INSNS (1);
10027 || (code
== IOR
&& TARGET_THUMB2
)))
10028 op0
= XEXP (op0
, 0);
10031 shift_op
= shifter_op_p (op0
, &shift_reg
);
10032 if (shift_op
!= NULL
)
10037 *cost
+= extra_cost
->alu
.log_shift_reg
;
10038 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10041 *cost
+= extra_cost
->alu
.log_shift
;
10043 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10044 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10048 if (CONST_INT_P (XEXP (x
, 1)))
10050 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10051 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10054 *cost
= COSTS_N_INSNS (insns
);
10056 *cost
+= insns
* extra_cost
->alu
.logical
;
10057 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
10062 *cost
+= extra_cost
->alu
.logical
;
10063 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
10064 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10068 if (mode
== DImode
)
10070 rtx op0
= XEXP (x
, 0);
10071 enum rtx_code subcode
= GET_CODE (op0
);
10073 *cost
= COSTS_N_INSNS (2);
10077 || (code
== IOR
&& TARGET_THUMB2
)))
10078 op0
= XEXP (op0
, 0);
10080 if (GET_CODE (op0
) == ZERO_EXTEND
)
10083 *cost
+= 2 * extra_cost
->alu
.logical
;
10085 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
10086 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10089 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10092 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10094 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
10095 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10100 *cost
+= 2 * extra_cost
->alu
.logical
;
10106 *cost
= LIBCALL_COST (2);
10110 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10111 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10113 rtx op0
= XEXP (x
, 0);
10115 *cost
= COSTS_N_INSNS (1);
10117 if (GET_CODE (op0
) == NEG
)
10118 op0
= XEXP (op0
, 0);
10121 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10123 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10124 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10127 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10129 *cost
= LIBCALL_COST (2);
10133 if (mode
== SImode
)
10135 *cost
= COSTS_N_INSNS (1);
10136 if (TARGET_DSP_MULTIPLY
10137 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10138 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10139 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10140 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10141 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10142 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10143 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10144 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10145 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10146 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10147 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10148 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10151 /* SMUL[TB][TB]. */
10153 *cost
+= extra_cost
->mult
[0].extend
;
10154 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10155 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10159 *cost
+= extra_cost
->mult
[0].simple
;
10163 if (mode
== DImode
)
10166 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10167 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10168 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10169 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10171 *cost
= COSTS_N_INSNS (1);
10173 *cost
+= extra_cost
->mult
[1].extend
;
10174 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10175 ZERO_EXTEND
, 0, speed_p
)
10176 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10177 ZERO_EXTEND
, 0, speed_p
));
10181 *cost
= LIBCALL_COST (2);
10186 *cost
= LIBCALL_COST (2);
10190 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10191 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10193 *cost
= COSTS_N_INSNS (1);
10195 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10199 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10201 *cost
= LIBCALL_COST (1);
10205 if (mode
== SImode
)
10207 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10209 *cost
= COSTS_N_INSNS (2);
10210 /* Assume the non-flag-changing variant. */
10212 *cost
+= (extra_cost
->alu
.log_shift
10213 + extra_cost
->alu
.arith_shift
);
10214 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10218 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10219 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10221 *cost
= COSTS_N_INSNS (2);
10222 /* No extra cost for MOV imm and MVN imm. */
10223 /* If the comparison op is using the flags, there's no further
10224 cost, otherwise we need to add the cost of the comparison. */
10225 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10226 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10227 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10229 *cost
+= (COSTS_N_INSNS (1)
10230 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10232 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10235 *cost
+= extra_cost
->alu
.arith
;
10239 *cost
= COSTS_N_INSNS (1);
10241 *cost
+= extra_cost
->alu
.arith
;
10245 if (GET_MODE_CLASS (mode
) == MODE_INT
10246 && GET_MODE_SIZE (mode
) < 4)
10248 /* Slightly disparage, as we might need an extend operation. */
10249 *cost
= 1 + COSTS_N_INSNS (1);
10251 *cost
+= extra_cost
->alu
.arith
;
10255 if (mode
== DImode
)
10257 *cost
= COSTS_N_INSNS (2);
10259 *cost
+= 2 * extra_cost
->alu
.arith
;
10264 *cost
= LIBCALL_COST (1);
10268 if (mode
== SImode
)
10271 rtx shift_reg
= NULL
;
10273 *cost
= COSTS_N_INSNS (1);
10274 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10278 if (shift_reg
!= NULL
)
10281 *cost
+= extra_cost
->alu
.log_shift_reg
;
10282 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10285 *cost
+= extra_cost
->alu
.log_shift
;
10286 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10291 *cost
+= extra_cost
->alu
.logical
;
10294 if (mode
== DImode
)
10296 *cost
= COSTS_N_INSNS (2);
10302 *cost
+= LIBCALL_COST (1);
10307 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10309 *cost
= COSTS_N_INSNS (4);
10312 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10313 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10315 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10316 /* Assume that if one arm of the if_then_else is a register,
10317 that it will be tied with the result and eliminate the
10318 conditional insn. */
10319 if (REG_P (XEXP (x
, 1)))
10321 else if (REG_P (XEXP (x
, 2)))
10327 if (extra_cost
->alu
.non_exec_costs_exec
)
10328 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10330 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10333 *cost
+= op1cost
+ op2cost
;
10339 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10343 enum machine_mode op0mode
;
10344 /* We'll mostly assume that the cost of a compare is the cost of the
10345 LHS. However, there are some notable exceptions. */
10347 /* Floating point compares are never done as side-effects. */
10348 op0mode
= GET_MODE (XEXP (x
, 0));
10349 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10350 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10352 *cost
= COSTS_N_INSNS (1);
10354 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10356 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10358 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10364 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10366 *cost
= LIBCALL_COST (2);
10370 /* DImode compares normally take two insns. */
10371 if (op0mode
== DImode
)
10373 *cost
= COSTS_N_INSNS (2);
10375 *cost
+= 2 * extra_cost
->alu
.arith
;
10379 if (op0mode
== SImode
)
10384 if (XEXP (x
, 1) == const0_rtx
10385 && !(REG_P (XEXP (x
, 0))
10386 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10387 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10389 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10391 /* Multiply operations that set the flags are often
10392 significantly more expensive. */
10394 && GET_CODE (XEXP (x
, 0)) == MULT
10395 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10396 *cost
+= extra_cost
->mult
[0].flag_setting
;
10399 && GET_CODE (XEXP (x
, 0)) == PLUS
10400 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10401 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10403 *cost
+= extra_cost
->mult
[0].flag_setting
;
10408 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10409 if (shift_op
!= NULL
)
10411 *cost
= COSTS_N_INSNS (1);
10412 if (shift_reg
!= NULL
)
10414 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10416 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10419 *cost
+= extra_cost
->alu
.arith_shift
;
10420 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10421 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10425 *cost
= COSTS_N_INSNS (1);
10427 *cost
+= extra_cost
->alu
.arith
;
10428 if (CONST_INT_P (XEXP (x
, 1))
10429 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10431 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10439 *cost
= LIBCALL_COST (2);
10462 if (outer_code
== SET
)
10464 /* Is it a store-flag operation? */
10465 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10466 && XEXP (x
, 1) == const0_rtx
)
10468 /* Thumb also needs an IT insn. */
10469 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10472 if (XEXP (x
, 1) == const0_rtx
)
10477 /* LSR Rd, Rn, #31. */
10478 *cost
= COSTS_N_INSNS (1);
10480 *cost
+= extra_cost
->alu
.shift
;
10490 *cost
= COSTS_N_INSNS (2);
10494 /* RSBS T1, Rn, Rn, LSR #31
10496 *cost
= COSTS_N_INSNS (2);
10498 *cost
+= extra_cost
->alu
.arith_shift
;
10502 /* RSB Rd, Rn, Rn, ASR #1
10503 LSR Rd, Rd, #31. */
10504 *cost
= COSTS_N_INSNS (2);
10506 *cost
+= (extra_cost
->alu
.arith_shift
10507 + extra_cost
->alu
.shift
);
10513 *cost
= COSTS_N_INSNS (2);
10515 *cost
+= extra_cost
->alu
.shift
;
10519 /* Remaining cases are either meaningless or would take
10520 three insns anyway. */
10521 *cost
= COSTS_N_INSNS (3);
10524 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10529 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10530 if (CONST_INT_P (XEXP (x
, 1))
10531 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10533 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10540 /* Not directly inside a set. If it involves the condition code
10541 register it must be the condition for a branch, cond_exec or
10542 I_T_E operation. Since the comparison is performed elsewhere
10543 this is just the control part which has no additional
10545 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10546 && XEXP (x
, 1) == const0_rtx
)
10554 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10555 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10557 *cost
= COSTS_N_INSNS (1);
10559 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10563 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10565 *cost
= LIBCALL_COST (1);
10569 if (mode
== SImode
)
10571 *cost
= COSTS_N_INSNS (1);
10573 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10577 *cost
= LIBCALL_COST (1);
10581 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10582 && MEM_P (XEXP (x
, 0)))
10584 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10586 if (mode
== DImode
)
10587 *cost
+= COSTS_N_INSNS (1);
10592 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10593 *cost
+= extra_cost
->ldst
.load
;
10595 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10597 if (mode
== DImode
)
10598 *cost
+= extra_cost
->alu
.shift
;
10603 /* Widening from less than 32-bits requires an extend operation. */
10604 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10606 /* We have SXTB/SXTH. */
10607 *cost
= COSTS_N_INSNS (1);
10608 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10610 *cost
+= extra_cost
->alu
.extend
;
10612 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10614 /* Needs two shifts. */
10615 *cost
= COSTS_N_INSNS (2);
10616 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10618 *cost
+= 2 * extra_cost
->alu
.shift
;
10621 /* Widening beyond 32-bits requires one more insn. */
10622 if (mode
== DImode
)
10624 *cost
+= COSTS_N_INSNS (1);
10626 *cost
+= extra_cost
->alu
.shift
;
10633 || GET_MODE (XEXP (x
, 0)) == SImode
10634 || GET_MODE (XEXP (x
, 0)) == QImode
)
10635 && MEM_P (XEXP (x
, 0)))
10637 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10639 if (mode
== DImode
)
10640 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10645 /* Widening from less than 32-bits requires an extend operation. */
10646 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10648 /* UXTB can be a shorter instruction in Thumb2, but it might
10649 be slower than the AND Rd, Rn, #255 alternative. When
10650 optimizing for speed it should never be slower to use
10651 AND, and we don't really model 16-bit vs 32-bit insns
10653 *cost
= COSTS_N_INSNS (1);
10655 *cost
+= extra_cost
->alu
.logical
;
10657 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10659 /* We have UXTB/UXTH. */
10660 *cost
= COSTS_N_INSNS (1);
10661 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10663 *cost
+= extra_cost
->alu
.extend
;
10665 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10667 /* Needs two shifts. It's marginally preferable to use
10668 shifts rather than two BIC instructions as the second
10669 shift may merge with a subsequent insn as a shifter
10671 *cost
= COSTS_N_INSNS (2);
10672 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10674 *cost
+= 2 * extra_cost
->alu
.shift
;
10676 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10677 *cost
= COSTS_N_INSNS (1);
10679 /* Widening beyond 32-bits requires one more insn. */
10680 if (mode
== DImode
)
10682 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10689 /* CONST_INT has no mode, so we cannot tell for sure how many
10690 insns are really going to be needed. The best we can do is
10691 look at the value passed. If it fits in SImode, then assume
10692 that's the mode it will be used for. Otherwise assume it
10693 will be used in DImode. */
10694 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10699 /* Avoid blowing up in arm_gen_constant (). */
10700 if (!(outer_code
== PLUS
10701 || outer_code
== AND
10702 || outer_code
== IOR
10703 || outer_code
== XOR
10704 || outer_code
== MINUS
))
10708 if (mode
== SImode
)
10710 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10711 INTVAL (x
), NULL
, NULL
,
10717 *cost
+= COSTS_N_INSNS (arm_gen_constant
10718 (outer_code
, SImode
, NULL
,
10719 trunc_int_for_mode (INTVAL (x
), SImode
),
10721 + arm_gen_constant (outer_code
, SImode
, NULL
,
10722 INTVAL (x
) >> 32, NULL
,
10734 if (arm_arch_thumb2
&& !flag_pic
)
10735 *cost
= COSTS_N_INSNS (2);
10737 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10740 *cost
= COSTS_N_INSNS (2);
10744 *cost
+= COSTS_N_INSNS (1);
10746 *cost
+= extra_cost
->alu
.arith
;
10752 *cost
= COSTS_N_INSNS (4);
10757 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10758 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10760 if (vfp3_const_double_rtx (x
))
10762 *cost
= COSTS_N_INSNS (1);
10764 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10770 *cost
= COSTS_N_INSNS (1);
10771 if (mode
== DFmode
)
10772 *cost
+= extra_cost
->ldst
.loadd
;
10774 *cost
+= extra_cost
->ldst
.loadf
;
10777 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10781 *cost
= COSTS_N_INSNS (4);
10787 && TARGET_HARD_FLOAT
10788 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10789 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10790 *cost
= COSTS_N_INSNS (1);
10792 *cost
= COSTS_N_INSNS (4);
10797 *cost
= COSTS_N_INSNS (1);
10798 /* When optimizing for size, we prefer constant pool entries to
10799 MOVW/MOVT pairs, so bump the cost of these slightly. */
10805 *cost
= COSTS_N_INSNS (1);
10807 *cost
+= extra_cost
->alu
.clz
;
10811 if (XEXP (x
, 1) == const0_rtx
)
10813 *cost
= COSTS_N_INSNS (1);
10815 *cost
+= extra_cost
->alu
.log_shift
;
10816 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10819 /* Fall through. */
10823 *cost
= COSTS_N_INSNS (2);
10827 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10828 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10829 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10830 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10831 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10832 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10833 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10834 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10837 *cost
= COSTS_N_INSNS (1);
10839 *cost
+= extra_cost
->mult
[1].extend
;
10840 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10842 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10846 *cost
= LIBCALL_COST (1);
10850 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10853 /* Reading the PC is like reading any other register. Writing it
10854 is more expensive, but we take that into account elsewhere. */
10859 /* TODO: Simple zero_extract of bottom bits using AND. */
10860 /* Fall through. */
10864 && CONST_INT_P (XEXP (x
, 1))
10865 && CONST_INT_P (XEXP (x
, 2)))
10867 *cost
= COSTS_N_INSNS (1);
10869 *cost
+= extra_cost
->alu
.bfx
;
10870 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10873 /* Without UBFX/SBFX, need to resort to shift operations. */
10874 *cost
= COSTS_N_INSNS (2);
10876 *cost
+= 2 * extra_cost
->alu
.shift
;
10877 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10881 if (TARGET_HARD_FLOAT
)
10883 *cost
= COSTS_N_INSNS (1);
10885 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10886 if (!TARGET_FPU_ARMV8
10887 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10889 /* Pre v8, widening HF->DF is a two-step process, first
10890 widening to SFmode. */
10891 *cost
+= COSTS_N_INSNS (1);
10893 *cost
+= extra_cost
->fp
[0].widen
;
10895 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10899 *cost
= LIBCALL_COST (1);
10902 case FLOAT_TRUNCATE
:
10903 if (TARGET_HARD_FLOAT
)
10905 *cost
= COSTS_N_INSNS (1);
10907 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10908 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10910 /* Vector modes? */
10912 *cost
= LIBCALL_COST (1);
10916 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10918 rtx op0
= XEXP (x
, 0);
10919 rtx op1
= XEXP (x
, 1);
10920 rtx op2
= XEXP (x
, 2);
10922 *cost
= COSTS_N_INSNS (1);
10924 /* vfms or vfnma. */
10925 if (GET_CODE (op0
) == NEG
)
10926 op0
= XEXP (op0
, 0);
10928 /* vfnms or vfnma. */
10929 if (GET_CODE (op2
) == NEG
)
10930 op2
= XEXP (op2
, 0);
10932 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
10933 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
10934 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
10937 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10942 *cost
= LIBCALL_COST (3);
10947 if (TARGET_HARD_FLOAT
)
10949 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10951 *cost
= COSTS_N_INSNS (1);
10953 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10954 /* Strip of the 'cost' of rounding towards zero. */
10955 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10956 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10958 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10959 /* ??? Increase the cost to deal with transferring from
10960 FP -> CORE registers? */
10963 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10964 && TARGET_FPU_ARMV8
)
10966 *cost
= COSTS_N_INSNS (1);
10968 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10971 /* Vector costs? */
10973 *cost
= LIBCALL_COST (1);
10977 case UNSIGNED_FLOAT
:
10978 if (TARGET_HARD_FLOAT
)
10980 /* ??? Increase the cost to deal with transferring from CORE
10981 -> FP registers? */
10982 *cost
= COSTS_N_INSNS (1);
10984 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10987 *cost
= LIBCALL_COST (1);
10991 *cost
= COSTS_N_INSNS (1);
10996 /* Just a guess. Guess number of instructions in the asm
10997 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10998 though (see PR60663). */
10999 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11000 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11002 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11006 if (mode
!= VOIDmode
)
11007 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11009 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11014 #undef HANDLE_NARROW_SHIFT_ARITH
11016 /* RTX costs when optimizing for size. */
11018 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
11019 int *total
, bool speed
)
11023 if (TARGET_OLD_RTX_COSTS
11024 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11026 /* Old way. (Deprecated.) */
11028 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11029 (enum rtx_code
) outer_code
, total
);
11031 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11032 (enum rtx_code
) outer_code
, total
,
11038 if (current_tune
->insn_extra_cost
)
11039 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11040 (enum rtx_code
) outer_code
,
11041 current_tune
->insn_extra_cost
,
11043 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11044 && current_tune->insn_extra_cost != NULL */
11046 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11047 (enum rtx_code
) outer_code
,
11048 &generic_extra_costs
, total
, speed
);
11051 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11053 print_rtl_single (dump_file
, x
);
11054 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11055 *total
, result
? "final" : "partial");
11060 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11061 supported on any "slowmul" cores, so it can be ignored. */
11064 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11065 int *total
, bool speed
)
11067 enum machine_mode mode
= GET_MODE (x
);
11071 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11078 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11081 *total
= COSTS_N_INSNS (20);
11085 if (CONST_INT_P (XEXP (x
, 1)))
11087 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11088 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11089 int cost
, const_ok
= const_ok_for_arm (i
);
11090 int j
, booth_unit_size
;
11092 /* Tune as appropriate. */
11093 cost
= const_ok
? 4 : 8;
11094 booth_unit_size
= 2;
11095 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11097 i
>>= booth_unit_size
;
11101 *total
= COSTS_N_INSNS (cost
);
11102 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11106 *total
= COSTS_N_INSNS (20);
11110 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11115 /* RTX cost for cores with a fast multiply unit (M variants). */
11118 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11119 int *total
, bool speed
)
11121 enum machine_mode mode
= GET_MODE (x
);
11125 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11129 /* ??? should thumb2 use different costs? */
11133 /* There is no point basing this on the tuning, since it is always the
11134 fast variant if it exists at all. */
11136 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11137 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11138 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11140 *total
= COSTS_N_INSNS(2);
11145 if (mode
== DImode
)
11147 *total
= COSTS_N_INSNS (5);
11151 if (CONST_INT_P (XEXP (x
, 1)))
11153 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11154 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11155 int cost
, const_ok
= const_ok_for_arm (i
);
11156 int j
, booth_unit_size
;
11158 /* Tune as appropriate. */
11159 cost
= const_ok
? 4 : 8;
11160 booth_unit_size
= 8;
11161 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11163 i
>>= booth_unit_size
;
11167 *total
= COSTS_N_INSNS(cost
);
11171 if (mode
== SImode
)
11173 *total
= COSTS_N_INSNS (4);
11177 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11179 if (TARGET_HARD_FLOAT
11181 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11183 *total
= COSTS_N_INSNS (1);
11188 /* Requires a lib call */
11189 *total
= COSTS_N_INSNS (20);
11193 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11198 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11199 so it can be ignored. */
11202 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11203 int *total
, bool speed
)
11205 enum machine_mode mode
= GET_MODE (x
);
11209 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11216 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11217 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11219 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11220 will stall until the multiplication is complete. */
11221 *total
= COSTS_N_INSNS (3);
11225 /* There is no point basing this on the tuning, since it is always the
11226 fast variant if it exists at all. */
11228 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11229 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11230 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11232 *total
= COSTS_N_INSNS (2);
11237 if (mode
== DImode
)
11239 *total
= COSTS_N_INSNS (5);
11243 if (CONST_INT_P (XEXP (x
, 1)))
11245 /* If operand 1 is a constant we can more accurately
11246 calculate the cost of the multiply. The multiplier can
11247 retire 15 bits on the first cycle and a further 12 on the
11248 second. We do, of course, have to load the constant into
11249 a register first. */
11250 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11251 /* There's a general overhead of one cycle. */
11253 unsigned HOST_WIDE_INT masked_const
;
11255 if (i
& 0x80000000)
11258 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11260 masked_const
= i
& 0xffff8000;
11261 if (masked_const
!= 0)
11264 masked_const
= i
& 0xf8000000;
11265 if (masked_const
!= 0)
11268 *total
= COSTS_N_INSNS (cost
);
11272 if (mode
== SImode
)
11274 *total
= COSTS_N_INSNS (3);
11278 /* Requires a lib call */
11279 *total
= COSTS_N_INSNS (20);
11283 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11288 /* RTX costs for 9e (and later) cores. */
11291 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11292 int *total
, bool speed
)
11294 enum machine_mode mode
= GET_MODE (x
);
11301 *total
= COSTS_N_INSNS (3);
11305 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11313 /* There is no point basing this on the tuning, since it is always the
11314 fast variant if it exists at all. */
11316 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11317 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11318 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11320 *total
= COSTS_N_INSNS (2);
11325 if (mode
== DImode
)
11327 *total
= COSTS_N_INSNS (5);
11331 if (mode
== SImode
)
11333 *total
= COSTS_N_INSNS (2);
11337 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11339 if (TARGET_HARD_FLOAT
11341 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11343 *total
= COSTS_N_INSNS (1);
11348 *total
= COSTS_N_INSNS (20);
11352 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11355 /* All address computations that can be done are free, but rtx cost returns
11356 the same for practically all of them. So we weight the different types
11357 of address here in the order (most pref first):
11358 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11360 arm_arm_address_cost (rtx x
)
11362 enum rtx_code c
= GET_CODE (x
);
11364 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11366 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11371 if (CONST_INT_P (XEXP (x
, 1)))
11374 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11384 arm_thumb_address_cost (rtx x
)
11386 enum rtx_code c
= GET_CODE (x
);
11391 && REG_P (XEXP (x
, 0))
11392 && CONST_INT_P (XEXP (x
, 1)))
11399 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11400 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11402 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11405 /* Adjust cost hook for XScale. */
11407 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11409 /* Some true dependencies can have a higher cost depending
11410 on precisely how certain input operands are used. */
11411 if (REG_NOTE_KIND(link
) == 0
11412 && recog_memoized (insn
) >= 0
11413 && recog_memoized (dep
) >= 0)
11415 int shift_opnum
= get_attr_shift (insn
);
11416 enum attr_type attr_type
= get_attr_type (dep
);
11418 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11419 operand for INSN. If we have a shifted input operand and the
11420 instruction we depend on is another ALU instruction, then we may
11421 have to account for an additional stall. */
11422 if (shift_opnum
!= 0
11423 && (attr_type
== TYPE_ALU_SHIFT_IMM
11424 || attr_type
== TYPE_ALUS_SHIFT_IMM
11425 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11426 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11427 || attr_type
== TYPE_ALU_SHIFT_REG
11428 || attr_type
== TYPE_ALUS_SHIFT_REG
11429 || attr_type
== TYPE_LOGIC_SHIFT_REG
11430 || attr_type
== TYPE_LOGICS_SHIFT_REG
11431 || attr_type
== TYPE_MOV_SHIFT
11432 || attr_type
== TYPE_MVN_SHIFT
11433 || attr_type
== TYPE_MOV_SHIFT_REG
11434 || attr_type
== TYPE_MVN_SHIFT_REG
))
11436 rtx shifted_operand
;
11439 /* Get the shifted operand. */
11440 extract_insn (insn
);
11441 shifted_operand
= recog_data
.operand
[shift_opnum
];
11443 /* Iterate over all the operands in DEP. If we write an operand
11444 that overlaps with SHIFTED_OPERAND, then we have increase the
11445 cost of this dependency. */
11446 extract_insn (dep
);
11447 preprocess_constraints (dep
);
11448 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11450 /* We can ignore strict inputs. */
11451 if (recog_data
.operand_type
[opno
] == OP_IN
)
11454 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11466 /* Adjust cost hook for Cortex A9. */
11468 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11470 switch (REG_NOTE_KIND (link
))
11477 case REG_DEP_OUTPUT
:
11478 if (recog_memoized (insn
) >= 0
11479 && recog_memoized (dep
) >= 0)
11481 if (GET_CODE (PATTERN (insn
)) == SET
)
11484 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11486 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11488 enum attr_type attr_type_insn
= get_attr_type (insn
);
11489 enum attr_type attr_type_dep
= get_attr_type (dep
);
11491 /* By default all dependencies of the form
11494 have an extra latency of 1 cycle because
11495 of the input and output dependency in this
11496 case. However this gets modeled as an true
11497 dependency and hence all these checks. */
11498 if (REG_P (SET_DEST (PATTERN (insn
)))
11499 && REG_P (SET_DEST (PATTERN (dep
)))
11500 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11501 SET_DEST (PATTERN (dep
))))
11503 /* FMACS is a special case where the dependent
11504 instruction can be issued 3 cycles before
11505 the normal latency in case of an output
11507 if ((attr_type_insn
== TYPE_FMACS
11508 || attr_type_insn
== TYPE_FMACD
)
11509 && (attr_type_dep
== TYPE_FMACS
11510 || attr_type_dep
== TYPE_FMACD
))
11512 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11513 *cost
= insn_default_latency (dep
) - 3;
11515 *cost
= insn_default_latency (dep
);
11520 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11521 *cost
= insn_default_latency (dep
) + 1;
11523 *cost
= insn_default_latency (dep
);
11533 gcc_unreachable ();
11539 /* Adjust cost hook for FA726TE. */
11541 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11543 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11544 have penalty of 3. */
11545 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11546 && recog_memoized (insn
) >= 0
11547 && recog_memoized (dep
) >= 0
11548 && get_attr_conds (dep
) == CONDS_SET
)
11550 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11551 if (get_attr_conds (insn
) == CONDS_USE
11552 && get_attr_type (insn
) != TYPE_BRANCH
)
11558 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11559 || get_attr_conds (insn
) == CONDS_USE
)
11569 /* Implement TARGET_REGISTER_MOVE_COST.
11571 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11572 it is typically more expensive than a single memory access. We set
11573 the cost to less than two memory accesses so that floating
11574 point to integer conversion does not go through memory. */
11577 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
11578 reg_class_t from
, reg_class_t to
)
11582 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11583 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11585 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11586 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11588 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11595 if (from
== HI_REGS
|| to
== HI_REGS
)
11602 /* Implement TARGET_MEMORY_MOVE_COST. */
11605 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
11606 bool in ATTRIBUTE_UNUSED
)
11612 if (GET_MODE_SIZE (mode
) < 4)
11615 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11619 /* Vectorizer cost model implementation. */
11621 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11623 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11625 int misalign ATTRIBUTE_UNUSED
)
11629 switch (type_of_cost
)
11632 return current_tune
->vec_costs
->scalar_stmt_cost
;
11635 return current_tune
->vec_costs
->scalar_load_cost
;
11638 return current_tune
->vec_costs
->scalar_store_cost
;
11641 return current_tune
->vec_costs
->vec_stmt_cost
;
11644 return current_tune
->vec_costs
->vec_align_load_cost
;
11647 return current_tune
->vec_costs
->vec_store_cost
;
11649 case vec_to_scalar
:
11650 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11652 case scalar_to_vec
:
11653 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11655 case unaligned_load
:
11656 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11658 case unaligned_store
:
11659 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11661 case cond_branch_taken
:
11662 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11664 case cond_branch_not_taken
:
11665 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11668 case vec_promote_demote
:
11669 return current_tune
->vec_costs
->vec_stmt_cost
;
11671 case vec_construct
:
11672 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11673 return elements
/ 2 + 1;
11676 gcc_unreachable ();
11680 /* Implement targetm.vectorize.add_stmt_cost. */
11683 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11684 struct _stmt_vec_info
*stmt_info
, int misalign
,
11685 enum vect_cost_model_location where
)
11687 unsigned *cost
= (unsigned *) data
;
11688 unsigned retval
= 0;
11690 if (flag_vect_cost_model
)
11692 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11693 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11695 /* Statements in an inner loop relative to the loop being
11696 vectorized are weighted more heavily. The value here is
11697 arbitrary and could potentially be improved with analysis. */
11698 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11699 count
*= 50; /* FIXME. */
11701 retval
= (unsigned) (count
* stmt_cost
);
11702 cost
[where
] += retval
;
11708 /* Return true if and only if this insn can dual-issue only as older. */
11710 cortexa7_older_only (rtx insn
)
11712 if (recog_memoized (insn
) < 0)
11715 switch (get_attr_type (insn
))
11717 case TYPE_ALU_DSP_REG
:
11718 case TYPE_ALU_SREG
:
11719 case TYPE_ALUS_SREG
:
11720 case TYPE_LOGIC_REG
:
11721 case TYPE_LOGICS_REG
:
11723 case TYPE_ADCS_REG
:
11728 case TYPE_SHIFT_IMM
:
11729 case TYPE_SHIFT_REG
:
11730 case TYPE_LOAD_BYTE
:
11733 case TYPE_FFARITHS
:
11735 case TYPE_FFARITHD
:
11753 case TYPE_F_STORES
:
11760 /* Return true if and only if this insn can dual-issue as younger. */
11762 cortexa7_younger (FILE *file
, int verbose
, rtx insn
)
11764 if (recog_memoized (insn
) < 0)
11767 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11771 switch (get_attr_type (insn
))
11774 case TYPE_ALUS_IMM
:
11775 case TYPE_LOGIC_IMM
:
11776 case TYPE_LOGICS_IMM
:
11781 case TYPE_MOV_SHIFT
:
11782 case TYPE_MOV_SHIFT_REG
:
11792 /* Look for an instruction that can dual issue only as an older
11793 instruction, and move it in front of any instructions that can
11794 dual-issue as younger, while preserving the relative order of all
11795 other instructions in the ready list. This is a hueuristic to help
11796 dual-issue in later cycles, by postponing issue of more flexible
11797 instructions. This heuristic may affect dual issue opportunities
11798 in the current cycle. */
11800 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11804 int first_older_only
= -1, first_younger
= -1;
11808 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11812 /* Traverse the ready list from the head (the instruction to issue
11813 first), and looking for the first instruction that can issue as
11814 younger and the first instruction that can dual-issue only as
11816 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11818 rtx insn
= ready
[i
];
11819 if (cortexa7_older_only (insn
))
11821 first_older_only
= i
;
11823 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11826 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11830 /* Nothing to reorder because either no younger insn found or insn
11831 that can dual-issue only as older appears before any insn that
11832 can dual-issue as younger. */
11833 if (first_younger
== -1)
11836 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11840 /* Nothing to reorder because no older-only insn in the ready list. */
11841 if (first_older_only
== -1)
11844 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11848 /* Move first_older_only insn before first_younger. */
11850 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11851 INSN_UID(ready
[first_older_only
]),
11852 INSN_UID(ready
[first_younger
]));
11853 rtx first_older_only_insn
= ready
[first_older_only
];
11854 for (i
= first_older_only
; i
< first_younger
; i
++)
11856 ready
[i
] = ready
[i
+1];
11859 ready
[i
] = first_older_only_insn
;
11863 /* Implement TARGET_SCHED_REORDER. */
11865 arm_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11871 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11874 /* Do nothing for other cores. */
11878 return arm_issue_rate ();
11881 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11882 It corrects the value of COST based on the relationship between
11883 INSN and DEP through the dependence LINK. It returns the new
11884 value. There is a per-core adjust_cost hook to adjust scheduler costs
11885 and the per-core hook can choose to completely override the generic
11886 adjust_cost function. Only put bits of code into arm_adjust_cost that
11887 are common across all cores. */
11889 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
11893 /* When generating Thumb-1 code, we want to place flag-setting operations
11894 close to a conditional branch which depends on them, so that we can
11895 omit the comparison. */
11897 && REG_NOTE_KIND (link
) == 0
11898 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11899 && recog_memoized (dep
) >= 0
11900 && get_attr_conds (dep
) == CONDS_SET
)
11903 if (current_tune
->sched_adjust_cost
!= NULL
)
11905 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11909 /* XXX Is this strictly true? */
11910 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11911 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11914 /* Call insns don't incur a stall, even if they follow a load. */
11915 if (REG_NOTE_KIND (link
) == 0
11919 if ((i_pat
= single_set (insn
)) != NULL
11920 && MEM_P (SET_SRC (i_pat
))
11921 && (d_pat
= single_set (dep
)) != NULL
11922 && MEM_P (SET_DEST (d_pat
)))
11924 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11925 /* This is a load after a store, there is no conflict if the load reads
11926 from a cached area. Assume that loads from the stack, and from the
11927 constant pool are cached, and that others will miss. This is a
11930 if ((GET_CODE (src_mem
) == SYMBOL_REF
11931 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11932 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11933 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11934 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11942 arm_max_conditional_execute (void)
11944 return max_insns_skipped
;
11948 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11951 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11953 return (optimize
> 0) ? 2 : 0;
11957 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11959 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11962 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11963 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11964 sequences of non-executed instructions in IT blocks probably take the same
11965 amount of time as executed instructions (and the IT instruction itself takes
11966 space in icache). This function was experimentally determined to give good
11967 results on a popular embedded benchmark. */
11970 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11972 return (TARGET_32BIT
&& speed_p
) ? 1
11973 : arm_default_branch_cost (speed_p
, predictable_p
);
11976 static bool fp_consts_inited
= false;
11978 static REAL_VALUE_TYPE value_fp0
;
11981 init_fp_table (void)
11985 r
= REAL_VALUE_ATOF ("0", DFmode
);
11987 fp_consts_inited
= true;
11990 /* Return TRUE if rtx X is a valid immediate FP constant. */
11992 arm_const_double_rtx (rtx x
)
11996 if (!fp_consts_inited
)
11999 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12000 if (REAL_VALUE_MINUS_ZERO (r
))
12003 if (REAL_VALUES_EQUAL (r
, value_fp0
))
12009 /* VFPv3 has a fairly wide range of representable immediates, formed from
12010 "quarter-precision" floating-point values. These can be evaluated using this
12011 formula (with ^ for exponentiation):
12015 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12016 16 <= n <= 31 and 0 <= r <= 7.
12018 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12020 - A (most-significant) is the sign bit.
12021 - BCD are the exponent (encoded as r XOR 3).
12022 - EFGH are the mantissa (encoded as n - 16).
12025 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12026 fconst[sd] instruction, or -1 if X isn't suitable. */
12028 vfp3_const_double_index (rtx x
)
12030 REAL_VALUE_TYPE r
, m
;
12031 int sign
, exponent
;
12032 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12033 unsigned HOST_WIDE_INT mask
;
12034 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12037 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12040 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12042 /* We can't represent these things, so detect them first. */
12043 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12046 /* Extract sign, exponent and mantissa. */
12047 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12048 r
= real_value_abs (&r
);
12049 exponent
= REAL_EXP (&r
);
12050 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12051 highest (sign) bit, with a fixed binary point at bit point_pos.
12052 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12053 bits for the mantissa, this may fail (low bits would be lost). */
12054 real_ldexp (&m
, &r
, point_pos
- exponent
);
12055 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12056 mantissa
= w
.elt (0);
12057 mant_hi
= w
.elt (1);
12059 /* If there are bits set in the low part of the mantissa, we can't
12060 represent this value. */
12064 /* Now make it so that mantissa contains the most-significant bits, and move
12065 the point_pos to indicate that the least-significant bits have been
12067 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12068 mantissa
= mant_hi
;
12070 /* We can permit four significant bits of mantissa only, plus a high bit
12071 which is always 1. */
12072 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12073 if ((mantissa
& mask
) != 0)
12076 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12077 mantissa
>>= point_pos
- 5;
12079 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12080 floating-point immediate zero with Neon using an integer-zero load, but
12081 that case is handled elsewhere.) */
12085 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12087 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12088 normalized significands are in the range [1, 2). (Our mantissa is shifted
12089 left 4 places at this point relative to normalized IEEE754 values). GCC
12090 internally uses [0.5, 1) (see real.c), so the exponent returned from
12091 REAL_EXP must be altered. */
12092 exponent
= 5 - exponent
;
12094 if (exponent
< 0 || exponent
> 7)
12097 /* Sign, mantissa and exponent are now in the correct form to plug into the
12098 formula described in the comment above. */
12099 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12102 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12104 vfp3_const_double_rtx (rtx x
)
12109 return vfp3_const_double_index (x
) != -1;
12112 /* Recognize immediates which can be used in various Neon instructions. Legal
12113 immediates are described by the following table (for VMVN variants, the
12114 bitwise inverse of the constant shown is recognized. In either case, VMOV
12115 is output and the correct instruction to use for a given constant is chosen
12116 by the assembler). The constant shown is replicated across all elements of
12117 the destination vector.
12119 insn elems variant constant (binary)
12120 ---- ----- ------- -----------------
12121 vmov i32 0 00000000 00000000 00000000 abcdefgh
12122 vmov i32 1 00000000 00000000 abcdefgh 00000000
12123 vmov i32 2 00000000 abcdefgh 00000000 00000000
12124 vmov i32 3 abcdefgh 00000000 00000000 00000000
12125 vmov i16 4 00000000 abcdefgh
12126 vmov i16 5 abcdefgh 00000000
12127 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12128 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12129 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12130 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12131 vmvn i16 10 00000000 abcdefgh
12132 vmvn i16 11 abcdefgh 00000000
12133 vmov i32 12 00000000 00000000 abcdefgh 11111111
12134 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12135 vmov i32 14 00000000 abcdefgh 11111111 11111111
12136 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12137 vmov i8 16 abcdefgh
12138 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12139 eeeeeeee ffffffff gggggggg hhhhhhhh
12140 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12141 vmov f32 19 00000000 00000000 00000000 00000000
12143 For case 18, B = !b. Representable values are exactly those accepted by
12144 vfp3_const_double_index, but are output as floating-point numbers rather
12147 For case 19, we will change it to vmov.i32 when assembling.
12149 Variants 0-5 (inclusive) may also be used as immediates for the second
12150 operand of VORR/VBIC instructions.
12152 The INVERSE argument causes the bitwise inverse of the given operand to be
12153 recognized instead (used for recognizing legal immediates for the VAND/VORN
12154 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12155 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12156 output, rather than the real insns vbic/vorr).
12158 INVERSE makes no difference to the recognition of float vectors.
12160 The return value is the variant of immediate as shown in the above table, or
12161 -1 if the given value doesn't match any of the listed patterns.
12164 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
12165 rtx
*modconst
, int *elementwidth
)
12167 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12169 for (i = 0; i < idx; i += (STRIDE)) \
12174 immtype = (CLASS); \
12175 elsize = (ELSIZE); \
12179 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12180 unsigned int innersize
;
12181 unsigned char bytes
[16];
12182 int immtype
= -1, matches
;
12183 unsigned int invmask
= inverse
? 0xff : 0;
12184 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12188 n_elts
= CONST_VECTOR_NUNITS (op
);
12189 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12194 if (mode
== VOIDmode
)
12196 innersize
= GET_MODE_SIZE (mode
);
12199 /* Vectors of float constants. */
12200 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12202 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12203 REAL_VALUE_TYPE r0
;
12205 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12208 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12210 for (i
= 1; i
< n_elts
; i
++)
12212 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12213 REAL_VALUE_TYPE re
;
12215 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12217 if (!REAL_VALUES_EQUAL (r0
, re
))
12222 *modconst
= CONST_VECTOR_ELT (op
, 0);
12227 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12233 /* Splat vector constant out into a byte vector. */
12234 for (i
= 0; i
< n_elts
; i
++)
12236 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12237 unsigned HOST_WIDE_INT elpart
;
12238 unsigned int part
, parts
;
12240 if (CONST_INT_P (el
))
12242 elpart
= INTVAL (el
);
12245 else if (CONST_DOUBLE_P (el
))
12247 elpart
= CONST_DOUBLE_LOW (el
);
12251 gcc_unreachable ();
12253 for (part
= 0; part
< parts
; part
++)
12256 for (byte
= 0; byte
< innersize
; byte
++)
12258 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12259 elpart
>>= BITS_PER_UNIT
;
12261 if (CONST_DOUBLE_P (el
))
12262 elpart
= CONST_DOUBLE_HIGH (el
);
12266 /* Sanity check. */
12267 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12271 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12272 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12274 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12275 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12277 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12278 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12280 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12281 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12283 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12285 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12287 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12288 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12290 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12291 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12293 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12294 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12296 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12297 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12299 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12301 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12303 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12304 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12306 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12307 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12309 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12310 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12312 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12313 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12315 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12317 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12318 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12326 *elementwidth
= elsize
;
12330 unsigned HOST_WIDE_INT imm
= 0;
12332 /* Un-invert bytes of recognized vector, if necessary. */
12334 for (i
= 0; i
< idx
; i
++)
12335 bytes
[i
] ^= invmask
;
12339 /* FIXME: Broken on 32-bit H_W_I hosts. */
12340 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12342 for (i
= 0; i
< 8; i
++)
12343 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12344 << (i
* BITS_PER_UNIT
);
12346 *modconst
= GEN_INT (imm
);
12350 unsigned HOST_WIDE_INT imm
= 0;
12352 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12353 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12355 *modconst
= GEN_INT (imm
);
12363 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12364 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12365 float elements), and a modified constant (whatever should be output for a
12366 VMOV) in *MODCONST. */
12369 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
12370 rtx
*modconst
, int *elementwidth
)
12374 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12380 *modconst
= tmpconst
;
12383 *elementwidth
= tmpwidth
;
12388 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12389 the immediate is valid, write a constant suitable for using as an operand
12390 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12391 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12394 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
12395 rtx
*modconst
, int *elementwidth
)
12399 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12401 if (retval
< 0 || retval
> 5)
12405 *modconst
= tmpconst
;
12408 *elementwidth
= tmpwidth
;
12413 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12414 the immediate is valid, write a constant suitable for using as an operand
12415 to VSHR/VSHL to *MODCONST and the corresponding element width to
12416 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12417 because they have different limitations. */
12420 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
12421 rtx
*modconst
, int *elementwidth
,
12424 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12425 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12426 unsigned HOST_WIDE_INT last_elt
= 0;
12427 unsigned HOST_WIDE_INT maxshift
;
12429 /* Split vector constant out into a byte vector. */
12430 for (i
= 0; i
< n_elts
; i
++)
12432 rtx el
= CONST_VECTOR_ELT (op
, i
);
12433 unsigned HOST_WIDE_INT elpart
;
12435 if (CONST_INT_P (el
))
12436 elpart
= INTVAL (el
);
12437 else if (CONST_DOUBLE_P (el
))
12440 gcc_unreachable ();
12442 if (i
!= 0 && elpart
!= last_elt
)
12448 /* Shift less than element size. */
12449 maxshift
= innersize
* 8;
12453 /* Left shift immediate value can be from 0 to <size>-1. */
12454 if (last_elt
>= maxshift
)
12459 /* Right shift immediate value can be from 1 to <size>. */
12460 if (last_elt
== 0 || last_elt
> maxshift
)
12465 *elementwidth
= innersize
* 8;
12468 *modconst
= CONST_VECTOR_ELT (op
, 0);
12473 /* Return a string suitable for output of Neon immediate logic operation
12477 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
12478 int inverse
, int quad
)
12480 int width
, is_valid
;
12481 static char templ
[40];
12483 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12485 gcc_assert (is_valid
!= 0);
12488 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12490 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12495 /* Return a string suitable for output of Neon immediate shift operation
12496 (VSHR or VSHL) MNEM. */
12499 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12500 enum machine_mode mode
, int quad
,
12503 int width
, is_valid
;
12504 static char templ
[40];
12506 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12507 gcc_assert (is_valid
!= 0);
12510 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12512 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12517 /* Output a sequence of pairwise operations to implement a reduction.
12518 NOTE: We do "too much work" here, because pairwise operations work on two
12519 registers-worth of operands in one go. Unfortunately we can't exploit those
12520 extra calculations to do the full operation in fewer steps, I don't think.
12521 Although all vector elements of the result but the first are ignored, we
12522 actually calculate the same result in each of the elements. An alternative
12523 such as initially loading a vector with zero to use as each of the second
12524 operands would use up an additional register and take an extra instruction,
12525 for no particular gain. */
12528 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
12529 rtx (*reduc
) (rtx
, rtx
, rtx
))
12531 enum machine_mode inner
= GET_MODE_INNER (mode
);
12532 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12535 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12537 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12538 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12543 /* If VALS is a vector constant that can be loaded into a register
12544 using VDUP, generate instructions to do so and return an RTX to
12545 assign to the register. Otherwise return NULL_RTX. */
12548 neon_vdup_constant (rtx vals
)
12550 enum machine_mode mode
= GET_MODE (vals
);
12551 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12552 int n_elts
= GET_MODE_NUNITS (mode
);
12553 bool all_same
= true;
12557 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12560 for (i
= 0; i
< n_elts
; ++i
)
12562 x
= XVECEXP (vals
, 0, i
);
12563 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12568 /* The elements are not all the same. We could handle repeating
12569 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12570 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12574 /* We can load this constant by using VDUP and a constant in a
12575 single ARM register. This will be cheaper than a vector
12578 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12579 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12582 /* Generate code to load VALS, which is a PARALLEL containing only
12583 constants (for vec_init) or CONST_VECTOR, efficiently into a
12584 register. Returns an RTX to copy into the register, or NULL_RTX
12585 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12588 neon_make_constant (rtx vals
)
12590 enum machine_mode mode
= GET_MODE (vals
);
12592 rtx const_vec
= NULL_RTX
;
12593 int n_elts
= GET_MODE_NUNITS (mode
);
12597 if (GET_CODE (vals
) == CONST_VECTOR
)
12599 else if (GET_CODE (vals
) == PARALLEL
)
12601 /* A CONST_VECTOR must contain only CONST_INTs and
12602 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12603 Only store valid constants in a CONST_VECTOR. */
12604 for (i
= 0; i
< n_elts
; ++i
)
12606 rtx x
= XVECEXP (vals
, 0, i
);
12607 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12610 if (n_const
== n_elts
)
12611 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12614 gcc_unreachable ();
12616 if (const_vec
!= NULL
12617 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12618 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12620 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12621 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12622 pipeline cycle; creating the constant takes one or two ARM
12623 pipeline cycles. */
12625 else if (const_vec
!= NULL_RTX
)
12626 /* Load from constant pool. On Cortex-A8 this takes two cycles
12627 (for either double or quad vectors). We can not take advantage
12628 of single-cycle VLD1 because we need a PC-relative addressing
12632 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12633 We can not construct an initializer. */
12637 /* Initialize vector TARGET to VALS. */
12640 neon_expand_vector_init (rtx target
, rtx vals
)
12642 enum machine_mode mode
= GET_MODE (target
);
12643 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12644 int n_elts
= GET_MODE_NUNITS (mode
);
12645 int n_var
= 0, one_var
= -1;
12646 bool all_same
= true;
12650 for (i
= 0; i
< n_elts
; ++i
)
12652 x
= XVECEXP (vals
, 0, i
);
12653 if (!CONSTANT_P (x
))
12654 ++n_var
, one_var
= i
;
12656 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12662 rtx constant
= neon_make_constant (vals
);
12663 if (constant
!= NULL_RTX
)
12665 emit_move_insn (target
, constant
);
12670 /* Splat a single non-constant element if we can. */
12671 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12673 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12674 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12675 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12679 /* One field is non-constant. Load constant then overwrite varying
12680 field. This is more efficient than using the stack. */
12683 rtx copy
= copy_rtx (vals
);
12684 rtx index
= GEN_INT (one_var
);
12686 /* Load constant part of vector, substitute neighboring value for
12687 varying element. */
12688 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12689 neon_expand_vector_init (target
, copy
);
12691 /* Insert variable. */
12692 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12696 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12699 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12702 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12705 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12708 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12711 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12714 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12717 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12720 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12723 gcc_unreachable ();
12728 /* Construct the vector in memory one field at a time
12729 and load the whole vector. */
12730 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12731 for (i
= 0; i
< n_elts
; i
++)
12732 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12733 i
* GET_MODE_SIZE (inner_mode
)),
12734 XVECEXP (vals
, 0, i
));
12735 emit_move_insn (target
, mem
);
12738 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12739 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12740 reported source locations are bogus. */
12743 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12746 HOST_WIDE_INT lane
;
12748 gcc_assert (CONST_INT_P (operand
));
12750 lane
= INTVAL (operand
);
12752 if (lane
< low
|| lane
>= high
)
12756 /* Bounds-check lanes. */
12759 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12761 bounds_check (operand
, low
, high
, "lane out of range");
12764 /* Bounds-check constants. */
12767 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12769 bounds_check (operand
, low
, high
, "constant out of range");
12773 neon_element_bits (enum machine_mode mode
)
12775 if (mode
== DImode
)
12776 return GET_MODE_BITSIZE (mode
);
12778 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12782 /* Predicates for `match_operand' and `match_operator'. */
12784 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12785 WB is true if full writeback address modes are allowed and is false
12786 if limited writeback address modes (POST_INC and PRE_DEC) are
12790 arm_coproc_mem_operand (rtx op
, bool wb
)
12794 /* Reject eliminable registers. */
12795 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12796 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12797 || reg_mentioned_p (arg_pointer_rtx
, op
)
12798 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12799 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12800 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12801 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12804 /* Constants are converted into offsets from labels. */
12808 ind
= XEXP (op
, 0);
12810 if (reload_completed
12811 && (GET_CODE (ind
) == LABEL_REF
12812 || (GET_CODE (ind
) == CONST
12813 && GET_CODE (XEXP (ind
, 0)) == PLUS
12814 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12815 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12818 /* Match: (mem (reg)). */
12820 return arm_address_register_rtx_p (ind
, 0);
12822 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12823 acceptable in any case (subject to verification by
12824 arm_address_register_rtx_p). We need WB to be true to accept
12825 PRE_INC and POST_DEC. */
12826 if (GET_CODE (ind
) == POST_INC
12827 || GET_CODE (ind
) == PRE_DEC
12829 && (GET_CODE (ind
) == PRE_INC
12830 || GET_CODE (ind
) == POST_DEC
)))
12831 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12834 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12835 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12836 && GET_CODE (XEXP (ind
, 1)) == PLUS
12837 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12838 ind
= XEXP (ind
, 1);
12843 if (GET_CODE (ind
) == PLUS
12844 && REG_P (XEXP (ind
, 0))
12845 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12846 && CONST_INT_P (XEXP (ind
, 1))
12847 && INTVAL (XEXP (ind
, 1)) > -1024
12848 && INTVAL (XEXP (ind
, 1)) < 1024
12849 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12855 /* Return TRUE if OP is a memory operand which we can load or store a vector
12856 to/from. TYPE is one of the following values:
12857 0 - Vector load/stor (vldr)
12858 1 - Core registers (ldm)
12859 2 - Element/structure loads (vld1)
12862 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12866 /* Reject eliminable registers. */
12867 if (! (reload_in_progress
|| reload_completed
)
12868 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12869 || reg_mentioned_p (arg_pointer_rtx
, op
)
12870 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12871 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12872 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12873 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12876 /* Constants are converted into offsets from labels. */
12880 ind
= XEXP (op
, 0);
12882 if (reload_completed
12883 && (GET_CODE (ind
) == LABEL_REF
12884 || (GET_CODE (ind
) == CONST
12885 && GET_CODE (XEXP (ind
, 0)) == PLUS
12886 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12887 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12890 /* Match: (mem (reg)). */
12892 return arm_address_register_rtx_p (ind
, 0);
12894 /* Allow post-increment with Neon registers. */
12895 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12896 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12897 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12899 /* Allow post-increment by register for VLDn */
12900 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12901 && GET_CODE (XEXP (ind
, 1)) == PLUS
12902 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12909 && GET_CODE (ind
) == PLUS
12910 && REG_P (XEXP (ind
, 0))
12911 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12912 && CONST_INT_P (XEXP (ind
, 1))
12913 && INTVAL (XEXP (ind
, 1)) > -1024
12914 /* For quad modes, we restrict the constant offset to be slightly less
12915 than what the instruction format permits. We have no such constraint
12916 on double mode offsets. (This must match arm_legitimate_index_p.) */
12917 && (INTVAL (XEXP (ind
, 1))
12918 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12919 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12925 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12928 neon_struct_mem_operand (rtx op
)
12932 /* Reject eliminable registers. */
12933 if (! (reload_in_progress
|| reload_completed
)
12934 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12935 || reg_mentioned_p (arg_pointer_rtx
, op
)
12936 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12937 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12938 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12939 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12942 /* Constants are converted into offsets from labels. */
12946 ind
= XEXP (op
, 0);
12948 if (reload_completed
12949 && (GET_CODE (ind
) == LABEL_REF
12950 || (GET_CODE (ind
) == CONST
12951 && GET_CODE (XEXP (ind
, 0)) == PLUS
12952 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12953 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12956 /* Match: (mem (reg)). */
12958 return arm_address_register_rtx_p (ind
, 0);
12960 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12961 if (GET_CODE (ind
) == POST_INC
12962 || GET_CODE (ind
) == PRE_DEC
)
12963 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12968 /* Return true if X is a register that will be eliminated later on. */
12970 arm_eliminable_register (rtx x
)
12972 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12973 || REGNO (x
) == ARG_POINTER_REGNUM
12974 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12975 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12978 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12979 coprocessor registers. Otherwise return NO_REGS. */
12982 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
12984 if (mode
== HFmode
)
12986 if (!TARGET_NEON_FP16
)
12987 return GENERAL_REGS
;
12988 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12990 return GENERAL_REGS
;
12993 /* The neon move patterns handle all legitimate vector and struct
12996 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12997 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12998 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12999 || VALID_NEON_STRUCT_MODE (mode
)))
13002 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13005 return GENERAL_REGS
;
13008 /* Values which must be returned in the most-significant end of the return
13012 arm_return_in_msb (const_tree valtype
)
13014 return (TARGET_AAPCS_BASED
13015 && BYTES_BIG_ENDIAN
13016 && (AGGREGATE_TYPE_P (valtype
)
13017 || TREE_CODE (valtype
) == COMPLEX_TYPE
13018 || FIXED_POINT_TYPE_P (valtype
)));
13021 /* Return TRUE if X references a SYMBOL_REF. */
13023 symbol_mentioned_p (rtx x
)
13028 if (GET_CODE (x
) == SYMBOL_REF
)
13031 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13032 are constant offsets, not symbols. */
13033 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13036 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13038 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13044 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13045 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13048 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13055 /* Return TRUE if X references a LABEL_REF. */
13057 label_mentioned_p (rtx x
)
13062 if (GET_CODE (x
) == LABEL_REF
)
13065 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13066 instruction, but they are constant offsets, not symbols. */
13067 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13070 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13071 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13077 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13078 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13081 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13089 tls_mentioned_p (rtx x
)
13091 switch (GET_CODE (x
))
13094 return tls_mentioned_p (XEXP (x
, 0));
13097 if (XINT (x
, 1) == UNSPEC_TLS
)
13105 /* Must not copy any rtx that uses a pc-relative address. */
13108 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
13110 if (GET_CODE (*x
) == UNSPEC
13111 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
13112 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
13118 arm_cannot_copy_insn_p (rtx insn
)
13120 /* The tls call insn cannot be copied, as it is paired with a data
13122 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13125 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
13129 minmax_code (rtx x
)
13131 enum rtx_code code
= GET_CODE (x
);
13144 gcc_unreachable ();
13148 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13151 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13152 int *mask
, bool *signed_sat
)
13154 /* The high bound must be a power of two minus one. */
13155 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13159 /* The low bound is either zero (for usat) or one less than the
13160 negation of the high bound (for ssat). */
13161 if (INTVAL (lo_bound
) == 0)
13166 *signed_sat
= false;
13171 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13176 *signed_sat
= true;
13184 /* Return 1 if memory locations are adjacent. */
13186 adjacent_mem_locations (rtx a
, rtx b
)
13188 /* We don't guarantee to preserve the order of these memory refs. */
13189 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13192 if ((REG_P (XEXP (a
, 0))
13193 || (GET_CODE (XEXP (a
, 0)) == PLUS
13194 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13195 && (REG_P (XEXP (b
, 0))
13196 || (GET_CODE (XEXP (b
, 0)) == PLUS
13197 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13199 HOST_WIDE_INT val0
= 0, val1
= 0;
13203 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13205 reg0
= XEXP (XEXP (a
, 0), 0);
13206 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13209 reg0
= XEXP (a
, 0);
13211 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13213 reg1
= XEXP (XEXP (b
, 0), 0);
13214 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13217 reg1
= XEXP (b
, 0);
13219 /* Don't accept any offset that will require multiple
13220 instructions to handle, since this would cause the
13221 arith_adjacentmem pattern to output an overlong sequence. */
13222 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13225 /* Don't allow an eliminable register: register elimination can make
13226 the offset too large. */
13227 if (arm_eliminable_register (reg0
))
13230 val_diff
= val1
- val0
;
13234 /* If the target has load delay slots, then there's no benefit
13235 to using an ldm instruction unless the offset is zero and
13236 we are optimizing for size. */
13237 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13238 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13239 && (val_diff
== 4 || val_diff
== -4));
13242 return ((REGNO (reg0
) == REGNO (reg1
))
13243 && (val_diff
== 4 || val_diff
== -4));
13249 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13250 for load operations, false for store operations. CONSECUTIVE is true
13251 if the register numbers in the operation must be consecutive in the register
13252 bank. RETURN_PC is true if value is to be loaded in PC.
13253 The pattern we are trying to match for load is:
13254 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13255 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13258 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13261 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13262 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13263 3. If consecutive is TRUE, then for kth register being loaded,
13264 REGNO (R_dk) = REGNO (R_d0) + k.
13265 The pattern for store is similar. */
13267 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
13268 bool consecutive
, bool return_pc
)
13270 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13271 rtx reg
, mem
, addr
;
13273 unsigned first_regno
;
13274 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13276 bool addr_reg_in_reglist
= false;
13277 bool update
= false;
13282 /* If not in SImode, then registers must be consecutive
13283 (e.g., VLDM instructions for DFmode). */
13284 gcc_assert ((mode
== SImode
) || consecutive
);
13285 /* Setting return_pc for stores is illegal. */
13286 gcc_assert (!return_pc
|| load
);
13288 /* Set up the increments and the regs per val based on the mode. */
13289 reg_increment
= GET_MODE_SIZE (mode
);
13290 regs_per_val
= reg_increment
/ 4;
13291 offset_adj
= return_pc
? 1 : 0;
13294 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13295 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13298 /* Check if this is a write-back. */
13299 elt
= XVECEXP (op
, 0, offset_adj
);
13300 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13306 /* The offset adjustment must be the number of registers being
13307 popped times the size of a single register. */
13308 if (!REG_P (SET_DEST (elt
))
13309 || !REG_P (XEXP (SET_SRC (elt
), 0))
13310 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13311 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13312 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13313 ((count
- 1 - offset_adj
) * reg_increment
))
13317 i
= i
+ offset_adj
;
13318 base
= base
+ offset_adj
;
13319 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13320 success depends on the type: VLDM can do just one reg,
13321 LDM must do at least two. */
13322 if ((count
<= i
) && (mode
== SImode
))
13325 elt
= XVECEXP (op
, 0, i
- 1);
13326 if (GET_CODE (elt
) != SET
)
13331 reg
= SET_DEST (elt
);
13332 mem
= SET_SRC (elt
);
13336 reg
= SET_SRC (elt
);
13337 mem
= SET_DEST (elt
);
13340 if (!REG_P (reg
) || !MEM_P (mem
))
13343 regno
= REGNO (reg
);
13344 first_regno
= regno
;
13345 addr
= XEXP (mem
, 0);
13346 if (GET_CODE (addr
) == PLUS
)
13348 if (!CONST_INT_P (XEXP (addr
, 1)))
13351 offset
= INTVAL (XEXP (addr
, 1));
13352 addr
= XEXP (addr
, 0);
13358 /* Don't allow SP to be loaded unless it is also the base register. It
13359 guarantees that SP is reset correctly when an LDM instruction
13360 is interrupted. Otherwise, we might end up with a corrupt stack. */
13361 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13364 for (; i
< count
; i
++)
13366 elt
= XVECEXP (op
, 0, i
);
13367 if (GET_CODE (elt
) != SET
)
13372 reg
= SET_DEST (elt
);
13373 mem
= SET_SRC (elt
);
13377 reg
= SET_SRC (elt
);
13378 mem
= SET_DEST (elt
);
13382 || GET_MODE (reg
) != mode
13383 || REGNO (reg
) <= regno
13386 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13387 /* Don't allow SP to be loaded unless it is also the base register. It
13388 guarantees that SP is reset correctly when an LDM instruction
13389 is interrupted. Otherwise, we might end up with a corrupt stack. */
13390 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13392 || GET_MODE (mem
) != mode
13393 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13394 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13395 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13396 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13397 offset
+ (i
- base
) * reg_increment
))
13398 && (!REG_P (XEXP (mem
, 0))
13399 || offset
+ (i
- base
) * reg_increment
!= 0)))
13402 regno
= REGNO (reg
);
13403 if (regno
== REGNO (addr
))
13404 addr_reg_in_reglist
= true;
13409 if (update
&& addr_reg_in_reglist
)
13412 /* For Thumb-1, address register is always modified - either by write-back
13413 or by explicit load. If the pattern does not describe an update,
13414 then the address register must be in the list of loaded registers. */
13416 return update
|| addr_reg_in_reglist
;
13422 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13423 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13424 instruction. ADD_OFFSET is nonzero if the base address register needs
13425 to be modified with an add instruction before we can use it. */
13428 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13429 int nops
, HOST_WIDE_INT add_offset
)
13431 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13432 if the offset isn't small enough. The reason 2 ldrs are faster
13433 is because these ARMs are able to do more than one cache access
13434 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13435 whilst the ARM8 has a double bandwidth cache. This means that
13436 these cores can do both an instruction fetch and a data fetch in
13437 a single cycle, so the trick of calculating the address into a
13438 scratch register (one of the result regs) and then doing a load
13439 multiple actually becomes slower (and no smaller in code size).
13440 That is the transformation
13442 ldr rd1, [rbase + offset]
13443 ldr rd2, [rbase + offset + 4]
13447 add rd1, rbase, offset
13448 ldmia rd1, {rd1, rd2}
13450 produces worse code -- '3 cycles + any stalls on rd2' instead of
13451 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13452 access per cycle, the first sequence could never complete in less
13453 than 6 cycles, whereas the ldm sequence would only take 5 and
13454 would make better use of sequential accesses if not hitting the
13457 We cheat here and test 'arm_ld_sched' which we currently know to
13458 only be true for the ARM8, ARM9 and StrongARM. If this ever
13459 changes, then the test below needs to be reworked. */
13460 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13463 /* XScale has load-store double instructions, but they have stricter
13464 alignment requirements than load-store multiple, so we cannot
13467 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13468 the pipeline until completion.
13476 An ldr instruction takes 1-3 cycles, but does not block the
13485 Best case ldr will always win. However, the more ldr instructions
13486 we issue, the less likely we are to be able to schedule them well.
13487 Using ldr instructions also increases code size.
13489 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13490 for counts of 3 or 4 regs. */
13491 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13496 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13497 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13498 an array ORDER which describes the sequence to use when accessing the
13499 offsets that produces an ascending order. In this sequence, each
13500 offset must be larger by exactly 4 than the previous one. ORDER[0]
13501 must have been filled in with the lowest offset by the caller.
13502 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13503 we use to verify that ORDER produces an ascending order of registers.
13504 Return true if it was possible to construct such an order, false if
13508 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13509 int *unsorted_regs
)
13512 for (i
= 1; i
< nops
; i
++)
13516 order
[i
] = order
[i
- 1];
13517 for (j
= 0; j
< nops
; j
++)
13518 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13520 /* We must find exactly one offset that is higher than the
13521 previous one by 4. */
13522 if (order
[i
] != order
[i
- 1])
13526 if (order
[i
] == order
[i
- 1])
13528 /* The register numbers must be ascending. */
13529 if (unsorted_regs
!= NULL
13530 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13536 /* Used to determine in a peephole whether a sequence of load
13537 instructions can be changed into a load-multiple instruction.
13538 NOPS is the number of separate load instructions we are examining. The
13539 first NOPS entries in OPERANDS are the destination registers, the
13540 next NOPS entries are memory operands. If this function is
13541 successful, *BASE is set to the common base register of the memory
13542 accesses; *LOAD_OFFSET is set to the first memory location's offset
13543 from that base register.
13544 REGS is an array filled in with the destination register numbers.
13545 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13546 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13547 the sequence of registers in REGS matches the loads from ascending memory
13548 locations, and the function verifies that the register numbers are
13549 themselves ascending. If CHECK_REGS is false, the register numbers
13550 are stored in the order they are found in the operands. */
13552 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13553 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13555 int unsorted_regs
[MAX_LDM_STM_OPS
];
13556 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13557 int order
[MAX_LDM_STM_OPS
];
13558 rtx base_reg_rtx
= NULL
;
13562 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13563 easily extended if required. */
13564 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13566 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13568 /* Loop over the operands and check that the memory references are
13569 suitable (i.e. immediate offsets from the same base register). At
13570 the same time, extract the target register, and the memory
13572 for (i
= 0; i
< nops
; i
++)
13577 /* Convert a subreg of a mem into the mem itself. */
13578 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13579 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13581 gcc_assert (MEM_P (operands
[nops
+ i
]));
13583 /* Don't reorder volatile memory references; it doesn't seem worth
13584 looking for the case where the order is ok anyway. */
13585 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13588 offset
= const0_rtx
;
13590 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13591 || (GET_CODE (reg
) == SUBREG
13592 && REG_P (reg
= SUBREG_REG (reg
))))
13593 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13594 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13595 || (GET_CODE (reg
) == SUBREG
13596 && REG_P (reg
= SUBREG_REG (reg
))))
13597 && (CONST_INT_P (offset
13598 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13602 base_reg
= REGNO (reg
);
13603 base_reg_rtx
= reg
;
13604 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13607 else if (base_reg
!= (int) REGNO (reg
))
13608 /* Not addressed from the same base register. */
13611 unsorted_regs
[i
] = (REG_P (operands
[i
])
13612 ? REGNO (operands
[i
])
13613 : REGNO (SUBREG_REG (operands
[i
])));
13615 /* If it isn't an integer register, or if it overwrites the
13616 base register but isn't the last insn in the list, then
13617 we can't do this. */
13618 if (unsorted_regs
[i
] < 0
13619 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13620 || unsorted_regs
[i
] > 14
13621 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13624 /* Don't allow SP to be loaded unless it is also the base
13625 register. It guarantees that SP is reset correctly when
13626 an LDM instruction is interrupted. Otherwise, we might
13627 end up with a corrupt stack. */
13628 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13631 unsorted_offsets
[i
] = INTVAL (offset
);
13632 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13636 /* Not a suitable memory address. */
13640 /* All the useful information has now been extracted from the
13641 operands into unsorted_regs and unsorted_offsets; additionally,
13642 order[0] has been set to the lowest offset in the list. Sort
13643 the offsets into order, verifying that they are adjacent, and
13644 check that the register numbers are ascending. */
13645 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13646 check_regs
? unsorted_regs
: NULL
))
13650 memcpy (saved_order
, order
, sizeof order
);
13656 for (i
= 0; i
< nops
; i
++)
13657 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13659 *load_offset
= unsorted_offsets
[order
[0]];
13663 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13666 if (unsorted_offsets
[order
[0]] == 0)
13667 ldm_case
= 1; /* ldmia */
13668 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13669 ldm_case
= 2; /* ldmib */
13670 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13671 ldm_case
= 3; /* ldmda */
13672 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13673 ldm_case
= 4; /* ldmdb */
13674 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13675 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13680 if (!multiple_operation_profitable_p (false, nops
,
13682 ? unsorted_offsets
[order
[0]] : 0))
13688 /* Used to determine in a peephole whether a sequence of store instructions can
13689 be changed into a store-multiple instruction.
13690 NOPS is the number of separate store instructions we are examining.
13691 NOPS_TOTAL is the total number of instructions recognized by the peephole
13693 The first NOPS entries in OPERANDS are the source registers, the next
13694 NOPS entries are memory operands. If this function is successful, *BASE is
13695 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13696 to the first memory location's offset from that base register. REGS is an
13697 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13698 likewise filled with the corresponding rtx's.
13699 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13700 numbers to an ascending order of stores.
13701 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13702 from ascending memory locations, and the function verifies that the register
13703 numbers are themselves ascending. If CHECK_REGS is false, the register
13704 numbers are stored in the order they are found in the operands. */
13706 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13707 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13708 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13710 int unsorted_regs
[MAX_LDM_STM_OPS
];
13711 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13712 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13713 int order
[MAX_LDM_STM_OPS
];
13715 rtx base_reg_rtx
= NULL
;
13718 /* Write back of base register is currently only supported for Thumb 1. */
13719 int base_writeback
= TARGET_THUMB1
;
13721 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13722 easily extended if required. */
13723 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13725 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13727 /* Loop over the operands and check that the memory references are
13728 suitable (i.e. immediate offsets from the same base register). At
13729 the same time, extract the target register, and the memory
13731 for (i
= 0; i
< nops
; i
++)
13736 /* Convert a subreg of a mem into the mem itself. */
13737 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13738 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13740 gcc_assert (MEM_P (operands
[nops
+ i
]));
13742 /* Don't reorder volatile memory references; it doesn't seem worth
13743 looking for the case where the order is ok anyway. */
13744 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13747 offset
= const0_rtx
;
13749 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13750 || (GET_CODE (reg
) == SUBREG
13751 && REG_P (reg
= SUBREG_REG (reg
))))
13752 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13753 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13754 || (GET_CODE (reg
) == SUBREG
13755 && REG_P (reg
= SUBREG_REG (reg
))))
13756 && (CONST_INT_P (offset
13757 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13759 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13760 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13761 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13765 base_reg
= REGNO (reg
);
13766 base_reg_rtx
= reg
;
13767 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13770 else if (base_reg
!= (int) REGNO (reg
))
13771 /* Not addressed from the same base register. */
13774 /* If it isn't an integer register, then we can't do this. */
13775 if (unsorted_regs
[i
] < 0
13776 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13777 /* The effects are unpredictable if the base register is
13778 both updated and stored. */
13779 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13780 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13781 || unsorted_regs
[i
] > 14)
13784 unsorted_offsets
[i
] = INTVAL (offset
);
13785 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13789 /* Not a suitable memory address. */
13793 /* All the useful information has now been extracted from the
13794 operands into unsorted_regs and unsorted_offsets; additionally,
13795 order[0] has been set to the lowest offset in the list. Sort
13796 the offsets into order, verifying that they are adjacent, and
13797 check that the register numbers are ascending. */
13798 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13799 check_regs
? unsorted_regs
: NULL
))
13803 memcpy (saved_order
, order
, sizeof order
);
13809 for (i
= 0; i
< nops
; i
++)
13811 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13813 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13816 *load_offset
= unsorted_offsets
[order
[0]];
13820 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13823 if (unsorted_offsets
[order
[0]] == 0)
13824 stm_case
= 1; /* stmia */
13825 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13826 stm_case
= 2; /* stmib */
13827 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13828 stm_case
= 3; /* stmda */
13829 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13830 stm_case
= 4; /* stmdb */
13834 if (!multiple_operation_profitable_p (false, nops
, 0))
13840 /* Routines for use in generating RTL. */
13842 /* Generate a load-multiple instruction. COUNT is the number of loads in
13843 the instruction; REGS and MEMS are arrays containing the operands.
13844 BASEREG is the base register to be used in addressing the memory operands.
13845 WBACK_OFFSET is nonzero if the instruction should update the base
13849 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13850 HOST_WIDE_INT wback_offset
)
13855 if (!multiple_operation_profitable_p (false, count
, 0))
13861 for (i
= 0; i
< count
; i
++)
13862 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13864 if (wback_offset
!= 0)
13865 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13867 seq
= get_insns ();
13873 result
= gen_rtx_PARALLEL (VOIDmode
,
13874 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13875 if (wback_offset
!= 0)
13877 XVECEXP (result
, 0, 0)
13878 = gen_rtx_SET (VOIDmode
, basereg
,
13879 plus_constant (Pmode
, basereg
, wback_offset
));
13884 for (j
= 0; i
< count
; i
++, j
++)
13885 XVECEXP (result
, 0, i
)
13886 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13891 /* Generate a store-multiple instruction. COUNT is the number of stores in
13892 the instruction; REGS and MEMS are arrays containing the operands.
13893 BASEREG is the base register to be used in addressing the memory operands.
13894 WBACK_OFFSET is nonzero if the instruction should update the base
13898 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13899 HOST_WIDE_INT wback_offset
)
13904 if (GET_CODE (basereg
) == PLUS
)
13905 basereg
= XEXP (basereg
, 0);
13907 if (!multiple_operation_profitable_p (false, count
, 0))
13913 for (i
= 0; i
< count
; i
++)
13914 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13916 if (wback_offset
!= 0)
13917 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13919 seq
= get_insns ();
13925 result
= gen_rtx_PARALLEL (VOIDmode
,
13926 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13927 if (wback_offset
!= 0)
13929 XVECEXP (result
, 0, 0)
13930 = gen_rtx_SET (VOIDmode
, basereg
,
13931 plus_constant (Pmode
, basereg
, wback_offset
));
13936 for (j
= 0; i
< count
; i
++, j
++)
13937 XVECEXP (result
, 0, i
)
13938 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13943 /* Generate either a load-multiple or a store-multiple instruction. This
13944 function can be used in situations where we can start with a single MEM
13945 rtx and adjust its address upwards.
13946 COUNT is the number of operations in the instruction, not counting a
13947 possible update of the base register. REGS is an array containing the
13949 BASEREG is the base register to be used in addressing the memory operands,
13950 which are constructed from BASEMEM.
13951 WRITE_BACK specifies whether the generated instruction should include an
13952 update of the base register.
13953 OFFSETP is used to pass an offset to and from this function; this offset
13954 is not used when constructing the address (instead BASEMEM should have an
13955 appropriate offset in its address), it is used only for setting
13956 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13959 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13960 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13962 rtx mems
[MAX_LDM_STM_OPS
];
13963 HOST_WIDE_INT offset
= *offsetp
;
13966 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13968 if (GET_CODE (basereg
) == PLUS
)
13969 basereg
= XEXP (basereg
, 0);
13971 for (i
= 0; i
< count
; i
++)
13973 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13974 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13982 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13983 write_back
? 4 * count
: 0);
13985 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13986 write_back
? 4 * count
: 0);
13990 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13991 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13993 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13998 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13999 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14001 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14005 /* Called from a peephole2 expander to turn a sequence of loads into an
14006 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14007 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14008 is true if we can reorder the registers because they are used commutatively
14010 Returns true iff we could generate a new instruction. */
14013 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14015 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14016 rtx mems
[MAX_LDM_STM_OPS
];
14017 int i
, j
, base_reg
;
14019 HOST_WIDE_INT offset
;
14020 int write_back
= FALSE
;
14024 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14025 &base_reg
, &offset
, !sort_regs
);
14031 for (i
= 0; i
< nops
- 1; i
++)
14032 for (j
= i
+ 1; j
< nops
; j
++)
14033 if (regs
[i
] > regs
[j
])
14039 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14043 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14044 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14050 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14051 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14053 if (!TARGET_THUMB1
)
14055 base_reg
= regs
[0];
14056 base_reg_rtx
= newbase
;
14060 for (i
= 0; i
< nops
; i
++)
14062 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14063 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14066 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14067 write_back
? offset
+ i
* 4 : 0));
14071 /* Called from a peephole2 expander to turn a sequence of stores into an
14072 STM instruction. OPERANDS are the operands found by the peephole matcher;
14073 NOPS indicates how many separate stores we are trying to combine.
14074 Returns true iff we could generate a new instruction. */
14077 gen_stm_seq (rtx
*operands
, int nops
)
14080 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14081 rtx mems
[MAX_LDM_STM_OPS
];
14084 HOST_WIDE_INT offset
;
14085 int write_back
= FALSE
;
14088 bool base_reg_dies
;
14090 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14091 mem_order
, &base_reg
, &offset
, true);
14096 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14098 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14101 gcc_assert (base_reg_dies
);
14107 gcc_assert (base_reg_dies
);
14108 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14112 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14114 for (i
= 0; i
< nops
; i
++)
14116 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14117 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14120 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14121 write_back
? offset
+ i
* 4 : 0));
14125 /* Called from a peephole2 expander to turn a sequence of stores that are
14126 preceded by constant loads into an STM instruction. OPERANDS are the
14127 operands found by the peephole matcher; NOPS indicates how many
14128 separate stores we are trying to combine; there are 2 * NOPS
14129 instructions in the peephole.
14130 Returns true iff we could generate a new instruction. */
14133 gen_const_stm_seq (rtx
*operands
, int nops
)
14135 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14136 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14137 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14138 rtx mems
[MAX_LDM_STM_OPS
];
14141 HOST_WIDE_INT offset
;
14142 int write_back
= FALSE
;
14145 bool base_reg_dies
;
14147 HARD_REG_SET allocated
;
14149 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14150 mem_order
, &base_reg
, &offset
, false);
14155 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14157 /* If the same register is used more than once, try to find a free
14159 CLEAR_HARD_REG_SET (allocated
);
14160 for (i
= 0; i
< nops
; i
++)
14162 for (j
= i
+ 1; j
< nops
; j
++)
14163 if (regs
[i
] == regs
[j
])
14165 rtx t
= peep2_find_free_register (0, nops
* 2,
14166 TARGET_THUMB1
? "l" : "r",
14167 SImode
, &allocated
);
14171 regs
[i
] = REGNO (t
);
14175 /* Compute an ordering that maps the register numbers to an ascending
14178 for (i
= 0; i
< nops
; i
++)
14179 if (regs
[i
] < regs
[reg_order
[0]])
14182 for (i
= 1; i
< nops
; i
++)
14184 int this_order
= reg_order
[i
- 1];
14185 for (j
= 0; j
< nops
; j
++)
14186 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14187 && (this_order
== reg_order
[i
- 1]
14188 || regs
[j
] < regs
[this_order
]))
14190 reg_order
[i
] = this_order
;
14193 /* Ensure that registers that must be live after the instruction end
14194 up with the correct value. */
14195 for (i
= 0; i
< nops
; i
++)
14197 int this_order
= reg_order
[i
];
14198 if ((this_order
!= mem_order
[i
]
14199 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14200 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14204 /* Load the constants. */
14205 for (i
= 0; i
< nops
; i
++)
14207 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14208 sorted_regs
[i
] = regs
[reg_order
[i
]];
14209 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14212 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14214 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14217 gcc_assert (base_reg_dies
);
14223 gcc_assert (base_reg_dies
);
14224 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14228 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14230 for (i
= 0; i
< nops
; i
++)
14232 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14233 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14236 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14237 write_back
? offset
+ i
* 4 : 0));
14241 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14242 unaligned copies on processors which support unaligned semantics for those
14243 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14244 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14245 An interleave factor of 1 (the minimum) will perform no interleaving.
14246 Load/store multiple are used for aligned addresses where possible. */
14249 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14250 HOST_WIDE_INT length
,
14251 unsigned int interleave_factor
)
14253 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14254 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14255 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14256 HOST_WIDE_INT i
, j
;
14257 HOST_WIDE_INT remaining
= length
, words
;
14258 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14260 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14261 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14262 HOST_WIDE_INT srcoffset
, dstoffset
;
14263 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14266 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14268 /* Use hard registers if we have aligned source or destination so we can use
14269 load/store multiple with contiguous registers. */
14270 if (dst_aligned
|| src_aligned
)
14271 for (i
= 0; i
< interleave_factor
; i
++)
14272 regs
[i
] = gen_rtx_REG (SImode
, i
);
14274 for (i
= 0; i
< interleave_factor
; i
++)
14275 regs
[i
] = gen_reg_rtx (SImode
);
14277 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14278 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14280 srcoffset
= dstoffset
= 0;
14282 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14283 For copying the last bytes we want to subtract this offset again. */
14284 src_autoinc
= dst_autoinc
= 0;
14286 for (i
= 0; i
< interleave_factor
; i
++)
14289 /* Copy BLOCK_SIZE_BYTES chunks. */
14291 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14294 if (src_aligned
&& interleave_factor
> 1)
14296 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14297 TRUE
, srcbase
, &srcoffset
));
14298 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14302 for (j
= 0; j
< interleave_factor
; j
++)
14304 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14306 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14307 srcoffset
+ j
* UNITS_PER_WORD
);
14308 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14310 srcoffset
+= block_size_bytes
;
14314 if (dst_aligned
&& interleave_factor
> 1)
14316 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14317 TRUE
, dstbase
, &dstoffset
));
14318 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14322 for (j
= 0; j
< interleave_factor
; j
++)
14324 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14326 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14327 dstoffset
+ j
* UNITS_PER_WORD
);
14328 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14330 dstoffset
+= block_size_bytes
;
14333 remaining
-= block_size_bytes
;
14336 /* Copy any whole words left (note these aren't interleaved with any
14337 subsequent halfword/byte load/stores in the interests of simplicity). */
14339 words
= remaining
/ UNITS_PER_WORD
;
14341 gcc_assert (words
< interleave_factor
);
14343 if (src_aligned
&& words
> 1)
14345 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14347 src_autoinc
+= UNITS_PER_WORD
* words
;
14351 for (j
= 0; j
< words
; j
++)
14353 addr
= plus_constant (Pmode
, src
,
14354 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14355 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14356 srcoffset
+ j
* UNITS_PER_WORD
);
14357 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14359 srcoffset
+= words
* UNITS_PER_WORD
;
14362 if (dst_aligned
&& words
> 1)
14364 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14366 dst_autoinc
+= words
* UNITS_PER_WORD
;
14370 for (j
= 0; j
< words
; j
++)
14372 addr
= plus_constant (Pmode
, dst
,
14373 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14374 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14375 dstoffset
+ j
* UNITS_PER_WORD
);
14376 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14378 dstoffset
+= words
* UNITS_PER_WORD
;
14381 remaining
-= words
* UNITS_PER_WORD
;
14383 gcc_assert (remaining
< 4);
14385 /* Copy a halfword if necessary. */
14387 if (remaining
>= 2)
14389 halfword_tmp
= gen_reg_rtx (SImode
);
14391 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14392 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14393 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14395 /* Either write out immediately, or delay until we've loaded the last
14396 byte, depending on interleave factor. */
14397 if (interleave_factor
== 1)
14399 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14400 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14401 emit_insn (gen_unaligned_storehi (mem
,
14402 gen_lowpart (HImode
, halfword_tmp
)));
14403 halfword_tmp
= NULL
;
14411 gcc_assert (remaining
< 2);
14413 /* Copy last byte. */
14415 if ((remaining
& 1) != 0)
14417 byte_tmp
= gen_reg_rtx (SImode
);
14419 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14420 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14421 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14423 if (interleave_factor
== 1)
14425 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14426 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14427 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14436 /* Store last halfword if we haven't done so already. */
14440 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14441 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14442 emit_insn (gen_unaligned_storehi (mem
,
14443 gen_lowpart (HImode
, halfword_tmp
)));
14447 /* Likewise for last byte. */
14451 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14452 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14453 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14457 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14460 /* From mips_adjust_block_mem:
14462 Helper function for doing a loop-based block operation on memory
14463 reference MEM. Each iteration of the loop will operate on LENGTH
14466 Create a new base register for use within the loop and point it to
14467 the start of MEM. Create a new memory reference that uses this
14468 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14471 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14474 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14476 /* Although the new mem does not refer to a known location,
14477 it does keep up to LENGTH bytes of alignment. */
14478 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14479 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14482 /* From mips_block_move_loop:
14484 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14485 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14486 the memory regions do not overlap. */
14489 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14490 unsigned int interleave_factor
,
14491 HOST_WIDE_INT bytes_per_iter
)
14493 rtx label
, src_reg
, dest_reg
, final_src
, test
;
14494 HOST_WIDE_INT leftover
;
14496 leftover
= length
% bytes_per_iter
;
14497 length
-= leftover
;
14499 /* Create registers and memory references for use within the loop. */
14500 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14501 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14503 /* Calculate the value that SRC_REG should have after the last iteration of
14505 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14506 0, 0, OPTAB_WIDEN
);
14508 /* Emit the start of the loop. */
14509 label
= gen_label_rtx ();
14510 emit_label (label
);
14512 /* Emit the loop body. */
14513 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14514 interleave_factor
);
14516 /* Move on to the next block. */
14517 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14518 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14520 /* Emit the loop condition. */
14521 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14522 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14524 /* Mop up any left-over bytes. */
14526 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14529 /* Emit a block move when either the source or destination is unaligned (not
14530 aligned to a four-byte boundary). This may need further tuning depending on
14531 core type, optimize_size setting, etc. */
14534 arm_movmemqi_unaligned (rtx
*operands
)
14536 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14540 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14541 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14542 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14543 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14544 or dst_aligned though: allow more interleaving in those cases since the
14545 resulting code can be smaller. */
14546 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14547 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14550 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14551 interleave_factor
, bytes_per_iter
);
14553 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14554 interleave_factor
);
14558 /* Note that the loop created by arm_block_move_unaligned_loop may be
14559 subject to loop unrolling, which makes tuning this condition a little
14562 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14564 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14571 arm_gen_movmemqi (rtx
*operands
)
14573 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14574 HOST_WIDE_INT srcoffset
, dstoffset
;
14576 rtx src
, dst
, srcbase
, dstbase
;
14577 rtx part_bytes_reg
= NULL
;
14580 if (!CONST_INT_P (operands
[2])
14581 || !CONST_INT_P (operands
[3])
14582 || INTVAL (operands
[2]) > 64)
14585 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14586 return arm_movmemqi_unaligned (operands
);
14588 if (INTVAL (operands
[3]) & 3)
14591 dstbase
= operands
[0];
14592 srcbase
= operands
[1];
14594 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14595 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14597 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14598 out_words_to_go
= INTVAL (operands
[2]) / 4;
14599 last_bytes
= INTVAL (operands
[2]) & 3;
14600 dstoffset
= srcoffset
= 0;
14602 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14603 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14605 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14607 if (in_words_to_go
> 4)
14608 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14609 TRUE
, srcbase
, &srcoffset
));
14611 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14612 src
, FALSE
, srcbase
,
14615 if (out_words_to_go
)
14617 if (out_words_to_go
> 4)
14618 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14619 TRUE
, dstbase
, &dstoffset
));
14620 else if (out_words_to_go
!= 1)
14621 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14622 out_words_to_go
, dst
,
14625 dstbase
, &dstoffset
));
14628 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14629 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14630 if (last_bytes
!= 0)
14632 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14638 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14639 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14642 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14643 if (out_words_to_go
)
14647 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14648 sreg
= copy_to_reg (mem
);
14650 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14651 emit_move_insn (mem
, sreg
);
14654 gcc_assert (!in_words_to_go
); /* Sanity check */
14657 if (in_words_to_go
)
14659 gcc_assert (in_words_to_go
> 0);
14661 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14662 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14665 gcc_assert (!last_bytes
|| part_bytes_reg
);
14667 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14669 rtx tmp
= gen_reg_rtx (SImode
);
14671 /* The bytes we want are in the top end of the word. */
14672 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14673 GEN_INT (8 * (4 - last_bytes
))));
14674 part_bytes_reg
= tmp
;
14678 mem
= adjust_automodify_address (dstbase
, QImode
,
14679 plus_constant (Pmode
, dst
,
14681 dstoffset
+ last_bytes
- 1);
14682 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14686 tmp
= gen_reg_rtx (SImode
);
14687 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14688 part_bytes_reg
= tmp
;
14695 if (last_bytes
> 1)
14697 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14698 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14702 rtx tmp
= gen_reg_rtx (SImode
);
14703 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14704 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14705 part_bytes_reg
= tmp
;
14712 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14713 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14720 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14723 next_consecutive_mem (rtx mem
)
14725 enum machine_mode mode
= GET_MODE (mem
);
14726 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14727 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14729 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14732 /* Copy using LDRD/STRD instructions whenever possible.
14733 Returns true upon success. */
14735 gen_movmem_ldrd_strd (rtx
*operands
)
14737 unsigned HOST_WIDE_INT len
;
14738 HOST_WIDE_INT align
;
14739 rtx src
, dst
, base
;
14741 bool src_aligned
, dst_aligned
;
14742 bool src_volatile
, dst_volatile
;
14744 gcc_assert (CONST_INT_P (operands
[2]));
14745 gcc_assert (CONST_INT_P (operands
[3]));
14747 len
= UINTVAL (operands
[2]);
14751 /* Maximum alignment we can assume for both src and dst buffers. */
14752 align
= INTVAL (operands
[3]);
14754 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14757 /* Place src and dst addresses in registers
14758 and update the corresponding mem rtx. */
14760 dst_volatile
= MEM_VOLATILE_P (dst
);
14761 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14762 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14763 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14766 src_volatile
= MEM_VOLATILE_P (src
);
14767 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14768 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14769 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14771 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14774 if (src_volatile
|| dst_volatile
)
14777 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14778 if (!(dst_aligned
|| src_aligned
))
14779 return arm_gen_movmemqi (operands
);
14781 src
= adjust_address (src
, DImode
, 0);
14782 dst
= adjust_address (dst
, DImode
, 0);
14786 reg0
= gen_reg_rtx (DImode
);
14788 emit_move_insn (reg0
, src
);
14790 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14793 emit_move_insn (dst
, reg0
);
14795 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14797 src
= next_consecutive_mem (src
);
14798 dst
= next_consecutive_mem (dst
);
14801 gcc_assert (len
< 8);
14804 /* More than a word but less than a double-word to copy. Copy a word. */
14805 reg0
= gen_reg_rtx (SImode
);
14806 src
= adjust_address (src
, SImode
, 0);
14807 dst
= adjust_address (dst
, SImode
, 0);
14809 emit_move_insn (reg0
, src
);
14811 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14814 emit_move_insn (dst
, reg0
);
14816 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14818 src
= next_consecutive_mem (src
);
14819 dst
= next_consecutive_mem (dst
);
14826 /* Copy the remaining bytes. */
14829 dst
= adjust_address (dst
, HImode
, 0);
14830 src
= adjust_address (src
, HImode
, 0);
14831 reg0
= gen_reg_rtx (SImode
);
14833 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14835 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14838 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14840 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14842 src
= next_consecutive_mem (src
);
14843 dst
= next_consecutive_mem (dst
);
14848 dst
= adjust_address (dst
, QImode
, 0);
14849 src
= adjust_address (src
, QImode
, 0);
14850 reg0
= gen_reg_rtx (QImode
);
14851 emit_move_insn (reg0
, src
);
14852 emit_move_insn (dst
, reg0
);
14856 /* Select a dominance comparison mode if possible for a test of the general
14857 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14858 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14859 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14860 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14861 In all cases OP will be either EQ or NE, but we don't need to know which
14862 here. If we are unable to support a dominance comparison we return
14863 CC mode. This will then fail to match for the RTL expressions that
14864 generate this call. */
14866 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14868 enum rtx_code cond1
, cond2
;
14871 /* Currently we will probably get the wrong result if the individual
14872 comparisons are not simple. This also ensures that it is safe to
14873 reverse a comparison if necessary. */
14874 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14876 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14880 /* The if_then_else variant of this tests the second condition if the
14881 first passes, but is true if the first fails. Reverse the first
14882 condition to get a true "inclusive-or" expression. */
14883 if (cond_or
== DOM_CC_NX_OR_Y
)
14884 cond1
= reverse_condition (cond1
);
14886 /* If the comparisons are not equal, and one doesn't dominate the other,
14887 then we can't do this. */
14889 && !comparison_dominates_p (cond1
, cond2
)
14890 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14895 enum rtx_code temp
= cond1
;
14903 if (cond_or
== DOM_CC_X_AND_Y
)
14908 case EQ
: return CC_DEQmode
;
14909 case LE
: return CC_DLEmode
;
14910 case LEU
: return CC_DLEUmode
;
14911 case GE
: return CC_DGEmode
;
14912 case GEU
: return CC_DGEUmode
;
14913 default: gcc_unreachable ();
14917 if (cond_or
== DOM_CC_X_AND_Y
)
14929 gcc_unreachable ();
14933 if (cond_or
== DOM_CC_X_AND_Y
)
14945 gcc_unreachable ();
14949 if (cond_or
== DOM_CC_X_AND_Y
)
14950 return CC_DLTUmode
;
14955 return CC_DLTUmode
;
14957 return CC_DLEUmode
;
14961 gcc_unreachable ();
14965 if (cond_or
== DOM_CC_X_AND_Y
)
14966 return CC_DGTUmode
;
14971 return CC_DGTUmode
;
14973 return CC_DGEUmode
;
14977 gcc_unreachable ();
14980 /* The remaining cases only occur when both comparisons are the
14983 gcc_assert (cond1
== cond2
);
14987 gcc_assert (cond1
== cond2
);
14991 gcc_assert (cond1
== cond2
);
14995 gcc_assert (cond1
== cond2
);
14996 return CC_DLEUmode
;
14999 gcc_assert (cond1
== cond2
);
15000 return CC_DGEUmode
;
15003 gcc_unreachable ();
15008 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15010 /* All floating point compares return CCFP if it is an equality
15011 comparison, and CCFPE otherwise. */
15012 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15035 gcc_unreachable ();
15039 /* A compare with a shifted operand. Because of canonicalization, the
15040 comparison will have to be swapped when we emit the assembler. */
15041 if (GET_MODE (y
) == SImode
15042 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15043 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15044 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15045 || GET_CODE (x
) == ROTATERT
))
15048 /* This operation is performed swapped, but since we only rely on the Z
15049 flag we don't need an additional mode. */
15050 if (GET_MODE (y
) == SImode
15051 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15052 && GET_CODE (x
) == NEG
15053 && (op
== EQ
|| op
== NE
))
15056 /* This is a special case that is used by combine to allow a
15057 comparison of a shifted byte load to be split into a zero-extend
15058 followed by a comparison of the shifted integer (only valid for
15059 equalities and unsigned inequalities). */
15060 if (GET_MODE (x
) == SImode
15061 && GET_CODE (x
) == ASHIFT
15062 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15063 && GET_CODE (XEXP (x
, 0)) == SUBREG
15064 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15065 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15066 && (op
== EQ
|| op
== NE
15067 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15068 && CONST_INT_P (y
))
15071 /* A construct for a conditional compare, if the false arm contains
15072 0, then both conditions must be true, otherwise either condition
15073 must be true. Not all conditions are possible, so CCmode is
15074 returned if it can't be done. */
15075 if (GET_CODE (x
) == IF_THEN_ELSE
15076 && (XEXP (x
, 2) == const0_rtx
15077 || XEXP (x
, 2) == const1_rtx
)
15078 && COMPARISON_P (XEXP (x
, 0))
15079 && COMPARISON_P (XEXP (x
, 1)))
15080 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15081 INTVAL (XEXP (x
, 2)));
15083 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15084 if (GET_CODE (x
) == AND
15085 && (op
== EQ
|| op
== NE
)
15086 && COMPARISON_P (XEXP (x
, 0))
15087 && COMPARISON_P (XEXP (x
, 1)))
15088 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15091 if (GET_CODE (x
) == IOR
15092 && (op
== EQ
|| op
== NE
)
15093 && COMPARISON_P (XEXP (x
, 0))
15094 && COMPARISON_P (XEXP (x
, 1)))
15095 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15098 /* An operation (on Thumb) where we want to test for a single bit.
15099 This is done by shifting that bit up into the top bit of a
15100 scratch register; we can then branch on the sign bit. */
15102 && GET_MODE (x
) == SImode
15103 && (op
== EQ
|| op
== NE
)
15104 && GET_CODE (x
) == ZERO_EXTRACT
15105 && XEXP (x
, 1) == const1_rtx
)
15108 /* An operation that sets the condition codes as a side-effect, the
15109 V flag is not set correctly, so we can only use comparisons where
15110 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15112 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15113 if (GET_MODE (x
) == SImode
15115 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15116 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15117 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15118 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15119 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15120 || GET_CODE (x
) == LSHIFTRT
15121 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15122 || GET_CODE (x
) == ROTATERT
15123 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15124 return CC_NOOVmode
;
15126 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15129 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15130 && GET_CODE (x
) == PLUS
15131 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15134 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15140 /* A DImode comparison against zero can be implemented by
15141 or'ing the two halves together. */
15142 if (y
== const0_rtx
)
15145 /* We can do an equality test in three Thumb instructions. */
15155 /* DImode unsigned comparisons can be implemented by cmp +
15156 cmpeq without a scratch register. Not worth doing in
15167 /* DImode signed and unsigned comparisons can be implemented
15168 by cmp + sbcs with a scratch register, but that does not
15169 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15170 gcc_assert (op
!= EQ
&& op
!= NE
);
15174 gcc_unreachable ();
15178 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15179 return GET_MODE (x
);
15184 /* X and Y are two things to compare using CODE. Emit the compare insn and
15185 return the rtx for register 0 in the proper mode. FP means this is a
15186 floating point compare: I don't think that it is needed on the arm. */
15188 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15190 enum machine_mode mode
;
15192 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15194 /* We might have X as a constant, Y as a register because of the predicates
15195 used for cmpdi. If so, force X to a register here. */
15196 if (dimode_comparison
&& !REG_P (x
))
15197 x
= force_reg (DImode
, x
);
15199 mode
= SELECT_CC_MODE (code
, x
, y
);
15200 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15202 if (dimode_comparison
15203 && mode
!= CC_CZmode
)
15207 /* To compare two non-zero values for equality, XOR them and
15208 then compare against zero. Not used for ARM mode; there
15209 CC_CZmode is cheaper. */
15210 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15212 gcc_assert (!reload_completed
);
15213 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15217 /* A scratch register is required. */
15218 if (reload_completed
)
15219 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15221 scratch
= gen_rtx_SCRATCH (SImode
);
15223 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15224 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15225 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15228 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15233 /* Generate a sequence of insns that will generate the correct return
15234 address mask depending on the physical architecture that the program
15237 arm_gen_return_addr_mask (void)
15239 rtx reg
= gen_reg_rtx (Pmode
);
15241 emit_insn (gen_return_addr_mask (reg
));
15246 arm_reload_in_hi (rtx
*operands
)
15248 rtx ref
= operands
[1];
15250 HOST_WIDE_INT offset
= 0;
15252 if (GET_CODE (ref
) == SUBREG
)
15254 offset
= SUBREG_BYTE (ref
);
15255 ref
= SUBREG_REG (ref
);
15260 /* We have a pseudo which has been spilt onto the stack; there
15261 are two cases here: the first where there is a simple
15262 stack-slot replacement and a second where the stack-slot is
15263 out of range, or is used as a subreg. */
15264 if (reg_equiv_mem (REGNO (ref
)))
15266 ref
= reg_equiv_mem (REGNO (ref
));
15267 base
= find_replacement (&XEXP (ref
, 0));
15270 /* The slot is out of range, or was dressed up in a SUBREG. */
15271 base
= reg_equiv_address (REGNO (ref
));
15274 base
= find_replacement (&XEXP (ref
, 0));
15276 /* Handle the case where the address is too complex to be offset by 1. */
15277 if (GET_CODE (base
) == MINUS
15278 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15280 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15282 emit_set_insn (base_plus
, base
);
15285 else if (GET_CODE (base
) == PLUS
)
15287 /* The addend must be CONST_INT, or we would have dealt with it above. */
15288 HOST_WIDE_INT hi
, lo
;
15290 offset
+= INTVAL (XEXP (base
, 1));
15291 base
= XEXP (base
, 0);
15293 /* Rework the address into a legal sequence of insns. */
15294 /* Valid range for lo is -4095 -> 4095 */
15297 : -((-offset
) & 0xfff));
15299 /* Corner case, if lo is the max offset then we would be out of range
15300 once we have added the additional 1 below, so bump the msb into the
15301 pre-loading insn(s). */
15305 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15306 ^ (HOST_WIDE_INT
) 0x80000000)
15307 - (HOST_WIDE_INT
) 0x80000000);
15309 gcc_assert (hi
+ lo
== offset
);
15313 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15315 /* Get the base address; addsi3 knows how to handle constants
15316 that require more than one insn. */
15317 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15323 /* Operands[2] may overlap operands[0] (though it won't overlap
15324 operands[1]), that's why we asked for a DImode reg -- so we can
15325 use the bit that does not overlap. */
15326 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15327 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15329 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15331 emit_insn (gen_zero_extendqisi2 (scratch
,
15332 gen_rtx_MEM (QImode
,
15333 plus_constant (Pmode
, base
,
15335 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15336 gen_rtx_MEM (QImode
,
15337 plus_constant (Pmode
, base
,
15339 if (!BYTES_BIG_ENDIAN
)
15340 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15341 gen_rtx_IOR (SImode
,
15344 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15348 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15349 gen_rtx_IOR (SImode
,
15350 gen_rtx_ASHIFT (SImode
, scratch
,
15352 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15355 /* Handle storing a half-word to memory during reload by synthesizing as two
15356 byte stores. Take care not to clobber the input values until after we
15357 have moved them somewhere safe. This code assumes that if the DImode
15358 scratch in operands[2] overlaps either the input value or output address
15359 in some way, then that value must die in this insn (we absolutely need
15360 two scratch registers for some corner cases). */
15362 arm_reload_out_hi (rtx
*operands
)
15364 rtx ref
= operands
[0];
15365 rtx outval
= operands
[1];
15367 HOST_WIDE_INT offset
= 0;
15369 if (GET_CODE (ref
) == SUBREG
)
15371 offset
= SUBREG_BYTE (ref
);
15372 ref
= SUBREG_REG (ref
);
15377 /* We have a pseudo which has been spilt onto the stack; there
15378 are two cases here: the first where there is a simple
15379 stack-slot replacement and a second where the stack-slot is
15380 out of range, or is used as a subreg. */
15381 if (reg_equiv_mem (REGNO (ref
)))
15383 ref
= reg_equiv_mem (REGNO (ref
));
15384 base
= find_replacement (&XEXP (ref
, 0));
15387 /* The slot is out of range, or was dressed up in a SUBREG. */
15388 base
= reg_equiv_address (REGNO (ref
));
15391 base
= find_replacement (&XEXP (ref
, 0));
15393 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15395 /* Handle the case where the address is too complex to be offset by 1. */
15396 if (GET_CODE (base
) == MINUS
15397 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15399 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15401 /* Be careful not to destroy OUTVAL. */
15402 if (reg_overlap_mentioned_p (base_plus
, outval
))
15404 /* Updating base_plus might destroy outval, see if we can
15405 swap the scratch and base_plus. */
15406 if (!reg_overlap_mentioned_p (scratch
, outval
))
15409 scratch
= base_plus
;
15414 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15416 /* Be conservative and copy OUTVAL into the scratch now,
15417 this should only be necessary if outval is a subreg
15418 of something larger than a word. */
15419 /* XXX Might this clobber base? I can't see how it can,
15420 since scratch is known to overlap with OUTVAL, and
15421 must be wider than a word. */
15422 emit_insn (gen_movhi (scratch_hi
, outval
));
15423 outval
= scratch_hi
;
15427 emit_set_insn (base_plus
, base
);
15430 else if (GET_CODE (base
) == PLUS
)
15432 /* The addend must be CONST_INT, or we would have dealt with it above. */
15433 HOST_WIDE_INT hi
, lo
;
15435 offset
+= INTVAL (XEXP (base
, 1));
15436 base
= XEXP (base
, 0);
15438 /* Rework the address into a legal sequence of insns. */
15439 /* Valid range for lo is -4095 -> 4095 */
15442 : -((-offset
) & 0xfff));
15444 /* Corner case, if lo is the max offset then we would be out of range
15445 once we have added the additional 1 below, so bump the msb into the
15446 pre-loading insn(s). */
15450 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15451 ^ (HOST_WIDE_INT
) 0x80000000)
15452 - (HOST_WIDE_INT
) 0x80000000);
15454 gcc_assert (hi
+ lo
== offset
);
15458 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15460 /* Be careful not to destroy OUTVAL. */
15461 if (reg_overlap_mentioned_p (base_plus
, outval
))
15463 /* Updating base_plus might destroy outval, see if we
15464 can swap the scratch and base_plus. */
15465 if (!reg_overlap_mentioned_p (scratch
, outval
))
15468 scratch
= base_plus
;
15473 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15475 /* Be conservative and copy outval into scratch now,
15476 this should only be necessary if outval is a
15477 subreg of something larger than a word. */
15478 /* XXX Might this clobber base? I can't see how it
15479 can, since scratch is known to overlap with
15481 emit_insn (gen_movhi (scratch_hi
, outval
));
15482 outval
= scratch_hi
;
15486 /* Get the base address; addsi3 knows how to handle constants
15487 that require more than one insn. */
15488 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15494 if (BYTES_BIG_ENDIAN
)
15496 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15497 plus_constant (Pmode
, base
,
15499 gen_lowpart (QImode
, outval
)));
15500 emit_insn (gen_lshrsi3 (scratch
,
15501 gen_rtx_SUBREG (SImode
, outval
, 0),
15503 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15505 gen_lowpart (QImode
, scratch
)));
15509 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15511 gen_lowpart (QImode
, outval
)));
15512 emit_insn (gen_lshrsi3 (scratch
,
15513 gen_rtx_SUBREG (SImode
, outval
, 0),
15515 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15516 plus_constant (Pmode
, base
,
15518 gen_lowpart (QImode
, scratch
)));
15522 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15523 (padded to the size of a word) should be passed in a register. */
15526 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
15528 if (TARGET_AAPCS_BASED
)
15529 return must_pass_in_stack_var_size (mode
, type
);
15531 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15535 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15536 Return true if an argument passed on the stack should be padded upwards,
15537 i.e. if the least-significant byte has useful data.
15538 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15539 aggregate types are placed in the lowest memory address. */
15542 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15544 if (!TARGET_AAPCS_BASED
)
15545 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15547 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15554 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15555 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15556 register has useful data, and return the opposite if the most
15557 significant byte does. */
15560 arm_pad_reg_upward (enum machine_mode mode
,
15561 tree type
, int first ATTRIBUTE_UNUSED
)
15563 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15565 /* For AAPCS, small aggregates, small fixed-point types,
15566 and small complex types are always padded upwards. */
15569 if ((AGGREGATE_TYPE_P (type
)
15570 || TREE_CODE (type
) == COMPLEX_TYPE
15571 || FIXED_POINT_TYPE_P (type
))
15572 && int_size_in_bytes (type
) <= 4)
15577 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15578 && GET_MODE_SIZE (mode
) <= 4)
15583 /* Otherwise, use default padding. */
15584 return !BYTES_BIG_ENDIAN
;
15587 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15588 assuming that the address in the base register is word aligned. */
15590 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15592 HOST_WIDE_INT max_offset
;
15594 /* Offset must be a multiple of 4 in Thumb mode. */
15595 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15600 else if (TARGET_ARM
)
15605 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15608 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15609 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15610 Assumes that the address in the base register RN is word aligned. Pattern
15611 guarantees that both memory accesses use the same base register,
15612 the offsets are constants within the range, and the gap between the offsets is 4.
15613 If preload complete then check that registers are legal. WBACK indicates whether
15614 address is updated. LOAD indicates whether memory access is load or store. */
15616 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15617 bool wback
, bool load
)
15619 unsigned int t
, t2
, n
;
15621 if (!reload_completed
)
15624 if (!offset_ok_for_ldrd_strd (offset
))
15631 if ((TARGET_THUMB2
)
15632 && ((wback
&& (n
== t
|| n
== t2
))
15633 || (t
== SP_REGNUM
)
15634 || (t
== PC_REGNUM
)
15635 || (t2
== SP_REGNUM
)
15636 || (t2
== PC_REGNUM
)
15637 || (!load
&& (n
== PC_REGNUM
))
15638 || (load
&& (t
== t2
))
15639 /* Triggers Cortex-M3 LDRD errata. */
15640 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15644 && ((wback
&& (n
== t
|| n
== t2
))
15645 || (t2
== PC_REGNUM
)
15646 || (t
% 2 != 0) /* First destination register is not even. */
15648 /* PC can be used as base register (for offset addressing only),
15649 but it is depricated. */
15650 || (n
== PC_REGNUM
)))
15656 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15657 operand MEM's address contains an immediate offset from the base
15658 register and has no side effects, in which case it sets BASE and
15659 OFFSET accordingly. */
15661 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15665 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15667 /* TODO: Handle more general memory operand patterns, such as
15668 PRE_DEC and PRE_INC. */
15670 if (side_effects_p (mem
))
15673 /* Can't deal with subregs. */
15674 if (GET_CODE (mem
) == SUBREG
)
15677 gcc_assert (MEM_P (mem
));
15679 *offset
= const0_rtx
;
15681 addr
= XEXP (mem
, 0);
15683 /* If addr isn't valid for DImode, then we can't handle it. */
15684 if (!arm_legitimate_address_p (DImode
, addr
,
15685 reload_in_progress
|| reload_completed
))
15693 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15695 *base
= XEXP (addr
, 0);
15696 *offset
= XEXP (addr
, 1);
15697 return (REG_P (*base
) && CONST_INT_P (*offset
));
15703 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15705 /* Called from a peephole2 to replace two word-size accesses with a
15706 single LDRD/STRD instruction. Returns true iff we can generate a
15707 new instruction sequence. That is, both accesses use the same base
15708 register and the gap between constant offsets is 4. This function
15709 may reorder its operands to match ldrd/strd RTL templates.
15710 OPERANDS are the operands found by the peephole matcher;
15711 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15712 corresponding memory operands. LOAD indicaates whether the access
15713 is load or store. CONST_STORE indicates a store of constant
15714 integer values held in OPERANDS[4,5] and assumes that the pattern
15715 is of length 4 insn, for the purpose of checking dead registers.
15716 COMMUTE indicates that register operands may be reordered. */
15718 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15719 bool const_store
, bool commute
)
15722 HOST_WIDE_INT offsets
[2], offset
;
15723 rtx base
= NULL_RTX
;
15724 rtx cur_base
, cur_offset
, tmp
;
15726 HARD_REG_SET regset
;
15728 gcc_assert (!const_store
|| !load
);
15729 /* Check that the memory references are immediate offsets from the
15730 same base register. Extract the base register, the destination
15731 registers, and the corresponding memory offsets. */
15732 for (i
= 0; i
< nops
; i
++)
15734 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15739 else if (REGNO (base
) != REGNO (cur_base
))
15742 offsets
[i
] = INTVAL (cur_offset
);
15743 if (GET_CODE (operands
[i
]) == SUBREG
)
15745 tmp
= SUBREG_REG (operands
[i
]);
15746 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15751 /* Make sure there is no dependency between the individual loads. */
15752 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15753 return false; /* RAW */
15755 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15756 return false; /* WAW */
15758 /* If the same input register is used in both stores
15759 when storing different constants, try to find a free register.
15760 For example, the code
15765 can be transformed into
15768 in Thumb mode assuming that r1 is free. */
15770 && REGNO (operands
[0]) == REGNO (operands
[1])
15771 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15775 CLEAR_HARD_REG_SET (regset
);
15776 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15777 if (tmp
== NULL_RTX
)
15780 /* Use the new register in the first load to ensure that
15781 if the original input register is not dead after peephole,
15782 then it will have the correct constant value. */
15785 else if (TARGET_ARM
)
15788 int regno
= REGNO (operands
[0]);
15789 if (!peep2_reg_dead_p (4, operands
[0]))
15791 /* When the input register is even and is not dead after the
15792 pattern, it has to hold the second constant but we cannot
15793 form a legal STRD in ARM mode with this register as the second
15795 if (regno
% 2 == 0)
15798 /* Is regno-1 free? */
15799 SET_HARD_REG_SET (regset
);
15800 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15801 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15802 if (tmp
== NULL_RTX
)
15809 /* Find a DImode register. */
15810 CLEAR_HARD_REG_SET (regset
);
15811 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15812 if (tmp
!= NULL_RTX
)
15814 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15815 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15819 /* Can we use the input register to form a DI register? */
15820 SET_HARD_REG_SET (regset
);
15821 CLEAR_HARD_REG_BIT(regset
,
15822 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15823 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15824 if (tmp
== NULL_RTX
)
15826 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15830 gcc_assert (operands
[0] != NULL_RTX
);
15831 gcc_assert (operands
[1] != NULL_RTX
);
15832 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15833 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15837 /* Make sure the instructions are ordered with lower memory access first. */
15838 if (offsets
[0] > offsets
[1])
15840 gap
= offsets
[0] - offsets
[1];
15841 offset
= offsets
[1];
15843 /* Swap the instructions such that lower memory is accessed first. */
15844 SWAP_RTX (operands
[0], operands
[1]);
15845 SWAP_RTX (operands
[2], operands
[3]);
15847 SWAP_RTX (operands
[4], operands
[5]);
15851 gap
= offsets
[1] - offsets
[0];
15852 offset
= offsets
[0];
15855 /* Make sure accesses are to consecutive memory locations. */
15859 /* Make sure we generate legal instructions. */
15860 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15864 /* In Thumb state, where registers are almost unconstrained, there
15865 is little hope to fix it. */
15869 if (load
&& commute
)
15871 /* Try reordering registers. */
15872 SWAP_RTX (operands
[0], operands
[1]);
15873 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15880 /* If input registers are dead after this pattern, they can be
15881 reordered or replaced by other registers that are free in the
15882 current pattern. */
15883 if (!peep2_reg_dead_p (4, operands
[0])
15884 || !peep2_reg_dead_p (4, operands
[1]))
15887 /* Try to reorder the input registers. */
15888 /* For example, the code
15893 can be transformed into
15898 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15901 SWAP_RTX (operands
[0], operands
[1]);
15905 /* Try to find a free DI register. */
15906 CLEAR_HARD_REG_SET (regset
);
15907 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15908 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15911 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15912 if (tmp
== NULL_RTX
)
15915 /* DREG must be an even-numbered register in DImode.
15916 Split it into SI registers. */
15917 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15918 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15919 gcc_assert (operands
[0] != NULL_RTX
);
15920 gcc_assert (operands
[1] != NULL_RTX
);
15921 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15922 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15924 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15937 /* Print a symbolic form of X to the debug file, F. */
15939 arm_print_value (FILE *f
, rtx x
)
15941 switch (GET_CODE (x
))
15944 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15948 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15956 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15958 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15959 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15967 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15971 fprintf (f
, "`%s'", XSTR (x
, 0));
15975 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15979 arm_print_value (f
, XEXP (x
, 0));
15983 arm_print_value (f
, XEXP (x
, 0));
15985 arm_print_value (f
, XEXP (x
, 1));
15993 fprintf (f
, "????");
15998 /* Routines for manipulation of the constant pool. */
16000 /* Arm instructions cannot load a large constant directly into a
16001 register; they have to come from a pc relative load. The constant
16002 must therefore be placed in the addressable range of the pc
16003 relative load. Depending on the precise pc relative load
16004 instruction the range is somewhere between 256 bytes and 4k. This
16005 means that we often have to dump a constant inside a function, and
16006 generate code to branch around it.
16008 It is important to minimize this, since the branches will slow
16009 things down and make the code larger.
16011 Normally we can hide the table after an existing unconditional
16012 branch so that there is no interruption of the flow, but in the
16013 worst case the code looks like this:
16031 We fix this by performing a scan after scheduling, which notices
16032 which instructions need to have their operands fetched from the
16033 constant table and builds the table.
16035 The algorithm starts by building a table of all the constants that
16036 need fixing up and all the natural barriers in the function (places
16037 where a constant table can be dropped without breaking the flow).
16038 For each fixup we note how far the pc-relative replacement will be
16039 able to reach and the offset of the instruction into the function.
16041 Having built the table we then group the fixes together to form
16042 tables that are as large as possible (subject to addressing
16043 constraints) and emit each table of constants after the last
16044 barrier that is within range of all the instructions in the group.
16045 If a group does not contain a barrier, then we forcibly create one
16046 by inserting a jump instruction into the flow. Once the table has
16047 been inserted, the insns are then modified to reference the
16048 relevant entry in the pool.
16050 Possible enhancements to the algorithm (not implemented) are:
16052 1) For some processors and object formats, there may be benefit in
16053 aligning the pools to the start of cache lines; this alignment
16054 would need to be taken into account when calculating addressability
16057 /* These typedefs are located at the start of this file, so that
16058 they can be used in the prototypes there. This comment is to
16059 remind readers of that fact so that the following structures
16060 can be understood more easily.
16062 typedef struct minipool_node Mnode;
16063 typedef struct minipool_fixup Mfix; */
16065 struct minipool_node
16067 /* Doubly linked chain of entries. */
16070 /* The maximum offset into the code that this entry can be placed. While
16071 pushing fixes for forward references, all entries are sorted in order
16072 of increasing max_address. */
16073 HOST_WIDE_INT max_address
;
16074 /* Similarly for an entry inserted for a backwards ref. */
16075 HOST_WIDE_INT min_address
;
16076 /* The number of fixes referencing this entry. This can become zero
16077 if we "unpush" an entry. In this case we ignore the entry when we
16078 come to emit the code. */
16080 /* The offset from the start of the minipool. */
16081 HOST_WIDE_INT offset
;
16082 /* The value in table. */
16084 /* The mode of value. */
16085 enum machine_mode mode
;
16086 /* The size of the value. With iWMMXt enabled
16087 sizes > 4 also imply an alignment of 8-bytes. */
16091 struct minipool_fixup
16095 HOST_WIDE_INT address
;
16097 enum machine_mode mode
;
16101 HOST_WIDE_INT forwards
;
16102 HOST_WIDE_INT backwards
;
16105 /* Fixes less than a word need padding out to a word boundary. */
16106 #define MINIPOOL_FIX_SIZE(mode) \
16107 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16109 static Mnode
* minipool_vector_head
;
16110 static Mnode
* minipool_vector_tail
;
16111 static rtx minipool_vector_label
;
16112 static int minipool_pad
;
16114 /* The linked list of all minipool fixes required for this function. */
16115 Mfix
* minipool_fix_head
;
16116 Mfix
* minipool_fix_tail
;
16117 /* The fix entry for the current minipool, once it has been placed. */
16118 Mfix
* minipool_barrier
;
16120 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16121 #define JUMP_TABLES_IN_TEXT_SECTION 0
16124 static HOST_WIDE_INT
16125 get_jump_table_size (rtx_jump_table_data
*insn
)
16127 /* ADDR_VECs only take room if read-only data does into the text
16129 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16131 rtx body
= PATTERN (insn
);
16132 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16133 HOST_WIDE_INT size
;
16134 HOST_WIDE_INT modesize
;
16136 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16137 size
= modesize
* XVECLEN (body
, elt
);
16141 /* Round up size of TBB table to a halfword boundary. */
16142 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16145 /* No padding necessary for TBH. */
16148 /* Add two bytes for alignment on Thumb. */
16153 gcc_unreachable ();
16161 /* Return the maximum amount of padding that will be inserted before
16164 static HOST_WIDE_INT
16165 get_label_padding (rtx label
)
16167 HOST_WIDE_INT align
, min_insn_size
;
16169 align
= 1 << label_to_alignment (label
);
16170 min_insn_size
= TARGET_THUMB
? 2 : 4;
16171 return align
> min_insn_size
? align
- min_insn_size
: 0;
16174 /* Move a minipool fix MP from its current location to before MAX_MP.
16175 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16176 constraints may need updating. */
16178 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16179 HOST_WIDE_INT max_address
)
16181 /* The code below assumes these are different. */
16182 gcc_assert (mp
!= max_mp
);
16184 if (max_mp
== NULL
)
16186 if (max_address
< mp
->max_address
)
16187 mp
->max_address
= max_address
;
16191 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16192 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16194 mp
->max_address
= max_address
;
16196 /* Unlink MP from its current position. Since max_mp is non-null,
16197 mp->prev must be non-null. */
16198 mp
->prev
->next
= mp
->next
;
16199 if (mp
->next
!= NULL
)
16200 mp
->next
->prev
= mp
->prev
;
16202 minipool_vector_tail
= mp
->prev
;
16204 /* Re-insert it before MAX_MP. */
16206 mp
->prev
= max_mp
->prev
;
16209 if (mp
->prev
!= NULL
)
16210 mp
->prev
->next
= mp
;
16212 minipool_vector_head
= mp
;
16215 /* Save the new entry. */
16218 /* Scan over the preceding entries and adjust their addresses as
16220 while (mp
->prev
!= NULL
16221 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16223 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16230 /* Add a constant to the minipool for a forward reference. Returns the
16231 node added or NULL if the constant will not fit in this pool. */
16233 add_minipool_forward_ref (Mfix
*fix
)
16235 /* If set, max_mp is the first pool_entry that has a lower
16236 constraint than the one we are trying to add. */
16237 Mnode
* max_mp
= NULL
;
16238 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16241 /* If the minipool starts before the end of FIX->INSN then this FIX
16242 can not be placed into the current pool. Furthermore, adding the
16243 new constant pool entry may cause the pool to start FIX_SIZE bytes
16245 if (minipool_vector_head
&&
16246 (fix
->address
+ get_attr_length (fix
->insn
)
16247 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16250 /* Scan the pool to see if a constant with the same value has
16251 already been added. While we are doing this, also note the
16252 location where we must insert the constant if it doesn't already
16254 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16256 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16257 && fix
->mode
== mp
->mode
16258 && (!LABEL_P (fix
->value
)
16259 || (CODE_LABEL_NUMBER (fix
->value
)
16260 == CODE_LABEL_NUMBER (mp
->value
)))
16261 && rtx_equal_p (fix
->value
, mp
->value
))
16263 /* More than one fix references this entry. */
16265 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16268 /* Note the insertion point if necessary. */
16270 && mp
->max_address
> max_address
)
16273 /* If we are inserting an 8-bytes aligned quantity and
16274 we have not already found an insertion point, then
16275 make sure that all such 8-byte aligned quantities are
16276 placed at the start of the pool. */
16277 if (ARM_DOUBLEWORD_ALIGN
16279 && fix
->fix_size
>= 8
16280 && mp
->fix_size
< 8)
16283 max_address
= mp
->max_address
;
16287 /* The value is not currently in the minipool, so we need to create
16288 a new entry for it. If MAX_MP is NULL, the entry will be put on
16289 the end of the list since the placement is less constrained than
16290 any existing entry. Otherwise, we insert the new fix before
16291 MAX_MP and, if necessary, adjust the constraints on the other
16294 mp
->fix_size
= fix
->fix_size
;
16295 mp
->mode
= fix
->mode
;
16296 mp
->value
= fix
->value
;
16298 /* Not yet required for a backwards ref. */
16299 mp
->min_address
= -65536;
16301 if (max_mp
== NULL
)
16303 mp
->max_address
= max_address
;
16305 mp
->prev
= minipool_vector_tail
;
16307 if (mp
->prev
== NULL
)
16309 minipool_vector_head
= mp
;
16310 minipool_vector_label
= gen_label_rtx ();
16313 mp
->prev
->next
= mp
;
16315 minipool_vector_tail
= mp
;
16319 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16320 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16322 mp
->max_address
= max_address
;
16325 mp
->prev
= max_mp
->prev
;
16327 if (mp
->prev
!= NULL
)
16328 mp
->prev
->next
= mp
;
16330 minipool_vector_head
= mp
;
16333 /* Save the new entry. */
16336 /* Scan over the preceding entries and adjust their addresses as
16338 while (mp
->prev
!= NULL
16339 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16341 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16349 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16350 HOST_WIDE_INT min_address
)
16352 HOST_WIDE_INT offset
;
16354 /* The code below assumes these are different. */
16355 gcc_assert (mp
!= min_mp
);
16357 if (min_mp
== NULL
)
16359 if (min_address
> mp
->min_address
)
16360 mp
->min_address
= min_address
;
16364 /* We will adjust this below if it is too loose. */
16365 mp
->min_address
= min_address
;
16367 /* Unlink MP from its current position. Since min_mp is non-null,
16368 mp->next must be non-null. */
16369 mp
->next
->prev
= mp
->prev
;
16370 if (mp
->prev
!= NULL
)
16371 mp
->prev
->next
= mp
->next
;
16373 minipool_vector_head
= mp
->next
;
16375 /* Reinsert it after MIN_MP. */
16377 mp
->next
= min_mp
->next
;
16379 if (mp
->next
!= NULL
)
16380 mp
->next
->prev
= mp
;
16382 minipool_vector_tail
= mp
;
16388 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16390 mp
->offset
= offset
;
16391 if (mp
->refcount
> 0)
16392 offset
+= mp
->fix_size
;
16394 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16395 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16401 /* Add a constant to the minipool for a backward reference. Returns the
16402 node added or NULL if the constant will not fit in this pool.
16404 Note that the code for insertion for a backwards reference can be
16405 somewhat confusing because the calculated offsets for each fix do
16406 not take into account the size of the pool (which is still under
16409 add_minipool_backward_ref (Mfix
*fix
)
16411 /* If set, min_mp is the last pool_entry that has a lower constraint
16412 than the one we are trying to add. */
16413 Mnode
*min_mp
= NULL
;
16414 /* This can be negative, since it is only a constraint. */
16415 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16418 /* If we can't reach the current pool from this insn, or if we can't
16419 insert this entry at the end of the pool without pushing other
16420 fixes out of range, then we don't try. This ensures that we
16421 can't fail later on. */
16422 if (min_address
>= minipool_barrier
->address
16423 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16424 >= minipool_barrier
->address
))
16427 /* Scan the pool to see if a constant with the same value has
16428 already been added. While we are doing this, also note the
16429 location where we must insert the constant if it doesn't already
16431 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16433 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16434 && fix
->mode
== mp
->mode
16435 && (!LABEL_P (fix
->value
)
16436 || (CODE_LABEL_NUMBER (fix
->value
)
16437 == CODE_LABEL_NUMBER (mp
->value
)))
16438 && rtx_equal_p (fix
->value
, mp
->value
)
16439 /* Check that there is enough slack to move this entry to the
16440 end of the table (this is conservative). */
16441 && (mp
->max_address
16442 > (minipool_barrier
->address
16443 + minipool_vector_tail
->offset
16444 + minipool_vector_tail
->fix_size
)))
16447 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16450 if (min_mp
!= NULL
)
16451 mp
->min_address
+= fix
->fix_size
;
16454 /* Note the insertion point if necessary. */
16455 if (mp
->min_address
< min_address
)
16457 /* For now, we do not allow the insertion of 8-byte alignment
16458 requiring nodes anywhere but at the start of the pool. */
16459 if (ARM_DOUBLEWORD_ALIGN
16460 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16465 else if (mp
->max_address
16466 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16468 /* Inserting before this entry would push the fix beyond
16469 its maximum address (which can happen if we have
16470 re-located a forwards fix); force the new fix to come
16472 if (ARM_DOUBLEWORD_ALIGN
16473 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16478 min_address
= mp
->min_address
+ fix
->fix_size
;
16481 /* Do not insert a non-8-byte aligned quantity before 8-byte
16482 aligned quantities. */
16483 else if (ARM_DOUBLEWORD_ALIGN
16484 && fix
->fix_size
< 8
16485 && mp
->fix_size
>= 8)
16488 min_address
= mp
->min_address
+ fix
->fix_size
;
16493 /* We need to create a new entry. */
16495 mp
->fix_size
= fix
->fix_size
;
16496 mp
->mode
= fix
->mode
;
16497 mp
->value
= fix
->value
;
16499 mp
->max_address
= minipool_barrier
->address
+ 65536;
16501 mp
->min_address
= min_address
;
16503 if (min_mp
== NULL
)
16506 mp
->next
= minipool_vector_head
;
16508 if (mp
->next
== NULL
)
16510 minipool_vector_tail
= mp
;
16511 minipool_vector_label
= gen_label_rtx ();
16514 mp
->next
->prev
= mp
;
16516 minipool_vector_head
= mp
;
16520 mp
->next
= min_mp
->next
;
16524 if (mp
->next
!= NULL
)
16525 mp
->next
->prev
= mp
;
16527 minipool_vector_tail
= mp
;
16530 /* Save the new entry. */
16538 /* Scan over the following entries and adjust their offsets. */
16539 while (mp
->next
!= NULL
)
16541 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16542 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16545 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16547 mp
->next
->offset
= mp
->offset
;
16556 assign_minipool_offsets (Mfix
*barrier
)
16558 HOST_WIDE_INT offset
= 0;
16561 minipool_barrier
= barrier
;
16563 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16565 mp
->offset
= offset
;
16567 if (mp
->refcount
> 0)
16568 offset
+= mp
->fix_size
;
16572 /* Output the literal table */
16574 dump_minipool (rtx scan
)
16580 if (ARM_DOUBLEWORD_ALIGN
)
16581 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16582 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16589 fprintf (dump_file
,
16590 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16591 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16593 scan
= emit_label_after (gen_label_rtx (), scan
);
16594 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16595 scan
= emit_label_after (minipool_vector_label
, scan
);
16597 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16599 if (mp
->refcount
> 0)
16603 fprintf (dump_file
,
16604 ";; Offset %u, min %ld, max %ld ",
16605 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16606 (unsigned long) mp
->max_address
);
16607 arm_print_value (dump_file
, mp
->value
);
16608 fputc ('\n', dump_file
);
16611 switch (mp
->fix_size
)
16613 #ifdef HAVE_consttable_1
16615 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16619 #ifdef HAVE_consttable_2
16621 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16625 #ifdef HAVE_consttable_4
16627 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16631 #ifdef HAVE_consttable_8
16633 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16637 #ifdef HAVE_consttable_16
16639 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16644 gcc_unreachable ();
16652 minipool_vector_head
= minipool_vector_tail
= NULL
;
16653 scan
= emit_insn_after (gen_consttable_end (), scan
);
16654 scan
= emit_barrier_after (scan
);
16657 /* Return the cost of forcibly inserting a barrier after INSN. */
16659 arm_barrier_cost (rtx insn
)
16661 /* Basing the location of the pool on the loop depth is preferable,
16662 but at the moment, the basic block information seems to be
16663 corrupt by this stage of the compilation. */
16664 int base_cost
= 50;
16665 rtx next
= next_nonnote_insn (insn
);
16667 if (next
!= NULL
&& LABEL_P (next
))
16670 switch (GET_CODE (insn
))
16673 /* It will always be better to place the table before the label, rather
16682 return base_cost
- 10;
16685 return base_cost
+ 10;
16689 /* Find the best place in the insn stream in the range
16690 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16691 Create the barrier by inserting a jump and add a new fix entry for
16694 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16696 HOST_WIDE_INT count
= 0;
16698 rtx from
= fix
->insn
;
16699 /* The instruction after which we will insert the jump. */
16700 rtx selected
= NULL
;
16702 /* The address at which the jump instruction will be placed. */
16703 HOST_WIDE_INT selected_address
;
16705 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16706 rtx label
= gen_label_rtx ();
16708 selected_cost
= arm_barrier_cost (from
);
16709 selected_address
= fix
->address
;
16711 while (from
&& count
< max_count
)
16713 rtx_jump_table_data
*tmp
;
16716 /* This code shouldn't have been called if there was a natural barrier
16718 gcc_assert (!BARRIER_P (from
));
16720 /* Count the length of this insn. This must stay in sync with the
16721 code that pushes minipool fixes. */
16722 if (LABEL_P (from
))
16723 count
+= get_label_padding (from
);
16725 count
+= get_attr_length (from
);
16727 /* If there is a jump table, add its length. */
16728 if (tablejump_p (from
, NULL
, &tmp
))
16730 count
+= get_jump_table_size (tmp
);
16732 /* Jump tables aren't in a basic block, so base the cost on
16733 the dispatch insn. If we select this location, we will
16734 still put the pool after the table. */
16735 new_cost
= arm_barrier_cost (from
);
16737 if (count
< max_count
16738 && (!selected
|| new_cost
<= selected_cost
))
16741 selected_cost
= new_cost
;
16742 selected_address
= fix
->address
+ count
;
16745 /* Continue after the dispatch table. */
16746 from
= NEXT_INSN (tmp
);
16750 new_cost
= arm_barrier_cost (from
);
16752 if (count
< max_count
16753 && (!selected
|| new_cost
<= selected_cost
))
16756 selected_cost
= new_cost
;
16757 selected_address
= fix
->address
+ count
;
16760 from
= NEXT_INSN (from
);
16763 /* Make sure that we found a place to insert the jump. */
16764 gcc_assert (selected
);
16766 /* Make sure we do not split a call and its corresponding
16767 CALL_ARG_LOCATION note. */
16768 if (CALL_P (selected
))
16770 rtx next
= NEXT_INSN (selected
);
16771 if (next
&& NOTE_P (next
)
16772 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16776 /* Create a new JUMP_INSN that branches around a barrier. */
16777 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16778 JUMP_LABEL (from
) = label
;
16779 barrier
= emit_barrier_after (from
);
16780 emit_label_after (label
, barrier
);
16782 /* Create a minipool barrier entry for the new barrier. */
16783 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16784 new_fix
->insn
= barrier
;
16785 new_fix
->address
= selected_address
;
16786 new_fix
->next
= fix
->next
;
16787 fix
->next
= new_fix
;
16792 /* Record that there is a natural barrier in the insn stream at
16795 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
16797 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16800 fix
->address
= address
;
16803 if (minipool_fix_head
!= NULL
)
16804 minipool_fix_tail
->next
= fix
;
16806 minipool_fix_head
= fix
;
16808 minipool_fix_tail
= fix
;
16811 /* Record INSN, which will need fixing up to load a value from the
16812 minipool. ADDRESS is the offset of the insn since the start of the
16813 function; LOC is a pointer to the part of the insn which requires
16814 fixing; VALUE is the constant that must be loaded, which is of type
16817 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
16818 enum machine_mode mode
, rtx value
)
16820 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16823 fix
->address
= address
;
16826 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16827 fix
->value
= value
;
16828 fix
->forwards
= get_attr_pool_range (insn
);
16829 fix
->backwards
= get_attr_neg_pool_range (insn
);
16830 fix
->minipool
= NULL
;
16832 /* If an insn doesn't have a range defined for it, then it isn't
16833 expecting to be reworked by this code. Better to stop now than
16834 to generate duff assembly code. */
16835 gcc_assert (fix
->forwards
|| fix
->backwards
);
16837 /* If an entry requires 8-byte alignment then assume all constant pools
16838 require 4 bytes of padding. Trying to do this later on a per-pool
16839 basis is awkward because existing pool entries have to be modified. */
16840 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16845 fprintf (dump_file
,
16846 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16847 GET_MODE_NAME (mode
),
16848 INSN_UID (insn
), (unsigned long) address
,
16849 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16850 arm_print_value (dump_file
, fix
->value
);
16851 fprintf (dump_file
, "\n");
16854 /* Add it to the chain of fixes. */
16857 if (minipool_fix_head
!= NULL
)
16858 minipool_fix_tail
->next
= fix
;
16860 minipool_fix_head
= fix
;
16862 minipool_fix_tail
= fix
;
16865 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16866 Returns the number of insns needed, or 99 if we always want to synthesize
16869 arm_max_const_double_inline_cost ()
16871 /* Let the value get synthesized to avoid the use of literal pools. */
16872 if (arm_disable_literal_pool
)
16875 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16878 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16879 Returns the number of insns needed, or 99 if we don't know how to
16882 arm_const_double_inline_cost (rtx val
)
16884 rtx lowpart
, highpart
;
16885 enum machine_mode mode
;
16887 mode
= GET_MODE (val
);
16889 if (mode
== VOIDmode
)
16892 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16894 lowpart
= gen_lowpart (SImode
, val
);
16895 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16897 gcc_assert (CONST_INT_P (lowpart
));
16898 gcc_assert (CONST_INT_P (highpart
));
16900 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16901 NULL_RTX
, NULL_RTX
, 0, 0)
16902 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16903 NULL_RTX
, NULL_RTX
, 0, 0));
16906 /* Return true if it is worthwhile to split a 64-bit constant into two
16907 32-bit operations. This is the case if optimizing for size, or
16908 if we have load delay slots, or if one 32-bit part can be done with
16909 a single data operation. */
16911 arm_const_double_by_parts (rtx val
)
16913 enum machine_mode mode
= GET_MODE (val
);
16916 if (optimize_size
|| arm_ld_sched
)
16919 if (mode
== VOIDmode
)
16922 part
= gen_highpart_mode (SImode
, mode
, val
);
16924 gcc_assert (CONST_INT_P (part
));
16926 if (const_ok_for_arm (INTVAL (part
))
16927 || const_ok_for_arm (~INTVAL (part
)))
16930 part
= gen_lowpart (SImode
, val
);
16932 gcc_assert (CONST_INT_P (part
));
16934 if (const_ok_for_arm (INTVAL (part
))
16935 || const_ok_for_arm (~INTVAL (part
)))
16941 /* Return true if it is possible to inline both the high and low parts
16942 of a 64-bit constant into 32-bit data processing instructions. */
16944 arm_const_double_by_immediates (rtx val
)
16946 enum machine_mode mode
= GET_MODE (val
);
16949 if (mode
== VOIDmode
)
16952 part
= gen_highpart_mode (SImode
, mode
, val
);
16954 gcc_assert (CONST_INT_P (part
));
16956 if (!const_ok_for_arm (INTVAL (part
)))
16959 part
= gen_lowpart (SImode
, val
);
16961 gcc_assert (CONST_INT_P (part
));
16963 if (!const_ok_for_arm (INTVAL (part
)))
16969 /* Scan INSN and note any of its operands that need fixing.
16970 If DO_PUSHES is false we do not actually push any of the fixups
16973 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
16977 extract_insn (insn
);
16979 if (!constrain_operands (1))
16980 fatal_insn_not_found (insn
);
16982 if (recog_data
.n_alternatives
== 0)
16985 /* Fill in recog_op_alt with information about the constraints of
16987 preprocess_constraints (insn
);
16989 const operand_alternative
*op_alt
= which_op_alt ();
16990 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16992 /* Things we need to fix can only occur in inputs. */
16993 if (recog_data
.operand_type
[opno
] != OP_IN
)
16996 /* If this alternative is a memory reference, then any mention
16997 of constants in this alternative is really to fool reload
16998 into allowing us to accept one there. We need to fix them up
16999 now so that we output the right code. */
17000 if (op_alt
[opno
].memory_ok
)
17002 rtx op
= recog_data
.operand
[opno
];
17004 if (CONSTANT_P (op
))
17007 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17008 recog_data
.operand_mode
[opno
], op
);
17010 else if (MEM_P (op
)
17011 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17012 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17016 rtx cop
= avoid_constant_pool_reference (op
);
17018 /* Casting the address of something to a mode narrower
17019 than a word can cause avoid_constant_pool_reference()
17020 to return the pool reference itself. That's no good to
17021 us here. Lets just hope that we can use the
17022 constant pool value directly. */
17024 cop
= get_pool_constant (XEXP (op
, 0));
17026 push_minipool_fix (insn
, address
,
17027 recog_data
.operand_loc
[opno
],
17028 recog_data
.operand_mode
[opno
], cop
);
17038 /* Rewrite move insn into subtract of 0 if the condition codes will
17039 be useful in next conditional jump insn. */
17042 thumb1_reorg (void)
17046 FOR_EACH_BB_FN (bb
, cfun
)
17049 rtx pat
, op0
, set
= NULL
;
17050 rtx prev
, insn
= BB_END (bb
);
17051 bool insn_clobbered
= false;
17053 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17054 insn
= PREV_INSN (insn
);
17056 /* Find the last cbranchsi4_insn in basic block BB. */
17057 if (insn
== BB_HEAD (bb
)
17058 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17061 /* Get the register with which we are comparing. */
17062 pat
= PATTERN (insn
);
17063 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17065 /* Find the first flag setting insn before INSN in basic block BB. */
17066 gcc_assert (insn
!= BB_HEAD (bb
));
17067 for (prev
= PREV_INSN (insn
);
17069 && prev
!= BB_HEAD (bb
)
17071 || DEBUG_INSN_P (prev
)
17072 || ((set
= single_set (prev
)) != NULL
17073 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17074 prev
= PREV_INSN (prev
))
17076 if (reg_set_p (op0
, prev
))
17077 insn_clobbered
= true;
17080 /* Skip if op0 is clobbered by insn other than prev. */
17081 if (insn_clobbered
)
17087 dest
= SET_DEST (set
);
17088 src
= SET_SRC (set
);
17089 if (!low_register_operand (dest
, SImode
)
17090 || !low_register_operand (src
, SImode
))
17093 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17094 in INSN. Both src and dest of the move insn are checked. */
17095 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17097 dest
= copy_rtx (dest
);
17098 src
= copy_rtx (src
);
17099 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17100 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
17101 INSN_CODE (prev
) = -1;
17102 /* Set test register in INSN to dest. */
17103 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17104 INSN_CODE (insn
) = -1;
17109 /* Convert instructions to their cc-clobbering variant if possible, since
17110 that allows us to use smaller encodings. */
17113 thumb2_reorg (void)
17118 INIT_REG_SET (&live
);
17120 /* We are freeing block_for_insn in the toplev to keep compatibility
17121 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17122 compute_bb_for_insn ();
17125 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17127 FOR_EACH_BB_FN (bb
, cfun
)
17129 if (current_tune
->disparage_flag_setting_t16_encodings
17130 && optimize_bb_for_speed_p (bb
))
17134 Convert_Action action
= SKIP
;
17135 Convert_Action action_for_partial_flag_setting
17136 = (current_tune
->disparage_partial_flag_setting_t16_encodings
17137 && optimize_bb_for_speed_p (bb
))
17140 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17141 df_simulate_initialize_backwards (bb
, &live
);
17142 FOR_BB_INSNS_REVERSE (bb
, insn
)
17144 if (NONJUMP_INSN_P (insn
)
17145 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17146 && GET_CODE (PATTERN (insn
)) == SET
)
17149 rtx pat
= PATTERN (insn
);
17150 rtx dst
= XEXP (pat
, 0);
17151 rtx src
= XEXP (pat
, 1);
17152 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17154 if (!OBJECT_P (src
))
17155 op0
= XEXP (src
, 0);
17157 if (BINARY_P (src
))
17158 op1
= XEXP (src
, 1);
17160 if (low_register_operand (dst
, SImode
))
17162 switch (GET_CODE (src
))
17165 /* Adding two registers and storing the result
17166 in the first source is already a 16-bit
17168 if (rtx_equal_p (dst
, op0
)
17169 && register_operand (op1
, SImode
))
17172 if (low_register_operand (op0
, SImode
))
17174 /* ADDS <Rd>,<Rn>,<Rm> */
17175 if (low_register_operand (op1
, SImode
))
17177 /* ADDS <Rdn>,#<imm8> */
17178 /* SUBS <Rdn>,#<imm8> */
17179 else if (rtx_equal_p (dst
, op0
)
17180 && CONST_INT_P (op1
)
17181 && IN_RANGE (INTVAL (op1
), -255, 255))
17183 /* ADDS <Rd>,<Rn>,#<imm3> */
17184 /* SUBS <Rd>,<Rn>,#<imm3> */
17185 else if (CONST_INT_P (op1
)
17186 && IN_RANGE (INTVAL (op1
), -7, 7))
17189 /* ADCS <Rd>, <Rn> */
17190 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17191 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17192 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17194 && COMPARISON_P (op1
)
17195 && cc_register (XEXP (op1
, 0), VOIDmode
)
17196 && maybe_get_arm_condition_code (op1
) == ARM_CS
17197 && XEXP (op1
, 1) == const0_rtx
)
17202 /* RSBS <Rd>,<Rn>,#0
17203 Not handled here: see NEG below. */
17204 /* SUBS <Rd>,<Rn>,#<imm3>
17206 Not handled here: see PLUS above. */
17207 /* SUBS <Rd>,<Rn>,<Rm> */
17208 if (low_register_operand (op0
, SImode
)
17209 && low_register_operand (op1
, SImode
))
17214 /* MULS <Rdm>,<Rn>,<Rdm>
17215 As an exception to the rule, this is only used
17216 when optimizing for size since MULS is slow on all
17217 known implementations. We do not even want to use
17218 MULS in cold code, if optimizing for speed, so we
17219 test the global flag here. */
17220 if (!optimize_size
)
17222 /* else fall through. */
17226 /* ANDS <Rdn>,<Rm> */
17227 if (rtx_equal_p (dst
, op0
)
17228 && low_register_operand (op1
, SImode
))
17229 action
= action_for_partial_flag_setting
;
17230 else if (rtx_equal_p (dst
, op1
)
17231 && low_register_operand (op0
, SImode
))
17232 action
= action_for_partial_flag_setting
== SKIP
17233 ? SKIP
: SWAP_CONV
;
17239 /* ASRS <Rdn>,<Rm> */
17240 /* LSRS <Rdn>,<Rm> */
17241 /* LSLS <Rdn>,<Rm> */
17242 if (rtx_equal_p (dst
, op0
)
17243 && low_register_operand (op1
, SImode
))
17244 action
= action_for_partial_flag_setting
;
17245 /* ASRS <Rd>,<Rm>,#<imm5> */
17246 /* LSRS <Rd>,<Rm>,#<imm5> */
17247 /* LSLS <Rd>,<Rm>,#<imm5> */
17248 else if (low_register_operand (op0
, SImode
)
17249 && CONST_INT_P (op1
)
17250 && IN_RANGE (INTVAL (op1
), 0, 31))
17251 action
= action_for_partial_flag_setting
;
17255 /* RORS <Rdn>,<Rm> */
17256 if (rtx_equal_p (dst
, op0
)
17257 && low_register_operand (op1
, SImode
))
17258 action
= action_for_partial_flag_setting
;
17262 /* MVNS <Rd>,<Rm> */
17263 if (low_register_operand (op0
, SImode
))
17264 action
= action_for_partial_flag_setting
;
17268 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17269 if (low_register_operand (op0
, SImode
))
17274 /* MOVS <Rd>,#<imm8> */
17275 if (CONST_INT_P (src
)
17276 && IN_RANGE (INTVAL (src
), 0, 255))
17277 action
= action_for_partial_flag_setting
;
17281 /* MOVS and MOV<c> with registers have different
17282 encodings, so are not relevant here. */
17290 if (action
!= SKIP
)
17292 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17293 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17296 if (action
== SWAP_CONV
)
17298 src
= copy_rtx (src
);
17299 XEXP (src
, 0) = op1
;
17300 XEXP (src
, 1) = op0
;
17301 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
17302 vec
= gen_rtvec (2, pat
, clobber
);
17304 else /* action == CONV */
17305 vec
= gen_rtvec (2, pat
, clobber
);
17307 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17308 INSN_CODE (insn
) = -1;
17312 if (NONDEBUG_INSN_P (insn
))
17313 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17317 CLEAR_REG_SET (&live
);
17320 /* Gcc puts the pool in the wrong place for ARM, since we can only
17321 load addresses a limited distance around the pc. We do some
17322 special munging to move the constant pool values to the correct
17323 point in the code. */
17328 HOST_WIDE_INT address
= 0;
17333 else if (TARGET_THUMB2
)
17336 /* Ensure all insns that must be split have been split at this point.
17337 Otherwise, the pool placement code below may compute incorrect
17338 insn lengths. Note that when optimizing, all insns have already
17339 been split at this point. */
17341 split_all_insns_noflow ();
17343 minipool_fix_head
= minipool_fix_tail
= NULL
;
17345 /* The first insn must always be a note, or the code below won't
17346 scan it properly. */
17347 insn
= get_insns ();
17348 gcc_assert (NOTE_P (insn
));
17351 /* Scan all the insns and record the operands that will need fixing. */
17352 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17354 if (BARRIER_P (insn
))
17355 push_minipool_barrier (insn
, address
);
17356 else if (INSN_P (insn
))
17358 rtx_jump_table_data
*table
;
17360 note_invalid_constants (insn
, address
, true);
17361 address
+= get_attr_length (insn
);
17363 /* If the insn is a vector jump, add the size of the table
17364 and skip the table. */
17365 if (tablejump_p (insn
, NULL
, &table
))
17367 address
+= get_jump_table_size (table
);
17371 else if (LABEL_P (insn
))
17372 /* Add the worst-case padding due to alignment. We don't add
17373 the _current_ padding because the minipool insertions
17374 themselves might change it. */
17375 address
+= get_label_padding (insn
);
17378 fix
= minipool_fix_head
;
17380 /* Now scan the fixups and perform the required changes. */
17385 Mfix
* last_added_fix
;
17386 Mfix
* last_barrier
= NULL
;
17389 /* Skip any further barriers before the next fix. */
17390 while (fix
&& BARRIER_P (fix
->insn
))
17393 /* No more fixes. */
17397 last_added_fix
= NULL
;
17399 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17401 if (BARRIER_P (ftmp
->insn
))
17403 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17406 last_barrier
= ftmp
;
17408 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17411 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17414 /* If we found a barrier, drop back to that; any fixes that we
17415 could have reached but come after the barrier will now go in
17416 the next mini-pool. */
17417 if (last_barrier
!= NULL
)
17419 /* Reduce the refcount for those fixes that won't go into this
17421 for (fdel
= last_barrier
->next
;
17422 fdel
&& fdel
!= ftmp
;
17425 fdel
->minipool
->refcount
--;
17426 fdel
->minipool
= NULL
;
17429 ftmp
= last_barrier
;
17433 /* ftmp is first fix that we can't fit into this pool and
17434 there no natural barriers that we could use. Insert a
17435 new barrier in the code somewhere between the previous
17436 fix and this one, and arrange to jump around it. */
17437 HOST_WIDE_INT max_address
;
17439 /* The last item on the list of fixes must be a barrier, so
17440 we can never run off the end of the list of fixes without
17441 last_barrier being set. */
17444 max_address
= minipool_vector_head
->max_address
;
17445 /* Check that there isn't another fix that is in range that
17446 we couldn't fit into this pool because the pool was
17447 already too large: we need to put the pool before such an
17448 instruction. The pool itself may come just after the
17449 fix because create_fix_barrier also allows space for a
17450 jump instruction. */
17451 if (ftmp
->address
< max_address
)
17452 max_address
= ftmp
->address
+ 1;
17454 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17457 assign_minipool_offsets (last_barrier
);
17461 if (!BARRIER_P (ftmp
->insn
)
17462 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17469 /* Scan over the fixes we have identified for this pool, fixing them
17470 up and adding the constants to the pool itself. */
17471 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17472 this_fix
= this_fix
->next
)
17473 if (!BARRIER_P (this_fix
->insn
))
17476 = plus_constant (Pmode
,
17477 gen_rtx_LABEL_REF (VOIDmode
,
17478 minipool_vector_label
),
17479 this_fix
->minipool
->offset
);
17480 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17483 dump_minipool (last_barrier
->insn
);
17487 /* From now on we must synthesize any constants that we can't handle
17488 directly. This can happen if the RTL gets split during final
17489 instruction generation. */
17490 cfun
->machine
->after_arm_reorg
= 1;
17492 /* Free the minipool memory. */
17493 obstack_free (&minipool_obstack
, minipool_startobj
);
17496 /* Routines to output assembly language. */
17498 /* If the rtx is the correct value then return the string of the number.
17499 In this way we can ensure that valid double constants are generated even
17500 when cross compiling. */
17502 fp_immediate_constant (rtx x
)
17506 if (!fp_consts_inited
)
17509 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
17511 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
17515 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17516 static const char *
17517 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17519 if (!fp_consts_inited
)
17522 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17526 /* OPERANDS[0] is the entire list of insns that constitute pop,
17527 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17528 is in the list, UPDATE is true iff the list contains explicit
17529 update of base register. */
17531 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17537 const char *conditional
;
17538 int num_saves
= XVECLEN (operands
[0], 0);
17539 unsigned int regno
;
17540 unsigned int regno_base
= REGNO (operands
[1]);
17543 offset
+= update
? 1 : 0;
17544 offset
+= return_pc
? 1 : 0;
17546 /* Is the base register in the list? */
17547 for (i
= offset
; i
< num_saves
; i
++)
17549 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17550 /* If SP is in the list, then the base register must be SP. */
17551 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17552 /* If base register is in the list, there must be no explicit update. */
17553 if (regno
== regno_base
)
17554 gcc_assert (!update
);
17557 conditional
= reverse
? "%?%D0" : "%?%d0";
17558 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17560 /* Output pop (not stmfd) because it has a shorter encoding. */
17561 gcc_assert (update
);
17562 sprintf (pattern
, "pop%s\t{", conditional
);
17566 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17567 It's just a convention, their semantics are identical. */
17568 if (regno_base
== SP_REGNUM
)
17569 sprintf (pattern
, "ldm%sfd\t", conditional
);
17570 else if (TARGET_UNIFIED_ASM
)
17571 sprintf (pattern
, "ldmia%s\t", conditional
);
17573 sprintf (pattern
, "ldm%sia\t", conditional
);
17575 strcat (pattern
, reg_names
[regno_base
]);
17577 strcat (pattern
, "!, {");
17579 strcat (pattern
, ", {");
17582 /* Output the first destination register. */
17584 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17586 /* Output the rest of the destination registers. */
17587 for (i
= offset
+ 1; i
< num_saves
; i
++)
17589 strcat (pattern
, ", ");
17591 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17594 strcat (pattern
, "}");
17596 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17597 strcat (pattern
, "^");
17599 output_asm_insn (pattern
, &cond
);
17603 /* Output the assembly for a store multiple. */
17606 vfp_output_fstmd (rtx
* operands
)
17613 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
17614 p
= strlen (pattern
);
17616 gcc_assert (REG_P (operands
[1]));
17618 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17619 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17621 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17623 strcpy (&pattern
[p
], "}");
17625 output_asm_insn (pattern
, operands
);
17630 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17631 number of bytes pushed. */
17634 vfp_emit_fstmd (int base_reg
, int count
)
17641 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17642 register pairs are stored by a store multiple insn. We avoid this
17643 by pushing an extra pair. */
17644 if (count
== 2 && !arm_arch6
)
17646 if (base_reg
== LAST_VFP_REGNUM
- 3)
17651 /* FSTMD may not store more than 16 doubleword registers at once. Split
17652 larger stores into multiple parts (up to a maximum of two, in
17657 /* NOTE: base_reg is an internal register number, so each D register
17659 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17660 saved
+= vfp_emit_fstmd (base_reg
, 16);
17664 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17665 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17667 reg
= gen_rtx_REG (DFmode
, base_reg
);
17670 XVECEXP (par
, 0, 0)
17671 = gen_rtx_SET (VOIDmode
,
17674 gen_rtx_PRE_MODIFY (Pmode
,
17677 (Pmode
, stack_pointer_rtx
,
17680 gen_rtx_UNSPEC (BLKmode
,
17681 gen_rtvec (1, reg
),
17682 UNSPEC_PUSH_MULT
));
17684 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17685 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17686 RTX_FRAME_RELATED_P (tmp
) = 1;
17687 XVECEXP (dwarf
, 0, 0) = tmp
;
17689 tmp
= gen_rtx_SET (VOIDmode
,
17690 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17692 RTX_FRAME_RELATED_P (tmp
) = 1;
17693 XVECEXP (dwarf
, 0, 1) = tmp
;
17695 for (i
= 1; i
< count
; i
++)
17697 reg
= gen_rtx_REG (DFmode
, base_reg
);
17699 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17701 tmp
= gen_rtx_SET (VOIDmode
,
17702 gen_frame_mem (DFmode
,
17703 plus_constant (Pmode
,
17707 RTX_FRAME_RELATED_P (tmp
) = 1;
17708 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17711 par
= emit_insn (par
);
17712 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17713 RTX_FRAME_RELATED_P (par
) = 1;
17718 /* Emit a call instruction with pattern PAT. ADDR is the address of
17719 the call target. */
17722 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17726 insn
= emit_call_insn (pat
);
17728 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17729 If the call might use such an entry, add a use of the PIC register
17730 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17731 if (TARGET_VXWORKS_RTP
17734 && GET_CODE (addr
) == SYMBOL_REF
17735 && (SYMBOL_REF_DECL (addr
)
17736 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17737 : !SYMBOL_REF_LOCAL_P (addr
)))
17739 require_pic_register ();
17740 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17743 if (TARGET_AAPCS_BASED
)
17745 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17746 linker. We need to add an IP clobber to allow setting
17747 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17748 is not needed since it's a fixed register. */
17749 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17750 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17754 /* Output a 'call' insn. */
17756 output_call (rtx
*operands
)
17758 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17760 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17761 if (REGNO (operands
[0]) == LR_REGNUM
)
17763 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17764 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17767 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17769 if (TARGET_INTERWORK
|| arm_arch4t
)
17770 output_asm_insn ("bx%?\t%0", operands
);
17772 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17777 /* Output a 'call' insn that is a reference in memory. This is
17778 disabled for ARMv5 and we prefer a blx instead because otherwise
17779 there's a significant performance overhead. */
17781 output_call_mem (rtx
*operands
)
17783 gcc_assert (!arm_arch5
);
17784 if (TARGET_INTERWORK
)
17786 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17787 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17788 output_asm_insn ("bx%?\t%|ip", operands
);
17790 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17792 /* LR is used in the memory address. We load the address in the
17793 first instruction. It's safe to use IP as the target of the
17794 load since the call will kill it anyway. */
17795 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17796 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17798 output_asm_insn ("bx%?\t%|ip", operands
);
17800 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17804 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17805 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17812 /* Output a move from arm registers to arm registers of a long double
17813 OPERANDS[0] is the destination.
17814 OPERANDS[1] is the source. */
17816 output_mov_long_double_arm_from_arm (rtx
*operands
)
17818 /* We have to be careful here because the two might overlap. */
17819 int dest_start
= REGNO (operands
[0]);
17820 int src_start
= REGNO (operands
[1]);
17824 if (dest_start
< src_start
)
17826 for (i
= 0; i
< 3; i
++)
17828 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17829 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17830 output_asm_insn ("mov%?\t%0, %1", ops
);
17835 for (i
= 2; i
>= 0; i
--)
17837 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17838 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17839 output_asm_insn ("mov%?\t%0, %1", ops
);
17847 arm_emit_movpair (rtx dest
, rtx src
)
17849 /* If the src is an immediate, simplify it. */
17850 if (CONST_INT_P (src
))
17852 HOST_WIDE_INT val
= INTVAL (src
);
17853 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17854 if ((val
>> 16) & 0x0000ffff)
17855 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17857 GEN_INT ((val
>> 16) & 0x0000ffff));
17860 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17861 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17864 /* Output a move between double words. It must be REG<-MEM
17867 output_move_double (rtx
*operands
, bool emit
, int *count
)
17869 enum rtx_code code0
= GET_CODE (operands
[0]);
17870 enum rtx_code code1
= GET_CODE (operands
[1]);
17875 /* The only case when this might happen is when
17876 you are looking at the length of a DImode instruction
17877 that has an invalid constant in it. */
17878 if (code0
== REG
&& code1
!= MEM
)
17880 gcc_assert (!emit
);
17887 unsigned int reg0
= REGNO (operands
[0]);
17889 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17891 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17893 switch (GET_CODE (XEXP (operands
[1], 0)))
17900 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17901 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17903 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17908 gcc_assert (TARGET_LDRD
);
17910 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17917 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17919 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17927 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17929 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17934 gcc_assert (TARGET_LDRD
);
17936 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17941 /* Autoicrement addressing modes should never have overlapping
17942 base and destination registers, and overlapping index registers
17943 are already prohibited, so this doesn't need to worry about
17945 otherops
[0] = operands
[0];
17946 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17947 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17949 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17951 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17953 /* Registers overlap so split out the increment. */
17956 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17957 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17964 /* Use a single insn if we can.
17965 FIXME: IWMMXT allows offsets larger than ldrd can
17966 handle, fix these up with a pair of ldr. */
17968 || !CONST_INT_P (otherops
[2])
17969 || (INTVAL (otherops
[2]) > -256
17970 && INTVAL (otherops
[2]) < 256))
17973 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17979 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17980 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17990 /* Use a single insn if we can.
17991 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17992 fix these up with a pair of ldr. */
17994 || !CONST_INT_P (otherops
[2])
17995 || (INTVAL (otherops
[2]) > -256
17996 && INTVAL (otherops
[2]) < 256))
17999 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18005 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18006 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18016 /* We might be able to use ldrd %0, %1 here. However the range is
18017 different to ldr/adr, and it is broken on some ARMv7-M
18018 implementations. */
18019 /* Use the second register of the pair to avoid problematic
18021 otherops
[1] = operands
[1];
18023 output_asm_insn ("adr%?\t%0, %1", otherops
);
18024 operands
[1] = otherops
[0];
18028 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18030 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18037 /* ??? This needs checking for thumb2. */
18039 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18040 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18042 otherops
[0] = operands
[0];
18043 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18044 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18046 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18048 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18050 switch ((int) INTVAL (otherops
[2]))
18054 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18060 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18066 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18070 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18071 operands
[1] = otherops
[0];
18073 && (REG_P (otherops
[2])
18075 || (CONST_INT_P (otherops
[2])
18076 && INTVAL (otherops
[2]) > -256
18077 && INTVAL (otherops
[2]) < 256)))
18079 if (reg_overlap_mentioned_p (operands
[0],
18083 /* Swap base and index registers over to
18084 avoid a conflict. */
18086 otherops
[1] = otherops
[2];
18089 /* If both registers conflict, it will usually
18090 have been fixed by a splitter. */
18091 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18092 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18096 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18097 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18104 otherops
[0] = operands
[0];
18106 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18111 if (CONST_INT_P (otherops
[2]))
18115 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18116 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18118 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18124 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18130 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18137 return "ldr%(d%)\t%0, [%1]";
18139 return "ldm%(ia%)\t%1, %M0";
18143 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18144 /* Take care of overlapping base/data reg. */
18145 if (reg_mentioned_p (operands
[0], operands
[1]))
18149 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18150 output_asm_insn ("ldr%?\t%0, %1", operands
);
18160 output_asm_insn ("ldr%?\t%0, %1", operands
);
18161 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18171 /* Constraints should ensure this. */
18172 gcc_assert (code0
== MEM
&& code1
== REG
);
18173 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18174 || (TARGET_ARM
&& TARGET_LDRD
));
18176 switch (GET_CODE (XEXP (operands
[0], 0)))
18182 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18184 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18189 gcc_assert (TARGET_LDRD
);
18191 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18198 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18200 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18208 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18210 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18215 gcc_assert (TARGET_LDRD
);
18217 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18222 otherops
[0] = operands
[1];
18223 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18224 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18226 /* IWMMXT allows offsets larger than ldrd can handle,
18227 fix these up with a pair of ldr. */
18229 && CONST_INT_P (otherops
[2])
18230 && (INTVAL(otherops
[2]) <= -256
18231 || INTVAL(otherops
[2]) >= 256))
18233 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18237 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18238 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18247 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18248 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18254 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18257 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18262 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18267 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18268 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18270 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18274 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18281 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18288 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18293 && (REG_P (otherops
[2])
18295 || (CONST_INT_P (otherops
[2])
18296 && INTVAL (otherops
[2]) > -256
18297 && INTVAL (otherops
[2]) < 256)))
18299 otherops
[0] = operands
[1];
18300 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18302 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18308 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18309 otherops
[1] = operands
[1];
18312 output_asm_insn ("str%?\t%1, %0", operands
);
18313 output_asm_insn ("str%?\t%H1, %0", otherops
);
18323 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18324 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18327 output_move_quad (rtx
*operands
)
18329 if (REG_P (operands
[0]))
18331 /* Load, or reg->reg move. */
18333 if (MEM_P (operands
[1]))
18335 switch (GET_CODE (XEXP (operands
[1], 0)))
18338 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18343 output_asm_insn ("adr%?\t%0, %1", operands
);
18344 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18348 gcc_unreachable ();
18356 gcc_assert (REG_P (operands
[1]));
18358 dest
= REGNO (operands
[0]);
18359 src
= REGNO (operands
[1]);
18361 /* This seems pretty dumb, but hopefully GCC won't try to do it
18364 for (i
= 0; i
< 4; i
++)
18366 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18367 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18368 output_asm_insn ("mov%?\t%0, %1", ops
);
18371 for (i
= 3; i
>= 0; i
--)
18373 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18374 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18375 output_asm_insn ("mov%?\t%0, %1", ops
);
18381 gcc_assert (MEM_P (operands
[0]));
18382 gcc_assert (REG_P (operands
[1]));
18383 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18385 switch (GET_CODE (XEXP (operands
[0], 0)))
18388 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18392 gcc_unreachable ();
18399 /* Output a VFP load or store instruction. */
18402 output_move_vfp (rtx
*operands
)
18404 rtx reg
, mem
, addr
, ops
[2];
18405 int load
= REG_P (operands
[0]);
18406 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18407 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18410 enum machine_mode mode
;
18412 reg
= operands
[!load
];
18413 mem
= operands
[load
];
18415 mode
= GET_MODE (reg
);
18417 gcc_assert (REG_P (reg
));
18418 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18419 gcc_assert (mode
== SFmode
18423 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18424 gcc_assert (MEM_P (mem
));
18426 addr
= XEXP (mem
, 0);
18428 switch (GET_CODE (addr
))
18431 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18432 ops
[0] = XEXP (addr
, 0);
18437 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
18438 ops
[0] = XEXP (addr
, 0);
18443 templ
= "f%s%c%%?\t%%%s0, %%1%s";
18449 sprintf (buff
, templ
,
18450 load
? "ld" : "st",
18453 integer_p
? "\t%@ int" : "");
18454 output_asm_insn (buff
, ops
);
18459 /* Output a Neon double-word or quad-word load or store, or a load
18460 or store for larger structure modes.
18462 WARNING: The ordering of elements is weird in big-endian mode,
18463 because the EABI requires that vectors stored in memory appear
18464 as though they were stored by a VSTM, as required by the EABI.
18465 GCC RTL defines element ordering based on in-memory order.
18466 This can be different from the architectural ordering of elements
18467 within a NEON register. The intrinsics defined in arm_neon.h use the
18468 NEON register element ordering, not the GCC RTL element ordering.
18470 For example, the in-memory ordering of a big-endian a quadword
18471 vector with 16-bit elements when stored from register pair {d0,d1}
18472 will be (lowest address first, d0[N] is NEON register element N):
18474 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18476 When necessary, quadword registers (dN, dN+1) are moved to ARM
18477 registers from rN in the order:
18479 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18481 So that STM/LDM can be used on vectors in ARM registers, and the
18482 same memory layout will result as if VSTM/VLDM were used.
18484 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18485 possible, which allows use of appropriate alignment tags.
18486 Note that the choice of "64" is independent of the actual vector
18487 element size; this size simply ensures that the behavior is
18488 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18490 Due to limitations of those instructions, use of VST1.64/VLD1.64
18491 is not possible if:
18492 - the address contains PRE_DEC, or
18493 - the mode refers to more than 4 double-word registers
18495 In those cases, it would be possible to replace VSTM/VLDM by a
18496 sequence of instructions; this is not currently implemented since
18497 this is not certain to actually improve performance. */
18500 output_move_neon (rtx
*operands
)
18502 rtx reg
, mem
, addr
, ops
[2];
18503 int regno
, nregs
, load
= REG_P (operands
[0]);
18506 enum machine_mode mode
;
18508 reg
= operands
[!load
];
18509 mem
= operands
[load
];
18511 mode
= GET_MODE (reg
);
18513 gcc_assert (REG_P (reg
));
18514 regno
= REGNO (reg
);
18515 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18516 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18517 || NEON_REGNO_OK_FOR_QUAD (regno
));
18518 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18519 || VALID_NEON_QREG_MODE (mode
)
18520 || VALID_NEON_STRUCT_MODE (mode
));
18521 gcc_assert (MEM_P (mem
));
18523 addr
= XEXP (mem
, 0);
18525 /* Strip off const from addresses like (const (plus (...))). */
18526 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18527 addr
= XEXP (addr
, 0);
18529 switch (GET_CODE (addr
))
18532 /* We have to use vldm / vstm for too-large modes. */
18535 templ
= "v%smia%%?\t%%0!, %%h1";
18536 ops
[0] = XEXP (addr
, 0);
18540 templ
= "v%s1.64\t%%h1, %%A0";
18547 /* We have to use vldm / vstm in this case, since there is no
18548 pre-decrement form of the vld1 / vst1 instructions. */
18549 templ
= "v%smdb%%?\t%%0!, %%h1";
18550 ops
[0] = XEXP (addr
, 0);
18555 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18556 gcc_unreachable ();
18563 for (i
= 0; i
< nregs
; i
++)
18565 /* We're only using DImode here because it's a convenient size. */
18566 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18567 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18568 if (reg_overlap_mentioned_p (ops
[0], mem
))
18570 gcc_assert (overlap
== -1);
18575 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18576 output_asm_insn (buff
, ops
);
18581 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18582 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18583 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18584 output_asm_insn (buff
, ops
);
18591 /* We have to use vldm / vstm for too-large modes. */
18593 templ
= "v%smia%%?\t%%m0, %%h1";
18595 templ
= "v%s1.64\t%%h1, %%A0";
18601 sprintf (buff
, templ
, load
? "ld" : "st");
18602 output_asm_insn (buff
, ops
);
18607 /* Compute and return the length of neon_mov<mode>, where <mode> is
18608 one of VSTRUCT modes: EI, OI, CI or XI. */
18610 arm_attr_length_move_neon (rtx insn
)
18612 rtx reg
, mem
, addr
;
18614 enum machine_mode mode
;
18616 extract_insn_cached (insn
);
18618 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18620 mode
= GET_MODE (recog_data
.operand
[0]);
18631 gcc_unreachable ();
18635 load
= REG_P (recog_data
.operand
[0]);
18636 reg
= recog_data
.operand
[!load
];
18637 mem
= recog_data
.operand
[load
];
18639 gcc_assert (MEM_P (mem
));
18641 mode
= GET_MODE (reg
);
18642 addr
= XEXP (mem
, 0);
18644 /* Strip off const from addresses like (const (plus (...))). */
18645 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18646 addr
= XEXP (addr
, 0);
18648 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18650 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18657 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18661 arm_address_offset_is_imm (rtx insn
)
18665 extract_insn_cached (insn
);
18667 if (REG_P (recog_data
.operand
[0]))
18670 mem
= recog_data
.operand
[0];
18672 gcc_assert (MEM_P (mem
));
18674 addr
= XEXP (mem
, 0);
18677 || (GET_CODE (addr
) == PLUS
18678 && REG_P (XEXP (addr
, 0))
18679 && CONST_INT_P (XEXP (addr
, 1))))
18685 /* Output an ADD r, s, #n where n may be too big for one instruction.
18686 If adding zero to one register, output nothing. */
18688 output_add_immediate (rtx
*operands
)
18690 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18692 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18695 output_multi_immediate (operands
,
18696 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18699 output_multi_immediate (operands
,
18700 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18707 /* Output a multiple immediate operation.
18708 OPERANDS is the vector of operands referred to in the output patterns.
18709 INSTR1 is the output pattern to use for the first constant.
18710 INSTR2 is the output pattern to use for subsequent constants.
18711 IMMED_OP is the index of the constant slot in OPERANDS.
18712 N is the constant value. */
18713 static const char *
18714 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18715 int immed_op
, HOST_WIDE_INT n
)
18717 #if HOST_BITS_PER_WIDE_INT > 32
18723 /* Quick and easy output. */
18724 operands
[immed_op
] = const0_rtx
;
18725 output_asm_insn (instr1
, operands
);
18730 const char * instr
= instr1
;
18732 /* Note that n is never zero here (which would give no output). */
18733 for (i
= 0; i
< 32; i
+= 2)
18737 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18738 output_asm_insn (instr
, operands
);
18748 /* Return the name of a shifter operation. */
18749 static const char *
18750 arm_shift_nmem(enum rtx_code code
)
18755 return ARM_LSL_NAME
;
18771 /* Return the appropriate ARM instruction for the operation code.
18772 The returned result should not be overwritten. OP is the rtx of the
18773 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18776 arithmetic_instr (rtx op
, int shift_first_arg
)
18778 switch (GET_CODE (op
))
18784 return shift_first_arg
? "rsb" : "sub";
18799 return arm_shift_nmem(GET_CODE(op
));
18802 gcc_unreachable ();
18806 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18807 for the operation code. The returned result should not be overwritten.
18808 OP is the rtx code of the shift.
18809 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18811 static const char *
18812 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18815 enum rtx_code code
= GET_CODE (op
);
18820 if (!CONST_INT_P (XEXP (op
, 1)))
18822 output_operand_lossage ("invalid shift operand");
18827 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18835 mnem
= arm_shift_nmem(code
);
18836 if (CONST_INT_P (XEXP (op
, 1)))
18838 *amountp
= INTVAL (XEXP (op
, 1));
18840 else if (REG_P (XEXP (op
, 1)))
18847 output_operand_lossage ("invalid shift operand");
18853 /* We never have to worry about the amount being other than a
18854 power of 2, since this case can never be reloaded from a reg. */
18855 if (!CONST_INT_P (XEXP (op
, 1)))
18857 output_operand_lossage ("invalid shift operand");
18861 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18863 /* Amount must be a power of two. */
18864 if (*amountp
& (*amountp
- 1))
18866 output_operand_lossage ("invalid shift operand");
18870 *amountp
= int_log2 (*amountp
);
18871 return ARM_LSL_NAME
;
18874 output_operand_lossage ("invalid shift operand");
18878 /* This is not 100% correct, but follows from the desire to merge
18879 multiplication by a power of 2 with the recognizer for a
18880 shift. >=32 is not a valid shift for "lsl", so we must try and
18881 output a shift that produces the correct arithmetical result.
18882 Using lsr #32 is identical except for the fact that the carry bit
18883 is not set correctly if we set the flags; but we never use the
18884 carry bit from such an operation, so we can ignore that. */
18885 if (code
== ROTATERT
)
18886 /* Rotate is just modulo 32. */
18888 else if (*amountp
!= (*amountp
& 31))
18890 if (code
== ASHIFT
)
18895 /* Shifts of 0 are no-ops. */
18902 /* Obtain the shift from the POWER of two. */
18904 static HOST_WIDE_INT
18905 int_log2 (HOST_WIDE_INT power
)
18907 HOST_WIDE_INT shift
= 0;
18909 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18911 gcc_assert (shift
<= 31);
18918 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18919 because /bin/as is horribly restrictive. The judgement about
18920 whether or not each character is 'printable' (and can be output as
18921 is) or not (and must be printed with an octal escape) must be made
18922 with reference to the *host* character set -- the situation is
18923 similar to that discussed in the comments above pp_c_char in
18924 c-pretty-print.c. */
18926 #define MAX_ASCII_LEN 51
18929 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18932 int len_so_far
= 0;
18934 fputs ("\t.ascii\t\"", stream
);
18936 for (i
= 0; i
< len
; i
++)
18940 if (len_so_far
>= MAX_ASCII_LEN
)
18942 fputs ("\"\n\t.ascii\t\"", stream
);
18948 if (c
== '\\' || c
== '\"')
18950 putc ('\\', stream
);
18958 fprintf (stream
, "\\%03o", c
);
18963 fputs ("\"\n", stream
);
18966 /* Compute the register save mask for registers 0 through 12
18967 inclusive. This code is used by arm_compute_save_reg_mask. */
18969 static unsigned long
18970 arm_compute_save_reg0_reg12_mask (void)
18972 unsigned long func_type
= arm_current_func_type ();
18973 unsigned long save_reg_mask
= 0;
18976 if (IS_INTERRUPT (func_type
))
18978 unsigned int max_reg
;
18979 /* Interrupt functions must not corrupt any registers,
18980 even call clobbered ones. If this is a leaf function
18981 we can just examine the registers used by the RTL, but
18982 otherwise we have to assume that whatever function is
18983 called might clobber anything, and so we have to save
18984 all the call-clobbered registers as well. */
18985 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18986 /* FIQ handlers have registers r8 - r12 banked, so
18987 we only need to check r0 - r7, Normal ISRs only
18988 bank r14 and r15, so we must check up to r12.
18989 r13 is the stack pointer which is always preserved,
18990 so we do not need to consider it here. */
18995 for (reg
= 0; reg
<= max_reg
; reg
++)
18996 if (df_regs_ever_live_p (reg
)
18997 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18998 save_reg_mask
|= (1 << reg
);
19000 /* Also save the pic base register if necessary. */
19002 && !TARGET_SINGLE_PIC_BASE
19003 && arm_pic_register
!= INVALID_REGNUM
19004 && crtl
->uses_pic_offset_table
)
19005 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19007 else if (IS_VOLATILE(func_type
))
19009 /* For noreturn functions we historically omitted register saves
19010 altogether. However this really messes up debugging. As a
19011 compromise save just the frame pointers. Combined with the link
19012 register saved elsewhere this should be sufficient to get
19014 if (frame_pointer_needed
)
19015 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19016 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19017 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19018 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19019 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19023 /* In the normal case we only need to save those registers
19024 which are call saved and which are used by this function. */
19025 for (reg
= 0; reg
<= 11; reg
++)
19026 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
19027 save_reg_mask
|= (1 << reg
);
19029 /* Handle the frame pointer as a special case. */
19030 if (frame_pointer_needed
)
19031 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19033 /* If we aren't loading the PIC register,
19034 don't stack it even though it may be live. */
19036 && !TARGET_SINGLE_PIC_BASE
19037 && arm_pic_register
!= INVALID_REGNUM
19038 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19039 || crtl
->uses_pic_offset_table
))
19040 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19042 /* The prologue will copy SP into R0, so save it. */
19043 if (IS_STACKALIGN (func_type
))
19044 save_reg_mask
|= 1;
19047 /* Save registers so the exception handler can modify them. */
19048 if (crtl
->calls_eh_return
)
19054 reg
= EH_RETURN_DATA_REGNO (i
);
19055 if (reg
== INVALID_REGNUM
)
19057 save_reg_mask
|= 1 << reg
;
19061 return save_reg_mask
;
19064 /* Return true if r3 is live at the start of the function. */
19067 arm_r3_live_at_start_p (void)
19069 /* Just look at cfg info, which is still close enough to correct at this
19070 point. This gives false positives for broken functions that might use
19071 uninitialized data that happens to be allocated in r3, but who cares? */
19072 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19075 /* Compute the number of bytes used to store the static chain register on the
19076 stack, above the stack frame. We need to know this accurately to get the
19077 alignment of the rest of the stack frame correct. */
19080 arm_compute_static_chain_stack_bytes (void)
19082 /* See the defining assertion in arm_expand_prologue. */
19083 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
19084 && IS_NESTED (arm_current_func_type ())
19085 && arm_r3_live_at_start_p ()
19086 && crtl
->args
.pretend_args_size
== 0)
19092 /* Compute a bit mask of which registers need to be
19093 saved on the stack for the current function.
19094 This is used by arm_get_frame_offsets, which may add extra registers. */
19096 static unsigned long
19097 arm_compute_save_reg_mask (void)
19099 unsigned int save_reg_mask
= 0;
19100 unsigned long func_type
= arm_current_func_type ();
19103 if (IS_NAKED (func_type
))
19104 /* This should never really happen. */
19107 /* If we are creating a stack frame, then we must save the frame pointer,
19108 IP (which will hold the old stack pointer), LR and the PC. */
19109 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19111 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19114 | (1 << PC_REGNUM
);
19116 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19118 /* Decide if we need to save the link register.
19119 Interrupt routines have their own banked link register,
19120 so they never need to save it.
19121 Otherwise if we do not use the link register we do not need to save
19122 it. If we are pushing other registers onto the stack however, we
19123 can save an instruction in the epilogue by pushing the link register
19124 now and then popping it back into the PC. This incurs extra memory
19125 accesses though, so we only do it when optimizing for size, and only
19126 if we know that we will not need a fancy return sequence. */
19127 if (df_regs_ever_live_p (LR_REGNUM
)
19130 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19131 && !crtl
->calls_eh_return
))
19132 save_reg_mask
|= 1 << LR_REGNUM
;
19134 if (cfun
->machine
->lr_save_eliminated
)
19135 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19137 if (TARGET_REALLY_IWMMXT
19138 && ((bit_count (save_reg_mask
)
19139 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19140 arm_compute_static_chain_stack_bytes())
19143 /* The total number of registers that are going to be pushed
19144 onto the stack is odd. We need to ensure that the stack
19145 is 64-bit aligned before we start to save iWMMXt registers,
19146 and also before we start to create locals. (A local variable
19147 might be a double or long long which we will load/store using
19148 an iWMMXt instruction). Therefore we need to push another
19149 ARM register, so that the stack will be 64-bit aligned. We
19150 try to avoid using the arg registers (r0 -r3) as they might be
19151 used to pass values in a tail call. */
19152 for (reg
= 4; reg
<= 12; reg
++)
19153 if ((save_reg_mask
& (1 << reg
)) == 0)
19157 save_reg_mask
|= (1 << reg
);
19160 cfun
->machine
->sibcall_blocked
= 1;
19161 save_reg_mask
|= (1 << 3);
19165 /* We may need to push an additional register for use initializing the
19166 PIC base register. */
19167 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19168 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19170 reg
= thumb_find_work_register (1 << 4);
19171 if (!call_used_regs
[reg
])
19172 save_reg_mask
|= (1 << reg
);
19175 return save_reg_mask
;
19179 /* Compute a bit mask of which registers need to be
19180 saved on the stack for the current function. */
19181 static unsigned long
19182 thumb1_compute_save_reg_mask (void)
19184 unsigned long mask
;
19188 for (reg
= 0; reg
< 12; reg
++)
19189 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
19193 && !TARGET_SINGLE_PIC_BASE
19194 && arm_pic_register
!= INVALID_REGNUM
19195 && crtl
->uses_pic_offset_table
)
19196 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19198 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19199 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19200 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19202 /* LR will also be pushed if any lo regs are pushed. */
19203 if (mask
& 0xff || thumb_force_lr_save ())
19204 mask
|= (1 << LR_REGNUM
);
19206 /* Make sure we have a low work register if we need one.
19207 We will need one if we are going to push a high register,
19208 but we are not currently intending to push a low register. */
19209 if ((mask
& 0xff) == 0
19210 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19212 /* Use thumb_find_work_register to choose which register
19213 we will use. If the register is live then we will
19214 have to push it. Use LAST_LO_REGNUM as our fallback
19215 choice for the register to select. */
19216 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19217 /* Make sure the register returned by thumb_find_work_register is
19218 not part of the return value. */
19219 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19220 reg
= LAST_LO_REGNUM
;
19222 if (! call_used_regs
[reg
])
19226 /* The 504 below is 8 bytes less than 512 because there are two possible
19227 alignment words. We can't tell here if they will be present or not so we
19228 have to play it safe and assume that they are. */
19229 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19230 ROUND_UP_WORD (get_frame_size ()) +
19231 crtl
->outgoing_args_size
) >= 504)
19233 /* This is the same as the code in thumb1_expand_prologue() which
19234 determines which register to use for stack decrement. */
19235 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19236 if (mask
& (1 << reg
))
19239 if (reg
> LAST_LO_REGNUM
)
19241 /* Make sure we have a register available for stack decrement. */
19242 mask
|= 1 << LAST_LO_REGNUM
;
19250 /* Return the number of bytes required to save VFP registers. */
19252 arm_get_vfp_saved_size (void)
19254 unsigned int regno
;
19259 /* Space for saved VFP registers. */
19260 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19263 for (regno
= FIRST_VFP_REGNUM
;
19264 regno
< LAST_VFP_REGNUM
;
19267 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19268 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19272 /* Workaround ARM10 VFPr1 bug. */
19273 if (count
== 2 && !arm_arch6
)
19275 saved
+= count
* 8;
19284 if (count
== 2 && !arm_arch6
)
19286 saved
+= count
* 8;
19293 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19294 everything bar the final return instruction. If simple_return is true,
19295 then do not output epilogue, because it has already been emitted in RTL. */
19297 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19298 bool simple_return
)
19300 char conditional
[10];
19303 unsigned long live_regs_mask
;
19304 unsigned long func_type
;
19305 arm_stack_offsets
*offsets
;
19307 func_type
= arm_current_func_type ();
19309 if (IS_NAKED (func_type
))
19312 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19314 /* If this function was declared non-returning, and we have
19315 found a tail call, then we have to trust that the called
19316 function won't return. */
19321 /* Otherwise, trap an attempted return by aborting. */
19323 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19325 assemble_external_libcall (ops
[1]);
19326 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19332 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19334 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19336 cfun
->machine
->return_used_this_function
= 1;
19338 offsets
= arm_get_frame_offsets ();
19339 live_regs_mask
= offsets
->saved_regs_mask
;
19341 if (!simple_return
&& live_regs_mask
)
19343 const char * return_reg
;
19345 /* If we do not have any special requirements for function exit
19346 (e.g. interworking) then we can load the return address
19347 directly into the PC. Otherwise we must load it into LR. */
19349 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19350 return_reg
= reg_names
[PC_REGNUM
];
19352 return_reg
= reg_names
[LR_REGNUM
];
19354 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19356 /* There are three possible reasons for the IP register
19357 being saved. 1) a stack frame was created, in which case
19358 IP contains the old stack pointer, or 2) an ISR routine
19359 corrupted it, or 3) it was saved to align the stack on
19360 iWMMXt. In case 1, restore IP into SP, otherwise just
19362 if (frame_pointer_needed
)
19364 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19365 live_regs_mask
|= (1 << SP_REGNUM
);
19368 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19371 /* On some ARM architectures it is faster to use LDR rather than
19372 LDM to load a single register. On other architectures, the
19373 cost is the same. In 26 bit mode, or for exception handlers,
19374 we have to use LDM to load the PC so that the CPSR is also
19376 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19377 if (live_regs_mask
== (1U << reg
))
19380 if (reg
<= LAST_ARM_REGNUM
19381 && (reg
!= LR_REGNUM
19383 || ! IS_INTERRUPT (func_type
)))
19385 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19386 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19393 /* Generate the load multiple instruction to restore the
19394 registers. Note we can get here, even if
19395 frame_pointer_needed is true, but only if sp already
19396 points to the base of the saved core registers. */
19397 if (live_regs_mask
& (1 << SP_REGNUM
))
19399 unsigned HOST_WIDE_INT stack_adjust
;
19401 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19402 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19404 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19405 if (TARGET_UNIFIED_ASM
)
19406 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19408 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19411 /* If we can't use ldmib (SA110 bug),
19412 then try to pop r3 instead. */
19414 live_regs_mask
|= 1 << 3;
19416 if (TARGET_UNIFIED_ASM
)
19417 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19419 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19423 if (TARGET_UNIFIED_ASM
)
19424 sprintf (instr
, "pop%s\t{", conditional
);
19426 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19428 p
= instr
+ strlen (instr
);
19430 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19431 if (live_regs_mask
& (1 << reg
))
19433 int l
= strlen (reg_names
[reg
]);
19439 memcpy (p
, ", ", 2);
19443 memcpy (p
, "%|", 2);
19444 memcpy (p
+ 2, reg_names
[reg
], l
);
19448 if (live_regs_mask
& (1 << LR_REGNUM
))
19450 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19451 /* If returning from an interrupt, restore the CPSR. */
19452 if (IS_INTERRUPT (func_type
))
19459 output_asm_insn (instr
, & operand
);
19461 /* See if we need to generate an extra instruction to
19462 perform the actual function return. */
19464 && func_type
!= ARM_FT_INTERWORKED
19465 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19467 /* The return has already been handled
19468 by loading the LR into the PC. */
19475 switch ((int) ARM_FUNC_TYPE (func_type
))
19479 /* ??? This is wrong for unified assembly syntax. */
19480 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19483 case ARM_FT_INTERWORKED
:
19484 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19487 case ARM_FT_EXCEPTION
:
19488 /* ??? This is wrong for unified assembly syntax. */
19489 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19493 /* Use bx if it's available. */
19494 if (arm_arch5
|| arm_arch4t
)
19495 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19497 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19501 output_asm_insn (instr
, & operand
);
19507 /* Write the function name into the code section, directly preceding
19508 the function prologue.
19510 Code will be output similar to this:
19512 .ascii "arm_poke_function_name", 0
19515 .word 0xff000000 + (t1 - t0)
19516 arm_poke_function_name
19518 stmfd sp!, {fp, ip, lr, pc}
19521 When performing a stack backtrace, code can inspect the value
19522 of 'pc' stored at 'fp' + 0. If the trace function then looks
19523 at location pc - 12 and the top 8 bits are set, then we know
19524 that there is a function name embedded immediately preceding this
19525 location and has length ((pc[-3]) & 0xff000000).
19527 We assume that pc is declared as a pointer to an unsigned long.
19529 It is of no benefit to output the function name if we are assembling
19530 a leaf function. These function types will not contain a stack
19531 backtrace structure, therefore it is not possible to determine the
19534 arm_poke_function_name (FILE *stream
, const char *name
)
19536 unsigned long alignlength
;
19537 unsigned long length
;
19540 length
= strlen (name
) + 1;
19541 alignlength
= ROUND_UP_WORD (length
);
19543 ASM_OUTPUT_ASCII (stream
, name
, length
);
19544 ASM_OUTPUT_ALIGN (stream
, 2);
19545 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19546 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19549 /* Place some comments into the assembler stream
19550 describing the current function. */
19552 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19554 unsigned long func_type
;
19556 /* ??? Do we want to print some of the below anyway? */
19560 /* Sanity check. */
19561 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19563 func_type
= arm_current_func_type ();
19565 switch ((int) ARM_FUNC_TYPE (func_type
))
19568 case ARM_FT_NORMAL
:
19570 case ARM_FT_INTERWORKED
:
19571 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19574 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19577 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19579 case ARM_FT_EXCEPTION
:
19580 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19584 if (IS_NAKED (func_type
))
19585 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19587 if (IS_VOLATILE (func_type
))
19588 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19590 if (IS_NESTED (func_type
))
19591 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19592 if (IS_STACKALIGN (func_type
))
19593 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19595 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19597 crtl
->args
.pretend_args_size
, frame_size
);
19599 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19600 frame_pointer_needed
,
19601 cfun
->machine
->uses_anonymous_args
);
19603 if (cfun
->machine
->lr_save_eliminated
)
19604 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19606 if (crtl
->calls_eh_return
)
19607 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19612 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19613 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19615 arm_stack_offsets
*offsets
;
19621 /* Emit any call-via-reg trampolines that are needed for v4t support
19622 of call_reg and call_value_reg type insns. */
19623 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19625 rtx label
= cfun
->machine
->call_via
[regno
];
19629 switch_to_section (function_section (current_function_decl
));
19630 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19631 CODE_LABEL_NUMBER (label
));
19632 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19636 /* ??? Probably not safe to set this here, since it assumes that a
19637 function will be emitted as assembly immediately after we generate
19638 RTL for it. This does not happen for inline functions. */
19639 cfun
->machine
->return_used_this_function
= 0;
19641 else /* TARGET_32BIT */
19643 /* We need to take into account any stack-frame rounding. */
19644 offsets
= arm_get_frame_offsets ();
19646 gcc_assert (!use_return_insn (FALSE
, NULL
)
19647 || (cfun
->machine
->return_used_this_function
!= 0)
19648 || offsets
->saved_regs
== offsets
->outgoing_args
19649 || frame_pointer_needed
);
19653 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19654 STR and STRD. If an even number of registers are being pushed, one
19655 or more STRD patterns are created for each register pair. If an
19656 odd number of registers are pushed, emit an initial STR followed by
19657 as many STRD instructions as are needed. This works best when the
19658 stack is initially 64-bit aligned (the normal case), since it
19659 ensures that each STRD is also 64-bit aligned. */
19661 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19666 rtx par
= NULL_RTX
;
19667 rtx dwarf
= NULL_RTX
;
19671 num_regs
= bit_count (saved_regs_mask
);
19673 /* Must be at least one register to save, and can't save SP or PC. */
19674 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19675 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19676 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19678 /* Create sequence for DWARF info. All the frame-related data for
19679 debugging is held in this wrapper. */
19680 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19682 /* Describe the stack adjustment. */
19683 tmp
= gen_rtx_SET (VOIDmode
,
19685 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19686 RTX_FRAME_RELATED_P (tmp
) = 1;
19687 XVECEXP (dwarf
, 0, 0) = tmp
;
19689 /* Find the first register. */
19690 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19695 /* If there's an odd number of registers to push. Start off by
19696 pushing a single register. This ensures that subsequent strd
19697 operations are dword aligned (assuming that SP was originally
19698 64-bit aligned). */
19699 if ((num_regs
& 1) != 0)
19701 rtx reg
, mem
, insn
;
19703 reg
= gen_rtx_REG (SImode
, regno
);
19705 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19706 stack_pointer_rtx
));
19708 mem
= gen_frame_mem (Pmode
,
19710 (Pmode
, stack_pointer_rtx
,
19711 plus_constant (Pmode
, stack_pointer_rtx
,
19714 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19715 RTX_FRAME_RELATED_P (tmp
) = 1;
19716 insn
= emit_insn (tmp
);
19717 RTX_FRAME_RELATED_P (insn
) = 1;
19718 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19719 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19721 RTX_FRAME_RELATED_P (tmp
) = 1;
19724 XVECEXP (dwarf
, 0, i
) = tmp
;
19728 while (i
< num_regs
)
19729 if (saved_regs_mask
& (1 << regno
))
19731 rtx reg1
, reg2
, mem1
, mem2
;
19732 rtx tmp0
, tmp1
, tmp2
;
19735 /* Find the register to pair with this one. */
19736 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19740 reg1
= gen_rtx_REG (SImode
, regno
);
19741 reg2
= gen_rtx_REG (SImode
, regno2
);
19748 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19751 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19753 -4 * (num_regs
- 1)));
19754 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19755 plus_constant (Pmode
, stack_pointer_rtx
,
19757 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19758 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19759 RTX_FRAME_RELATED_P (tmp0
) = 1;
19760 RTX_FRAME_RELATED_P (tmp1
) = 1;
19761 RTX_FRAME_RELATED_P (tmp2
) = 1;
19762 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19763 XVECEXP (par
, 0, 0) = tmp0
;
19764 XVECEXP (par
, 0, 1) = tmp1
;
19765 XVECEXP (par
, 0, 2) = tmp2
;
19766 insn
= emit_insn (par
);
19767 RTX_FRAME_RELATED_P (insn
) = 1;
19768 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19772 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19775 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19778 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19779 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19780 RTX_FRAME_RELATED_P (tmp1
) = 1;
19781 RTX_FRAME_RELATED_P (tmp2
) = 1;
19782 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19783 XVECEXP (par
, 0, 0) = tmp1
;
19784 XVECEXP (par
, 0, 1) = tmp2
;
19788 /* Create unwind information. This is an approximation. */
19789 tmp1
= gen_rtx_SET (VOIDmode
,
19790 gen_frame_mem (Pmode
,
19791 plus_constant (Pmode
,
19795 tmp2
= gen_rtx_SET (VOIDmode
,
19796 gen_frame_mem (Pmode
,
19797 plus_constant (Pmode
,
19802 RTX_FRAME_RELATED_P (tmp1
) = 1;
19803 RTX_FRAME_RELATED_P (tmp2
) = 1;
19804 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19805 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19807 regno
= regno2
+ 1;
19815 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19816 whenever possible, otherwise it emits single-word stores. The first store
19817 also allocates stack space for all saved registers, using writeback with
19818 post-addressing mode. All other stores use offset addressing. If no STRD
19819 can be emitted, this function emits a sequence of single-word stores,
19820 and not an STM as before, because single-word stores provide more freedom
19821 scheduling and can be turned into an STM by peephole optimizations. */
19823 arm_emit_strd_push (unsigned long saved_regs_mask
)
19826 int i
, j
, dwarf_index
= 0;
19828 rtx dwarf
= NULL_RTX
;
19829 rtx insn
= NULL_RTX
;
19832 /* TODO: A more efficient code can be emitted by changing the
19833 layout, e.g., first push all pairs that can use STRD to keep the
19834 stack aligned, and then push all other registers. */
19835 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19836 if (saved_regs_mask
& (1 << i
))
19839 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19840 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19841 gcc_assert (num_regs
> 0);
19843 /* Create sequence for DWARF info. */
19844 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19846 /* For dwarf info, we generate explicit stack update. */
19847 tmp
= gen_rtx_SET (VOIDmode
,
19849 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19850 RTX_FRAME_RELATED_P (tmp
) = 1;
19851 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19853 /* Save registers. */
19854 offset
= - 4 * num_regs
;
19856 while (j
<= LAST_ARM_REGNUM
)
19857 if (saved_regs_mask
& (1 << j
))
19860 && (saved_regs_mask
& (1 << (j
+ 1))))
19862 /* Current register and previous register form register pair for
19863 which STRD can be generated. */
19866 /* Allocate stack space for all saved registers. */
19867 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19868 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19869 mem
= gen_frame_mem (DImode
, tmp
);
19872 else if (offset
> 0)
19873 mem
= gen_frame_mem (DImode
,
19874 plus_constant (Pmode
,
19878 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19880 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19881 RTX_FRAME_RELATED_P (tmp
) = 1;
19882 tmp
= emit_insn (tmp
);
19884 /* Record the first store insn. */
19885 if (dwarf_index
== 1)
19888 /* Generate dwarf info. */
19889 mem
= gen_frame_mem (SImode
,
19890 plus_constant (Pmode
,
19893 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19894 RTX_FRAME_RELATED_P (tmp
) = 1;
19895 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19897 mem
= gen_frame_mem (SImode
,
19898 plus_constant (Pmode
,
19901 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19902 RTX_FRAME_RELATED_P (tmp
) = 1;
19903 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19910 /* Emit a single word store. */
19913 /* Allocate stack space for all saved registers. */
19914 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19915 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19916 mem
= gen_frame_mem (SImode
, tmp
);
19919 else if (offset
> 0)
19920 mem
= gen_frame_mem (SImode
,
19921 plus_constant (Pmode
,
19925 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19927 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19928 RTX_FRAME_RELATED_P (tmp
) = 1;
19929 tmp
= emit_insn (tmp
);
19931 /* Record the first store insn. */
19932 if (dwarf_index
== 1)
19935 /* Generate dwarf info. */
19936 mem
= gen_frame_mem (SImode
,
19937 plus_constant(Pmode
,
19940 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19941 RTX_FRAME_RELATED_P (tmp
) = 1;
19942 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19951 /* Attach dwarf info to the first insn we generate. */
19952 gcc_assert (insn
!= NULL_RTX
);
19953 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19954 RTX_FRAME_RELATED_P (insn
) = 1;
19957 /* Generate and emit an insn that we will recognize as a push_multi.
19958 Unfortunately, since this insn does not reflect very well the actual
19959 semantics of the operation, we need to annotate the insn for the benefit
19960 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19961 MASK for registers that should be annotated for DWARF2 frame unwind
19964 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19967 int num_dwarf_regs
= 0;
19971 int dwarf_par_index
;
19974 /* We don't record the PC in the dwarf frame information. */
19975 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
19977 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19979 if (mask
& (1 << i
))
19981 if (dwarf_regs_mask
& (1 << i
))
19985 gcc_assert (num_regs
&& num_regs
<= 16);
19986 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
19988 /* For the body of the insn we are going to generate an UNSPEC in
19989 parallel with several USEs. This allows the insn to be recognized
19990 by the push_multi pattern in the arm.md file.
19992 The body of the insn looks something like this:
19995 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19996 (const_int:SI <num>)))
19997 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20003 For the frame note however, we try to be more explicit and actually
20004 show each register being stored into the stack frame, plus a (single)
20005 decrement of the stack pointer. We do it this way in order to be
20006 friendly to the stack unwinding code, which only wants to see a single
20007 stack decrement per instruction. The RTL we generate for the note looks
20008 something like this:
20011 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20012 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20013 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20014 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20018 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20019 instead we'd have a parallel expression detailing all
20020 the stores to the various memory addresses so that debug
20021 information is more up-to-date. Remember however while writing
20022 this to take care of the constraints with the push instruction.
20024 Note also that this has to be taken care of for the VFP registers.
20026 For more see PR43399. */
20028 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20029 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20030 dwarf_par_index
= 1;
20032 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20034 if (mask
& (1 << i
))
20036 reg
= gen_rtx_REG (SImode
, i
);
20038 XVECEXP (par
, 0, 0)
20039 = gen_rtx_SET (VOIDmode
,
20042 gen_rtx_PRE_MODIFY (Pmode
,
20045 (Pmode
, stack_pointer_rtx
,
20048 gen_rtx_UNSPEC (BLKmode
,
20049 gen_rtvec (1, reg
),
20050 UNSPEC_PUSH_MULT
));
20052 if (dwarf_regs_mask
& (1 << i
))
20054 tmp
= gen_rtx_SET (VOIDmode
,
20055 gen_frame_mem (SImode
, stack_pointer_rtx
),
20057 RTX_FRAME_RELATED_P (tmp
) = 1;
20058 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20065 for (j
= 1, i
++; j
< num_regs
; i
++)
20067 if (mask
& (1 << i
))
20069 reg
= gen_rtx_REG (SImode
, i
);
20071 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20073 if (dwarf_regs_mask
& (1 << i
))
20076 = gen_rtx_SET (VOIDmode
,
20079 plus_constant (Pmode
, stack_pointer_rtx
,
20082 RTX_FRAME_RELATED_P (tmp
) = 1;
20083 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20090 par
= emit_insn (par
);
20092 tmp
= gen_rtx_SET (VOIDmode
,
20094 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20095 RTX_FRAME_RELATED_P (tmp
) = 1;
20096 XVECEXP (dwarf
, 0, 0) = tmp
;
20098 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20103 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20104 SIZE is the offset to be adjusted.
20105 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20107 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20111 RTX_FRAME_RELATED_P (insn
) = 1;
20112 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
20113 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20116 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20117 SAVED_REGS_MASK shows which registers need to be restored.
20119 Unfortunately, since this insn does not reflect very well the actual
20120 semantics of the operation, we need to annotate the insn for the benefit
20121 of DWARF2 frame unwind information. */
20123 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20128 rtx dwarf
= NULL_RTX
;
20134 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20135 offset_adj
= return_in_pc
? 1 : 0;
20136 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20137 if (saved_regs_mask
& (1 << i
))
20140 gcc_assert (num_regs
&& num_regs
<= 16);
20142 /* If SP is in reglist, then we don't emit SP update insn. */
20143 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20145 /* The parallel needs to hold num_regs SETs
20146 and one SET for the stack update. */
20147 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20152 XVECEXP (par
, 0, 0) = tmp
;
20157 /* Increment the stack pointer, based on there being
20158 num_regs 4-byte registers to restore. */
20159 tmp
= gen_rtx_SET (VOIDmode
,
20161 plus_constant (Pmode
,
20164 RTX_FRAME_RELATED_P (tmp
) = 1;
20165 XVECEXP (par
, 0, offset_adj
) = tmp
;
20168 /* Now restore every reg, which may include PC. */
20169 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20170 if (saved_regs_mask
& (1 << i
))
20172 reg
= gen_rtx_REG (SImode
, i
);
20173 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20175 /* Emit single load with writeback. */
20176 tmp
= gen_frame_mem (SImode
,
20177 gen_rtx_POST_INC (Pmode
,
20178 stack_pointer_rtx
));
20179 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
20180 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20184 tmp
= gen_rtx_SET (VOIDmode
,
20188 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20189 RTX_FRAME_RELATED_P (tmp
) = 1;
20190 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20192 /* We need to maintain a sequence for DWARF info too. As dwarf info
20193 should not have PC, skip PC. */
20194 if (i
!= PC_REGNUM
)
20195 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20201 par
= emit_jump_insn (par
);
20203 par
= emit_insn (par
);
20205 REG_NOTES (par
) = dwarf
;
20207 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20208 stack_pointer_rtx
, stack_pointer_rtx
);
20211 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20212 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20214 Unfortunately, since this insn does not reflect very well the actual
20215 semantics of the operation, we need to annotate the insn for the benefit
20216 of DWARF2 frame unwind information. */
20218 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20222 rtx dwarf
= NULL_RTX
;
20225 gcc_assert (num_regs
&& num_regs
<= 32);
20227 /* Workaround ARM10 VFPr1 bug. */
20228 if (num_regs
== 2 && !arm_arch6
)
20230 if (first_reg
== 15)
20236 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20237 there could be up to 32 D-registers to restore.
20238 If there are more than 16 D-registers, make two recursive calls,
20239 each of which emits one pop_multi instruction. */
20242 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20243 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20247 /* The parallel needs to hold num_regs SETs
20248 and one SET for the stack update. */
20249 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20251 /* Increment the stack pointer, based on there being
20252 num_regs 8-byte registers to restore. */
20253 tmp
= gen_rtx_SET (VOIDmode
,
20255 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20256 RTX_FRAME_RELATED_P (tmp
) = 1;
20257 XVECEXP (par
, 0, 0) = tmp
;
20259 /* Now show every reg that will be restored, using a SET for each. */
20260 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20262 reg
= gen_rtx_REG (DFmode
, i
);
20264 tmp
= gen_rtx_SET (VOIDmode
,
20268 plus_constant (Pmode
, base_reg
, 8 * j
)));
20269 RTX_FRAME_RELATED_P (tmp
) = 1;
20270 XVECEXP (par
, 0, j
+ 1) = tmp
;
20272 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20277 par
= emit_insn (par
);
20278 REG_NOTES (par
) = dwarf
;
20280 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20281 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20283 RTX_FRAME_RELATED_P (par
) = 1;
20284 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20287 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20288 base_reg
, base_reg
);
20291 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20292 number of registers are being popped, multiple LDRD patterns are created for
20293 all register pairs. If odd number of registers are popped, last register is
20294 loaded by using LDR pattern. */
20296 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20300 rtx par
= NULL_RTX
;
20301 rtx dwarf
= NULL_RTX
;
20302 rtx tmp
, reg
, tmp1
;
20305 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20306 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20307 if (saved_regs_mask
& (1 << i
))
20310 gcc_assert (num_regs
&& num_regs
<= 16);
20312 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20313 to be popped. So, if num_regs is even, now it will become odd,
20314 and we can generate pop with PC. If num_regs is odd, it will be
20315 even now, and ldr with return can be generated for PC. */
20319 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20321 /* Var j iterates over all the registers to gather all the registers in
20322 saved_regs_mask. Var i gives index of saved registers in stack frame.
20323 A PARALLEL RTX of register-pair is created here, so that pattern for
20324 LDRD can be matched. As PC is always last register to be popped, and
20325 we have already decremented num_regs if PC, we don't have to worry
20326 about PC in this loop. */
20327 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20328 if (saved_regs_mask
& (1 << j
))
20330 /* Create RTX for memory load. */
20331 reg
= gen_rtx_REG (SImode
, j
);
20332 tmp
= gen_rtx_SET (SImode
,
20334 gen_frame_mem (SImode
,
20335 plus_constant (Pmode
,
20336 stack_pointer_rtx
, 4 * i
)));
20337 RTX_FRAME_RELATED_P (tmp
) = 1;
20341 /* When saved-register index (i) is even, the RTX to be emitted is
20342 yet to be created. Hence create it first. The LDRD pattern we
20343 are generating is :
20344 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20345 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20346 where target registers need not be consecutive. */
20347 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20351 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20352 added as 0th element and if i is odd, reg_i is added as 1st element
20353 of LDRD pattern shown above. */
20354 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20355 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20359 /* When saved-register index (i) is odd, RTXs for both the registers
20360 to be loaded are generated in above given LDRD pattern, and the
20361 pattern can be emitted now. */
20362 par
= emit_insn (par
);
20363 REG_NOTES (par
) = dwarf
;
20364 RTX_FRAME_RELATED_P (par
) = 1;
20370 /* If the number of registers pushed is odd AND return_in_pc is false OR
20371 number of registers are even AND return_in_pc is true, last register is
20372 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20373 then LDR with post increment. */
20375 /* Increment the stack pointer, based on there being
20376 num_regs 4-byte registers to restore. */
20377 tmp
= gen_rtx_SET (VOIDmode
,
20379 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20380 RTX_FRAME_RELATED_P (tmp
) = 1;
20381 tmp
= emit_insn (tmp
);
20384 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20385 stack_pointer_rtx
, stack_pointer_rtx
);
20390 if (((num_regs
% 2) == 1 && !return_in_pc
)
20391 || ((num_regs
% 2) == 0 && return_in_pc
))
20393 /* Scan for the single register to be popped. Skip until the saved
20394 register is found. */
20395 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20397 /* Gen LDR with post increment here. */
20398 tmp1
= gen_rtx_MEM (SImode
,
20399 gen_rtx_POST_INC (SImode
,
20400 stack_pointer_rtx
));
20401 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20403 reg
= gen_rtx_REG (SImode
, j
);
20404 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20405 RTX_FRAME_RELATED_P (tmp
) = 1;
20406 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20410 /* If return_in_pc, j must be PC_REGNUM. */
20411 gcc_assert (j
== PC_REGNUM
);
20412 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20413 XVECEXP (par
, 0, 0) = ret_rtx
;
20414 XVECEXP (par
, 0, 1) = tmp
;
20415 par
= emit_jump_insn (par
);
20419 par
= emit_insn (tmp
);
20420 REG_NOTES (par
) = dwarf
;
20421 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20422 stack_pointer_rtx
, stack_pointer_rtx
);
20426 else if ((num_regs
% 2) == 1 && return_in_pc
)
20428 /* There are 2 registers to be popped. So, generate the pattern
20429 pop_multiple_with_stack_update_and_return to pop in PC. */
20430 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20436 /* LDRD in ARM mode needs consecutive registers as operands. This function
20437 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20438 offset addressing and then generates one separate stack udpate. This provides
20439 more scheduling freedom, compared to writeback on every load. However,
20440 if the function returns using load into PC directly
20441 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20442 before the last load. TODO: Add a peephole optimization to recognize
20443 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20444 peephole optimization to merge the load at stack-offset zero
20445 with the stack update instruction using load with writeback
20446 in post-index addressing mode. */
20448 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20452 rtx par
= NULL_RTX
;
20453 rtx dwarf
= NULL_RTX
;
20456 /* Restore saved registers. */
20457 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20459 while (j
<= LAST_ARM_REGNUM
)
20460 if (saved_regs_mask
& (1 << j
))
20463 && (saved_regs_mask
& (1 << (j
+ 1)))
20464 && (j
+ 1) != PC_REGNUM
)
20466 /* Current register and next register form register pair for which
20467 LDRD can be generated. PC is always the last register popped, and
20468 we handle it separately. */
20470 mem
= gen_frame_mem (DImode
,
20471 plus_constant (Pmode
,
20475 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20477 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20478 tmp
= emit_insn (tmp
);
20479 RTX_FRAME_RELATED_P (tmp
) = 1;
20481 /* Generate dwarf info. */
20483 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20484 gen_rtx_REG (SImode
, j
),
20486 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20487 gen_rtx_REG (SImode
, j
+ 1),
20490 REG_NOTES (tmp
) = dwarf
;
20495 else if (j
!= PC_REGNUM
)
20497 /* Emit a single word load. */
20499 mem
= gen_frame_mem (SImode
,
20500 plus_constant (Pmode
,
20504 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20506 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20507 tmp
= emit_insn (tmp
);
20508 RTX_FRAME_RELATED_P (tmp
) = 1;
20510 /* Generate dwarf info. */
20511 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20512 gen_rtx_REG (SImode
, j
),
20518 else /* j == PC_REGNUM */
20524 /* Update the stack. */
20527 tmp
= gen_rtx_SET (Pmode
,
20529 plus_constant (Pmode
,
20532 tmp
= emit_insn (tmp
);
20533 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20534 stack_pointer_rtx
, stack_pointer_rtx
);
20538 if (saved_regs_mask
& (1 << PC_REGNUM
))
20540 /* Only PC is to be popped. */
20541 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20542 XVECEXP (par
, 0, 0) = ret_rtx
;
20543 tmp
= gen_rtx_SET (SImode
,
20544 gen_rtx_REG (SImode
, PC_REGNUM
),
20545 gen_frame_mem (SImode
,
20546 gen_rtx_POST_INC (SImode
,
20547 stack_pointer_rtx
)));
20548 RTX_FRAME_RELATED_P (tmp
) = 1;
20549 XVECEXP (par
, 0, 1) = tmp
;
20550 par
= emit_jump_insn (par
);
20552 /* Generate dwarf info. */
20553 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20554 gen_rtx_REG (SImode
, PC_REGNUM
),
20556 REG_NOTES (par
) = dwarf
;
20557 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20558 stack_pointer_rtx
, stack_pointer_rtx
);
20562 /* Calculate the size of the return value that is passed in registers. */
20564 arm_size_return_regs (void)
20566 enum machine_mode mode
;
20568 if (crtl
->return_rtx
!= 0)
20569 mode
= GET_MODE (crtl
->return_rtx
);
20571 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20573 return GET_MODE_SIZE (mode
);
20576 /* Return true if the current function needs to save/restore LR. */
20578 thumb_force_lr_save (void)
20580 return !cfun
->machine
->lr_save_eliminated
20581 && (!leaf_function_p ()
20582 || thumb_far_jump_used_p ()
20583 || df_regs_ever_live_p (LR_REGNUM
));
20586 /* We do not know if r3 will be available because
20587 we do have an indirect tailcall happening in this
20588 particular case. */
20590 is_indirect_tailcall_p (rtx call
)
20592 rtx pat
= PATTERN (call
);
20594 /* Indirect tail call. */
20595 pat
= XVECEXP (pat
, 0, 0);
20596 if (GET_CODE (pat
) == SET
)
20597 pat
= SET_SRC (pat
);
20599 pat
= XEXP (XEXP (pat
, 0), 0);
20600 return REG_P (pat
);
20603 /* Return true if r3 is used by any of the tail call insns in the
20604 current function. */
20606 any_sibcall_could_use_r3 (void)
20611 if (!crtl
->tail_call_emit
)
20613 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20614 if (e
->flags
& EDGE_SIBCALL
)
20616 rtx call
= BB_END (e
->src
);
20617 if (!CALL_P (call
))
20618 call
= prev_nonnote_nondebug_insn (call
);
20619 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20620 if (find_regno_fusage (call
, USE
, 3)
20621 || is_indirect_tailcall_p (call
))
20628 /* Compute the distance from register FROM to register TO.
20629 These can be the arg pointer (26), the soft frame pointer (25),
20630 the stack pointer (13) or the hard frame pointer (11).
20631 In thumb mode r7 is used as the soft frame pointer, if needed.
20632 Typical stack layout looks like this:
20634 old stack pointer -> | |
20637 | | saved arguments for
20638 | | vararg functions
20641 hard FP & arg pointer -> | | \
20649 soft frame pointer -> | | /
20654 locals base pointer -> | | /
20659 current stack pointer -> | | /
20662 For a given function some or all of these stack components
20663 may not be needed, giving rise to the possibility of
20664 eliminating some of the registers.
20666 The values returned by this function must reflect the behavior
20667 of arm_expand_prologue() and arm_compute_save_reg_mask().
20669 The sign of the number returned reflects the direction of stack
20670 growth, so the values are positive for all eliminations except
20671 from the soft frame pointer to the hard frame pointer.
20673 SFP may point just inside the local variables block to ensure correct
20677 /* Calculate stack offsets. These are used to calculate register elimination
20678 offsets and in prologue/epilogue code. Also calculates which registers
20679 should be saved. */
20681 static arm_stack_offsets
*
20682 arm_get_frame_offsets (void)
20684 struct arm_stack_offsets
*offsets
;
20685 unsigned long func_type
;
20689 HOST_WIDE_INT frame_size
;
20692 offsets
= &cfun
->machine
->stack_offsets
;
20694 /* We need to know if we are a leaf function. Unfortunately, it
20695 is possible to be called after start_sequence has been called,
20696 which causes get_insns to return the insns for the sequence,
20697 not the function, which will cause leaf_function_p to return
20698 the incorrect result.
20700 to know about leaf functions once reload has completed, and the
20701 frame size cannot be changed after that time, so we can safely
20702 use the cached value. */
20704 if (reload_completed
)
20707 /* Initially this is the size of the local variables. It will translated
20708 into an offset once we have determined the size of preceding data. */
20709 frame_size
= ROUND_UP_WORD (get_frame_size ());
20711 leaf
= leaf_function_p ();
20713 /* Space for variadic functions. */
20714 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20716 /* In Thumb mode this is incorrect, but never used. */
20718 = (offsets
->saved_args
20719 + arm_compute_static_chain_stack_bytes ()
20720 + (frame_pointer_needed
? 4 : 0));
20724 unsigned int regno
;
20726 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20727 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20728 saved
= core_saved
;
20730 /* We know that SP will be doubleword aligned on entry, and we must
20731 preserve that condition at any subroutine call. We also require the
20732 soft frame pointer to be doubleword aligned. */
20734 if (TARGET_REALLY_IWMMXT
)
20736 /* Check for the call-saved iWMMXt registers. */
20737 for (regno
= FIRST_IWMMXT_REGNUM
;
20738 regno
<= LAST_IWMMXT_REGNUM
;
20740 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20744 func_type
= arm_current_func_type ();
20745 /* Space for saved VFP registers. */
20746 if (! IS_VOLATILE (func_type
)
20747 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20748 saved
+= arm_get_vfp_saved_size ();
20750 else /* TARGET_THUMB1 */
20752 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20753 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20754 saved
= core_saved
;
20755 if (TARGET_BACKTRACE
)
20759 /* Saved registers include the stack frame. */
20760 offsets
->saved_regs
20761 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20762 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20764 /* A leaf function does not need any stack alignment if it has nothing
20766 if (leaf
&& frame_size
== 0
20767 /* However if it calls alloca(), we have a dynamically allocated
20768 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20769 && ! cfun
->calls_alloca
)
20771 offsets
->outgoing_args
= offsets
->soft_frame
;
20772 offsets
->locals_base
= offsets
->soft_frame
;
20776 /* Ensure SFP has the correct alignment. */
20777 if (ARM_DOUBLEWORD_ALIGN
20778 && (offsets
->soft_frame
& 7))
20780 offsets
->soft_frame
+= 4;
20781 /* Try to align stack by pushing an extra reg. Don't bother doing this
20782 when there is a stack frame as the alignment will be rolled into
20783 the normal stack adjustment. */
20784 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20788 /* Register r3 is caller-saved. Normally it does not need to be
20789 saved on entry by the prologue. However if we choose to save
20790 it for padding then we may confuse the compiler into thinking
20791 a prologue sequence is required when in fact it is not. This
20792 will occur when shrink-wrapping if r3 is used as a scratch
20793 register and there are no other callee-saved writes.
20795 This situation can be avoided when other callee-saved registers
20796 are available and r3 is not mandatory if we choose a callee-saved
20797 register for padding. */
20798 bool prefer_callee_reg_p
= false;
20800 /* If it is safe to use r3, then do so. This sometimes
20801 generates better code on Thumb-2 by avoiding the need to
20802 use 32-bit push/pop instructions. */
20803 if (! any_sibcall_could_use_r3 ()
20804 && arm_size_return_regs () <= 12
20805 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20807 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20810 if (!TARGET_THUMB2
)
20811 prefer_callee_reg_p
= true;
20814 || prefer_callee_reg_p
)
20816 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20818 /* Avoid fixed registers; they may be changed at
20819 arbitrary times so it's unsafe to restore them
20820 during the epilogue. */
20822 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20832 offsets
->saved_regs
+= 4;
20833 offsets
->saved_regs_mask
|= (1 << reg
);
20838 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20839 offsets
->outgoing_args
= (offsets
->locals_base
20840 + crtl
->outgoing_args_size
);
20842 if (ARM_DOUBLEWORD_ALIGN
)
20844 /* Ensure SP remains doubleword aligned. */
20845 if (offsets
->outgoing_args
& 7)
20846 offsets
->outgoing_args
+= 4;
20847 gcc_assert (!(offsets
->outgoing_args
& 7));
20854 /* Calculate the relative offsets for the different stack pointers. Positive
20855 offsets are in the direction of stack growth. */
20858 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20860 arm_stack_offsets
*offsets
;
20862 offsets
= arm_get_frame_offsets ();
20864 /* OK, now we have enough information to compute the distances.
20865 There must be an entry in these switch tables for each pair
20866 of registers in ELIMINABLE_REGS, even if some of the entries
20867 seem to be redundant or useless. */
20870 case ARG_POINTER_REGNUM
:
20873 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20876 case FRAME_POINTER_REGNUM
:
20877 /* This is the reverse of the soft frame pointer
20878 to hard frame pointer elimination below. */
20879 return offsets
->soft_frame
- offsets
->saved_args
;
20881 case ARM_HARD_FRAME_POINTER_REGNUM
:
20882 /* This is only non-zero in the case where the static chain register
20883 is stored above the frame. */
20884 return offsets
->frame
- offsets
->saved_args
- 4;
20886 case STACK_POINTER_REGNUM
:
20887 /* If nothing has been pushed on the stack at all
20888 then this will return -4. This *is* correct! */
20889 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20892 gcc_unreachable ();
20894 gcc_unreachable ();
20896 case FRAME_POINTER_REGNUM
:
20899 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20902 case ARM_HARD_FRAME_POINTER_REGNUM
:
20903 /* The hard frame pointer points to the top entry in the
20904 stack frame. The soft frame pointer to the bottom entry
20905 in the stack frame. If there is no stack frame at all,
20906 then they are identical. */
20908 return offsets
->frame
- offsets
->soft_frame
;
20910 case STACK_POINTER_REGNUM
:
20911 return offsets
->outgoing_args
- offsets
->soft_frame
;
20914 gcc_unreachable ();
20916 gcc_unreachable ();
20919 /* You cannot eliminate from the stack pointer.
20920 In theory you could eliminate from the hard frame
20921 pointer to the stack pointer, but this will never
20922 happen, since if a stack frame is not needed the
20923 hard frame pointer will never be used. */
20924 gcc_unreachable ();
20928 /* Given FROM and TO register numbers, say whether this elimination is
20929 allowed. Frame pointer elimination is automatically handled.
20931 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20932 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20933 pointer, we must eliminate FRAME_POINTER_REGNUM into
20934 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20935 ARG_POINTER_REGNUM. */
20938 arm_can_eliminate (const int from
, const int to
)
20940 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20941 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20942 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20943 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20947 /* Emit RTL to save coprocessor registers on function entry. Returns the
20948 number of bytes pushed. */
20951 arm_save_coproc_regs(void)
20953 int saved_size
= 0;
20955 unsigned start_reg
;
20958 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20959 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20961 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20962 insn
= gen_rtx_MEM (V2SImode
, insn
);
20963 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20964 RTX_FRAME_RELATED_P (insn
) = 1;
20968 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20970 start_reg
= FIRST_VFP_REGNUM
;
20972 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20974 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20975 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20977 if (start_reg
!= reg
)
20978 saved_size
+= vfp_emit_fstmd (start_reg
,
20979 (reg
- start_reg
) / 2);
20980 start_reg
= reg
+ 2;
20983 if (start_reg
!= reg
)
20984 saved_size
+= vfp_emit_fstmd (start_reg
,
20985 (reg
- start_reg
) / 2);
20991 /* Set the Thumb frame pointer from the stack pointer. */
20994 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20996 HOST_WIDE_INT amount
;
20999 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21001 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21002 stack_pointer_rtx
, GEN_INT (amount
)));
21005 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21006 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21007 expects the first two operands to be the same. */
21010 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21012 hard_frame_pointer_rtx
));
21016 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21017 hard_frame_pointer_rtx
,
21018 stack_pointer_rtx
));
21020 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
21021 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21022 RTX_FRAME_RELATED_P (dwarf
) = 1;
21023 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21026 RTX_FRAME_RELATED_P (insn
) = 1;
21029 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21032 arm_expand_prologue (void)
21037 unsigned long live_regs_mask
;
21038 unsigned long func_type
;
21040 int saved_pretend_args
= 0;
21041 int saved_regs
= 0;
21042 unsigned HOST_WIDE_INT args_to_push
;
21043 arm_stack_offsets
*offsets
;
21045 func_type
= arm_current_func_type ();
21047 /* Naked functions don't have prologues. */
21048 if (IS_NAKED (func_type
))
21051 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21052 args_to_push
= crtl
->args
.pretend_args_size
;
21054 /* Compute which register we will have to save onto the stack. */
21055 offsets
= arm_get_frame_offsets ();
21056 live_regs_mask
= offsets
->saved_regs_mask
;
21058 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21060 if (IS_STACKALIGN (func_type
))
21064 /* Handle a word-aligned stack pointer. We generate the following:
21069 <save and restore r0 in normal prologue/epilogue>
21073 The unwinder doesn't need to know about the stack realignment.
21074 Just tell it we saved SP in r0. */
21075 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21077 r0
= gen_rtx_REG (SImode
, 0);
21078 r1
= gen_rtx_REG (SImode
, 1);
21080 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21081 RTX_FRAME_RELATED_P (insn
) = 1;
21082 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21084 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21086 /* ??? The CFA changes here, which may cause GDB to conclude that it
21087 has entered a different function. That said, the unwind info is
21088 correct, individually, before and after this instruction because
21089 we've described the save of SP, which will override the default
21090 handling of SP as restoring from the CFA. */
21091 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21094 /* For APCS frames, if IP register is clobbered
21095 when creating frame, save that register in a special
21097 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21099 if (IS_INTERRUPT (func_type
))
21101 /* Interrupt functions must not corrupt any registers.
21102 Creating a frame pointer however, corrupts the IP
21103 register, so we must push it first. */
21104 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21106 /* Do not set RTX_FRAME_RELATED_P on this insn.
21107 The dwarf stack unwinding code only wants to see one
21108 stack decrement per function, and this is not it. If
21109 this instruction is labeled as being part of the frame
21110 creation sequence then dwarf2out_frame_debug_expr will
21111 die when it encounters the assignment of IP to FP
21112 later on, since the use of SP here establishes SP as
21113 the CFA register and not IP.
21115 Anyway this instruction is not really part of the stack
21116 frame creation although it is part of the prologue. */
21118 else if (IS_NESTED (func_type
))
21120 /* The static chain register is the same as the IP register
21121 used as a scratch register during stack frame creation.
21122 To get around this need to find somewhere to store IP
21123 whilst the frame is being created. We try the following
21126 1. The last argument register r3 if it is available.
21127 2. A slot on the stack above the frame if there are no
21128 arguments to push onto the stack.
21129 3. Register r3 again, after pushing the argument registers
21130 onto the stack, if this is a varargs function.
21131 4. The last slot on the stack created for the arguments to
21132 push, if this isn't a varargs function.
21134 Note - we only need to tell the dwarf2 backend about the SP
21135 adjustment in the second variant; the static chain register
21136 doesn't need to be unwound, as it doesn't contain a value
21137 inherited from the caller. */
21139 if (!arm_r3_live_at_start_p ())
21140 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21141 else if (args_to_push
== 0)
21145 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21148 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21149 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21152 /* Just tell the dwarf backend that we adjusted SP. */
21153 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21154 plus_constant (Pmode
, stack_pointer_rtx
,
21156 RTX_FRAME_RELATED_P (insn
) = 1;
21157 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21161 /* Store the args on the stack. */
21162 if (cfun
->machine
->uses_anonymous_args
)
21165 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21166 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21167 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21168 saved_pretend_args
= 1;
21174 if (args_to_push
== 4)
21175 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21178 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21179 plus_constant (Pmode
,
21183 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21185 /* Just tell the dwarf backend that we adjusted SP. */
21187 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21188 plus_constant (Pmode
, stack_pointer_rtx
,
21190 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21193 RTX_FRAME_RELATED_P (insn
) = 1;
21194 fp_offset
= args_to_push
;
21199 insn
= emit_set_insn (ip_rtx
,
21200 plus_constant (Pmode
, stack_pointer_rtx
,
21202 RTX_FRAME_RELATED_P (insn
) = 1;
21207 /* Push the argument registers, or reserve space for them. */
21208 if (cfun
->machine
->uses_anonymous_args
)
21209 insn
= emit_multi_reg_push
21210 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21211 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21214 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21215 GEN_INT (- args_to_push
)));
21216 RTX_FRAME_RELATED_P (insn
) = 1;
21219 /* If this is an interrupt service routine, and the link register
21220 is going to be pushed, and we're not generating extra
21221 push of IP (needed when frame is needed and frame layout if apcs),
21222 subtracting four from LR now will mean that the function return
21223 can be done with a single instruction. */
21224 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21225 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21226 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21229 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21231 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21234 if (live_regs_mask
)
21236 unsigned long dwarf_regs_mask
= live_regs_mask
;
21238 saved_regs
+= bit_count (live_regs_mask
) * 4;
21239 if (optimize_size
&& !frame_pointer_needed
21240 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21242 /* If no coprocessor registers are being pushed and we don't have
21243 to worry about a frame pointer then push extra registers to
21244 create the stack frame. This is done is a way that does not
21245 alter the frame layout, so is independent of the epilogue. */
21249 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21251 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21252 if (frame
&& n
* 4 >= frame
)
21255 live_regs_mask
|= (1 << n
) - 1;
21256 saved_regs
+= frame
;
21261 && current_tune
->prefer_ldrd_strd
21262 && !optimize_function_for_size_p (cfun
))
21264 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21266 thumb2_emit_strd_push (live_regs_mask
);
21267 else if (TARGET_ARM
21268 && !TARGET_APCS_FRAME
21269 && !IS_INTERRUPT (func_type
))
21270 arm_emit_strd_push (live_regs_mask
);
21273 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21274 RTX_FRAME_RELATED_P (insn
) = 1;
21279 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21280 RTX_FRAME_RELATED_P (insn
) = 1;
21284 if (! IS_VOLATILE (func_type
))
21285 saved_regs
+= arm_save_coproc_regs ();
21287 if (frame_pointer_needed
&& TARGET_ARM
)
21289 /* Create the new frame pointer. */
21290 if (TARGET_APCS_FRAME
)
21292 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21293 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21294 RTX_FRAME_RELATED_P (insn
) = 1;
21296 if (IS_NESTED (func_type
))
21298 /* Recover the static chain register. */
21299 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21300 insn
= gen_rtx_REG (SImode
, 3);
21303 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21304 insn
= gen_frame_mem (SImode
, insn
);
21306 emit_set_insn (ip_rtx
, insn
);
21307 /* Add a USE to stop propagate_one_insn() from barfing. */
21308 emit_insn (gen_force_register_use (ip_rtx
));
21313 insn
= GEN_INT (saved_regs
- 4);
21314 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21315 stack_pointer_rtx
, insn
));
21316 RTX_FRAME_RELATED_P (insn
) = 1;
21320 if (flag_stack_usage_info
)
21321 current_function_static_stack_size
21322 = offsets
->outgoing_args
- offsets
->saved_args
;
21324 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21326 /* This add can produce multiple insns for a large constant, so we
21327 need to get tricky. */
21328 rtx last
= get_last_insn ();
21330 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21331 - offsets
->outgoing_args
);
21333 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21337 last
= last
? NEXT_INSN (last
) : get_insns ();
21338 RTX_FRAME_RELATED_P (last
) = 1;
21340 while (last
!= insn
);
21342 /* If the frame pointer is needed, emit a special barrier that
21343 will prevent the scheduler from moving stores to the frame
21344 before the stack adjustment. */
21345 if (frame_pointer_needed
)
21346 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21347 hard_frame_pointer_rtx
));
21351 if (frame_pointer_needed
&& TARGET_THUMB2
)
21352 thumb_set_frame_pointer (offsets
);
21354 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21356 unsigned long mask
;
21358 mask
= live_regs_mask
;
21359 mask
&= THUMB2_WORK_REGS
;
21360 if (!IS_NESTED (func_type
))
21361 mask
|= (1 << IP_REGNUM
);
21362 arm_load_pic_register (mask
);
21365 /* If we are profiling, make sure no instructions are scheduled before
21366 the call to mcount. Similarly if the user has requested no
21367 scheduling in the prolog. Similarly if we want non-call exceptions
21368 using the EABI unwinder, to prevent faulting instructions from being
21369 swapped with a stack adjustment. */
21370 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21371 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21372 && cfun
->can_throw_non_call_exceptions
))
21373 emit_insn (gen_blockage ());
21375 /* If the link register is being kept alive, with the return address in it,
21376 then make sure that it does not get reused by the ce2 pass. */
21377 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21378 cfun
->machine
->lr_save_eliminated
= 1;
21381 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21383 arm_print_condition (FILE *stream
)
21385 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21387 /* Branch conversion is not implemented for Thumb-2. */
21390 output_operand_lossage ("predicated Thumb instruction");
21393 if (current_insn_predicate
!= NULL
)
21395 output_operand_lossage
21396 ("predicated instruction in conditional sequence");
21400 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21402 else if (current_insn_predicate
)
21404 enum arm_cond_code code
;
21408 output_operand_lossage ("predicated Thumb instruction");
21412 code
= get_arm_condition_code (current_insn_predicate
);
21413 fputs (arm_condition_codes
[code
], stream
);
21418 /* Globally reserved letters: acln
21419 Puncutation letters currently used: @_|?().!#
21420 Lower case letters currently used: bcdefhimpqtvwxyz
21421 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21422 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21424 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21426 If CODE is 'd', then the X is a condition operand and the instruction
21427 should only be executed if the condition is true.
21428 if CODE is 'D', then the X is a condition operand and the instruction
21429 should only be executed if the condition is false: however, if the mode
21430 of the comparison is CCFPEmode, then always execute the instruction -- we
21431 do this because in these circumstances !GE does not necessarily imply LT;
21432 in these cases the instruction pattern will take care to make sure that
21433 an instruction containing %d will follow, thereby undoing the effects of
21434 doing this instruction unconditionally.
21435 If CODE is 'N' then X is a floating point operand that must be negated
21437 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21438 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21440 arm_print_operand (FILE *stream
, rtx x
, int code
)
21445 fputs (ASM_COMMENT_START
, stream
);
21449 fputs (user_label_prefix
, stream
);
21453 fputs (REGISTER_PREFIX
, stream
);
21457 arm_print_condition (stream
);
21461 /* Nothing in unified syntax, otherwise the current condition code. */
21462 if (!TARGET_UNIFIED_ASM
)
21463 arm_print_condition (stream
);
21467 /* The current condition code in unified syntax, otherwise nothing. */
21468 if (TARGET_UNIFIED_ASM
)
21469 arm_print_condition (stream
);
21473 /* The current condition code for a condition code setting instruction.
21474 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21475 if (TARGET_UNIFIED_ASM
)
21477 fputc('s', stream
);
21478 arm_print_condition (stream
);
21482 arm_print_condition (stream
);
21483 fputc('s', stream
);
21488 /* If the instruction is conditionally executed then print
21489 the current condition code, otherwise print 's'. */
21490 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21491 if (current_insn_predicate
)
21492 arm_print_condition (stream
);
21494 fputc('s', stream
);
21497 /* %# is a "break" sequence. It doesn't output anything, but is used to
21498 separate e.g. operand numbers from following text, if that text consists
21499 of further digits which we don't want to be part of the operand
21507 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21508 r
= real_value_negate (&r
);
21509 fprintf (stream
, "%s", fp_const_from_val (&r
));
21513 /* An integer or symbol address without a preceding # sign. */
21515 switch (GET_CODE (x
))
21518 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21522 output_addr_const (stream
, x
);
21526 if (GET_CODE (XEXP (x
, 0)) == PLUS
21527 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21529 output_addr_const (stream
, x
);
21532 /* Fall through. */
21535 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21539 /* An integer that we want to print in HEX. */
21541 switch (GET_CODE (x
))
21544 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21548 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21553 if (CONST_INT_P (x
))
21556 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21557 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21561 putc ('~', stream
);
21562 output_addr_const (stream
, x
);
21567 /* Print the log2 of a CONST_INT. */
21571 if (!CONST_INT_P (x
)
21572 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21573 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21575 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21580 /* The low 16 bits of an immediate constant. */
21581 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21585 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21589 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21597 shift
= shift_op (x
, &val
);
21601 fprintf (stream
, ", %s ", shift
);
21603 arm_print_operand (stream
, XEXP (x
, 1), 0);
21605 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21610 /* An explanation of the 'Q', 'R' and 'H' register operands:
21612 In a pair of registers containing a DI or DF value the 'Q'
21613 operand returns the register number of the register containing
21614 the least significant part of the value. The 'R' operand returns
21615 the register number of the register containing the most
21616 significant part of the value.
21618 The 'H' operand returns the higher of the two register numbers.
21619 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21620 same as the 'Q' operand, since the most significant part of the
21621 value is held in the lower number register. The reverse is true
21622 on systems where WORDS_BIG_ENDIAN is false.
21624 The purpose of these operands is to distinguish between cases
21625 where the endian-ness of the values is important (for example
21626 when they are added together), and cases where the endian-ness
21627 is irrelevant, but the order of register operations is important.
21628 For example when loading a value from memory into a register
21629 pair, the endian-ness does not matter. Provided that the value
21630 from the lower memory address is put into the lower numbered
21631 register, and the value from the higher address is put into the
21632 higher numbered register, the load will work regardless of whether
21633 the value being loaded is big-wordian or little-wordian. The
21634 order of the two register loads can matter however, if the address
21635 of the memory location is actually held in one of the registers
21636 being overwritten by the load.
21638 The 'Q' and 'R' constraints are also available for 64-bit
21641 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21643 rtx part
= gen_lowpart (SImode
, x
);
21644 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21648 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21650 output_operand_lossage ("invalid operand for code '%c'", code
);
21654 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21658 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21660 enum machine_mode mode
= GET_MODE (x
);
21663 if (mode
== VOIDmode
)
21665 part
= gen_highpart_mode (SImode
, mode
, x
);
21666 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21670 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21672 output_operand_lossage ("invalid operand for code '%c'", code
);
21676 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21680 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21682 output_operand_lossage ("invalid operand for code '%c'", code
);
21686 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21690 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21692 output_operand_lossage ("invalid operand for code '%c'", code
);
21696 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21700 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21702 output_operand_lossage ("invalid operand for code '%c'", code
);
21706 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21710 asm_fprintf (stream
, "%r",
21711 REG_P (XEXP (x
, 0))
21712 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21716 asm_fprintf (stream
, "{%r-%r}",
21718 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21721 /* Like 'M', but writing doubleword vector registers, for use by Neon
21725 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21726 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21728 asm_fprintf (stream
, "{d%d}", regno
);
21730 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21735 /* CONST_TRUE_RTX means always -- that's the default. */
21736 if (x
== const_true_rtx
)
21739 if (!COMPARISON_P (x
))
21741 output_operand_lossage ("invalid operand for code '%c'", code
);
21745 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21750 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21751 want to do that. */
21752 if (x
== const_true_rtx
)
21754 output_operand_lossage ("instruction never executed");
21757 if (!COMPARISON_P (x
))
21759 output_operand_lossage ("invalid operand for code '%c'", code
);
21763 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21764 (get_arm_condition_code (x
))],
21774 /* Former Maverick support, removed after GCC-4.7. */
21775 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21780 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21781 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21782 /* Bad value for wCG register number. */
21784 output_operand_lossage ("invalid operand for code '%c'", code
);
21789 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21792 /* Print an iWMMXt control register name. */
21794 if (!CONST_INT_P (x
)
21796 || INTVAL (x
) >= 16)
21797 /* Bad value for wC register number. */
21799 output_operand_lossage ("invalid operand for code '%c'", code
);
21805 static const char * wc_reg_names
[16] =
21807 "wCID", "wCon", "wCSSF", "wCASF",
21808 "wC4", "wC5", "wC6", "wC7",
21809 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21810 "wC12", "wC13", "wC14", "wC15"
21813 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21817 /* Print the high single-precision register of a VFP double-precision
21821 enum machine_mode mode
= GET_MODE (x
);
21824 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21826 output_operand_lossage ("invalid operand for code '%c'", code
);
21831 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21833 output_operand_lossage ("invalid operand for code '%c'", code
);
21837 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21841 /* Print a VFP/Neon double precision or quad precision register name. */
21845 enum machine_mode mode
= GET_MODE (x
);
21846 int is_quad
= (code
== 'q');
21849 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21851 output_operand_lossage ("invalid operand for code '%c'", code
);
21856 || !IS_VFP_REGNUM (REGNO (x
)))
21858 output_operand_lossage ("invalid operand for code '%c'", code
);
21863 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21864 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21866 output_operand_lossage ("invalid operand for code '%c'", code
);
21870 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21871 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21875 /* These two codes print the low/high doubleword register of a Neon quad
21876 register, respectively. For pair-structure types, can also print
21877 low/high quadword registers. */
21881 enum machine_mode mode
= GET_MODE (x
);
21884 if ((GET_MODE_SIZE (mode
) != 16
21885 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21887 output_operand_lossage ("invalid operand for code '%c'", code
);
21892 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21894 output_operand_lossage ("invalid operand for code '%c'", code
);
21898 if (GET_MODE_SIZE (mode
) == 16)
21899 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21900 + (code
== 'f' ? 1 : 0));
21902 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21903 + (code
== 'f' ? 1 : 0));
21907 /* Print a VFPv3 floating-point constant, represented as an integer
21911 int index
= vfp3_const_double_index (x
);
21912 gcc_assert (index
!= -1);
21913 fprintf (stream
, "%d", index
);
21917 /* Print bits representing opcode features for Neon.
21919 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21920 and polynomials as unsigned.
21922 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21924 Bit 2 is 1 for rounding functions, 0 otherwise. */
21926 /* Identify the type as 's', 'u', 'p' or 'f'. */
21929 HOST_WIDE_INT bits
= INTVAL (x
);
21930 fputc ("uspf"[bits
& 3], stream
);
21934 /* Likewise, but signed and unsigned integers are both 'i'. */
21937 HOST_WIDE_INT bits
= INTVAL (x
);
21938 fputc ("iipf"[bits
& 3], stream
);
21942 /* As for 'T', but emit 'u' instead of 'p'. */
21945 HOST_WIDE_INT bits
= INTVAL (x
);
21946 fputc ("usuf"[bits
& 3], stream
);
21950 /* Bit 2: rounding (vs none). */
21953 HOST_WIDE_INT bits
= INTVAL (x
);
21954 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21958 /* Memory operand for vld1/vst1 instruction. */
21962 bool postinc
= FALSE
;
21963 rtx postinc_reg
= NULL
;
21964 unsigned align
, memsize
, align_bits
;
21966 gcc_assert (MEM_P (x
));
21967 addr
= XEXP (x
, 0);
21968 if (GET_CODE (addr
) == POST_INC
)
21971 addr
= XEXP (addr
, 0);
21973 if (GET_CODE (addr
) == POST_MODIFY
)
21975 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
21976 addr
= XEXP (addr
, 0);
21978 asm_fprintf (stream
, "[%r", REGNO (addr
));
21980 /* We know the alignment of this access, so we can emit a hint in the
21981 instruction (for some alignments) as an aid to the memory subsystem
21983 align
= MEM_ALIGN (x
) >> 3;
21984 memsize
= MEM_SIZE (x
);
21986 /* Only certain alignment specifiers are supported by the hardware. */
21987 if (memsize
== 32 && (align
% 32) == 0)
21989 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21991 else if (memsize
>= 8 && (align
% 8) == 0)
21996 if (align_bits
!= 0)
21997 asm_fprintf (stream
, ":%d", align_bits
);
21999 asm_fprintf (stream
, "]");
22002 fputs("!", stream
);
22004 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22012 gcc_assert (MEM_P (x
));
22013 addr
= XEXP (x
, 0);
22014 gcc_assert (REG_P (addr
));
22015 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22019 /* Translate an S register number into a D register number and element index. */
22022 enum machine_mode mode
= GET_MODE (x
);
22025 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22027 output_operand_lossage ("invalid operand for code '%c'", code
);
22032 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22034 output_operand_lossage ("invalid operand for code '%c'", code
);
22038 regno
= regno
- FIRST_VFP_REGNUM
;
22039 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22044 gcc_assert (CONST_DOUBLE_P (x
));
22046 result
= vfp3_const_double_for_fract_bits (x
);
22048 result
= vfp3_const_double_for_bits (x
);
22049 fprintf (stream
, "#%d", result
);
22052 /* Register specifier for vld1.16/vst1.16. Translate the S register
22053 number into a D register number and element index. */
22056 enum machine_mode mode
= GET_MODE (x
);
22059 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22061 output_operand_lossage ("invalid operand for code '%c'", code
);
22066 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22068 output_operand_lossage ("invalid operand for code '%c'", code
);
22072 regno
= regno
- FIRST_VFP_REGNUM
;
22073 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22080 output_operand_lossage ("missing operand");
22084 switch (GET_CODE (x
))
22087 asm_fprintf (stream
, "%r", REGNO (x
));
22091 output_memory_reference_mode
= GET_MODE (x
);
22092 output_address (XEXP (x
, 0));
22099 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22100 sizeof (fpstr
), 0, 1);
22101 fprintf (stream
, "#%s", fpstr
);
22104 fprintf (stream
, "#%s", fp_immediate_constant (x
));
22108 gcc_assert (GET_CODE (x
) != NEG
);
22109 fputc ('#', stream
);
22110 if (GET_CODE (x
) == HIGH
)
22112 fputs (":lower16:", stream
);
22116 output_addr_const (stream
, x
);
22122 /* Target hook for printing a memory address. */
22124 arm_print_operand_address (FILE *stream
, rtx x
)
22128 int is_minus
= GET_CODE (x
) == MINUS
;
22131 asm_fprintf (stream
, "[%r]", REGNO (x
));
22132 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22134 rtx base
= XEXP (x
, 0);
22135 rtx index
= XEXP (x
, 1);
22136 HOST_WIDE_INT offset
= 0;
22138 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22140 /* Ensure that BASE is a register. */
22141 /* (one of them must be). */
22142 /* Also ensure the SP is not used as in index register. */
22147 switch (GET_CODE (index
))
22150 offset
= INTVAL (index
);
22153 asm_fprintf (stream
, "[%r, #%wd]",
22154 REGNO (base
), offset
);
22158 asm_fprintf (stream
, "[%r, %s%r]",
22159 REGNO (base
), is_minus
? "-" : "",
22169 asm_fprintf (stream
, "[%r, %s%r",
22170 REGNO (base
), is_minus
? "-" : "",
22171 REGNO (XEXP (index
, 0)));
22172 arm_print_operand (stream
, index
, 'S');
22173 fputs ("]", stream
);
22178 gcc_unreachable ();
22181 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22182 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22184 extern enum machine_mode output_memory_reference_mode
;
22186 gcc_assert (REG_P (XEXP (x
, 0)));
22188 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22189 asm_fprintf (stream
, "[%r, #%s%d]!",
22190 REGNO (XEXP (x
, 0)),
22191 GET_CODE (x
) == PRE_DEC
? "-" : "",
22192 GET_MODE_SIZE (output_memory_reference_mode
));
22194 asm_fprintf (stream
, "[%r], #%s%d",
22195 REGNO (XEXP (x
, 0)),
22196 GET_CODE (x
) == POST_DEC
? "-" : "",
22197 GET_MODE_SIZE (output_memory_reference_mode
));
22199 else if (GET_CODE (x
) == PRE_MODIFY
)
22201 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22202 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22203 asm_fprintf (stream
, "#%wd]!",
22204 INTVAL (XEXP (XEXP (x
, 1), 1)));
22206 asm_fprintf (stream
, "%r]!",
22207 REGNO (XEXP (XEXP (x
, 1), 1)));
22209 else if (GET_CODE (x
) == POST_MODIFY
)
22211 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22212 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22213 asm_fprintf (stream
, "#%wd",
22214 INTVAL (XEXP (XEXP (x
, 1), 1)));
22216 asm_fprintf (stream
, "%r",
22217 REGNO (XEXP (XEXP (x
, 1), 1)));
22219 else output_addr_const (stream
, x
);
22224 asm_fprintf (stream
, "[%r]", REGNO (x
));
22225 else if (GET_CODE (x
) == POST_INC
)
22226 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22227 else if (GET_CODE (x
) == PLUS
)
22229 gcc_assert (REG_P (XEXP (x
, 0)));
22230 if (CONST_INT_P (XEXP (x
, 1)))
22231 asm_fprintf (stream
, "[%r, #%wd]",
22232 REGNO (XEXP (x
, 0)),
22233 INTVAL (XEXP (x
, 1)));
22235 asm_fprintf (stream
, "[%r, %r]",
22236 REGNO (XEXP (x
, 0)),
22237 REGNO (XEXP (x
, 1)));
22240 output_addr_const (stream
, x
);
22244 /* Target hook for indicating whether a punctuation character for
22245 TARGET_PRINT_OPERAND is valid. */
22247 arm_print_operand_punct_valid_p (unsigned char code
)
22249 return (code
== '@' || code
== '|' || code
== '.'
22250 || code
== '(' || code
== ')' || code
== '#'
22251 || (TARGET_32BIT
&& (code
== '?'))
22252 || (TARGET_THUMB2
&& (code
== '!'))
22253 || (TARGET_THUMB
&& (code
== '_')));
22256 /* Target hook for assembling integer objects. The ARM version needs to
22257 handle word-sized values specially. */
22259 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22261 enum machine_mode mode
;
22263 if (size
== UNITS_PER_WORD
&& aligned_p
)
22265 fputs ("\t.word\t", asm_out_file
);
22266 output_addr_const (asm_out_file
, x
);
22268 /* Mark symbols as position independent. We only do this in the
22269 .text segment, not in the .data segment. */
22270 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22271 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22273 /* See legitimize_pic_address for an explanation of the
22274 TARGET_VXWORKS_RTP check. */
22275 if (!arm_pic_data_is_text_relative
22276 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22277 fputs ("(GOT)", asm_out_file
);
22279 fputs ("(GOTOFF)", asm_out_file
);
22281 fputc ('\n', asm_out_file
);
22285 mode
= GET_MODE (x
);
22287 if (arm_vector_mode_supported_p (mode
))
22291 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22293 units
= CONST_VECTOR_NUNITS (x
);
22294 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22296 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22297 for (i
= 0; i
< units
; i
++)
22299 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22301 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22304 for (i
= 0; i
< units
; i
++)
22306 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22307 REAL_VALUE_TYPE rval
;
22309 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22312 (rval
, GET_MODE_INNER (mode
),
22313 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22319 return default_assemble_integer (x
, size
, aligned_p
);
22323 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22327 if (!TARGET_AAPCS_BASED
)
22330 default_named_section_asm_out_constructor
22331 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22335 /* Put these in the .init_array section, using a special relocation. */
22336 if (priority
!= DEFAULT_INIT_PRIORITY
)
22339 sprintf (buf
, "%s.%.5u",
22340 is_ctor
? ".init_array" : ".fini_array",
22342 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22349 switch_to_section (s
);
22350 assemble_align (POINTER_SIZE
);
22351 fputs ("\t.word\t", asm_out_file
);
22352 output_addr_const (asm_out_file
, symbol
);
22353 fputs ("(target1)\n", asm_out_file
);
22356 /* Add a function to the list of static constructors. */
22359 arm_elf_asm_constructor (rtx symbol
, int priority
)
22361 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22364 /* Add a function to the list of static destructors. */
22367 arm_elf_asm_destructor (rtx symbol
, int priority
)
22369 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22372 /* A finite state machine takes care of noticing whether or not instructions
22373 can be conditionally executed, and thus decrease execution time and code
22374 size by deleting branch instructions. The fsm is controlled by
22375 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22377 /* The state of the fsm controlling condition codes are:
22378 0: normal, do nothing special
22379 1: make ASM_OUTPUT_OPCODE not output this instruction
22380 2: make ASM_OUTPUT_OPCODE not output this instruction
22381 3: make instructions conditional
22382 4: make instructions conditional
22384 State transitions (state->state by whom under condition):
22385 0 -> 1 final_prescan_insn if the `target' is a label
22386 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22387 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22388 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22389 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22390 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22391 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22392 (the target insn is arm_target_insn).
22394 If the jump clobbers the conditions then we use states 2 and 4.
22396 A similar thing can be done with conditional return insns.
22398 XXX In case the `target' is an unconditional branch, this conditionalising
22399 of the instructions always reduces code size, but not always execution
22400 time. But then, I want to reduce the code size to somewhere near what
22401 /bin/cc produces. */
22403 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22404 instructions. When a COND_EXEC instruction is seen the subsequent
22405 instructions are scanned so that multiple conditional instructions can be
22406 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22407 specify the length and true/false mask for the IT block. These will be
22408 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22410 /* Returns the index of the ARM condition code string in
22411 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22412 COMPARISON should be an rtx like `(eq (...) (...))'. */
22415 maybe_get_arm_condition_code (rtx comparison
)
22417 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22418 enum arm_cond_code code
;
22419 enum rtx_code comp_code
= GET_CODE (comparison
);
22421 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22422 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22423 XEXP (comparison
, 1));
22427 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22428 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22429 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22430 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22431 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22432 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22433 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22434 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22435 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22436 case CC_DLTUmode
: code
= ARM_CC
;
22439 if (comp_code
== EQ
)
22440 return ARM_INVERSE_CONDITION_CODE (code
);
22441 if (comp_code
== NE
)
22448 case NE
: return ARM_NE
;
22449 case EQ
: return ARM_EQ
;
22450 case GE
: return ARM_PL
;
22451 case LT
: return ARM_MI
;
22452 default: return ARM_NV
;
22458 case NE
: return ARM_NE
;
22459 case EQ
: return ARM_EQ
;
22460 default: return ARM_NV
;
22466 case NE
: return ARM_MI
;
22467 case EQ
: return ARM_PL
;
22468 default: return ARM_NV
;
22473 /* We can handle all cases except UNEQ and LTGT. */
22476 case GE
: return ARM_GE
;
22477 case GT
: return ARM_GT
;
22478 case LE
: return ARM_LS
;
22479 case LT
: return ARM_MI
;
22480 case NE
: return ARM_NE
;
22481 case EQ
: return ARM_EQ
;
22482 case ORDERED
: return ARM_VC
;
22483 case UNORDERED
: return ARM_VS
;
22484 case UNLT
: return ARM_LT
;
22485 case UNLE
: return ARM_LE
;
22486 case UNGT
: return ARM_HI
;
22487 case UNGE
: return ARM_PL
;
22488 /* UNEQ and LTGT do not have a representation. */
22489 case UNEQ
: /* Fall through. */
22490 case LTGT
: /* Fall through. */
22491 default: return ARM_NV
;
22497 case NE
: return ARM_NE
;
22498 case EQ
: return ARM_EQ
;
22499 case GE
: return ARM_LE
;
22500 case GT
: return ARM_LT
;
22501 case LE
: return ARM_GE
;
22502 case LT
: return ARM_GT
;
22503 case GEU
: return ARM_LS
;
22504 case GTU
: return ARM_CC
;
22505 case LEU
: return ARM_CS
;
22506 case LTU
: return ARM_HI
;
22507 default: return ARM_NV
;
22513 case LTU
: return ARM_CS
;
22514 case GEU
: return ARM_CC
;
22515 default: return ARM_NV
;
22521 case NE
: return ARM_NE
;
22522 case EQ
: return ARM_EQ
;
22523 case GEU
: return ARM_CS
;
22524 case GTU
: return ARM_HI
;
22525 case LEU
: return ARM_LS
;
22526 case LTU
: return ARM_CC
;
22527 default: return ARM_NV
;
22533 case GE
: return ARM_GE
;
22534 case LT
: return ARM_LT
;
22535 case GEU
: return ARM_CS
;
22536 case LTU
: return ARM_CC
;
22537 default: return ARM_NV
;
22543 case NE
: return ARM_NE
;
22544 case EQ
: return ARM_EQ
;
22545 case GE
: return ARM_GE
;
22546 case GT
: return ARM_GT
;
22547 case LE
: return ARM_LE
;
22548 case LT
: return ARM_LT
;
22549 case GEU
: return ARM_CS
;
22550 case GTU
: return ARM_HI
;
22551 case LEU
: return ARM_LS
;
22552 case LTU
: return ARM_CC
;
22553 default: return ARM_NV
;
22556 default: gcc_unreachable ();
22560 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22561 static enum arm_cond_code
22562 get_arm_condition_code (rtx comparison
)
22564 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22565 gcc_assert (code
!= ARM_NV
);
22569 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22572 thumb2_final_prescan_insn (rtx insn
)
22574 rtx first_insn
= insn
;
22575 rtx body
= PATTERN (insn
);
22577 enum arm_cond_code code
;
22582 /* max_insns_skipped in the tune was already taken into account in the
22583 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22584 just emit the IT blocks as we can. It does not make sense to split
22586 max
= MAX_INSN_PER_IT_BLOCK
;
22588 /* Remove the previous insn from the count of insns to be output. */
22589 if (arm_condexec_count
)
22590 arm_condexec_count
--;
22592 /* Nothing to do if we are already inside a conditional block. */
22593 if (arm_condexec_count
)
22596 if (GET_CODE (body
) != COND_EXEC
)
22599 /* Conditional jumps are implemented directly. */
22603 predicate
= COND_EXEC_TEST (body
);
22604 arm_current_cc
= get_arm_condition_code (predicate
);
22606 n
= get_attr_ce_count (insn
);
22607 arm_condexec_count
= 1;
22608 arm_condexec_mask
= (1 << n
) - 1;
22609 arm_condexec_masklen
= n
;
22610 /* See if subsequent instructions can be combined into the same block. */
22613 insn
= next_nonnote_insn (insn
);
22615 /* Jumping into the middle of an IT block is illegal, so a label or
22616 barrier terminates the block. */
22617 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22620 body
= PATTERN (insn
);
22621 /* USE and CLOBBER aren't really insns, so just skip them. */
22622 if (GET_CODE (body
) == USE
22623 || GET_CODE (body
) == CLOBBER
)
22626 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22627 if (GET_CODE (body
) != COND_EXEC
)
22629 /* Maximum number of conditionally executed instructions in a block. */
22630 n
= get_attr_ce_count (insn
);
22631 if (arm_condexec_masklen
+ n
> max
)
22634 predicate
= COND_EXEC_TEST (body
);
22635 code
= get_arm_condition_code (predicate
);
22636 mask
= (1 << n
) - 1;
22637 if (arm_current_cc
== code
)
22638 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22639 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22642 arm_condexec_count
++;
22643 arm_condexec_masklen
+= n
;
22645 /* A jump must be the last instruction in a conditional block. */
22649 /* Restore recog_data (getting the attributes of other insns can
22650 destroy this array, but final.c assumes that it remains intact
22651 across this call). */
22652 extract_constrain_insn_cached (first_insn
);
22656 arm_final_prescan_insn (rtx insn
)
22658 /* BODY will hold the body of INSN. */
22659 rtx body
= PATTERN (insn
);
22661 /* This will be 1 if trying to repeat the trick, and things need to be
22662 reversed if it appears to fail. */
22665 /* If we start with a return insn, we only succeed if we find another one. */
22666 int seeking_return
= 0;
22667 enum rtx_code return_code
= UNKNOWN
;
22669 /* START_INSN will hold the insn from where we start looking. This is the
22670 first insn after the following code_label if REVERSE is true. */
22671 rtx start_insn
= insn
;
22673 /* If in state 4, check if the target branch is reached, in order to
22674 change back to state 0. */
22675 if (arm_ccfsm_state
== 4)
22677 if (insn
== arm_target_insn
)
22679 arm_target_insn
= NULL
;
22680 arm_ccfsm_state
= 0;
22685 /* If in state 3, it is possible to repeat the trick, if this insn is an
22686 unconditional branch to a label, and immediately following this branch
22687 is the previous target label which is only used once, and the label this
22688 branch jumps to is not too far off. */
22689 if (arm_ccfsm_state
== 3)
22691 if (simplejump_p (insn
))
22693 start_insn
= next_nonnote_insn (start_insn
);
22694 if (BARRIER_P (start_insn
))
22696 /* XXX Isn't this always a barrier? */
22697 start_insn
= next_nonnote_insn (start_insn
);
22699 if (LABEL_P (start_insn
)
22700 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22701 && LABEL_NUSES (start_insn
) == 1)
22706 else if (ANY_RETURN_P (body
))
22708 start_insn
= next_nonnote_insn (start_insn
);
22709 if (BARRIER_P (start_insn
))
22710 start_insn
= next_nonnote_insn (start_insn
);
22711 if (LABEL_P (start_insn
)
22712 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22713 && LABEL_NUSES (start_insn
) == 1)
22716 seeking_return
= 1;
22717 return_code
= GET_CODE (body
);
22726 gcc_assert (!arm_ccfsm_state
|| reverse
);
22727 if (!JUMP_P (insn
))
22730 /* This jump might be paralleled with a clobber of the condition codes
22731 the jump should always come first */
22732 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22733 body
= XVECEXP (body
, 0, 0);
22736 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22737 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22740 int fail
= FALSE
, succeed
= FALSE
;
22741 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22742 int then_not_else
= TRUE
;
22743 rtx this_insn
= start_insn
, label
= 0;
22745 /* Register the insn jumped to. */
22748 if (!seeking_return
)
22749 label
= XEXP (SET_SRC (body
), 0);
22751 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22752 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22753 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22755 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22756 then_not_else
= FALSE
;
22758 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22760 seeking_return
= 1;
22761 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22763 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22765 seeking_return
= 1;
22766 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22767 then_not_else
= FALSE
;
22770 gcc_unreachable ();
22772 /* See how many insns this branch skips, and what kind of insns. If all
22773 insns are okay, and the label or unconditional branch to the same
22774 label is not too far away, succeed. */
22775 for (insns_skipped
= 0;
22776 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22780 this_insn
= next_nonnote_insn (this_insn
);
22784 switch (GET_CODE (this_insn
))
22787 /* Succeed if it is the target label, otherwise fail since
22788 control falls in from somewhere else. */
22789 if (this_insn
== label
)
22791 arm_ccfsm_state
= 1;
22799 /* Succeed if the following insn is the target label.
22801 If return insns are used then the last insn in a function
22802 will be a barrier. */
22803 this_insn
= next_nonnote_insn (this_insn
);
22804 if (this_insn
&& this_insn
== label
)
22806 arm_ccfsm_state
= 1;
22814 /* The AAPCS says that conditional calls should not be
22815 used since they make interworking inefficient (the
22816 linker can't transform BL<cond> into BLX). That's
22817 only a problem if the machine has BLX. */
22824 /* Succeed if the following insn is the target label, or
22825 if the following two insns are a barrier and the
22827 this_insn
= next_nonnote_insn (this_insn
);
22828 if (this_insn
&& BARRIER_P (this_insn
))
22829 this_insn
= next_nonnote_insn (this_insn
);
22831 if (this_insn
&& this_insn
== label
22832 && insns_skipped
< max_insns_skipped
)
22834 arm_ccfsm_state
= 1;
22842 /* If this is an unconditional branch to the same label, succeed.
22843 If it is to another label, do nothing. If it is conditional,
22845 /* XXX Probably, the tests for SET and the PC are
22848 scanbody
= PATTERN (this_insn
);
22849 if (GET_CODE (scanbody
) == SET
22850 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22852 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22853 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22855 arm_ccfsm_state
= 2;
22858 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22861 /* Fail if a conditional return is undesirable (e.g. on a
22862 StrongARM), but still allow this if optimizing for size. */
22863 else if (GET_CODE (scanbody
) == return_code
22864 && !use_return_insn (TRUE
, NULL
)
22867 else if (GET_CODE (scanbody
) == return_code
)
22869 arm_ccfsm_state
= 2;
22872 else if (GET_CODE (scanbody
) == PARALLEL
)
22874 switch (get_attr_conds (this_insn
))
22884 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22889 /* Instructions using or affecting the condition codes make it
22891 scanbody
= PATTERN (this_insn
);
22892 if (!(GET_CODE (scanbody
) == SET
22893 || GET_CODE (scanbody
) == PARALLEL
)
22894 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22904 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22905 arm_target_label
= CODE_LABEL_NUMBER (label
);
22908 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22910 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22912 this_insn
= next_nonnote_insn (this_insn
);
22913 gcc_assert (!this_insn
22914 || (!BARRIER_P (this_insn
)
22915 && !LABEL_P (this_insn
)));
22919 /* Oh, dear! we ran off the end.. give up. */
22920 extract_constrain_insn_cached (insn
);
22921 arm_ccfsm_state
= 0;
22922 arm_target_insn
= NULL
;
22925 arm_target_insn
= this_insn
;
22928 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22931 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22933 if (reverse
|| then_not_else
)
22934 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22937 /* Restore recog_data (getting the attributes of other insns can
22938 destroy this array, but final.c assumes that it remains intact
22939 across this call. */
22940 extract_constrain_insn_cached (insn
);
22944 /* Output IT instructions. */
22946 thumb2_asm_output_opcode (FILE * stream
)
22951 if (arm_condexec_mask
)
22953 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22954 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22956 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22957 arm_condition_codes
[arm_current_cc
]);
22958 arm_condexec_mask
= 0;
22962 /* Returns true if REGNO is a valid register
22963 for holding a quantity of type MODE. */
22965 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
22967 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22968 return (regno
== CC_REGNUM
22969 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22970 && regno
== VFPCC_REGNUM
));
22973 /* For the Thumb we only allow values bigger than SImode in
22974 registers 0 - 6, so that there is always a second low
22975 register available to hold the upper part of the value.
22976 We probably we ought to ensure that the register is the
22977 start of an even numbered register pair. */
22978 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22980 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22981 && IS_VFP_REGNUM (regno
))
22983 if (mode
== SFmode
|| mode
== SImode
)
22984 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22986 if (mode
== DFmode
)
22987 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22989 /* VFP registers can hold HFmode values, but there is no point in
22990 putting them there unless we have hardware conversion insns. */
22991 if (mode
== HFmode
)
22992 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22995 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22996 || (VALID_NEON_QREG_MODE (mode
)
22997 && NEON_REGNO_OK_FOR_QUAD (regno
))
22998 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22999 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23000 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23001 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23002 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23007 if (TARGET_REALLY_IWMMXT
)
23009 if (IS_IWMMXT_GR_REGNUM (regno
))
23010 return mode
== SImode
;
23012 if (IS_IWMMXT_REGNUM (regno
))
23013 return VALID_IWMMXT_REG_MODE (mode
);
23016 /* We allow almost any value to be stored in the general registers.
23017 Restrict doubleword quantities to even register pairs in ARM state
23018 so that we can use ldrd. Do not allow very large Neon structure
23019 opaque modes in general registers; they would use too many. */
23020 if (regno
<= LAST_ARM_REGNUM
)
23022 if (ARM_NUM_REGS (mode
) > 4)
23028 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23031 if (regno
== FRAME_POINTER_REGNUM
23032 || regno
== ARG_POINTER_REGNUM
)
23033 /* We only allow integers in the fake hard registers. */
23034 return GET_MODE_CLASS (mode
) == MODE_INT
;
23039 /* Implement MODES_TIEABLE_P. */
23042 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
23044 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23047 /* We specifically want to allow elements of "structure" modes to
23048 be tieable to the structure. This more general condition allows
23049 other rarer situations too. */
23051 && (VALID_NEON_DREG_MODE (mode1
)
23052 || VALID_NEON_QREG_MODE (mode1
)
23053 || VALID_NEON_STRUCT_MODE (mode1
))
23054 && (VALID_NEON_DREG_MODE (mode2
)
23055 || VALID_NEON_QREG_MODE (mode2
)
23056 || VALID_NEON_STRUCT_MODE (mode2
)))
23062 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23063 not used in arm mode. */
23066 arm_regno_class (int regno
)
23070 if (regno
== STACK_POINTER_REGNUM
)
23072 if (regno
== CC_REGNUM
)
23079 if (TARGET_THUMB2
&& regno
< 8)
23082 if ( regno
<= LAST_ARM_REGNUM
23083 || regno
== FRAME_POINTER_REGNUM
23084 || regno
== ARG_POINTER_REGNUM
)
23085 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23087 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23088 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23090 if (IS_VFP_REGNUM (regno
))
23092 if (regno
<= D7_VFP_REGNUM
)
23093 return VFP_D0_D7_REGS
;
23094 else if (regno
<= LAST_LO_VFP_REGNUM
)
23095 return VFP_LO_REGS
;
23097 return VFP_HI_REGS
;
23100 if (IS_IWMMXT_REGNUM (regno
))
23101 return IWMMXT_REGS
;
23103 if (IS_IWMMXT_GR_REGNUM (regno
))
23104 return IWMMXT_GR_REGS
;
23109 /* Handle a special case when computing the offset
23110 of an argument from the frame pointer. */
23112 arm_debugger_arg_offset (int value
, rtx addr
)
23116 /* We are only interested if dbxout_parms() failed to compute the offset. */
23120 /* We can only cope with the case where the address is held in a register. */
23124 /* If we are using the frame pointer to point at the argument, then
23125 an offset of 0 is correct. */
23126 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23129 /* If we are using the stack pointer to point at the
23130 argument, then an offset of 0 is correct. */
23131 /* ??? Check this is consistent with thumb2 frame layout. */
23132 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23133 && REGNO (addr
) == SP_REGNUM
)
23136 /* Oh dear. The argument is pointed to by a register rather
23137 than being held in a register, or being stored at a known
23138 offset from the frame pointer. Since GDB only understands
23139 those two kinds of argument we must translate the address
23140 held in the register into an offset from the frame pointer.
23141 We do this by searching through the insns for the function
23142 looking to see where this register gets its value. If the
23143 register is initialized from the frame pointer plus an offset
23144 then we are in luck and we can continue, otherwise we give up.
23146 This code is exercised by producing debugging information
23147 for a function with arguments like this:
23149 double func (double a, double b, int c, double d) {return d;}
23151 Without this code the stab for parameter 'd' will be set to
23152 an offset of 0 from the frame pointer, rather than 8. */
23154 /* The if() statement says:
23156 If the insn is a normal instruction
23157 and if the insn is setting the value in a register
23158 and if the register being set is the register holding the address of the argument
23159 and if the address is computing by an addition
23160 that involves adding to a register
23161 which is the frame pointer
23166 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23168 if ( NONJUMP_INSN_P (insn
)
23169 && GET_CODE (PATTERN (insn
)) == SET
23170 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23171 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23172 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23173 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23174 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23177 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23186 warning (0, "unable to compute real location of stacked parameter");
23187 value
= 8; /* XXX magic hack */
23208 T_MAX
/* Size of enum. Keep last. */
23209 } neon_builtin_type_mode
;
23211 #define TYPE_MODE_BIT(X) (1 << (X))
23213 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23214 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23215 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23216 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23217 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23218 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23220 #define v8qi_UP T_V8QI
23221 #define v4hi_UP T_V4HI
23222 #define v4hf_UP T_V4HF
23223 #define v2si_UP T_V2SI
23224 #define v2sf_UP T_V2SF
23226 #define v16qi_UP T_V16QI
23227 #define v8hi_UP T_V8HI
23228 #define v4si_UP T_V4SI
23229 #define v4sf_UP T_V4SF
23230 #define v2di_UP T_V2DI
23235 #define UP(X) X##_UP
23271 NEON_LOADSTRUCTLANE
,
23273 NEON_STORESTRUCTLANE
,
23282 const neon_itype itype
;
23283 const neon_builtin_type_mode mode
;
23284 const enum insn_code code
;
23285 unsigned int fcode
;
23286 } neon_builtin_datum
;
23288 #define CF(N,X) CODE_FOR_neon_##N##X
23290 #define VAR1(T, N, A) \
23291 {#N, NEON_##T, UP (A), CF (N, A), 0}
23292 #define VAR2(T, N, A, B) \
23294 {#N, NEON_##T, UP (B), CF (N, B), 0}
23295 #define VAR3(T, N, A, B, C) \
23296 VAR2 (T, N, A, B), \
23297 {#N, NEON_##T, UP (C), CF (N, C), 0}
23298 #define VAR4(T, N, A, B, C, D) \
23299 VAR3 (T, N, A, B, C), \
23300 {#N, NEON_##T, UP (D), CF (N, D), 0}
23301 #define VAR5(T, N, A, B, C, D, E) \
23302 VAR4 (T, N, A, B, C, D), \
23303 {#N, NEON_##T, UP (E), CF (N, E), 0}
23304 #define VAR6(T, N, A, B, C, D, E, F) \
23305 VAR5 (T, N, A, B, C, D, E), \
23306 {#N, NEON_##T, UP (F), CF (N, F), 0}
23307 #define VAR7(T, N, A, B, C, D, E, F, G) \
23308 VAR6 (T, N, A, B, C, D, E, F), \
23309 {#N, NEON_##T, UP (G), CF (N, G), 0}
23310 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23311 VAR7 (T, N, A, B, C, D, E, F, G), \
23312 {#N, NEON_##T, UP (H), CF (N, H), 0}
23313 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23314 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23315 {#N, NEON_##T, UP (I), CF (N, I), 0}
23316 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23317 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23318 {#N, NEON_##T, UP (J), CF (N, J), 0}
23320 /* The NEON builtin data can be found in arm_neon_builtins.def.
23321 The mode entries in the following table correspond to the "key" type of the
23322 instruction variant, i.e. equivalent to that which would be specified after
23323 the assembler mnemonic, which usually refers to the last vector operand.
23324 (Signed/unsigned/polynomial types are not differentiated between though, and
23325 are all mapped onto the same mode for a given element size.) The modes
23326 listed per instruction should be the same as those defined for that
23327 instruction's pattern in neon.md. */
23329 static neon_builtin_datum neon_builtin_data
[] =
23331 #include "arm_neon_builtins.def"
23346 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23347 #define VAR1(T, N, A) \
23349 #define VAR2(T, N, A, B) \
23352 #define VAR3(T, N, A, B, C) \
23353 VAR2 (T, N, A, B), \
23355 #define VAR4(T, N, A, B, C, D) \
23356 VAR3 (T, N, A, B, C), \
23358 #define VAR5(T, N, A, B, C, D, E) \
23359 VAR4 (T, N, A, B, C, D), \
23361 #define VAR6(T, N, A, B, C, D, E, F) \
23362 VAR5 (T, N, A, B, C, D, E), \
23364 #define VAR7(T, N, A, B, C, D, E, F, G) \
23365 VAR6 (T, N, A, B, C, D, E, F), \
23367 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23368 VAR7 (T, N, A, B, C, D, E, F, G), \
23370 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23371 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23373 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23374 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23378 ARM_BUILTIN_GETWCGR0
,
23379 ARM_BUILTIN_GETWCGR1
,
23380 ARM_BUILTIN_GETWCGR2
,
23381 ARM_BUILTIN_GETWCGR3
,
23383 ARM_BUILTIN_SETWCGR0
,
23384 ARM_BUILTIN_SETWCGR1
,
23385 ARM_BUILTIN_SETWCGR2
,
23386 ARM_BUILTIN_SETWCGR3
,
23390 ARM_BUILTIN_WAVG2BR
,
23391 ARM_BUILTIN_WAVG2HR
,
23392 ARM_BUILTIN_WAVG2B
,
23393 ARM_BUILTIN_WAVG2H
,
23400 ARM_BUILTIN_WMACSZ
,
23402 ARM_BUILTIN_WMACUZ
,
23405 ARM_BUILTIN_WSADBZ
,
23407 ARM_BUILTIN_WSADHZ
,
23409 ARM_BUILTIN_WALIGNI
,
23410 ARM_BUILTIN_WALIGNR0
,
23411 ARM_BUILTIN_WALIGNR1
,
23412 ARM_BUILTIN_WALIGNR2
,
23413 ARM_BUILTIN_WALIGNR3
,
23416 ARM_BUILTIN_TMIAPH
,
23417 ARM_BUILTIN_TMIABB
,
23418 ARM_BUILTIN_TMIABT
,
23419 ARM_BUILTIN_TMIATB
,
23420 ARM_BUILTIN_TMIATT
,
23422 ARM_BUILTIN_TMOVMSKB
,
23423 ARM_BUILTIN_TMOVMSKH
,
23424 ARM_BUILTIN_TMOVMSKW
,
23426 ARM_BUILTIN_TBCSTB
,
23427 ARM_BUILTIN_TBCSTH
,
23428 ARM_BUILTIN_TBCSTW
,
23430 ARM_BUILTIN_WMADDS
,
23431 ARM_BUILTIN_WMADDU
,
23433 ARM_BUILTIN_WPACKHSS
,
23434 ARM_BUILTIN_WPACKWSS
,
23435 ARM_BUILTIN_WPACKDSS
,
23436 ARM_BUILTIN_WPACKHUS
,
23437 ARM_BUILTIN_WPACKWUS
,
23438 ARM_BUILTIN_WPACKDUS
,
23443 ARM_BUILTIN_WADDSSB
,
23444 ARM_BUILTIN_WADDSSH
,
23445 ARM_BUILTIN_WADDSSW
,
23446 ARM_BUILTIN_WADDUSB
,
23447 ARM_BUILTIN_WADDUSH
,
23448 ARM_BUILTIN_WADDUSW
,
23452 ARM_BUILTIN_WSUBSSB
,
23453 ARM_BUILTIN_WSUBSSH
,
23454 ARM_BUILTIN_WSUBSSW
,
23455 ARM_BUILTIN_WSUBUSB
,
23456 ARM_BUILTIN_WSUBUSH
,
23457 ARM_BUILTIN_WSUBUSW
,
23464 ARM_BUILTIN_WCMPEQB
,
23465 ARM_BUILTIN_WCMPEQH
,
23466 ARM_BUILTIN_WCMPEQW
,
23467 ARM_BUILTIN_WCMPGTUB
,
23468 ARM_BUILTIN_WCMPGTUH
,
23469 ARM_BUILTIN_WCMPGTUW
,
23470 ARM_BUILTIN_WCMPGTSB
,
23471 ARM_BUILTIN_WCMPGTSH
,
23472 ARM_BUILTIN_WCMPGTSW
,
23474 ARM_BUILTIN_TEXTRMSB
,
23475 ARM_BUILTIN_TEXTRMSH
,
23476 ARM_BUILTIN_TEXTRMSW
,
23477 ARM_BUILTIN_TEXTRMUB
,
23478 ARM_BUILTIN_TEXTRMUH
,
23479 ARM_BUILTIN_TEXTRMUW
,
23480 ARM_BUILTIN_TINSRB
,
23481 ARM_BUILTIN_TINSRH
,
23482 ARM_BUILTIN_TINSRW
,
23484 ARM_BUILTIN_WMAXSW
,
23485 ARM_BUILTIN_WMAXSH
,
23486 ARM_BUILTIN_WMAXSB
,
23487 ARM_BUILTIN_WMAXUW
,
23488 ARM_BUILTIN_WMAXUH
,
23489 ARM_BUILTIN_WMAXUB
,
23490 ARM_BUILTIN_WMINSW
,
23491 ARM_BUILTIN_WMINSH
,
23492 ARM_BUILTIN_WMINSB
,
23493 ARM_BUILTIN_WMINUW
,
23494 ARM_BUILTIN_WMINUH
,
23495 ARM_BUILTIN_WMINUB
,
23497 ARM_BUILTIN_WMULUM
,
23498 ARM_BUILTIN_WMULSM
,
23499 ARM_BUILTIN_WMULUL
,
23501 ARM_BUILTIN_PSADBH
,
23502 ARM_BUILTIN_WSHUFH
,
23516 ARM_BUILTIN_WSLLHI
,
23517 ARM_BUILTIN_WSLLWI
,
23518 ARM_BUILTIN_WSLLDI
,
23519 ARM_BUILTIN_WSRAHI
,
23520 ARM_BUILTIN_WSRAWI
,
23521 ARM_BUILTIN_WSRADI
,
23522 ARM_BUILTIN_WSRLHI
,
23523 ARM_BUILTIN_WSRLWI
,
23524 ARM_BUILTIN_WSRLDI
,
23525 ARM_BUILTIN_WRORHI
,
23526 ARM_BUILTIN_WRORWI
,
23527 ARM_BUILTIN_WRORDI
,
23529 ARM_BUILTIN_WUNPCKIHB
,
23530 ARM_BUILTIN_WUNPCKIHH
,
23531 ARM_BUILTIN_WUNPCKIHW
,
23532 ARM_BUILTIN_WUNPCKILB
,
23533 ARM_BUILTIN_WUNPCKILH
,
23534 ARM_BUILTIN_WUNPCKILW
,
23536 ARM_BUILTIN_WUNPCKEHSB
,
23537 ARM_BUILTIN_WUNPCKEHSH
,
23538 ARM_BUILTIN_WUNPCKEHSW
,
23539 ARM_BUILTIN_WUNPCKEHUB
,
23540 ARM_BUILTIN_WUNPCKEHUH
,
23541 ARM_BUILTIN_WUNPCKEHUW
,
23542 ARM_BUILTIN_WUNPCKELSB
,
23543 ARM_BUILTIN_WUNPCKELSH
,
23544 ARM_BUILTIN_WUNPCKELSW
,
23545 ARM_BUILTIN_WUNPCKELUB
,
23546 ARM_BUILTIN_WUNPCKELUH
,
23547 ARM_BUILTIN_WUNPCKELUW
,
23553 ARM_BUILTIN_WADDSUBHX
,
23554 ARM_BUILTIN_WSUBADDHX
,
23556 ARM_BUILTIN_WABSDIFFB
,
23557 ARM_BUILTIN_WABSDIFFH
,
23558 ARM_BUILTIN_WABSDIFFW
,
23560 ARM_BUILTIN_WADDCH
,
23561 ARM_BUILTIN_WADDCW
,
23564 ARM_BUILTIN_WAVG4R
,
23566 ARM_BUILTIN_WMADDSX
,
23567 ARM_BUILTIN_WMADDUX
,
23569 ARM_BUILTIN_WMADDSN
,
23570 ARM_BUILTIN_WMADDUN
,
23572 ARM_BUILTIN_WMULWSM
,
23573 ARM_BUILTIN_WMULWUM
,
23575 ARM_BUILTIN_WMULWSMR
,
23576 ARM_BUILTIN_WMULWUMR
,
23578 ARM_BUILTIN_WMULWL
,
23580 ARM_BUILTIN_WMULSMR
,
23581 ARM_BUILTIN_WMULUMR
,
23583 ARM_BUILTIN_WQMULM
,
23584 ARM_BUILTIN_WQMULMR
,
23586 ARM_BUILTIN_WQMULWM
,
23587 ARM_BUILTIN_WQMULWMR
,
23589 ARM_BUILTIN_WADDBHUSM
,
23590 ARM_BUILTIN_WADDBHUSL
,
23592 ARM_BUILTIN_WQMIABB
,
23593 ARM_BUILTIN_WQMIABT
,
23594 ARM_BUILTIN_WQMIATB
,
23595 ARM_BUILTIN_WQMIATT
,
23597 ARM_BUILTIN_WQMIABBN
,
23598 ARM_BUILTIN_WQMIABTN
,
23599 ARM_BUILTIN_WQMIATBN
,
23600 ARM_BUILTIN_WQMIATTN
,
23602 ARM_BUILTIN_WMIABB
,
23603 ARM_BUILTIN_WMIABT
,
23604 ARM_BUILTIN_WMIATB
,
23605 ARM_BUILTIN_WMIATT
,
23607 ARM_BUILTIN_WMIABBN
,
23608 ARM_BUILTIN_WMIABTN
,
23609 ARM_BUILTIN_WMIATBN
,
23610 ARM_BUILTIN_WMIATTN
,
23612 ARM_BUILTIN_WMIAWBB
,
23613 ARM_BUILTIN_WMIAWBT
,
23614 ARM_BUILTIN_WMIAWTB
,
23615 ARM_BUILTIN_WMIAWTT
,
23617 ARM_BUILTIN_WMIAWBBN
,
23618 ARM_BUILTIN_WMIAWBTN
,
23619 ARM_BUILTIN_WMIAWTBN
,
23620 ARM_BUILTIN_WMIAWTTN
,
23622 ARM_BUILTIN_WMERGE
,
23624 ARM_BUILTIN_CRC32B
,
23625 ARM_BUILTIN_CRC32H
,
23626 ARM_BUILTIN_CRC32W
,
23627 ARM_BUILTIN_CRC32CB
,
23628 ARM_BUILTIN_CRC32CH
,
23629 ARM_BUILTIN_CRC32CW
,
23631 ARM_BUILTIN_GET_FPSCR
,
23632 ARM_BUILTIN_SET_FPSCR
,
23638 #define CRYPTO1(L, U, M1, M2) \
23639 ARM_BUILTIN_CRYPTO_##U,
23640 #define CRYPTO2(L, U, M1, M2, M3) \
23641 ARM_BUILTIN_CRYPTO_##U,
23642 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23643 ARM_BUILTIN_CRYPTO_##U,
23645 #include "crypto.def"
23651 #include "arm_neon_builtins.def"
23656 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23670 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
23672 #define NUM_DREG_TYPES 5
23673 #define NUM_QREG_TYPES 6
23676 arm_init_neon_builtins (void)
23678 unsigned int i
, fcode
;
23681 tree neon_intQI_type_node
;
23682 tree neon_intHI_type_node
;
23683 tree neon_floatHF_type_node
;
23684 tree neon_polyQI_type_node
;
23685 tree neon_polyHI_type_node
;
23686 tree neon_intSI_type_node
;
23687 tree neon_intDI_type_node
;
23688 tree neon_intUTI_type_node
;
23689 tree neon_float_type_node
;
23691 tree intQI_pointer_node
;
23692 tree intHI_pointer_node
;
23693 tree intSI_pointer_node
;
23694 tree intDI_pointer_node
;
23695 tree float_pointer_node
;
23697 tree const_intQI_node
;
23698 tree const_intHI_node
;
23699 tree const_intSI_node
;
23700 tree const_intDI_node
;
23701 tree const_float_node
;
23703 tree const_intQI_pointer_node
;
23704 tree const_intHI_pointer_node
;
23705 tree const_intSI_pointer_node
;
23706 tree const_intDI_pointer_node
;
23707 tree const_float_pointer_node
;
23709 tree V8QI_type_node
;
23710 tree V4HI_type_node
;
23711 tree V4UHI_type_node
;
23712 tree V4HF_type_node
;
23713 tree V2SI_type_node
;
23714 tree V2USI_type_node
;
23715 tree V2SF_type_node
;
23716 tree V16QI_type_node
;
23717 tree V8HI_type_node
;
23718 tree V8UHI_type_node
;
23719 tree V4SI_type_node
;
23720 tree V4USI_type_node
;
23721 tree V4SF_type_node
;
23722 tree V2DI_type_node
;
23723 tree V2UDI_type_node
;
23725 tree intUQI_type_node
;
23726 tree intUHI_type_node
;
23727 tree intUSI_type_node
;
23728 tree intUDI_type_node
;
23730 tree intEI_type_node
;
23731 tree intOI_type_node
;
23732 tree intCI_type_node
;
23733 tree intXI_type_node
;
23735 tree reinterp_ftype_dreg
[NUM_DREG_TYPES
][NUM_DREG_TYPES
];
23736 tree reinterp_ftype_qreg
[NUM_QREG_TYPES
][NUM_QREG_TYPES
];
23737 tree dreg_types
[NUM_DREG_TYPES
], qreg_types
[NUM_QREG_TYPES
];
23739 /* Create distinguished type nodes for NEON vector element types,
23740 and pointers to values of such types, so we can detect them later. */
23741 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23742 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23743 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23744 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23745 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
23746 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
23747 neon_float_type_node
= make_node (REAL_TYPE
);
23748 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
23749 layout_type (neon_float_type_node
);
23750 neon_floatHF_type_node
= make_node (REAL_TYPE
);
23751 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
23752 layout_type (neon_floatHF_type_node
);
23754 /* Define typedefs which exactly correspond to the modes we are basing vector
23755 types on. If you change these names you'll need to change
23756 the table used by arm_mangle_type too. */
23757 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
23758 "__builtin_neon_qi");
23759 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
23760 "__builtin_neon_hi");
23761 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
23762 "__builtin_neon_hf");
23763 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
23764 "__builtin_neon_si");
23765 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
23766 "__builtin_neon_sf");
23767 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
23768 "__builtin_neon_di");
23769 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
23770 "__builtin_neon_poly8");
23771 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
23772 "__builtin_neon_poly16");
23774 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
23775 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
23776 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
23777 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
23778 float_pointer_node
= build_pointer_type (neon_float_type_node
);
23780 /* Next create constant-qualified versions of the above types. */
23781 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
23783 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
23785 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
23787 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
23789 const_float_node
= build_qualified_type (neon_float_type_node
,
23792 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
23793 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
23794 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
23795 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
23796 const_float_pointer_node
= build_pointer_type (const_float_node
);
23798 /* Unsigned integer types for various mode sizes. */
23799 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
23800 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
23801 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
23802 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
23803 neon_intUTI_type_node
= make_unsigned_type (GET_MODE_PRECISION (TImode
));
23804 /* Now create vector types based on our NEON element types. */
23805 /* 64-bit vectors. */
23807 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
23809 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
23811 build_vector_type_for_mode (intUHI_type_node
, V4HImode
);
23813 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
23815 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
23817 build_vector_type_for_mode (intUSI_type_node
, V2SImode
);
23819 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
23820 /* 128-bit vectors. */
23822 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
23824 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
23826 build_vector_type_for_mode (intUHI_type_node
, V8HImode
);
23828 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
23830 build_vector_type_for_mode (intUSI_type_node
, V4SImode
);
23832 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
23834 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
23836 build_vector_type_for_mode (intUDI_type_node
, V2DImode
);
23839 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
23840 "__builtin_neon_uqi");
23841 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
23842 "__builtin_neon_uhi");
23843 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
23844 "__builtin_neon_usi");
23845 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23846 "__builtin_neon_udi");
23847 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23848 "__builtin_neon_poly64");
23849 (*lang_hooks
.types
.register_builtin_type
) (neon_intUTI_type_node
,
23850 "__builtin_neon_poly128");
23852 /* Opaque integer types for structures of vectors. */
23853 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
23854 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
23855 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
23856 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
23858 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
23859 "__builtin_neon_ti");
23860 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
23861 "__builtin_neon_ei");
23862 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
23863 "__builtin_neon_oi");
23864 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
23865 "__builtin_neon_ci");
23866 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
23867 "__builtin_neon_xi");
23869 if (TARGET_CRYPTO
&& TARGET_HARD_FLOAT
)
23872 tree V16UQI_type_node
=
23873 build_vector_type_for_mode (intUQI_type_node
, V16QImode
);
23875 tree v16uqi_ftype_v16uqi
23876 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
, NULL_TREE
);
23878 tree v16uqi_ftype_v16uqi_v16uqi
23879 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
,
23880 V16UQI_type_node
, NULL_TREE
);
23882 tree v4usi_ftype_v4usi
23883 = build_function_type_list (V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23885 tree v4usi_ftype_v4usi_v4usi
23886 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23887 V4USI_type_node
, NULL_TREE
);
23889 tree v4usi_ftype_v4usi_v4usi_v4usi
23890 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23891 V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23893 tree uti_ftype_udi_udi
23894 = build_function_type_list (neon_intUTI_type_node
, intUDI_type_node
,
23895 intUDI_type_node
, NULL_TREE
);
23908 ARM_BUILTIN_CRYPTO_##U
23910 "__builtin_arm_crypto_"#L
23911 #define FT1(R, A) \
23913 #define FT2(R, A1, A2) \
23914 R##_ftype_##A1##_##A2
23915 #define FT3(R, A1, A2, A3) \
23916 R##_ftype_##A1##_##A2##_##A3
23917 #define CRYPTO1(L, U, R, A) \
23918 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23919 C (U), BUILT_IN_MD, \
23921 #define CRYPTO2(L, U, R, A1, A2) \
23922 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23923 C (U), BUILT_IN_MD, \
23926 #define CRYPTO3(L, U, R, A1, A2, A3) \
23927 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23928 C (U), BUILT_IN_MD, \
23930 #include "crypto.def"
23941 dreg_types
[0] = V8QI_type_node
;
23942 dreg_types
[1] = V4HI_type_node
;
23943 dreg_types
[2] = V2SI_type_node
;
23944 dreg_types
[3] = V2SF_type_node
;
23945 dreg_types
[4] = neon_intDI_type_node
;
23947 qreg_types
[0] = V16QI_type_node
;
23948 qreg_types
[1] = V8HI_type_node
;
23949 qreg_types
[2] = V4SI_type_node
;
23950 qreg_types
[3] = V4SF_type_node
;
23951 qreg_types
[4] = V2DI_type_node
;
23952 qreg_types
[5] = neon_intUTI_type_node
;
23954 for (i
= 0; i
< NUM_QREG_TYPES
; i
++)
23957 for (j
= 0; j
< NUM_QREG_TYPES
; j
++)
23959 if (i
< NUM_DREG_TYPES
&& j
< NUM_DREG_TYPES
)
23960 reinterp_ftype_dreg
[i
][j
]
23961 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
23963 reinterp_ftype_qreg
[i
][j
]
23964 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
23968 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
23969 i
< ARRAY_SIZE (neon_builtin_data
);
23972 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
23974 const char* const modenames
[] = {
23975 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23976 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23981 int is_load
= 0, is_store
= 0;
23983 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
23990 case NEON_LOAD1LANE
:
23991 case NEON_LOADSTRUCT
:
23992 case NEON_LOADSTRUCTLANE
:
23994 /* Fall through. */
23996 case NEON_STORE1LANE
:
23997 case NEON_STORESTRUCT
:
23998 case NEON_STORESTRUCTLANE
:
24001 /* Fall through. */
24005 case NEON_LOGICBINOP
:
24006 case NEON_SHIFTINSERT
:
24013 case NEON_SHIFTIMM
:
24014 case NEON_SHIFTACC
:
24020 case NEON_LANEMULL
:
24021 case NEON_LANEMULH
:
24023 case NEON_SCALARMUL
:
24024 case NEON_SCALARMULL
:
24025 case NEON_SCALARMULH
:
24026 case NEON_SCALARMAC
:
24032 tree return_type
= void_type_node
, args
= void_list_node
;
24034 /* Build a function type directly from the insn_data for
24035 this builtin. The build_function_type() function takes
24036 care of removing duplicates for us. */
24037 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
24041 if (is_load
&& k
== 1)
24043 /* Neon load patterns always have the memory
24044 operand in the operand 1 position. */
24045 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
24046 == neon_struct_operand
);
24052 eltype
= const_intQI_pointer_node
;
24057 eltype
= const_intHI_pointer_node
;
24062 eltype
= const_intSI_pointer_node
;
24067 eltype
= const_float_pointer_node
;
24072 eltype
= const_intDI_pointer_node
;
24075 default: gcc_unreachable ();
24078 else if (is_store
&& k
== 0)
24080 /* Similarly, Neon store patterns use operand 0 as
24081 the memory location to store to. */
24082 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
24083 == neon_struct_operand
);
24089 eltype
= intQI_pointer_node
;
24094 eltype
= intHI_pointer_node
;
24099 eltype
= intSI_pointer_node
;
24104 eltype
= float_pointer_node
;
24109 eltype
= intDI_pointer_node
;
24112 default: gcc_unreachable ();
24117 switch (insn_data
[d
->code
].operand
[k
].mode
)
24119 case VOIDmode
: eltype
= void_type_node
; break;
24121 case QImode
: eltype
= neon_intQI_type_node
; break;
24122 case HImode
: eltype
= neon_intHI_type_node
; break;
24123 case SImode
: eltype
= neon_intSI_type_node
; break;
24124 case SFmode
: eltype
= neon_float_type_node
; break;
24125 case DImode
: eltype
= neon_intDI_type_node
; break;
24126 case TImode
: eltype
= intTI_type_node
; break;
24127 case EImode
: eltype
= intEI_type_node
; break;
24128 case OImode
: eltype
= intOI_type_node
; break;
24129 case CImode
: eltype
= intCI_type_node
; break;
24130 case XImode
: eltype
= intXI_type_node
; break;
24131 /* 64-bit vectors. */
24132 case V8QImode
: eltype
= V8QI_type_node
; break;
24133 case V4HImode
: eltype
= V4HI_type_node
; break;
24134 case V2SImode
: eltype
= V2SI_type_node
; break;
24135 case V2SFmode
: eltype
= V2SF_type_node
; break;
24136 /* 128-bit vectors. */
24137 case V16QImode
: eltype
= V16QI_type_node
; break;
24138 case V8HImode
: eltype
= V8HI_type_node
; break;
24139 case V4SImode
: eltype
= V4SI_type_node
; break;
24140 case V4SFmode
: eltype
= V4SF_type_node
; break;
24141 case V2DImode
: eltype
= V2DI_type_node
; break;
24142 default: gcc_unreachable ();
24146 if (k
== 0 && !is_store
)
24147 return_type
= eltype
;
24149 args
= tree_cons (NULL_TREE
, eltype
, args
);
24152 ftype
= build_function_type (return_type
, args
);
24156 case NEON_REINTERP
:
24158 /* We iterate over NUM_DREG_TYPES doubleword types,
24159 then NUM_QREG_TYPES quadword types.
24160 V4HF is not a type used in reinterpret, so we translate
24161 d->mode to the correct index in reinterp_ftype_dreg. */
24163 = GET_MODE_SIZE (insn_data
[d
->code
].operand
[0].mode
) > 8;
24164 int rhs
= (d
->mode
- ((!qreg_p
&& (d
->mode
> T_V4HF
)) ? 1 : 0))
24166 switch (insn_data
[d
->code
].operand
[0].mode
)
24168 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
24169 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
24170 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
24171 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
24172 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
24173 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
24174 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
24175 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
24176 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
24177 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
24178 case TImode
: ftype
= reinterp_ftype_qreg
[5][rhs
]; break;
24179 default: gcc_unreachable ();
24183 case NEON_FLOAT_WIDEN
:
24185 tree eltype
= NULL_TREE
;
24186 tree return_type
= NULL_TREE
;
24188 switch (insn_data
[d
->code
].operand
[1].mode
)
24191 eltype
= V4HF_type_node
;
24192 return_type
= V4SF_type_node
;
24194 default: gcc_unreachable ();
24196 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
24199 case NEON_FLOAT_NARROW
:
24201 tree eltype
= NULL_TREE
;
24202 tree return_type
= NULL_TREE
;
24204 switch (insn_data
[d
->code
].operand
[1].mode
)
24207 eltype
= V4SF_type_node
;
24208 return_type
= V4HF_type_node
;
24210 default: gcc_unreachable ();
24212 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
24217 tree eltype
= NULL_TREE
;
24218 switch (insn_data
[d
->code
].operand
[1].mode
)
24221 eltype
= V4UHI_type_node
;
24224 eltype
= V8UHI_type_node
;
24227 eltype
= V2USI_type_node
;
24230 eltype
= V4USI_type_node
;
24233 eltype
= V2UDI_type_node
;
24235 default: gcc_unreachable ();
24237 ftype
= build_function_type_list (eltype
, eltype
, NULL
);
24241 gcc_unreachable ();
24244 gcc_assert (ftype
!= NULL
);
24246 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
24248 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
24250 arm_builtin_decls
[fcode
] = decl
;
24254 #undef NUM_DREG_TYPES
24255 #undef NUM_QREG_TYPES
24257 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24260 if ((MASK) & insn_flags) \
24263 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24264 BUILT_IN_MD, NULL, NULL_TREE); \
24265 arm_builtin_decls[CODE] = bdecl; \
24270 struct builtin_description
24272 const unsigned int mask
;
24273 const enum insn_code icode
;
24274 const char * const name
;
24275 const enum arm_builtins code
;
24276 const enum rtx_code comparison
;
24277 const unsigned int flag
;
24280 static const struct builtin_description bdesc_2arg
[] =
24282 #define IWMMXT_BUILTIN(code, string, builtin) \
24283 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24284 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24286 #define IWMMXT2_BUILTIN(code, string, builtin) \
24287 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24288 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24290 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
24291 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
24292 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
24293 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
24294 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
24295 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
24296 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
24297 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
24298 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
24299 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
24300 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
24301 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
24302 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
24303 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
24304 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
24305 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
24306 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
24307 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
24308 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
24309 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
24310 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
24311 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
24312 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
24313 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
24314 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
24315 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
24316 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
24317 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
24318 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
24319 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
24320 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
24321 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
24322 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
24323 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
24324 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
24325 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
24326 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
24327 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
24328 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
24329 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
24330 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
24331 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
24332 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
24333 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
24334 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
24335 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
24336 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
24337 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
24338 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
24339 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
24340 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
24341 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
24342 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
24343 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
24344 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
24345 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
24346 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
24347 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
24348 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
24349 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
24350 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
24351 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
24352 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
24353 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
24354 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
24355 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
24356 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
24357 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
24358 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
24359 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
24360 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
24361 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
24362 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
24363 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
24364 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
24365 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
24366 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
24367 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
24369 #define IWMMXT_BUILTIN2(code, builtin) \
24370 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24372 #define IWMMXT2_BUILTIN2(code, builtin) \
24373 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24375 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
24376 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
24377 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
24378 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
24379 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
24380 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
24381 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
24382 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
24383 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
24384 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
24387 #define FP_BUILTIN(L, U) \
24388 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24391 FP_BUILTIN (get_fpscr
, GET_FPSCR
)
24392 FP_BUILTIN (set_fpscr
, SET_FPSCR
)
24395 #define CRC32_BUILTIN(L, U) \
24396 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24398 CRC32_BUILTIN (crc32b
, CRC32B
)
24399 CRC32_BUILTIN (crc32h
, CRC32H
)
24400 CRC32_BUILTIN (crc32w
, CRC32W
)
24401 CRC32_BUILTIN (crc32cb
, CRC32CB
)
24402 CRC32_BUILTIN (crc32ch
, CRC32CH
)
24403 CRC32_BUILTIN (crc32cw
, CRC32CW
)
24404 #undef CRC32_BUILTIN
24407 #define CRYPTO_BUILTIN(L, U) \
24408 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24413 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24414 #define CRYPTO1(L, U, R, A)
24415 #define CRYPTO3(L, U, R, A1, A2, A3)
24416 #include "crypto.def"
24423 static const struct builtin_description bdesc_1arg
[] =
24425 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
24426 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
24427 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
24428 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
24429 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
24430 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
24431 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
24432 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
24433 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
24434 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
24435 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
24436 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
24437 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
24438 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
24439 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
24440 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
24441 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
24442 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
24443 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
24444 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
24445 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
24446 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
24447 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
24448 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
24450 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24451 #define CRYPTO2(L, U, R, A1, A2)
24452 #define CRYPTO3(L, U, R, A1, A2, A3)
24453 #include "crypto.def"
24459 static const struct builtin_description bdesc_3arg
[] =
24461 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24462 #define CRYPTO1(L, U, R, A)
24463 #define CRYPTO2(L, U, R, A1, A2)
24464 #include "crypto.def"
24469 #undef CRYPTO_BUILTIN
24471 /* Set up all the iWMMXt builtins. This is not called if
24472 TARGET_IWMMXT is zero. */
24475 arm_init_iwmmxt_builtins (void)
24477 const struct builtin_description
* d
;
24480 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
24481 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
24482 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
24484 tree v8qi_ftype_v8qi_v8qi_int
24485 = build_function_type_list (V8QI_type_node
,
24486 V8QI_type_node
, V8QI_type_node
,
24487 integer_type_node
, NULL_TREE
);
24488 tree v4hi_ftype_v4hi_int
24489 = build_function_type_list (V4HI_type_node
,
24490 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24491 tree v2si_ftype_v2si_int
24492 = build_function_type_list (V2SI_type_node
,
24493 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24494 tree v2si_ftype_di_di
24495 = build_function_type_list (V2SI_type_node
,
24496 long_long_integer_type_node
,
24497 long_long_integer_type_node
,
24499 tree di_ftype_di_int
24500 = build_function_type_list (long_long_integer_type_node
,
24501 long_long_integer_type_node
,
24502 integer_type_node
, NULL_TREE
);
24503 tree di_ftype_di_int_int
24504 = build_function_type_list (long_long_integer_type_node
,
24505 long_long_integer_type_node
,
24507 integer_type_node
, NULL_TREE
);
24508 tree int_ftype_v8qi
24509 = build_function_type_list (integer_type_node
,
24510 V8QI_type_node
, NULL_TREE
);
24511 tree int_ftype_v4hi
24512 = build_function_type_list (integer_type_node
,
24513 V4HI_type_node
, NULL_TREE
);
24514 tree int_ftype_v2si
24515 = build_function_type_list (integer_type_node
,
24516 V2SI_type_node
, NULL_TREE
);
24517 tree int_ftype_v8qi_int
24518 = build_function_type_list (integer_type_node
,
24519 V8QI_type_node
, integer_type_node
, NULL_TREE
);
24520 tree int_ftype_v4hi_int
24521 = build_function_type_list (integer_type_node
,
24522 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24523 tree int_ftype_v2si_int
24524 = build_function_type_list (integer_type_node
,
24525 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24526 tree v8qi_ftype_v8qi_int_int
24527 = build_function_type_list (V8QI_type_node
,
24528 V8QI_type_node
, integer_type_node
,
24529 integer_type_node
, NULL_TREE
);
24530 tree v4hi_ftype_v4hi_int_int
24531 = build_function_type_list (V4HI_type_node
,
24532 V4HI_type_node
, integer_type_node
,
24533 integer_type_node
, NULL_TREE
);
24534 tree v2si_ftype_v2si_int_int
24535 = build_function_type_list (V2SI_type_node
,
24536 V2SI_type_node
, integer_type_node
,
24537 integer_type_node
, NULL_TREE
);
24538 /* Miscellaneous. */
24539 tree v8qi_ftype_v4hi_v4hi
24540 = build_function_type_list (V8QI_type_node
,
24541 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24542 tree v4hi_ftype_v2si_v2si
24543 = build_function_type_list (V4HI_type_node
,
24544 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24545 tree v8qi_ftype_v4hi_v8qi
24546 = build_function_type_list (V8QI_type_node
,
24547 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
24548 tree v2si_ftype_v4hi_v4hi
24549 = build_function_type_list (V2SI_type_node
,
24550 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24551 tree v2si_ftype_v8qi_v8qi
24552 = build_function_type_list (V2SI_type_node
,
24553 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24554 tree v4hi_ftype_v4hi_di
24555 = build_function_type_list (V4HI_type_node
,
24556 V4HI_type_node
, long_long_integer_type_node
,
24558 tree v2si_ftype_v2si_di
24559 = build_function_type_list (V2SI_type_node
,
24560 V2SI_type_node
, long_long_integer_type_node
,
24563 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
24564 tree int_ftype_void
24565 = build_function_type_list (integer_type_node
, NULL_TREE
);
24567 = build_function_type_list (long_long_integer_type_node
,
24568 V8QI_type_node
, NULL_TREE
);
24570 = build_function_type_list (long_long_integer_type_node
,
24571 V4HI_type_node
, NULL_TREE
);
24573 = build_function_type_list (long_long_integer_type_node
,
24574 V2SI_type_node
, NULL_TREE
);
24575 tree v2si_ftype_v4hi
24576 = build_function_type_list (V2SI_type_node
,
24577 V4HI_type_node
, NULL_TREE
);
24578 tree v4hi_ftype_v8qi
24579 = build_function_type_list (V4HI_type_node
,
24580 V8QI_type_node
, NULL_TREE
);
24581 tree v8qi_ftype_v8qi
24582 = build_function_type_list (V8QI_type_node
,
24583 V8QI_type_node
, NULL_TREE
);
24584 tree v4hi_ftype_v4hi
24585 = build_function_type_list (V4HI_type_node
,
24586 V4HI_type_node
, NULL_TREE
);
24587 tree v2si_ftype_v2si
24588 = build_function_type_list (V2SI_type_node
,
24589 V2SI_type_node
, NULL_TREE
);
24591 tree di_ftype_di_v4hi_v4hi
24592 = build_function_type_list (long_long_unsigned_type_node
,
24593 long_long_unsigned_type_node
,
24594 V4HI_type_node
, V4HI_type_node
,
24597 tree di_ftype_v4hi_v4hi
24598 = build_function_type_list (long_long_unsigned_type_node
,
24599 V4HI_type_node
,V4HI_type_node
,
24602 tree v2si_ftype_v2si_v4hi_v4hi
24603 = build_function_type_list (V2SI_type_node
,
24604 V2SI_type_node
, V4HI_type_node
,
24605 V4HI_type_node
, NULL_TREE
);
24607 tree v2si_ftype_v2si_v8qi_v8qi
24608 = build_function_type_list (V2SI_type_node
,
24609 V2SI_type_node
, V8QI_type_node
,
24610 V8QI_type_node
, NULL_TREE
);
24612 tree di_ftype_di_v2si_v2si
24613 = build_function_type_list (long_long_unsigned_type_node
,
24614 long_long_unsigned_type_node
,
24615 V2SI_type_node
, V2SI_type_node
,
24618 tree di_ftype_di_di_int
24619 = build_function_type_list (long_long_unsigned_type_node
,
24620 long_long_unsigned_type_node
,
24621 long_long_unsigned_type_node
,
24622 integer_type_node
, NULL_TREE
);
24624 tree void_ftype_int
24625 = build_function_type_list (void_type_node
,
24626 integer_type_node
, NULL_TREE
);
24628 tree v8qi_ftype_char
24629 = build_function_type_list (V8QI_type_node
,
24630 signed_char_type_node
, NULL_TREE
);
24632 tree v4hi_ftype_short
24633 = build_function_type_list (V4HI_type_node
,
24634 short_integer_type_node
, NULL_TREE
);
24636 tree v2si_ftype_int
24637 = build_function_type_list (V2SI_type_node
,
24638 integer_type_node
, NULL_TREE
);
24640 /* Normal vector binops. */
24641 tree v8qi_ftype_v8qi_v8qi
24642 = build_function_type_list (V8QI_type_node
,
24643 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24644 tree v4hi_ftype_v4hi_v4hi
24645 = build_function_type_list (V4HI_type_node
,
24646 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
24647 tree v2si_ftype_v2si_v2si
24648 = build_function_type_list (V2SI_type_node
,
24649 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24650 tree di_ftype_di_di
24651 = build_function_type_list (long_long_unsigned_type_node
,
24652 long_long_unsigned_type_node
,
24653 long_long_unsigned_type_node
,
24656 /* Add all builtins that are more or less simple operations on two
24658 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24660 /* Use one of the operands; the target can have a different mode for
24661 mask-generating compares. */
24662 enum machine_mode mode
;
24665 if (d
->name
== 0 || !(d
->mask
== FL_IWMMXT
|| d
->mask
== FL_IWMMXT2
))
24668 mode
= insn_data
[d
->icode
].operand
[1].mode
;
24673 type
= v8qi_ftype_v8qi_v8qi
;
24676 type
= v4hi_ftype_v4hi_v4hi
;
24679 type
= v2si_ftype_v2si_v2si
;
24682 type
= di_ftype_di_di
;
24686 gcc_unreachable ();
24689 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
24692 /* Add the remaining MMX insns with somewhat more complicated types. */
24693 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24694 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24695 ARM_BUILTIN_ ## CODE)
24697 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24698 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24699 ARM_BUILTIN_ ## CODE)
24701 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
24702 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
24703 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
24704 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
24705 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
24706 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
24707 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
24708 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
24709 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
24711 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
24712 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
24713 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
24714 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
24715 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
24716 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
24718 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
24719 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
24720 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
24721 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
24722 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
24723 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
24725 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
24726 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
24727 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
24728 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
24729 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
24730 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
24732 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
24733 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
24734 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
24735 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
24736 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
24737 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
24739 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
24741 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
24742 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
24743 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
24744 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
24745 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
24746 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
24747 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
24748 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
24749 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
24750 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
24752 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
24753 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
24754 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
24755 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
24756 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
24757 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
24758 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
24759 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
24760 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
24762 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
24763 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
24764 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
24766 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
24767 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
24768 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
24770 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
24771 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
24773 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
24774 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
24775 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
24776 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
24777 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
24778 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
24780 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
24781 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
24782 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
24783 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
24784 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
24785 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
24786 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
24787 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
24788 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
24789 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
24790 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
24791 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
24793 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
24794 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
24795 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
24796 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
24798 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
24799 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
24800 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
24801 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
24802 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
24803 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
24804 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
24806 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
24807 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
24808 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
24810 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
24811 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
24812 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
24813 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
24815 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
24816 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
24817 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
24818 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
24820 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
24821 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
24822 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
24823 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
24825 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
24826 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
24827 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
24828 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
24830 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
24831 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
24832 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
24833 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
24835 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
24836 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
24837 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
24838 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
24840 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
24842 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
24843 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
24844 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
24846 #undef iwmmx_mbuiltin
24847 #undef iwmmx2_mbuiltin
24851 arm_init_fp16_builtins (void)
24853 tree fp16_type
= make_node (REAL_TYPE
);
24854 TYPE_PRECISION (fp16_type
) = 16;
24855 layout_type (fp16_type
);
24856 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
24860 arm_init_crc32_builtins ()
24862 tree si_ftype_si_qi
24863 = build_function_type_list (unsigned_intSI_type_node
,
24864 unsigned_intSI_type_node
,
24865 unsigned_intQI_type_node
, NULL_TREE
);
24866 tree si_ftype_si_hi
24867 = build_function_type_list (unsigned_intSI_type_node
,
24868 unsigned_intSI_type_node
,
24869 unsigned_intHI_type_node
, NULL_TREE
);
24870 tree si_ftype_si_si
24871 = build_function_type_list (unsigned_intSI_type_node
,
24872 unsigned_intSI_type_node
,
24873 unsigned_intSI_type_node
, NULL_TREE
);
24875 arm_builtin_decls
[ARM_BUILTIN_CRC32B
]
24876 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi
,
24877 ARM_BUILTIN_CRC32B
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24878 arm_builtin_decls
[ARM_BUILTIN_CRC32H
]
24879 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi
,
24880 ARM_BUILTIN_CRC32H
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24881 arm_builtin_decls
[ARM_BUILTIN_CRC32W
]
24882 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si
,
24883 ARM_BUILTIN_CRC32W
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24884 arm_builtin_decls
[ARM_BUILTIN_CRC32CB
]
24885 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi
,
24886 ARM_BUILTIN_CRC32CB
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24887 arm_builtin_decls
[ARM_BUILTIN_CRC32CH
]
24888 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi
,
24889 ARM_BUILTIN_CRC32CH
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24890 arm_builtin_decls
[ARM_BUILTIN_CRC32CW
]
24891 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si
,
24892 ARM_BUILTIN_CRC32CW
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24896 arm_init_builtins (void)
24898 if (TARGET_REALLY_IWMMXT
)
24899 arm_init_iwmmxt_builtins ();
24902 arm_init_neon_builtins ();
24904 if (arm_fp16_format
)
24905 arm_init_fp16_builtins ();
24908 arm_init_crc32_builtins ();
24910 if (TARGET_VFP
&& TARGET_HARD_FLOAT
)
24912 tree ftype_set_fpscr
24913 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL
);
24914 tree ftype_get_fpscr
24915 = build_function_type_list (unsigned_type_node
, NULL
);
24917 arm_builtin_decls
[ARM_BUILTIN_GET_FPSCR
]
24918 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr
,
24919 ARM_BUILTIN_GET_FPSCR
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24920 arm_builtin_decls
[ARM_BUILTIN_SET_FPSCR
]
24921 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr
,
24922 ARM_BUILTIN_SET_FPSCR
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24926 /* Return the ARM builtin for CODE. */
24929 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
24931 if (code
>= ARM_BUILTIN_MAX
)
24932 return error_mark_node
;
24934 return arm_builtin_decls
[code
];
24937 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24939 static const char *
24940 arm_invalid_parameter_type (const_tree t
)
24942 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24943 return N_("function parameters cannot have __fp16 type");
24947 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24949 static const char *
24950 arm_invalid_return_type (const_tree t
)
24952 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24953 return N_("functions cannot return __fp16 type");
24957 /* Implement TARGET_PROMOTED_TYPE. */
24960 arm_promoted_type (const_tree t
)
24962 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24963 return float_type_node
;
24967 /* Implement TARGET_CONVERT_TO_TYPE.
24968 Specifically, this hook implements the peculiarity of the ARM
24969 half-precision floating-point C semantics that requires conversions between
24970 __fp16 to or from double to do an intermediate conversion to float. */
24973 arm_convert_to_type (tree type
, tree expr
)
24975 tree fromtype
= TREE_TYPE (expr
);
24976 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
24978 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
24979 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
24980 return convert (type
, convert (float_type_node
, expr
));
24984 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24985 This simply adds HFmode as a supported mode; even though we don't
24986 implement arithmetic on this type directly, it's supported by
24987 optabs conversions, much the way the double-word arithmetic is
24988 special-cased in the default hook. */
24991 arm_scalar_mode_supported_p (enum machine_mode mode
)
24993 if (mode
== HFmode
)
24994 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24995 else if (ALL_FIXED_POINT_MODE_P (mode
))
24998 return default_scalar_mode_supported_p (mode
);
25001 /* Errors in the source file can cause expand_expr to return const0_rtx
25002 where we expect a vector. To avoid crashing, use one of the vector
25003 clear instructions. */
25006 safe_vector_operand (rtx x
, enum machine_mode mode
)
25008 if (x
!= const0_rtx
)
25010 x
= gen_reg_rtx (mode
);
25012 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
25013 : gen_rtx_SUBREG (DImode
, x
, 0)));
25017 /* Function to expand ternary builtins. */
25019 arm_expand_ternop_builtin (enum insn_code icode
,
25020 tree exp
, rtx target
)
25023 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25024 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25025 tree arg2
= CALL_EXPR_ARG (exp
, 2);
25027 rtx op0
= expand_normal (arg0
);
25028 rtx op1
= expand_normal (arg1
);
25029 rtx op2
= expand_normal (arg2
);
25030 rtx op3
= NULL_RTX
;
25032 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25033 lane operand depending on endianness. */
25034 bool builtin_sha1cpm_p
= false;
25036 if (insn_data
[icode
].n_operands
== 5)
25038 gcc_assert (icode
== CODE_FOR_crypto_sha1c
25039 || icode
== CODE_FOR_crypto_sha1p
25040 || icode
== CODE_FOR_crypto_sha1m
);
25041 builtin_sha1cpm_p
= true;
25043 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25044 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25045 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
25046 enum machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
25049 if (VECTOR_MODE_P (mode0
))
25050 op0
= safe_vector_operand (op0
, mode0
);
25051 if (VECTOR_MODE_P (mode1
))
25052 op1
= safe_vector_operand (op1
, mode1
);
25053 if (VECTOR_MODE_P (mode2
))
25054 op2
= safe_vector_operand (op2
, mode2
);
25057 || GET_MODE (target
) != tmode
25058 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25059 target
= gen_reg_rtx (tmode
);
25061 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
25062 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
25063 && (GET_MODE (op2
) == mode2
|| GET_MODE (op2
) == VOIDmode
));
25065 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25066 op0
= copy_to_mode_reg (mode0
, op0
);
25067 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25068 op1
= copy_to_mode_reg (mode1
, op1
);
25069 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25070 op2
= copy_to_mode_reg (mode2
, op2
);
25071 if (builtin_sha1cpm_p
)
25072 op3
= GEN_INT (TARGET_BIG_END
? 1 : 0);
25074 if (builtin_sha1cpm_p
)
25075 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
25077 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25084 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25087 arm_expand_binop_builtin (enum insn_code icode
,
25088 tree exp
, rtx target
)
25091 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25092 tree arg1
= CALL_EXPR_ARG (exp
, 1);
25093 rtx op0
= expand_normal (arg0
);
25094 rtx op1
= expand_normal (arg1
);
25095 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25096 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25097 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
25099 if (VECTOR_MODE_P (mode0
))
25100 op0
= safe_vector_operand (op0
, mode0
);
25101 if (VECTOR_MODE_P (mode1
))
25102 op1
= safe_vector_operand (op1
, mode1
);
25105 || GET_MODE (target
) != tmode
25106 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25107 target
= gen_reg_rtx (tmode
);
25109 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
25110 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
25112 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25113 op0
= copy_to_mode_reg (mode0
, op0
);
25114 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25115 op1
= copy_to_mode_reg (mode1
, op1
);
25117 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25124 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25127 arm_expand_unop_builtin (enum insn_code icode
,
25128 tree exp
, rtx target
, int do_load
)
25131 tree arg0
= CALL_EXPR_ARG (exp
, 0);
25132 rtx op0
= expand_normal (arg0
);
25133 rtx op1
= NULL_RTX
;
25134 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25135 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
25136 bool builtin_sha1h_p
= false;
25138 if (insn_data
[icode
].n_operands
== 3)
25140 gcc_assert (icode
== CODE_FOR_crypto_sha1h
);
25141 builtin_sha1h_p
= true;
25145 || GET_MODE (target
) != tmode
25146 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25147 target
= gen_reg_rtx (tmode
);
25149 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
25152 if (VECTOR_MODE_P (mode0
))
25153 op0
= safe_vector_operand (op0
, mode0
);
25155 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25156 op0
= copy_to_mode_reg (mode0
, op0
);
25158 if (builtin_sha1h_p
)
25159 op1
= GEN_INT (TARGET_BIG_END
? 1 : 0);
25161 if (builtin_sha1h_p
)
25162 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25164 pat
= GEN_FCN (icode
) (target
, op0
);
25172 NEON_ARG_COPY_TO_REG
,
25178 #define NEON_MAX_BUILTIN_ARGS 5
25180 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25181 and return an expression for the accessed memory.
25183 The intrinsic function operates on a block of registers that has
25184 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25185 function references the memory at EXP of type TYPE and in mode
25186 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25190 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
25191 enum machine_mode reg_mode
,
25192 neon_builtin_type_mode type_mode
)
25194 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
25195 tree elem_type
, upper_bound
, array_type
;
25197 /* Work out the size of the register block in bytes. */
25198 reg_size
= GET_MODE_SIZE (reg_mode
);
25200 /* Work out the size of each vector in bytes. */
25201 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
25202 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
25204 /* Work out how many vectors there are. */
25205 gcc_assert (reg_size
% vector_size
== 0);
25206 nvectors
= reg_size
/ vector_size
;
25208 /* Work out the type of each element. */
25209 gcc_assert (POINTER_TYPE_P (type
));
25210 elem_type
= TREE_TYPE (type
);
25212 /* Work out how many elements are being loaded or stored.
25213 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25214 and memory elements; anything else implies a lane load or store. */
25215 if (mem_mode
== reg_mode
)
25216 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
25220 /* Create a type that describes the full access. */
25221 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
25222 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
25224 /* Dereference EXP using that type. */
25225 return fold_build2 (MEM_REF
, array_type
, exp
,
25226 build_int_cst (build_pointer_type (array_type
), 0));
25229 /* Expand a Neon builtin. */
25231 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
25232 neon_builtin_type_mode type_mode
,
25233 tree exp
, int fcode
, ...)
25237 tree arg
[NEON_MAX_BUILTIN_ARGS
];
25238 rtx op
[NEON_MAX_BUILTIN_ARGS
];
25241 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25242 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
25243 enum machine_mode other_mode
;
25249 || GET_MODE (target
) != tmode
25250 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
25251 target
= gen_reg_rtx (tmode
);
25253 va_start (ap
, fcode
);
25255 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
25259 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
25261 if (thisarg
== NEON_ARG_STOP
)
25265 opno
= argc
+ have_retval
;
25266 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
25267 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
25268 arg_type
= TREE_VALUE (formals
);
25269 if (thisarg
== NEON_ARG_MEMORY
)
25271 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
25272 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
25273 mode
[argc
], other_mode
,
25277 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25279 op
[argc
] = expand_expr (arg
[argc
], NULL_RTX
, VOIDmode
,
25280 (thisarg
== NEON_ARG_MEMORY
25281 ? EXPAND_MEMORY
: EXPAND_NORMAL
));
25285 case NEON_ARG_COPY_TO_REG
:
25286 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25287 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25288 (op
[argc
], mode
[argc
]))
25289 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
25292 case NEON_ARG_CONSTANT
:
25293 /* FIXME: This error message is somewhat unhelpful. */
25294 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25295 (op
[argc
], mode
[argc
]))
25296 error ("argument must be a constant");
25299 case NEON_ARG_MEMORY
:
25300 /* Check if expand failed. */
25301 if (op
[argc
] == const0_rtx
)
25303 gcc_assert (MEM_P (op
[argc
]));
25304 PUT_MODE (op
[argc
], mode
[argc
]);
25305 /* ??? arm_neon.h uses the same built-in functions for signed
25306 and unsigned accesses, casting where necessary. This isn't
25308 set_mem_alias_set (op
[argc
], 0);
25309 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25310 (op
[argc
], mode
[argc
]))
25311 op
[argc
] = (replace_equiv_address
25312 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
25315 case NEON_ARG_STOP
:
25316 gcc_unreachable ();
25320 formals
= TREE_CHAIN (formals
);
25330 pat
= GEN_FCN (icode
) (target
, op
[0]);
25334 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
25338 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
25342 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
25346 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
25350 gcc_unreachable ();
25356 pat
= GEN_FCN (icode
) (op
[0]);
25360 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
25364 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
25368 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
25372 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
25376 gcc_unreachable ();
25387 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25388 constants defined per-instruction or per instruction-variant. Instead, the
25389 required info is looked up in the table neon_builtin_data. */
25391 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
25393 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
25394 neon_itype itype
= d
->itype
;
25395 enum insn_code icode
= d
->code
;
25396 neon_builtin_type_mode type_mode
= d
->mode
;
25403 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25404 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25408 case NEON_SCALARMUL
:
25409 case NEON_SCALARMULL
:
25410 case NEON_SCALARMULH
:
25411 case NEON_SHIFTINSERT
:
25412 case NEON_LOGICBINOP
:
25413 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25414 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25418 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25419 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25420 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25424 case NEON_SHIFTIMM
:
25425 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25426 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
25430 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25431 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25436 case NEON_FLOAT_WIDEN
:
25437 case NEON_FLOAT_NARROW
:
25439 case NEON_REINTERP
:
25440 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25441 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25445 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25446 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25449 case NEON_LANEMULL
:
25450 case NEON_LANEMULH
:
25451 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25452 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25453 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25456 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25457 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25458 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25460 case NEON_SHIFTACC
:
25461 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25462 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25463 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25465 case NEON_SCALARMAC
:
25466 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25467 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25468 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25472 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25473 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25477 case NEON_LOADSTRUCT
:
25478 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25479 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
25481 case NEON_LOAD1LANE
:
25482 case NEON_LOADSTRUCTLANE
:
25483 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25484 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25488 case NEON_STORESTRUCT
:
25489 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25490 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25492 case NEON_STORE1LANE
:
25493 case NEON_STORESTRUCTLANE
:
25494 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25495 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25499 gcc_unreachable ();
25502 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25504 neon_reinterpret (rtx dest
, rtx src
)
25506 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
25509 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25510 not to early-clobber SRC registers in the process.
25512 We assume that the operands described by SRC and DEST represent a
25513 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25514 number of components into which the copy has been decomposed. */
25516 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25520 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25521 || REGNO (operands
[0]) < REGNO (operands
[1]))
25523 for (i
= 0; i
< count
; i
++)
25525 operands
[2 * i
] = dest
[i
];
25526 operands
[2 * i
+ 1] = src
[i
];
25531 for (i
= 0; i
< count
; i
++)
25533 operands
[2 * i
] = dest
[count
- i
- 1];
25534 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25539 /* Split operands into moves from op[1] + op[2] into op[0]. */
25542 neon_split_vcombine (rtx operands
[3])
25544 unsigned int dest
= REGNO (operands
[0]);
25545 unsigned int src1
= REGNO (operands
[1]);
25546 unsigned int src2
= REGNO (operands
[2]);
25547 enum machine_mode halfmode
= GET_MODE (operands
[1]);
25548 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
25549 rtx destlo
, desthi
;
25551 if (src1
== dest
&& src2
== dest
+ halfregs
)
25553 /* No-op move. Can't split to nothing; emit something. */
25554 emit_note (NOTE_INSN_DELETED
);
25558 /* Preserve register attributes for variable tracking. */
25559 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25560 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25561 GET_MODE_SIZE (halfmode
));
25563 /* Special case of reversed high/low parts. Use VSWP. */
25564 if (src2
== dest
&& src1
== dest
+ halfregs
)
25566 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
25567 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
25568 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25572 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25574 /* Try to avoid unnecessary moves if part of the result
25575 is in the right place already. */
25577 emit_move_insn (destlo
, operands
[1]);
25578 if (src2
!= dest
+ halfregs
)
25579 emit_move_insn (desthi
, operands
[2]);
25583 if (src2
!= dest
+ halfregs
)
25584 emit_move_insn (desthi
, operands
[2]);
25586 emit_move_insn (destlo
, operands
[1]);
25590 /* Expand an expression EXP that calls a built-in function,
25591 with result going to TARGET if that's convenient
25592 (and in mode MODE if that's convenient).
25593 SUBTARGET may be used as the target for computing one of EXP's operands.
25594 IGNORE is nonzero if the value is to be ignored. */
25597 arm_expand_builtin (tree exp
,
25599 rtx subtarget ATTRIBUTE_UNUSED
,
25600 enum machine_mode mode ATTRIBUTE_UNUSED
,
25601 int ignore ATTRIBUTE_UNUSED
)
25603 const struct builtin_description
* d
;
25604 enum insn_code icode
;
25605 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25613 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25615 enum machine_mode tmode
;
25616 enum machine_mode mode0
;
25617 enum machine_mode mode1
;
25618 enum machine_mode mode2
;
25624 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
25625 return arm_expand_neon_builtin (fcode
, exp
, target
);
25629 case ARM_BUILTIN_GET_FPSCR
:
25630 case ARM_BUILTIN_SET_FPSCR
:
25631 if (fcode
== ARM_BUILTIN_GET_FPSCR
)
25633 icode
= CODE_FOR_get_fpscr
;
25634 target
= gen_reg_rtx (SImode
);
25635 pat
= GEN_FCN (icode
) (target
);
25640 icode
= CODE_FOR_set_fpscr
;
25641 arg0
= CALL_EXPR_ARG (exp
, 0);
25642 op0
= expand_normal (arg0
);
25643 pat
= GEN_FCN (icode
) (op0
);
25648 case ARM_BUILTIN_TEXTRMSB
:
25649 case ARM_BUILTIN_TEXTRMUB
:
25650 case ARM_BUILTIN_TEXTRMSH
:
25651 case ARM_BUILTIN_TEXTRMUH
:
25652 case ARM_BUILTIN_TEXTRMSW
:
25653 case ARM_BUILTIN_TEXTRMUW
:
25654 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
25655 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
25656 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
25657 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
25658 : CODE_FOR_iwmmxt_textrmw
);
25660 arg0
= CALL_EXPR_ARG (exp
, 0);
25661 arg1
= CALL_EXPR_ARG (exp
, 1);
25662 op0
= expand_normal (arg0
);
25663 op1
= expand_normal (arg1
);
25664 tmode
= insn_data
[icode
].operand
[0].mode
;
25665 mode0
= insn_data
[icode
].operand
[1].mode
;
25666 mode1
= insn_data
[icode
].operand
[2].mode
;
25668 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25669 op0
= copy_to_mode_reg (mode0
, op0
);
25670 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25672 /* @@@ better error message */
25673 error ("selector must be an immediate");
25674 return gen_reg_rtx (tmode
);
25677 opint
= INTVAL (op1
);
25678 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
25680 if (opint
> 7 || opint
< 0)
25681 error ("the range of selector should be in 0 to 7");
25683 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
25685 if (opint
> 3 || opint
< 0)
25686 error ("the range of selector should be in 0 to 3");
25688 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25690 if (opint
> 1 || opint
< 0)
25691 error ("the range of selector should be in 0 to 1");
25695 || GET_MODE (target
) != tmode
25696 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25697 target
= gen_reg_rtx (tmode
);
25698 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25704 case ARM_BUILTIN_WALIGNI
:
25705 /* If op2 is immediate, call walighi, else call walighr. */
25706 arg0
= CALL_EXPR_ARG (exp
, 0);
25707 arg1
= CALL_EXPR_ARG (exp
, 1);
25708 arg2
= CALL_EXPR_ARG (exp
, 2);
25709 op0
= expand_normal (arg0
);
25710 op1
= expand_normal (arg1
);
25711 op2
= expand_normal (arg2
);
25712 if (CONST_INT_P (op2
))
25714 icode
= CODE_FOR_iwmmxt_waligni
;
25715 tmode
= insn_data
[icode
].operand
[0].mode
;
25716 mode0
= insn_data
[icode
].operand
[1].mode
;
25717 mode1
= insn_data
[icode
].operand
[2].mode
;
25718 mode2
= insn_data
[icode
].operand
[3].mode
;
25719 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25720 op0
= copy_to_mode_reg (mode0
, op0
);
25721 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25722 op1
= copy_to_mode_reg (mode1
, op1
);
25723 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
25724 selector
= INTVAL (op2
);
25725 if (selector
> 7 || selector
< 0)
25726 error ("the range of selector should be in 0 to 7");
25730 icode
= CODE_FOR_iwmmxt_walignr
;
25731 tmode
= insn_data
[icode
].operand
[0].mode
;
25732 mode0
= insn_data
[icode
].operand
[1].mode
;
25733 mode1
= insn_data
[icode
].operand
[2].mode
;
25734 mode2
= insn_data
[icode
].operand
[3].mode
;
25735 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25736 op0
= copy_to_mode_reg (mode0
, op0
);
25737 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25738 op1
= copy_to_mode_reg (mode1
, op1
);
25739 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25740 op2
= copy_to_mode_reg (mode2
, op2
);
25743 || GET_MODE (target
) != tmode
25744 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25745 target
= gen_reg_rtx (tmode
);
25746 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25752 case ARM_BUILTIN_TINSRB
:
25753 case ARM_BUILTIN_TINSRH
:
25754 case ARM_BUILTIN_TINSRW
:
25755 case ARM_BUILTIN_WMERGE
:
25756 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
25757 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
25758 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
25759 : CODE_FOR_iwmmxt_tinsrw
);
25760 arg0
= CALL_EXPR_ARG (exp
, 0);
25761 arg1
= CALL_EXPR_ARG (exp
, 1);
25762 arg2
= CALL_EXPR_ARG (exp
, 2);
25763 op0
= expand_normal (arg0
);
25764 op1
= expand_normal (arg1
);
25765 op2
= expand_normal (arg2
);
25766 tmode
= insn_data
[icode
].operand
[0].mode
;
25767 mode0
= insn_data
[icode
].operand
[1].mode
;
25768 mode1
= insn_data
[icode
].operand
[2].mode
;
25769 mode2
= insn_data
[icode
].operand
[3].mode
;
25771 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25772 op0
= copy_to_mode_reg (mode0
, op0
);
25773 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25774 op1
= copy_to_mode_reg (mode1
, op1
);
25775 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25777 error ("selector must be an immediate");
25780 if (icode
== CODE_FOR_iwmmxt_wmerge
)
25782 selector
= INTVAL (op2
);
25783 if (selector
> 7 || selector
< 0)
25784 error ("the range of selector should be in 0 to 7");
25786 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
25787 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
25788 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
25791 selector
= INTVAL (op2
);
25792 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
25793 error ("the range of selector should be in 0 to 7");
25794 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
25795 error ("the range of selector should be in 0 to 3");
25796 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
25797 error ("the range of selector should be in 0 to 1");
25799 op2
= GEN_INT (mask
);
25802 || GET_MODE (target
) != tmode
25803 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25804 target
= gen_reg_rtx (tmode
);
25805 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25811 case ARM_BUILTIN_SETWCGR0
:
25812 case ARM_BUILTIN_SETWCGR1
:
25813 case ARM_BUILTIN_SETWCGR2
:
25814 case ARM_BUILTIN_SETWCGR3
:
25815 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
25816 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
25817 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
25818 : CODE_FOR_iwmmxt_setwcgr3
);
25819 arg0
= CALL_EXPR_ARG (exp
, 0);
25820 op0
= expand_normal (arg0
);
25821 mode0
= insn_data
[icode
].operand
[0].mode
;
25822 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25823 op0
= copy_to_mode_reg (mode0
, op0
);
25824 pat
= GEN_FCN (icode
) (op0
);
25830 case ARM_BUILTIN_GETWCGR0
:
25831 case ARM_BUILTIN_GETWCGR1
:
25832 case ARM_BUILTIN_GETWCGR2
:
25833 case ARM_BUILTIN_GETWCGR3
:
25834 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
25835 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
25836 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
25837 : CODE_FOR_iwmmxt_getwcgr3
);
25838 tmode
= insn_data
[icode
].operand
[0].mode
;
25840 || GET_MODE (target
) != tmode
25841 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25842 target
= gen_reg_rtx (tmode
);
25843 pat
= GEN_FCN (icode
) (target
);
25849 case ARM_BUILTIN_WSHUFH
:
25850 icode
= CODE_FOR_iwmmxt_wshufh
;
25851 arg0
= CALL_EXPR_ARG (exp
, 0);
25852 arg1
= CALL_EXPR_ARG (exp
, 1);
25853 op0
= expand_normal (arg0
);
25854 op1
= expand_normal (arg1
);
25855 tmode
= insn_data
[icode
].operand
[0].mode
;
25856 mode1
= insn_data
[icode
].operand
[1].mode
;
25857 mode2
= insn_data
[icode
].operand
[2].mode
;
25859 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
25860 op0
= copy_to_mode_reg (mode1
, op0
);
25861 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
25863 error ("mask must be an immediate");
25866 selector
= INTVAL (op1
);
25867 if (selector
< 0 || selector
> 255)
25868 error ("the range of mask should be in 0 to 255");
25870 || GET_MODE (target
) != tmode
25871 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25872 target
= gen_reg_rtx (tmode
);
25873 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25879 case ARM_BUILTIN_WMADDS
:
25880 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
25881 case ARM_BUILTIN_WMADDSX
:
25882 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
25883 case ARM_BUILTIN_WMADDSN
:
25884 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
25885 case ARM_BUILTIN_WMADDU
:
25886 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
25887 case ARM_BUILTIN_WMADDUX
:
25888 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
25889 case ARM_BUILTIN_WMADDUN
:
25890 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
25891 case ARM_BUILTIN_WSADBZ
:
25892 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
25893 case ARM_BUILTIN_WSADHZ
:
25894 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
25896 /* Several three-argument builtins. */
25897 case ARM_BUILTIN_WMACS
:
25898 case ARM_BUILTIN_WMACU
:
25899 case ARM_BUILTIN_TMIA
:
25900 case ARM_BUILTIN_TMIAPH
:
25901 case ARM_BUILTIN_TMIATT
:
25902 case ARM_BUILTIN_TMIATB
:
25903 case ARM_BUILTIN_TMIABT
:
25904 case ARM_BUILTIN_TMIABB
:
25905 case ARM_BUILTIN_WQMIABB
:
25906 case ARM_BUILTIN_WQMIABT
:
25907 case ARM_BUILTIN_WQMIATB
:
25908 case ARM_BUILTIN_WQMIATT
:
25909 case ARM_BUILTIN_WQMIABBN
:
25910 case ARM_BUILTIN_WQMIABTN
:
25911 case ARM_BUILTIN_WQMIATBN
:
25912 case ARM_BUILTIN_WQMIATTN
:
25913 case ARM_BUILTIN_WMIABB
:
25914 case ARM_BUILTIN_WMIABT
:
25915 case ARM_BUILTIN_WMIATB
:
25916 case ARM_BUILTIN_WMIATT
:
25917 case ARM_BUILTIN_WMIABBN
:
25918 case ARM_BUILTIN_WMIABTN
:
25919 case ARM_BUILTIN_WMIATBN
:
25920 case ARM_BUILTIN_WMIATTN
:
25921 case ARM_BUILTIN_WMIAWBB
:
25922 case ARM_BUILTIN_WMIAWBT
:
25923 case ARM_BUILTIN_WMIAWTB
:
25924 case ARM_BUILTIN_WMIAWTT
:
25925 case ARM_BUILTIN_WMIAWBBN
:
25926 case ARM_BUILTIN_WMIAWBTN
:
25927 case ARM_BUILTIN_WMIAWTBN
:
25928 case ARM_BUILTIN_WMIAWTTN
:
25929 case ARM_BUILTIN_WSADB
:
25930 case ARM_BUILTIN_WSADH
:
25931 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
25932 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
25933 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
25934 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
25935 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
25936 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
25937 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
25938 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
25939 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
25940 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
25941 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
25942 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
25943 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
25944 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
25945 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
25946 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
25947 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
25948 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
25949 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
25950 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
25951 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
25952 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
25953 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
25954 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
25955 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
25956 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
25957 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
25958 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
25959 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
25960 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
25961 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
25962 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
25963 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
25964 : CODE_FOR_iwmmxt_wsadh
);
25965 arg0
= CALL_EXPR_ARG (exp
, 0);
25966 arg1
= CALL_EXPR_ARG (exp
, 1);
25967 arg2
= CALL_EXPR_ARG (exp
, 2);
25968 op0
= expand_normal (arg0
);
25969 op1
= expand_normal (arg1
);
25970 op2
= expand_normal (arg2
);
25971 tmode
= insn_data
[icode
].operand
[0].mode
;
25972 mode0
= insn_data
[icode
].operand
[1].mode
;
25973 mode1
= insn_data
[icode
].operand
[2].mode
;
25974 mode2
= insn_data
[icode
].operand
[3].mode
;
25976 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25977 op0
= copy_to_mode_reg (mode0
, op0
);
25978 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25979 op1
= copy_to_mode_reg (mode1
, op1
);
25980 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25981 op2
= copy_to_mode_reg (mode2
, op2
);
25983 || GET_MODE (target
) != tmode
25984 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25985 target
= gen_reg_rtx (tmode
);
25986 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25992 case ARM_BUILTIN_WZERO
:
25993 target
= gen_reg_rtx (DImode
);
25994 emit_insn (gen_iwmmxt_clrdi (target
));
25997 case ARM_BUILTIN_WSRLHI
:
25998 case ARM_BUILTIN_WSRLWI
:
25999 case ARM_BUILTIN_WSRLDI
:
26000 case ARM_BUILTIN_WSLLHI
:
26001 case ARM_BUILTIN_WSLLWI
:
26002 case ARM_BUILTIN_WSLLDI
:
26003 case ARM_BUILTIN_WSRAHI
:
26004 case ARM_BUILTIN_WSRAWI
:
26005 case ARM_BUILTIN_WSRADI
:
26006 case ARM_BUILTIN_WRORHI
:
26007 case ARM_BUILTIN_WRORWI
:
26008 case ARM_BUILTIN_WRORDI
:
26009 case ARM_BUILTIN_WSRLH
:
26010 case ARM_BUILTIN_WSRLW
:
26011 case ARM_BUILTIN_WSRLD
:
26012 case ARM_BUILTIN_WSLLH
:
26013 case ARM_BUILTIN_WSLLW
:
26014 case ARM_BUILTIN_WSLLD
:
26015 case ARM_BUILTIN_WSRAH
:
26016 case ARM_BUILTIN_WSRAW
:
26017 case ARM_BUILTIN_WSRAD
:
26018 case ARM_BUILTIN_WRORH
:
26019 case ARM_BUILTIN_WRORW
:
26020 case ARM_BUILTIN_WRORD
:
26021 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
26022 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
26023 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
26024 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
26025 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
26026 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
26027 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
26028 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
26029 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
26030 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
26031 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
26032 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
26033 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
26034 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
26035 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
26036 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
26037 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
26038 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
26039 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
26040 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
26041 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
26042 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
26043 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
26044 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
26045 : CODE_FOR_nothing
);
26046 arg1
= CALL_EXPR_ARG (exp
, 1);
26047 op1
= expand_normal (arg1
);
26048 if (GET_MODE (op1
) == VOIDmode
)
26050 imm
= INTVAL (op1
);
26051 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
26052 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
26053 && (imm
< 0 || imm
> 32))
26055 if (fcode
== ARM_BUILTIN_WRORHI
)
26056 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26057 else if (fcode
== ARM_BUILTIN_WRORWI
)
26058 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26059 else if (fcode
== ARM_BUILTIN_WRORH
)
26060 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26062 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26064 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
26065 && (imm
< 0 || imm
> 64))
26067 if (fcode
== ARM_BUILTIN_WRORDI
)
26068 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26070 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26074 if (fcode
== ARM_BUILTIN_WSRLHI
)
26075 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26076 else if (fcode
== ARM_BUILTIN_WSRLWI
)
26077 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26078 else if (fcode
== ARM_BUILTIN_WSRLDI
)
26079 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26080 else if (fcode
== ARM_BUILTIN_WSLLHI
)
26081 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26082 else if (fcode
== ARM_BUILTIN_WSLLWI
)
26083 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26084 else if (fcode
== ARM_BUILTIN_WSLLDI
)
26085 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26086 else if (fcode
== ARM_BUILTIN_WSRAHI
)
26087 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26088 else if (fcode
== ARM_BUILTIN_WSRAWI
)
26089 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26090 else if (fcode
== ARM_BUILTIN_WSRADI
)
26091 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26092 else if (fcode
== ARM_BUILTIN_WSRLH
)
26093 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26094 else if (fcode
== ARM_BUILTIN_WSRLW
)
26095 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26096 else if (fcode
== ARM_BUILTIN_WSRLD
)
26097 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26098 else if (fcode
== ARM_BUILTIN_WSLLH
)
26099 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26100 else if (fcode
== ARM_BUILTIN_WSLLW
)
26101 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26102 else if (fcode
== ARM_BUILTIN_WSLLD
)
26103 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26104 else if (fcode
== ARM_BUILTIN_WSRAH
)
26105 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26106 else if (fcode
== ARM_BUILTIN_WSRAW
)
26107 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26109 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26112 return arm_expand_binop_builtin (icode
, exp
, target
);
26118 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
26119 if (d
->code
== (const enum arm_builtins
) fcode
)
26120 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
26122 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
26123 if (d
->code
== (const enum arm_builtins
) fcode
)
26124 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
26126 for (i
= 0, d
= bdesc_3arg
; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
26127 if (d
->code
== (const enum arm_builtins
) fcode
)
26128 return arm_expand_ternop_builtin (d
->icode
, exp
, target
);
26130 /* @@@ Should really do something sensible here. */
26134 /* Return the number (counting from 0) of
26135 the least significant set bit in MASK. */
26138 number_of_first_bit_set (unsigned mask
)
26140 return ctz_hwi (mask
);
26143 /* Like emit_multi_reg_push, but allowing for a different set of
26144 registers to be described as saved. MASK is the set of registers
26145 to be saved; REAL_REGS is the set of registers to be described as
26146 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26149 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
26151 unsigned long regno
;
26152 rtx par
[10], tmp
, reg
, insn
;
26155 /* Build the parallel of the registers actually being stored. */
26156 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26158 regno
= ctz_hwi (mask
);
26159 reg
= gen_rtx_REG (SImode
, regno
);
26162 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26164 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26169 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26170 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26171 tmp
= gen_frame_mem (BLKmode
, tmp
);
26172 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
26175 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26176 insn
= emit_insn (tmp
);
26178 /* Always build the stack adjustment note for unwind info. */
26179 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26180 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
26183 /* Build the parallel of the registers recorded as saved for unwind. */
26184 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26186 regno
= ctz_hwi (real_regs
);
26187 reg
= gen_rtx_REG (SImode
, regno
);
26189 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26190 tmp
= gen_frame_mem (SImode
, tmp
);
26191 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
26192 RTX_FRAME_RELATED_P (tmp
) = 1;
26200 RTX_FRAME_RELATED_P (par
[0]) = 1;
26201 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26204 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26209 /* Emit code to push or pop registers to or from the stack. F is the
26210 assembly file. MASK is the registers to pop. */
26212 thumb_pop (FILE *f
, unsigned long mask
)
26215 int lo_mask
= mask
& 0xFF;
26216 int pushed_words
= 0;
26220 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26222 /* Special case. Do not generate a POP PC statement here, do it in
26224 thumb_exit (f
, -1);
26228 fprintf (f
, "\tpop\t{");
26230 /* Look at the low registers first. */
26231 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26235 asm_fprintf (f
, "%r", regno
);
26237 if ((lo_mask
& ~1) != 0)
26244 if (mask
& (1 << PC_REGNUM
))
26246 /* Catch popping the PC. */
26247 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
26248 || crtl
->calls_eh_return
)
26250 /* The PC is never poped directly, instead
26251 it is popped into r3 and then BX is used. */
26252 fprintf (f
, "}\n");
26254 thumb_exit (f
, -1);
26263 asm_fprintf (f
, "%r", PC_REGNUM
);
26267 fprintf (f
, "}\n");
26270 /* Generate code to return from a thumb function.
26271 If 'reg_containing_return_addr' is -1, then the return address is
26272 actually on the stack, at the stack pointer. */
26274 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26276 unsigned regs_available_for_popping
;
26277 unsigned regs_to_pop
;
26279 unsigned available
;
26281 enum machine_mode mode
;
26283 int restore_a4
= FALSE
;
26285 /* Compute the registers we need to pop. */
26289 if (reg_containing_return_addr
== -1)
26291 regs_to_pop
|= 1 << LR_REGNUM
;
26295 if (TARGET_BACKTRACE
)
26297 /* Restore the (ARM) frame pointer and stack pointer. */
26298 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26302 /* If there is nothing to pop then just emit the BX instruction and
26304 if (pops_needed
== 0)
26306 if (crtl
->calls_eh_return
)
26307 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26309 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26312 /* Otherwise if we are not supporting interworking and we have not created
26313 a backtrace structure and the function was not entered in ARM mode then
26314 just pop the return address straight into the PC. */
26315 else if (!TARGET_INTERWORK
26316 && !TARGET_BACKTRACE
26317 && !is_called_in_ARM_mode (current_function_decl
)
26318 && !crtl
->calls_eh_return
)
26320 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26324 /* Find out how many of the (return) argument registers we can corrupt. */
26325 regs_available_for_popping
= 0;
26327 /* If returning via __builtin_eh_return, the bottom three registers
26328 all contain information needed for the return. */
26329 if (crtl
->calls_eh_return
)
26333 /* If we can deduce the registers used from the function's
26334 return value. This is more reliable that examining
26335 df_regs_ever_live_p () because that will be set if the register is
26336 ever used in the function, not just if the register is used
26337 to hold a return value. */
26339 if (crtl
->return_rtx
!= 0)
26340 mode
= GET_MODE (crtl
->return_rtx
);
26342 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26344 size
= GET_MODE_SIZE (mode
);
26348 /* In a void function we can use any argument register.
26349 In a function that returns a structure on the stack
26350 we can use the second and third argument registers. */
26351 if (mode
== VOIDmode
)
26352 regs_available_for_popping
=
26353 (1 << ARG_REGISTER (1))
26354 | (1 << ARG_REGISTER (2))
26355 | (1 << ARG_REGISTER (3));
26357 regs_available_for_popping
=
26358 (1 << ARG_REGISTER (2))
26359 | (1 << ARG_REGISTER (3));
26361 else if (size
<= 4)
26362 regs_available_for_popping
=
26363 (1 << ARG_REGISTER (2))
26364 | (1 << ARG_REGISTER (3));
26365 else if (size
<= 8)
26366 regs_available_for_popping
=
26367 (1 << ARG_REGISTER (3));
26370 /* Match registers to be popped with registers into which we pop them. */
26371 for (available
= regs_available_for_popping
,
26372 required
= regs_to_pop
;
26373 required
!= 0 && available
!= 0;
26374 available
&= ~(available
& - available
),
26375 required
&= ~(required
& - required
))
26378 /* If we have any popping registers left over, remove them. */
26380 regs_available_for_popping
&= ~available
;
26382 /* Otherwise if we need another popping register we can use
26383 the fourth argument register. */
26384 else if (pops_needed
)
26386 /* If we have not found any free argument registers and
26387 reg a4 contains the return address, we must move it. */
26388 if (regs_available_for_popping
== 0
26389 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26391 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26392 reg_containing_return_addr
= LR_REGNUM
;
26394 else if (size
> 12)
26396 /* Register a4 is being used to hold part of the return value,
26397 but we have dire need of a free, low register. */
26400 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26403 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26405 /* The fourth argument register is available. */
26406 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26412 /* Pop as many registers as we can. */
26413 thumb_pop (f
, regs_available_for_popping
);
26415 /* Process the registers we popped. */
26416 if (reg_containing_return_addr
== -1)
26418 /* The return address was popped into the lowest numbered register. */
26419 regs_to_pop
&= ~(1 << LR_REGNUM
);
26421 reg_containing_return_addr
=
26422 number_of_first_bit_set (regs_available_for_popping
);
26424 /* Remove this register for the mask of available registers, so that
26425 the return address will not be corrupted by further pops. */
26426 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26429 /* If we popped other registers then handle them here. */
26430 if (regs_available_for_popping
)
26434 /* Work out which register currently contains the frame pointer. */
26435 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26437 /* Move it into the correct place. */
26438 asm_fprintf (f
, "\tmov\t%r, %r\n",
26439 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26441 /* (Temporarily) remove it from the mask of popped registers. */
26442 regs_available_for_popping
&= ~(1 << frame_pointer
);
26443 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26445 if (regs_available_for_popping
)
26449 /* We popped the stack pointer as well,
26450 find the register that contains it. */
26451 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26453 /* Move it into the stack register. */
26454 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26456 /* At this point we have popped all necessary registers, so
26457 do not worry about restoring regs_available_for_popping
26458 to its correct value:
26460 assert (pops_needed == 0)
26461 assert (regs_available_for_popping == (1 << frame_pointer))
26462 assert (regs_to_pop == (1 << STACK_POINTER)) */
26466 /* Since we have just move the popped value into the frame
26467 pointer, the popping register is available for reuse, and
26468 we know that we still have the stack pointer left to pop. */
26469 regs_available_for_popping
|= (1 << frame_pointer
);
26473 /* If we still have registers left on the stack, but we no longer have
26474 any registers into which we can pop them, then we must move the return
26475 address into the link register and make available the register that
26477 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26479 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26481 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26482 reg_containing_return_addr
);
26484 reg_containing_return_addr
= LR_REGNUM
;
26487 /* If we have registers left on the stack then pop some more.
26488 We know that at most we will want to pop FP and SP. */
26489 if (pops_needed
> 0)
26494 thumb_pop (f
, regs_available_for_popping
);
26496 /* We have popped either FP or SP.
26497 Move whichever one it is into the correct register. */
26498 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26499 move_to
= number_of_first_bit_set (regs_to_pop
);
26501 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26503 regs_to_pop
&= ~(1 << move_to
);
26508 /* If we still have not popped everything then we must have only
26509 had one register available to us and we are now popping the SP. */
26510 if (pops_needed
> 0)
26514 thumb_pop (f
, regs_available_for_popping
);
26516 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26518 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26520 assert (regs_to_pop == (1 << STACK_POINTER))
26521 assert (pops_needed == 1)
26525 /* If necessary restore the a4 register. */
26528 if (reg_containing_return_addr
!= LR_REGNUM
)
26530 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26531 reg_containing_return_addr
= LR_REGNUM
;
26534 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26537 if (crtl
->calls_eh_return
)
26538 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26540 /* Return to caller. */
26541 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26544 /* Scan INSN just before assembler is output for it.
26545 For Thumb-1, we track the status of the condition codes; this
26546 information is used in the cbranchsi4_insn pattern. */
26548 thumb1_final_prescan_insn (rtx insn
)
26550 if (flag_print_asm_name
)
26551 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26552 INSN_ADDRESSES (INSN_UID (insn
)));
26553 /* Don't overwrite the previous setter when we get to a cbranch. */
26554 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26556 enum attr_conds conds
;
26558 if (cfun
->machine
->thumb1_cc_insn
)
26560 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26561 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26564 conds
= get_attr_conds (insn
);
26565 if (conds
== CONDS_SET
)
26567 rtx set
= single_set (insn
);
26568 cfun
->machine
->thumb1_cc_insn
= insn
;
26569 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26570 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26571 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
26572 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26574 rtx src1
= XEXP (SET_SRC (set
), 1);
26575 if (src1
== const0_rtx
)
26576 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26578 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26580 /* Record the src register operand instead of dest because
26581 cprop_hardreg pass propagates src. */
26582 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26585 else if (conds
!= CONDS_NOCOND
)
26586 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26589 /* Check if unexpected far jump is used. */
26590 if (cfun
->machine
->lr_save_eliminated
26591 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26592 internal_error("Unexpected thumb1 far jump");
26596 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26598 unsigned HOST_WIDE_INT mask
= 0xff;
26601 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26602 if (val
== 0) /* XXX */
26605 for (i
= 0; i
< 25; i
++)
26606 if ((val
& (mask
<< i
)) == val
)
26612 /* Returns nonzero if the current function contains,
26613 or might contain a far jump. */
26615 thumb_far_jump_used_p (void)
26618 bool far_jump
= false;
26619 unsigned int func_size
= 0;
26621 /* This test is only important for leaf functions. */
26622 /* assert (!leaf_function_p ()); */
26624 /* If we have already decided that far jumps may be used,
26625 do not bother checking again, and always return true even if
26626 it turns out that they are not being used. Once we have made
26627 the decision that far jumps are present (and that hence the link
26628 register will be pushed onto the stack) we cannot go back on it. */
26629 if (cfun
->machine
->far_jump_used
)
26632 /* If this function is not being called from the prologue/epilogue
26633 generation code then it must be being called from the
26634 INITIAL_ELIMINATION_OFFSET macro. */
26635 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26637 /* In this case we know that we are being asked about the elimination
26638 of the arg pointer register. If that register is not being used,
26639 then there are no arguments on the stack, and we do not have to
26640 worry that a far jump might force the prologue to push the link
26641 register, changing the stack offsets. In this case we can just
26642 return false, since the presence of far jumps in the function will
26643 not affect stack offsets.
26645 If the arg pointer is live (or if it was live, but has now been
26646 eliminated and so set to dead) then we do have to test to see if
26647 the function might contain a far jump. This test can lead to some
26648 false negatives, since before reload is completed, then length of
26649 branch instructions is not known, so gcc defaults to returning their
26650 longest length, which in turn sets the far jump attribute to true.
26652 A false negative will not result in bad code being generated, but it
26653 will result in a needless push and pop of the link register. We
26654 hope that this does not occur too often.
26656 If we need doubleword stack alignment this could affect the other
26657 elimination offsets so we can't risk getting it wrong. */
26658 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26659 cfun
->machine
->arg_pointer_live
= 1;
26660 else if (!cfun
->machine
->arg_pointer_live
)
26664 /* We should not change far_jump_used during or after reload, as there is
26665 no chance to change stack frame layout. */
26666 if (reload_in_progress
|| reload_completed
)
26669 /* Check to see if the function contains a branch
26670 insn with the far jump attribute set. */
26671 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26673 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26677 func_size
+= get_attr_length (insn
);
26680 /* Attribute far_jump will always be true for thumb1 before
26681 shorten_branch pass. So checking far_jump attribute before
26682 shorten_branch isn't much useful.
26684 Following heuristic tries to estimate more accurately if a far jump
26685 may finally be used. The heuristic is very conservative as there is
26686 no chance to roll-back the decision of not to use far jump.
26688 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26689 2-byte insn is associated with a 4 byte constant pool. Using
26690 function size 2048/3 as the threshold is conservative enough. */
26693 if ((func_size
* 3) >= 2048)
26695 /* Record the fact that we have decided that
26696 the function does use far jumps. */
26697 cfun
->machine
->far_jump_used
= 1;
26705 /* Return nonzero if FUNC must be entered in ARM mode. */
26707 is_called_in_ARM_mode (tree func
)
26709 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26711 /* Ignore the problem about functions whose address is taken. */
26712 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26716 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26722 /* Given the stack offsets and register mask in OFFSETS, decide how
26723 many additional registers to push instead of subtracting a constant
26724 from SP. For epilogues the principle is the same except we use pop.
26725 FOR_PROLOGUE indicates which we're generating. */
26727 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26729 HOST_WIDE_INT amount
;
26730 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26731 /* Extract a mask of the ones we can give to the Thumb's push/pop
26733 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26734 /* Then count how many other high registers will need to be pushed. */
26735 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26736 int n_free
, reg_base
, size
;
26738 if (!for_prologue
&& frame_pointer_needed
)
26739 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26741 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26743 /* If the stack frame size is 512 exactly, we can save one load
26744 instruction, which should make this a win even when optimizing
26746 if (!optimize_size
&& amount
!= 512)
26749 /* Can't do this if there are high registers to push. */
26750 if (high_regs_pushed
!= 0)
26753 /* Shouldn't do it in the prologue if no registers would normally
26754 be pushed at all. In the epilogue, also allow it if we'll have
26755 a pop insn for the PC. */
26758 || TARGET_BACKTRACE
26759 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26760 || TARGET_INTERWORK
26761 || crtl
->args
.pretend_args_size
!= 0))
26764 /* Don't do this if thumb_expand_prologue wants to emit instructions
26765 between the push and the stack frame allocation. */
26767 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26768 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26775 size
= arm_size_return_regs ();
26776 reg_base
= ARM_NUM_INTS (size
);
26777 live_regs_mask
>>= reg_base
;
26780 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26781 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
26783 live_regs_mask
>>= 1;
26789 gcc_assert (amount
/ 4 * 4 == amount
);
26791 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26792 return (amount
- 508) / 4;
26793 if (amount
<= n_free
* 4)
26798 /* The bits which aren't usefully expanded as rtl. */
26800 thumb1_unexpanded_epilogue (void)
26802 arm_stack_offsets
*offsets
;
26804 unsigned long live_regs_mask
= 0;
26805 int high_regs_pushed
= 0;
26807 int had_to_push_lr
;
26810 if (cfun
->machine
->return_used_this_function
!= 0)
26813 if (IS_NAKED (arm_current_func_type ()))
26816 offsets
= arm_get_frame_offsets ();
26817 live_regs_mask
= offsets
->saved_regs_mask
;
26818 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26820 /* If we can deduce the registers used from the function's return value.
26821 This is more reliable that examining df_regs_ever_live_p () because that
26822 will be set if the register is ever used in the function, not just if
26823 the register is used to hold a return value. */
26824 size
= arm_size_return_regs ();
26826 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26829 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26830 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26833 /* The prolog may have pushed some high registers to use as
26834 work registers. e.g. the testsuite file:
26835 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26836 compiles to produce:
26837 push {r4, r5, r6, r7, lr}
26841 as part of the prolog. We have to undo that pushing here. */
26843 if (high_regs_pushed
)
26845 unsigned long mask
= live_regs_mask
& 0xff;
26848 /* The available low registers depend on the size of the value we are
26856 /* Oh dear! We have no low registers into which we can pop
26859 ("no low registers available for popping high registers");
26861 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
26862 if (live_regs_mask
& (1 << next_hi_reg
))
26865 while (high_regs_pushed
)
26867 /* Find lo register(s) into which the high register(s) can
26869 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26871 if (mask
& (1 << regno
))
26872 high_regs_pushed
--;
26873 if (high_regs_pushed
== 0)
26877 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
26879 /* Pop the values into the low register(s). */
26880 thumb_pop (asm_out_file
, mask
);
26882 /* Move the value(s) into the high registers. */
26883 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26885 if (mask
& (1 << regno
))
26887 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26890 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
26891 if (live_regs_mask
& (1 << next_hi_reg
))
26896 live_regs_mask
&= ~0x0f00;
26899 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26900 live_regs_mask
&= 0xff;
26902 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26904 /* Pop the return address into the PC. */
26905 if (had_to_push_lr
)
26906 live_regs_mask
|= 1 << PC_REGNUM
;
26908 /* Either no argument registers were pushed or a backtrace
26909 structure was created which includes an adjusted stack
26910 pointer, so just pop everything. */
26911 if (live_regs_mask
)
26912 thumb_pop (asm_out_file
, live_regs_mask
);
26914 /* We have either just popped the return address into the
26915 PC or it is was kept in LR for the entire function.
26916 Note that thumb_pop has already called thumb_exit if the
26917 PC was in the list. */
26918 if (!had_to_push_lr
)
26919 thumb_exit (asm_out_file
, LR_REGNUM
);
26923 /* Pop everything but the return address. */
26924 if (live_regs_mask
)
26925 thumb_pop (asm_out_file
, live_regs_mask
);
26927 if (had_to_push_lr
)
26931 /* We have no free low regs, so save one. */
26932 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26936 /* Get the return address into a temporary register. */
26937 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26941 /* Move the return address to lr. */
26942 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26944 /* Restore the low register. */
26945 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26950 regno
= LAST_ARG_REGNUM
;
26955 /* Remove the argument registers that were pushed onto the stack. */
26956 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26957 SP_REGNUM
, SP_REGNUM
,
26958 crtl
->args
.pretend_args_size
);
26960 thumb_exit (asm_out_file
, regno
);
26966 /* Functions to save and restore machine-specific function data. */
26967 static struct machine_function
*
26968 arm_init_machine_status (void)
26970 struct machine_function
*machine
;
26971 machine
= ggc_cleared_alloc
<machine_function
> ();
26973 #if ARM_FT_UNKNOWN != 0
26974 machine
->func_type
= ARM_FT_UNKNOWN
;
26979 /* Return an RTX indicating where the return address to the
26980 calling function can be found. */
26982 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26987 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26990 /* Do anything needed before RTL is emitted for each function. */
26992 arm_init_expanders (void)
26994 /* Arrange to initialize and mark the machine per-function status. */
26995 init_machine_status
= arm_init_machine_status
;
26997 /* This is to stop the combine pass optimizing away the alignment
26998 adjustment of va_arg. */
26999 /* ??? It is claimed that this should not be necessary. */
27001 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
27005 /* Like arm_compute_initial_elimination offset. Simpler because there
27006 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27007 to point at the base of the local variables after static stack
27008 space for a function has been allocated. */
27011 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
27013 arm_stack_offsets
*offsets
;
27015 offsets
= arm_get_frame_offsets ();
27019 case ARG_POINTER_REGNUM
:
27022 case STACK_POINTER_REGNUM
:
27023 return offsets
->outgoing_args
- offsets
->saved_args
;
27025 case FRAME_POINTER_REGNUM
:
27026 return offsets
->soft_frame
- offsets
->saved_args
;
27028 case ARM_HARD_FRAME_POINTER_REGNUM
:
27029 return offsets
->saved_regs
- offsets
->saved_args
;
27031 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27032 return offsets
->locals_base
- offsets
->saved_args
;
27035 gcc_unreachable ();
27039 case FRAME_POINTER_REGNUM
:
27042 case STACK_POINTER_REGNUM
:
27043 return offsets
->outgoing_args
- offsets
->soft_frame
;
27045 case ARM_HARD_FRAME_POINTER_REGNUM
:
27046 return offsets
->saved_regs
- offsets
->soft_frame
;
27048 case THUMB_HARD_FRAME_POINTER_REGNUM
:
27049 return offsets
->locals_base
- offsets
->soft_frame
;
27052 gcc_unreachable ();
27057 gcc_unreachable ();
27061 /* Generate the function's prologue. */
27064 thumb1_expand_prologue (void)
27068 HOST_WIDE_INT amount
;
27069 arm_stack_offsets
*offsets
;
27070 unsigned long func_type
;
27072 unsigned long live_regs_mask
;
27073 unsigned long l_mask
;
27074 unsigned high_regs_pushed
= 0;
27076 func_type
= arm_current_func_type ();
27078 /* Naked functions don't have prologues. */
27079 if (IS_NAKED (func_type
))
27082 if (IS_INTERRUPT (func_type
))
27084 error ("interrupt Service Routines cannot be coded in Thumb mode");
27088 if (is_called_in_ARM_mode (current_function_decl
))
27089 emit_insn (gen_prologue_thumb1_interwork ());
27091 offsets
= arm_get_frame_offsets ();
27092 live_regs_mask
= offsets
->saved_regs_mask
;
27094 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27095 l_mask
= live_regs_mask
& 0x40ff;
27096 /* Then count how many other high registers will need to be pushed. */
27097 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
27099 if (crtl
->args
.pretend_args_size
)
27101 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
27103 if (cfun
->machine
->uses_anonymous_args
)
27105 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
27106 unsigned long mask
;
27108 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
27109 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
27111 insn
= thumb1_emit_multi_reg_push (mask
, 0);
27115 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27116 stack_pointer_rtx
, x
));
27118 RTX_FRAME_RELATED_P (insn
) = 1;
27121 if (TARGET_BACKTRACE
)
27123 HOST_WIDE_INT offset
= 0;
27124 unsigned work_register
;
27125 rtx work_reg
, x
, arm_hfp_rtx
;
27127 /* We have been asked to create a stack backtrace structure.
27128 The code looks like this:
27132 0 sub SP, #16 Reserve space for 4 registers.
27133 2 push {R7} Push low registers.
27134 4 add R7, SP, #20 Get the stack pointer before the push.
27135 6 str R7, [SP, #8] Store the stack pointer
27136 (before reserving the space).
27137 8 mov R7, PC Get hold of the start of this code + 12.
27138 10 str R7, [SP, #16] Store it.
27139 12 mov R7, FP Get hold of the current frame pointer.
27140 14 str R7, [SP, #4] Store it.
27141 16 mov R7, LR Get hold of the current return address.
27142 18 str R7, [SP, #12] Store it.
27143 20 add R7, SP, #16 Point at the start of the
27144 backtrace structure.
27145 22 mov FP, R7 Put this value into the frame pointer. */
27147 work_register
= thumb_find_work_register (live_regs_mask
);
27148 work_reg
= gen_rtx_REG (SImode
, work_register
);
27149 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27151 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27152 stack_pointer_rtx
, GEN_INT (-16)));
27153 RTX_FRAME_RELATED_P (insn
) = 1;
27157 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27158 RTX_FRAME_RELATED_P (insn
) = 1;
27160 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27163 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27164 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27166 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27167 x
= gen_frame_mem (SImode
, x
);
27168 emit_move_insn (x
, work_reg
);
27170 /* Make sure that the instruction fetching the PC is in the right place
27171 to calculate "start of backtrace creation code + 12". */
27172 /* ??? The stores using the common WORK_REG ought to be enough to
27173 prevent the scheduler from doing anything weird. Failing that
27174 we could always move all of the following into an UNSPEC_VOLATILE. */
27177 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27178 emit_move_insn (work_reg
, x
);
27180 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27181 x
= gen_frame_mem (SImode
, x
);
27182 emit_move_insn (x
, work_reg
);
27184 emit_move_insn (work_reg
, arm_hfp_rtx
);
27186 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27187 x
= gen_frame_mem (SImode
, x
);
27188 emit_move_insn (x
, work_reg
);
27192 emit_move_insn (work_reg
, arm_hfp_rtx
);
27194 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27195 x
= gen_frame_mem (SImode
, x
);
27196 emit_move_insn (x
, work_reg
);
27198 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27199 emit_move_insn (work_reg
, x
);
27201 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27202 x
= gen_frame_mem (SImode
, x
);
27203 emit_move_insn (x
, work_reg
);
27206 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27207 emit_move_insn (work_reg
, x
);
27209 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27210 x
= gen_frame_mem (SImode
, x
);
27211 emit_move_insn (x
, work_reg
);
27213 x
= GEN_INT (offset
+ 12);
27214 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27216 emit_move_insn (arm_hfp_rtx
, work_reg
);
27218 /* Optimization: If we are not pushing any low registers but we are going
27219 to push some high registers then delay our first push. This will just
27220 be a push of LR and we can combine it with the push of the first high
27222 else if ((l_mask
& 0xff) != 0
27223 || (high_regs_pushed
== 0 && l_mask
))
27225 unsigned long mask
= l_mask
;
27226 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27227 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27228 RTX_FRAME_RELATED_P (insn
) = 1;
27231 if (high_regs_pushed
)
27233 unsigned pushable_regs
;
27234 unsigned next_hi_reg
;
27235 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27236 : crtl
->args
.info
.nregs
;
27237 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27239 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27240 if (live_regs_mask
& (1 << next_hi_reg
))
27243 /* Here we need to mask out registers used for passing arguments
27244 even if they can be pushed. This is to avoid using them to stash the high
27245 registers. Such kind of stash may clobber the use of arguments. */
27246 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
27248 if (pushable_regs
== 0)
27249 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27251 while (high_regs_pushed
> 0)
27253 unsigned long real_regs_mask
= 0;
27255 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
27257 if (pushable_regs
& (1 << regno
))
27259 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27260 gen_rtx_REG (SImode
, next_hi_reg
));
27262 high_regs_pushed
--;
27263 real_regs_mask
|= (1 << next_hi_reg
);
27265 if (high_regs_pushed
)
27267 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27269 if (live_regs_mask
& (1 << next_hi_reg
))
27274 pushable_regs
&= ~((1 << regno
) - 1);
27280 /* If we had to find a work register and we have not yet
27281 saved the LR then add it to the list of regs to push. */
27282 if (l_mask
== (1 << LR_REGNUM
))
27284 pushable_regs
|= l_mask
;
27285 real_regs_mask
|= l_mask
;
27289 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
27290 RTX_FRAME_RELATED_P (insn
) = 1;
27294 /* Load the pic register before setting the frame pointer,
27295 so we can use r7 as a temporary work register. */
27296 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27297 arm_load_pic_register (live_regs_mask
);
27299 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27300 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27301 stack_pointer_rtx
);
27303 if (flag_stack_usage_info
)
27304 current_function_static_stack_size
27305 = offsets
->outgoing_args
- offsets
->saved_args
;
27307 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27308 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27313 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27314 GEN_INT (- amount
)));
27315 RTX_FRAME_RELATED_P (insn
) = 1;
27321 /* The stack decrement is too big for an immediate value in a single
27322 insn. In theory we could issue multiple subtracts, but after
27323 three of them it becomes more space efficient to place the full
27324 value in the constant pool and load into a register. (Also the
27325 ARM debugger really likes to see only one stack decrement per
27326 function). So instead we look for a scratch register into which
27327 we can load the decrement, and then we subtract this from the
27328 stack pointer. Unfortunately on the thumb the only available
27329 scratch registers are the argument registers, and we cannot use
27330 these as they may hold arguments to the function. Instead we
27331 attempt to locate a call preserved register which is used by this
27332 function. If we can find one, then we know that it will have
27333 been pushed at the start of the prologue and so we can corrupt
27335 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27336 if (live_regs_mask
& (1 << regno
))
27339 gcc_assert(regno
<= LAST_LO_REGNUM
);
27341 reg
= gen_rtx_REG (SImode
, regno
);
27343 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27345 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27346 stack_pointer_rtx
, reg
));
27348 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
27349 plus_constant (Pmode
, stack_pointer_rtx
,
27351 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27352 RTX_FRAME_RELATED_P (insn
) = 1;
27356 if (frame_pointer_needed
)
27357 thumb_set_frame_pointer (offsets
);
27359 /* If we are profiling, make sure no instructions are scheduled before
27360 the call to mcount. Similarly if the user has requested no
27361 scheduling in the prolog. Similarly if we want non-call exceptions
27362 using the EABI unwinder, to prevent faulting instructions from being
27363 swapped with a stack adjustment. */
27364 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27365 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27366 && cfun
->can_throw_non_call_exceptions
))
27367 emit_insn (gen_blockage ());
27369 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27370 if (live_regs_mask
& 0xff)
27371 cfun
->machine
->lr_save_eliminated
= 0;
27374 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27375 POP instruction can be generated. LR should be replaced by PC. All
27376 the checks required are already done by USE_RETURN_INSN (). Hence,
27377 all we really need to check here is if single register is to be
27378 returned, or multiple register return. */
27380 thumb2_expand_return (bool simple_return
)
27383 unsigned long saved_regs_mask
;
27384 arm_stack_offsets
*offsets
;
27386 offsets
= arm_get_frame_offsets ();
27387 saved_regs_mask
= offsets
->saved_regs_mask
;
27389 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27390 if (saved_regs_mask
& (1 << i
))
27393 if (!simple_return
&& saved_regs_mask
)
27397 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27398 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27399 rtx addr
= gen_rtx_MEM (SImode
,
27400 gen_rtx_POST_INC (SImode
,
27401 stack_pointer_rtx
));
27402 set_mem_alias_set (addr
, get_frame_alias_set ());
27403 XVECEXP (par
, 0, 0) = ret_rtx
;
27404 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
27405 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27406 emit_jump_insn (par
);
27410 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27411 saved_regs_mask
|= (1 << PC_REGNUM
);
27412 arm_emit_multi_reg_pop (saved_regs_mask
);
27417 emit_jump_insn (simple_return_rtx
);
27422 thumb1_expand_epilogue (void)
27424 HOST_WIDE_INT amount
;
27425 arm_stack_offsets
*offsets
;
27428 /* Naked functions don't have prologues. */
27429 if (IS_NAKED (arm_current_func_type ()))
27432 offsets
= arm_get_frame_offsets ();
27433 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27435 if (frame_pointer_needed
)
27437 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27438 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27440 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27442 gcc_assert (amount
>= 0);
27445 emit_insn (gen_blockage ());
27448 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27449 GEN_INT (amount
)));
27452 /* r3 is always free in the epilogue. */
27453 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27455 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27456 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27460 /* Emit a USE (stack_pointer_rtx), so that
27461 the stack adjustment will not be deleted. */
27462 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27464 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27465 emit_insn (gen_blockage ());
27467 /* Emit a clobber for each insn that will be restored in the epilogue,
27468 so that flow2 will get register lifetimes correct. */
27469 for (regno
= 0; regno
< 13; regno
++)
27470 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
27471 emit_clobber (gen_rtx_REG (SImode
, regno
));
27473 if (! df_regs_ever_live_p (LR_REGNUM
))
27474 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27477 /* Epilogue code for APCS frame. */
27479 arm_expand_epilogue_apcs_frame (bool really_return
)
27481 unsigned long func_type
;
27482 unsigned long saved_regs_mask
;
27485 int floats_from_frame
= 0;
27486 arm_stack_offsets
*offsets
;
27488 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27489 func_type
= arm_current_func_type ();
27491 /* Get frame offsets for ARM. */
27492 offsets
= arm_get_frame_offsets ();
27493 saved_regs_mask
= offsets
->saved_regs_mask
;
27495 /* Find the offset of the floating-point save area in the frame. */
27497 = (offsets
->saved_args
27498 + arm_compute_static_chain_stack_bytes ()
27501 /* Compute how many core registers saved and how far away the floats are. */
27502 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27503 if (saved_regs_mask
& (1 << i
))
27506 floats_from_frame
+= 4;
27509 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27512 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27514 /* The offset is from IP_REGNUM. */
27515 int saved_size
= arm_get_vfp_saved_size ();
27516 if (saved_size
> 0)
27519 floats_from_frame
+= saved_size
;
27520 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27521 hard_frame_pointer_rtx
,
27522 GEN_INT (-floats_from_frame
)));
27523 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27524 ip_rtx
, hard_frame_pointer_rtx
);
27527 /* Generate VFP register multi-pop. */
27528 start_reg
= FIRST_VFP_REGNUM
;
27530 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27531 /* Look for a case where a reg does not need restoring. */
27532 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27533 && (!df_regs_ever_live_p (i
+ 1)
27534 || call_used_regs
[i
+ 1]))
27536 if (start_reg
!= i
)
27537 arm_emit_vfp_multi_reg_pop (start_reg
,
27538 (i
- start_reg
) / 2,
27539 gen_rtx_REG (SImode
,
27544 /* Restore the remaining regs that we have discovered (or possibly
27545 even all of them, if the conditional in the for loop never
27547 if (start_reg
!= i
)
27548 arm_emit_vfp_multi_reg_pop (start_reg
,
27549 (i
- start_reg
) / 2,
27550 gen_rtx_REG (SImode
, IP_REGNUM
));
27555 /* The frame pointer is guaranteed to be non-double-word aligned, as
27556 it is set to double-word-aligned old_stack_pointer - 4. */
27558 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27560 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27561 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27563 rtx addr
= gen_frame_mem (V2SImode
,
27564 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27566 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27567 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27568 gen_rtx_REG (V2SImode
, i
),
27574 /* saved_regs_mask should contain IP which contains old stack pointer
27575 at the time of activation creation. Since SP and IP are adjacent registers,
27576 we can restore the value directly into SP. */
27577 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27578 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27579 saved_regs_mask
|= (1 << SP_REGNUM
);
27581 /* There are two registers left in saved_regs_mask - LR and PC. We
27582 only need to restore LR (the return address), but to
27583 save time we can load it directly into PC, unless we need a
27584 special function exit sequence, or we are not really returning. */
27586 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27587 && !crtl
->calls_eh_return
)
27588 /* Delete LR from the register mask, so that LR on
27589 the stack is loaded into the PC in the register mask. */
27590 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27592 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27594 num_regs
= bit_count (saved_regs_mask
);
27595 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27598 emit_insn (gen_blockage ());
27599 /* Unwind the stack to just below the saved registers. */
27600 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27601 hard_frame_pointer_rtx
,
27602 GEN_INT (- 4 * num_regs
)));
27604 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27605 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27608 arm_emit_multi_reg_pop (saved_regs_mask
);
27610 if (IS_INTERRUPT (func_type
))
27612 /* Interrupt handlers will have pushed the
27613 IP onto the stack, so restore it now. */
27615 rtx addr
= gen_rtx_MEM (SImode
,
27616 gen_rtx_POST_INC (SImode
,
27617 stack_pointer_rtx
));
27618 set_mem_alias_set (addr
, get_frame_alias_set ());
27619 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27620 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27621 gen_rtx_REG (SImode
, IP_REGNUM
),
27625 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27628 if (crtl
->calls_eh_return
)
27629 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27631 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27633 if (IS_STACKALIGN (func_type
))
27634 /* Restore the original stack pointer. Before prologue, the stack was
27635 realigned and the original stack pointer saved in r0. For details,
27636 see comment in arm_expand_prologue. */
27637 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27639 emit_jump_insn (simple_return_rtx
);
27642 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27643 function is not a sibcall. */
27645 arm_expand_epilogue (bool really_return
)
27647 unsigned long func_type
;
27648 unsigned long saved_regs_mask
;
27652 arm_stack_offsets
*offsets
;
27654 func_type
= arm_current_func_type ();
27656 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27657 let output_return_instruction take care of instruction emission if any. */
27658 if (IS_NAKED (func_type
)
27659 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27662 emit_jump_insn (simple_return_rtx
);
27666 /* If we are throwing an exception, then we really must be doing a
27667 return, so we can't tail-call. */
27668 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27670 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27672 arm_expand_epilogue_apcs_frame (really_return
);
27676 /* Get frame offsets for ARM. */
27677 offsets
= arm_get_frame_offsets ();
27678 saved_regs_mask
= offsets
->saved_regs_mask
;
27679 num_regs
= bit_count (saved_regs_mask
);
27681 if (frame_pointer_needed
)
27684 /* Restore stack pointer if necessary. */
27687 /* In ARM mode, frame pointer points to first saved register.
27688 Restore stack pointer to last saved register. */
27689 amount
= offsets
->frame
- offsets
->saved_regs
;
27691 /* Force out any pending memory operations that reference stacked data
27692 before stack de-allocation occurs. */
27693 emit_insn (gen_blockage ());
27694 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27695 hard_frame_pointer_rtx
,
27696 GEN_INT (amount
)));
27697 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27699 hard_frame_pointer_rtx
);
27701 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27703 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27707 /* In Thumb-2 mode, the frame pointer points to the last saved
27709 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27712 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27713 hard_frame_pointer_rtx
,
27714 GEN_INT (amount
)));
27715 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27716 hard_frame_pointer_rtx
,
27717 hard_frame_pointer_rtx
);
27720 /* Force out any pending memory operations that reference stacked data
27721 before stack de-allocation occurs. */
27722 emit_insn (gen_blockage ());
27723 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27724 hard_frame_pointer_rtx
));
27725 arm_add_cfa_adjust_cfa_note (insn
, 0,
27727 hard_frame_pointer_rtx
);
27728 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27730 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27735 /* Pop off outgoing args and local frame to adjust stack pointer to
27736 last saved register. */
27737 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27741 /* Force out any pending memory operations that reference stacked data
27742 before stack de-allocation occurs. */
27743 emit_insn (gen_blockage ());
27744 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27746 GEN_INT (amount
)));
27747 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27748 stack_pointer_rtx
, stack_pointer_rtx
);
27749 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27751 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27755 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27757 /* Generate VFP register multi-pop. */
27758 int end_reg
= LAST_VFP_REGNUM
+ 1;
27760 /* Scan the registers in reverse order. We need to match
27761 any groupings made in the prologue and generate matching
27762 vldm operations. The need to match groups is because,
27763 unlike pop, vldm can only do consecutive regs. */
27764 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27765 /* Look for a case where a reg does not need restoring. */
27766 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27767 && (!df_regs_ever_live_p (i
+ 1)
27768 || call_used_regs
[i
+ 1]))
27770 /* Restore the regs discovered so far (from reg+2 to
27772 if (end_reg
> i
+ 2)
27773 arm_emit_vfp_multi_reg_pop (i
+ 2,
27774 (end_reg
- (i
+ 2)) / 2,
27775 stack_pointer_rtx
);
27779 /* Restore the remaining regs that we have discovered (or possibly
27780 even all of them, if the conditional in the for loop never
27782 if (end_reg
> i
+ 2)
27783 arm_emit_vfp_multi_reg_pop (i
+ 2,
27784 (end_reg
- (i
+ 2)) / 2,
27785 stack_pointer_rtx
);
27789 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27790 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27793 rtx addr
= gen_rtx_MEM (V2SImode
,
27794 gen_rtx_POST_INC (SImode
,
27795 stack_pointer_rtx
));
27796 set_mem_alias_set (addr
, get_frame_alias_set ());
27797 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27798 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27799 gen_rtx_REG (V2SImode
, i
),
27801 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27802 stack_pointer_rtx
, stack_pointer_rtx
);
27805 if (saved_regs_mask
)
27808 bool return_in_pc
= false;
27810 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27811 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27812 && !IS_STACKALIGN (func_type
)
27814 && crtl
->args
.pretend_args_size
== 0
27815 && saved_regs_mask
& (1 << LR_REGNUM
)
27816 && !crtl
->calls_eh_return
)
27818 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27819 saved_regs_mask
|= (1 << PC_REGNUM
);
27820 return_in_pc
= true;
27823 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27825 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27826 if (saved_regs_mask
& (1 << i
))
27828 rtx addr
= gen_rtx_MEM (SImode
,
27829 gen_rtx_POST_INC (SImode
,
27830 stack_pointer_rtx
));
27831 set_mem_alias_set (addr
, get_frame_alias_set ());
27833 if (i
== PC_REGNUM
)
27835 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27836 XVECEXP (insn
, 0, 0) = ret_rtx
;
27837 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
27838 gen_rtx_REG (SImode
, i
),
27840 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27841 insn
= emit_jump_insn (insn
);
27845 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27847 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27848 gen_rtx_REG (SImode
, i
),
27850 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27852 stack_pointer_rtx
);
27859 && current_tune
->prefer_ldrd_strd
27860 && !optimize_function_for_size_p (cfun
))
27863 thumb2_emit_ldrd_pop (saved_regs_mask
);
27864 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27865 arm_emit_ldrd_pop (saved_regs_mask
);
27867 arm_emit_multi_reg_pop (saved_regs_mask
);
27870 arm_emit_multi_reg_pop (saved_regs_mask
);
27873 if (return_in_pc
== true)
27877 if (crtl
->args
.pretend_args_size
)
27880 rtx dwarf
= NULL_RTX
;
27881 rtx tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27883 GEN_INT (crtl
->args
.pretend_args_size
)));
27885 RTX_FRAME_RELATED_P (tmp
) = 1;
27887 if (cfun
->machine
->uses_anonymous_args
)
27889 /* Restore pretend args. Refer arm_expand_prologue on how to save
27890 pretend_args in stack. */
27891 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27892 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27893 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27894 if (saved_regs_mask
& (1 << i
))
27896 rtx reg
= gen_rtx_REG (SImode
, i
);
27897 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27900 REG_NOTES (tmp
) = dwarf
;
27902 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
27903 stack_pointer_rtx
, stack_pointer_rtx
);
27906 if (!really_return
)
27909 if (crtl
->calls_eh_return
)
27910 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27912 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27914 if (IS_STACKALIGN (func_type
))
27915 /* Restore the original stack pointer. Before prologue, the stack was
27916 realigned and the original stack pointer saved in r0. For details,
27917 see comment in arm_expand_prologue. */
27918 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27920 emit_jump_insn (simple_return_rtx
);
27923 /* Implementation of insn prologue_thumb1_interwork. This is the first
27924 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27927 thumb1_output_interwork (void)
27930 FILE *f
= asm_out_file
;
27932 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27933 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27935 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27937 /* Generate code sequence to switch us into Thumb mode. */
27938 /* The .code 32 directive has already been emitted by
27939 ASM_DECLARE_FUNCTION_NAME. */
27940 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27941 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27943 /* Generate a label, so that the debugger will notice the
27944 change in instruction sets. This label is also used by
27945 the assembler to bypass the ARM code when this function
27946 is called from a Thumb encoded function elsewhere in the
27947 same file. Hence the definition of STUB_NAME here must
27948 agree with the definition in gas/config/tc-arm.c. */
27950 #define STUB_NAME ".real_start_of"
27952 fprintf (f
, "\t.code\t16\n");
27954 if (arm_dllexport_name_p (name
))
27955 name
= arm_strip_name_encoding (name
);
27957 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
27958 fprintf (f
, "\t.thumb_func\n");
27959 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
27964 /* Handle the case of a double word load into a low register from
27965 a computed memory address. The computed address may involve a
27966 register which is overwritten by the load. */
27968 thumb_load_double_from_address (rtx
*operands
)
27976 gcc_assert (REG_P (operands
[0]));
27977 gcc_assert (MEM_P (operands
[1]));
27979 /* Get the memory address. */
27980 addr
= XEXP (operands
[1], 0);
27982 /* Work out how the memory address is computed. */
27983 switch (GET_CODE (addr
))
27986 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27988 if (REGNO (operands
[0]) == REGNO (addr
))
27990 output_asm_insn ("ldr\t%H0, %2", operands
);
27991 output_asm_insn ("ldr\t%0, %1", operands
);
27995 output_asm_insn ("ldr\t%0, %1", operands
);
27996 output_asm_insn ("ldr\t%H0, %2", operands
);
28001 /* Compute <address> + 4 for the high order load. */
28002 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28004 output_asm_insn ("ldr\t%0, %1", operands
);
28005 output_asm_insn ("ldr\t%H0, %2", operands
);
28009 arg1
= XEXP (addr
, 0);
28010 arg2
= XEXP (addr
, 1);
28012 if (CONSTANT_P (arg1
))
28013 base
= arg2
, offset
= arg1
;
28015 base
= arg1
, offset
= arg2
;
28017 gcc_assert (REG_P (base
));
28019 /* Catch the case of <address> = <reg> + <reg> */
28020 if (REG_P (offset
))
28022 int reg_offset
= REGNO (offset
);
28023 int reg_base
= REGNO (base
);
28024 int reg_dest
= REGNO (operands
[0]);
28026 /* Add the base and offset registers together into the
28027 higher destination register. */
28028 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
28029 reg_dest
+ 1, reg_base
, reg_offset
);
28031 /* Load the lower destination register from the address in
28032 the higher destination register. */
28033 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
28034 reg_dest
, reg_dest
+ 1);
28036 /* Load the higher destination register from its own address
28038 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
28039 reg_dest
+ 1, reg_dest
+ 1);
28043 /* Compute <address> + 4 for the high order load. */
28044 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28046 /* If the computed address is held in the low order register
28047 then load the high order register first, otherwise always
28048 load the low order register first. */
28049 if (REGNO (operands
[0]) == REGNO (base
))
28051 output_asm_insn ("ldr\t%H0, %2", operands
);
28052 output_asm_insn ("ldr\t%0, %1", operands
);
28056 output_asm_insn ("ldr\t%0, %1", operands
);
28057 output_asm_insn ("ldr\t%H0, %2", operands
);
28063 /* With no registers to worry about we can just load the value
28065 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28067 output_asm_insn ("ldr\t%H0, %2", operands
);
28068 output_asm_insn ("ldr\t%0, %1", operands
);
28072 gcc_unreachable ();
28079 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
28086 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28089 operands
[4] = operands
[5];
28092 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
28093 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
28097 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28100 operands
[4] = operands
[5];
28103 if (REGNO (operands
[5]) > REGNO (operands
[6]))
28106 operands
[5] = operands
[6];
28109 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28112 operands
[4] = operands
[5];
28116 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
28117 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
28121 gcc_unreachable ();
28127 /* Output a call-via instruction for thumb state. */
28129 thumb_call_via_reg (rtx reg
)
28131 int regno
= REGNO (reg
);
28134 gcc_assert (regno
< LR_REGNUM
);
28136 /* If we are in the normal text section we can use a single instance
28137 per compilation unit. If we are doing function sections, then we need
28138 an entry per section, since we can't rely on reachability. */
28139 if (in_section
== text_section
)
28141 thumb_call_reg_needed
= 1;
28143 if (thumb_call_via_label
[regno
] == NULL
)
28144 thumb_call_via_label
[regno
] = gen_label_rtx ();
28145 labelp
= thumb_call_via_label
+ regno
;
28149 if (cfun
->machine
->call_via
[regno
] == NULL
)
28150 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28151 labelp
= cfun
->machine
->call_via
+ regno
;
28154 output_asm_insn ("bl\t%a0", labelp
);
28158 /* Routines for generating rtl. */
28160 thumb_expand_movmemqi (rtx
*operands
)
28162 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28163 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28164 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28165 HOST_WIDE_INT offset
= 0;
28169 emit_insn (gen_movmem12b (out
, in
, out
, in
));
28175 emit_insn (gen_movmem8b (out
, in
, out
, in
));
28181 rtx reg
= gen_reg_rtx (SImode
);
28182 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28183 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28190 rtx reg
= gen_reg_rtx (HImode
);
28191 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28192 plus_constant (Pmode
, in
,
28194 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28203 rtx reg
= gen_reg_rtx (QImode
);
28204 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28205 plus_constant (Pmode
, in
,
28207 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28214 thumb_reload_out_hi (rtx
*operands
)
28216 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28219 /* Handle reading a half-word from memory during reload. */
28221 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
28223 gcc_unreachable ();
28226 /* Return the length of a function name prefix
28227 that starts with the character 'c'. */
28229 arm_get_strip_length (int c
)
28233 ARM_NAME_ENCODING_LENGTHS
28238 /* Return a pointer to a function's name with any
28239 and all prefix encodings stripped from it. */
28241 arm_strip_name_encoding (const char *name
)
28245 while ((skip
= arm_get_strip_length (* name
)))
28251 /* If there is a '*' anywhere in the name's prefix, then
28252 emit the stripped name verbatim, otherwise prepend an
28253 underscore if leading underscores are being used. */
28255 arm_asm_output_labelref (FILE *stream
, const char *name
)
28260 while ((skip
= arm_get_strip_length (* name
)))
28262 verbatim
|= (*name
== '*');
28267 fputs (name
, stream
);
28269 asm_fprintf (stream
, "%U%s", name
);
28272 /* This function is used to emit an EABI tag and its associated value.
28273 We emit the numerical value of the tag in case the assembler does not
28274 support textual tags. (Eg gas prior to 2.20). If requested we include
28275 the tag name in a comment so that anyone reading the assembler output
28276 will know which tag is being set.
28278 This function is not static because arm-c.c needs it too. */
28281 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28283 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28284 if (flag_verbose_asm
|| flag_debug_asm
)
28285 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28286 asm_fprintf (asm_out_file
, "\n");
28290 arm_file_start (void)
28294 if (TARGET_UNIFIED_ASM
)
28295 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
28299 const char *fpu_name
;
28300 if (arm_selected_arch
)
28302 /* armv7ve doesn't support any extensions. */
28303 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
28305 /* Keep backward compatability for assemblers
28306 which don't support armv7ve. */
28307 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
28308 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
28309 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
28310 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
28311 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
28315 const char* pos
= strchr (arm_selected_arch
->name
, '+');
28319 gcc_assert (strlen (arm_selected_arch
->name
)
28320 <= sizeof (buf
) / sizeof (*pos
));
28321 strncpy (buf
, arm_selected_arch
->name
,
28322 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
28323 buf
[pos
- arm_selected_arch
->name
] = '\0';
28324 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
28325 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
28328 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
28331 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
28332 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
28335 const char* truncated_name
28336 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
28337 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
28340 if (TARGET_SOFT_FLOAT
)
28342 fpu_name
= "softvfp";
28346 fpu_name
= arm_fpu_desc
->name
;
28347 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
28349 if (TARGET_HARD_FLOAT
)
28350 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28351 if (TARGET_HARD_FLOAT_ABI
)
28352 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28355 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
28357 /* Some of these attributes only apply when the corresponding features
28358 are used. However we don't have any easy way of figuring this out.
28359 Conservatively record the setting that would have been used. */
28361 if (flag_rounding_math
)
28362 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28364 if (!flag_unsafe_math_optimizations
)
28366 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28367 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28369 if (flag_signaling_nans
)
28370 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28372 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28373 flag_finite_math_only
? 1 : 3);
28375 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28376 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28377 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28378 flag_short_enums
? 1 : 2);
28380 /* Tag_ABI_optimization_goals. */
28383 else if (optimize
>= 2)
28389 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28391 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28394 if (arm_fp16_format
)
28395 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28396 (int) arm_fp16_format
);
28398 if (arm_lang_output_object_attributes_hook
)
28399 arm_lang_output_object_attributes_hook();
28402 default_file_start ();
28406 arm_file_end (void)
28410 if (NEED_INDICATE_EXEC_STACK
)
28411 /* Add .note.GNU-stack. */
28412 file_end_indicate_exec_stack ();
28414 if (! thumb_call_reg_needed
)
28417 switch_to_section (text_section
);
28418 asm_fprintf (asm_out_file
, "\t.code 16\n");
28419 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28421 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28423 rtx label
= thumb_call_via_label
[regno
];
28427 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28428 CODE_LABEL_NUMBER (label
));
28429 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28435 /* Symbols in the text segment can be accessed without indirecting via the
28436 constant pool; it may take an extra binary operation, but this is still
28437 faster than indirecting via memory. Don't do this when not optimizing,
28438 since we won't be calculating al of the offsets necessary to do this
28442 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28444 if (optimize
> 0 && TREE_CONSTANT (decl
))
28445 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28447 default_encode_section_info (decl
, rtl
, first
);
28449 #endif /* !ARM_PE */
28452 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28454 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28455 && !strcmp (prefix
, "L"))
28457 arm_ccfsm_state
= 0;
28458 arm_target_insn
= NULL
;
28460 default_internal_label (stream
, prefix
, labelno
);
28463 /* Output code to add DELTA to the first argument, and then jump
28464 to FUNCTION. Used for C++ multiple inheritance. */
28466 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
28467 HOST_WIDE_INT delta
,
28468 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
28471 static int thunk_label
= 0;
28474 int mi_delta
= delta
;
28475 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28477 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28480 mi_delta
= - mi_delta
;
28482 final_start_function (emit_barrier (), file
, 1);
28486 int labelno
= thunk_label
++;
28487 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28488 /* Thunks are entered in arm mode when avaiable. */
28489 if (TARGET_THUMB1_ONLY
)
28491 /* push r3 so we can use it as a temporary. */
28492 /* TODO: Omit this save if r3 is not used. */
28493 fputs ("\tpush {r3}\n", file
);
28494 fputs ("\tldr\tr3, ", file
);
28498 fputs ("\tldr\tr12, ", file
);
28500 assemble_name (file
, label
);
28501 fputc ('\n', file
);
28504 /* If we are generating PIC, the ldr instruction below loads
28505 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28506 the address of the add + 8, so we have:
28508 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28511 Note that we have "+ 1" because some versions of GNU ld
28512 don't set the low bit of the result for R_ARM_REL32
28513 relocations against thumb function symbols.
28514 On ARMv6M this is +4, not +8. */
28515 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28516 assemble_name (file
, labelpc
);
28517 fputs (":\n", file
);
28518 if (TARGET_THUMB1_ONLY
)
28520 /* This is 2 insns after the start of the thunk, so we know it
28521 is 4-byte aligned. */
28522 fputs ("\tadd\tr3, pc, r3\n", file
);
28523 fputs ("\tmov r12, r3\n", file
);
28526 fputs ("\tadd\tr12, pc, r12\n", file
);
28528 else if (TARGET_THUMB1_ONLY
)
28529 fputs ("\tmov r12, r3\n", file
);
28531 if (TARGET_THUMB1_ONLY
)
28533 if (mi_delta
> 255)
28535 fputs ("\tldr\tr3, ", file
);
28536 assemble_name (file
, label
);
28537 fputs ("+4\n", file
);
28538 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
28539 mi_op
, this_regno
, this_regno
);
28541 else if (mi_delta
!= 0)
28543 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28544 mi_op
, this_regno
, this_regno
,
28550 /* TODO: Use movw/movt for large constants when available. */
28551 while (mi_delta
!= 0)
28553 if ((mi_delta
& (3 << shift
)) == 0)
28557 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28558 mi_op
, this_regno
, this_regno
,
28559 mi_delta
& (0xff << shift
));
28560 mi_delta
&= ~(0xff << shift
);
28567 if (TARGET_THUMB1_ONLY
)
28568 fputs ("\tpop\t{r3}\n", file
);
28570 fprintf (file
, "\tbx\tr12\n");
28571 ASM_OUTPUT_ALIGN (file
, 2);
28572 assemble_name (file
, label
);
28573 fputs (":\n", file
);
28576 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28577 rtx tem
= XEXP (DECL_RTL (function
), 0);
28578 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28579 pipeline offset is four rather than eight. Adjust the offset
28581 tem
= plus_constant (GET_MODE (tem
), tem
,
28582 TARGET_THUMB1_ONLY
? -3 : -7);
28583 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28585 gen_rtx_SYMBOL_REF (Pmode
,
28586 ggc_strdup (labelpc
)));
28587 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28590 /* Output ".word .LTHUNKn". */
28591 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28593 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28594 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28598 fputs ("\tb\t", file
);
28599 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28600 if (NEED_PLT_RELOC
)
28601 fputs ("(PLT)", file
);
28602 fputc ('\n', file
);
28605 final_end_function ();
28609 arm_emit_vector_const (FILE *file
, rtx x
)
28612 const char * pattern
;
28614 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28616 switch (GET_MODE (x
))
28618 case V2SImode
: pattern
= "%08x"; break;
28619 case V4HImode
: pattern
= "%04x"; break;
28620 case V8QImode
: pattern
= "%02x"; break;
28621 default: gcc_unreachable ();
28624 fprintf (file
, "0x");
28625 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28629 element
= CONST_VECTOR_ELT (x
, i
);
28630 fprintf (file
, pattern
, INTVAL (element
));
28636 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28637 HFmode constant pool entries are actually loaded with ldr. */
28639 arm_emit_fp16_const (rtx c
)
28644 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
28645 bits
= real_to_target (NULL
, &r
, HFmode
);
28646 if (WORDS_BIG_ENDIAN
)
28647 assemble_zeros (2);
28648 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28649 if (!WORDS_BIG_ENDIAN
)
28650 assemble_zeros (2);
28654 arm_output_load_gr (rtx
*operands
)
28661 if (!MEM_P (operands
[1])
28662 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28663 || !REG_P (reg
= XEXP (sum
, 0))
28664 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28665 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28666 return "wldrw%?\t%0, %1";
28668 /* Fix up an out-of-range load of a GR register. */
28669 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28670 wcgr
= operands
[0];
28672 output_asm_insn ("ldr%?\t%0, %1", operands
);
28674 operands
[0] = wcgr
;
28676 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28677 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28682 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28684 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28685 named arg and all anonymous args onto the stack.
28686 XXX I know the prologue shouldn't be pushing registers, but it is faster
28690 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28691 enum machine_mode mode
,
28694 int second_time ATTRIBUTE_UNUSED
)
28696 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28699 cfun
->machine
->uses_anonymous_args
= 1;
28700 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28702 nregs
= pcum
->aapcs_ncrn
;
28703 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
28707 nregs
= pcum
->nregs
;
28709 if (nregs
< NUM_ARG_REGS
)
28710 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28713 /* We can't rely on the caller doing the proper promotion when
28714 using APCS or ATPCS. */
28717 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28719 return !TARGET_AAPCS_BASED
;
28722 static enum machine_mode
28723 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28724 enum machine_mode mode
,
28725 int *punsignedp ATTRIBUTE_UNUSED
,
28726 const_tree fntype ATTRIBUTE_UNUSED
,
28727 int for_return ATTRIBUTE_UNUSED
)
28729 if (GET_MODE_CLASS (mode
) == MODE_INT
28730 && GET_MODE_SIZE (mode
) < 4)
28736 /* AAPCS based ABIs use short enums by default. */
28739 arm_default_short_enums (void)
28741 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
28745 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28748 arm_align_anon_bitfield (void)
28750 return TARGET_AAPCS_BASED
;
28754 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28757 arm_cxx_guard_type (void)
28759 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28763 /* The EABI says test the least significant bit of a guard variable. */
28766 arm_cxx_guard_mask_bit (void)
28768 return TARGET_AAPCS_BASED
;
28772 /* The EABI specifies that all array cookies are 8 bytes long. */
28775 arm_get_cookie_size (tree type
)
28779 if (!TARGET_AAPCS_BASED
)
28780 return default_cxx_get_cookie_size (type
);
28782 size
= build_int_cst (sizetype
, 8);
28787 /* The EABI says that array cookies should also contain the element size. */
28790 arm_cookie_has_size (void)
28792 return TARGET_AAPCS_BASED
;
28796 /* The EABI says constructors and destructors should return a pointer to
28797 the object constructed/destroyed. */
28800 arm_cxx_cdtor_returns_this (void)
28802 return TARGET_AAPCS_BASED
;
28805 /* The EABI says that an inline function may never be the key
28809 arm_cxx_key_method_may_be_inline (void)
28811 return !TARGET_AAPCS_BASED
;
28815 arm_cxx_determine_class_data_visibility (tree decl
)
28817 if (!TARGET_AAPCS_BASED
28818 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28821 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28822 is exported. However, on systems without dynamic vague linkage,
28823 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28824 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28825 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28827 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28828 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28832 arm_cxx_class_data_always_comdat (void)
28834 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28835 vague linkage if the class has no key function. */
28836 return !TARGET_AAPCS_BASED
;
28840 /* The EABI says __aeabi_atexit should be used to register static
28844 arm_cxx_use_aeabi_atexit (void)
28846 return TARGET_AAPCS_BASED
;
28851 arm_set_return_address (rtx source
, rtx scratch
)
28853 arm_stack_offsets
*offsets
;
28854 HOST_WIDE_INT delta
;
28856 unsigned long saved_regs
;
28858 offsets
= arm_get_frame_offsets ();
28859 saved_regs
= offsets
->saved_regs_mask
;
28861 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28862 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28865 if (frame_pointer_needed
)
28866 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28869 /* LR will be the first saved register. */
28870 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28875 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
28876 GEN_INT (delta
& ~4095)));
28881 addr
= stack_pointer_rtx
;
28883 addr
= plus_constant (Pmode
, addr
, delta
);
28885 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28891 thumb_set_return_address (rtx source
, rtx scratch
)
28893 arm_stack_offsets
*offsets
;
28894 HOST_WIDE_INT delta
;
28895 HOST_WIDE_INT limit
;
28898 unsigned long mask
;
28902 offsets
= arm_get_frame_offsets ();
28903 mask
= offsets
->saved_regs_mask
;
28904 if (mask
& (1 << LR_REGNUM
))
28907 /* Find the saved regs. */
28908 if (frame_pointer_needed
)
28910 delta
= offsets
->soft_frame
- offsets
->saved_args
;
28911 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
28917 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
28920 /* Allow for the stack frame. */
28921 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
28923 /* The link register is always the first saved register. */
28926 /* Construct the address. */
28927 addr
= gen_rtx_REG (SImode
, reg
);
28930 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
28931 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
28935 addr
= plus_constant (Pmode
, addr
, delta
);
28937 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28940 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28943 /* Implements target hook vector_mode_supported_p. */
28945 arm_vector_mode_supported_p (enum machine_mode mode
)
28947 /* Neon also supports V2SImode, etc. listed in the clause below. */
28948 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
28949 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
28952 if ((TARGET_NEON
|| TARGET_IWMMXT
)
28953 && ((mode
== V2SImode
)
28954 || (mode
== V4HImode
)
28955 || (mode
== V8QImode
)))
28958 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
28959 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
28960 || mode
== V2HAmode
))
28966 /* Implements target hook array_mode_supported_p. */
28969 arm_array_mode_supported_p (enum machine_mode mode
,
28970 unsigned HOST_WIDE_INT nelems
)
28973 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
28974 && (nelems
>= 2 && nelems
<= 4))
28980 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28981 registers when autovectorizing for Neon, at least until multiple vector
28982 widths are supported properly by the middle-end. */
28984 static enum machine_mode
28985 arm_preferred_simd_mode (enum machine_mode mode
)
28991 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
28993 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
28995 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
28997 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
28999 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29006 if (TARGET_REALLY_IWMMXT
)
29022 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29024 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29025 using r0-r4 for function arguments, r7 for the stack frame and don't have
29026 enough left over to do doubleword arithmetic. For Thumb-2 all the
29027 potentially problematic instructions accept high registers so this is not
29028 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29029 that require many low registers. */
29031 arm_class_likely_spilled_p (reg_class_t rclass
)
29033 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29034 || rclass
== CC_REG
)
29040 /* Implements target hook small_register_classes_for_mode_p. */
29042 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
29044 return TARGET_THUMB1
;
29047 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29048 ARM insns and therefore guarantee that the shift count is modulo 256.
29049 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29050 guarantee no particular behavior for out-of-range counts. */
29052 static unsigned HOST_WIDE_INT
29053 arm_shift_truncation_mask (enum machine_mode mode
)
29055 return mode
== SImode
? 255 : 0;
29059 /* Map internal gcc register numbers to DWARF2 register numbers. */
29062 arm_dbx_register_number (unsigned int regno
)
29067 if (IS_VFP_REGNUM (regno
))
29069 /* See comment in arm_dwarf_register_span. */
29070 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29071 return 64 + regno
- FIRST_VFP_REGNUM
;
29073 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29076 if (IS_IWMMXT_GR_REGNUM (regno
))
29077 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29079 if (IS_IWMMXT_REGNUM (regno
))
29080 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29082 gcc_unreachable ();
29085 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29086 GCC models tham as 64 32-bit registers, so we need to describe this to
29087 the DWARF generation code. Other registers can use the default. */
29089 arm_dwarf_register_span (rtx rtl
)
29091 enum machine_mode mode
;
29097 regno
= REGNO (rtl
);
29098 if (!IS_VFP_REGNUM (regno
))
29101 /* XXX FIXME: The EABI defines two VFP register ranges:
29102 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29104 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29105 corresponding D register. Until GDB supports this, we shall use the
29106 legacy encodings. We also use these encodings for D0-D15 for
29107 compatibility with older debuggers. */
29108 mode
= GET_MODE (rtl
);
29109 if (GET_MODE_SIZE (mode
) < 8)
29112 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29114 nregs
= GET_MODE_SIZE (mode
) / 4;
29115 for (i
= 0; i
< nregs
; i
+= 2)
29116 if (TARGET_BIG_END
)
29118 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29119 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
29123 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
29124 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29129 nregs
= GET_MODE_SIZE (mode
) / 8;
29130 for (i
= 0; i
< nregs
; i
++)
29131 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
29134 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
29137 #if ARM_UNWIND_INFO
29138 /* Emit unwind directives for a store-multiple instruction or stack pointer
29139 push during alignment.
29140 These should only ever be generated by the function prologue code, so
29141 expect them to have a particular form.
29142 The store-multiple instruction sometimes pushes pc as the last register,
29143 although it should not be tracked into unwind information, or for -Os
29144 sometimes pushes some dummy registers before first register that needs
29145 to be tracked in unwind information; such dummy registers are there just
29146 to avoid separate stack adjustment, and will not be restored in the
29150 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
29153 HOST_WIDE_INT offset
;
29154 HOST_WIDE_INT nregs
;
29158 unsigned padfirst
= 0, padlast
= 0;
29161 e
= XVECEXP (p
, 0, 0);
29162 gcc_assert (GET_CODE (e
) == SET
);
29164 /* First insn will adjust the stack pointer. */
29165 gcc_assert (GET_CODE (e
) == SET
29166 && REG_P (SET_DEST (e
))
29167 && REGNO (SET_DEST (e
)) == SP_REGNUM
29168 && GET_CODE (SET_SRC (e
)) == PLUS
);
29170 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29171 nregs
= XVECLEN (p
, 0) - 1;
29172 gcc_assert (nregs
);
29174 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29177 /* For -Os dummy registers can be pushed at the beginning to
29178 avoid separate stack pointer adjustment. */
29179 e
= XVECEXP (p
, 0, 1);
29180 e
= XEXP (SET_DEST (e
), 0);
29181 if (GET_CODE (e
) == PLUS
)
29182 padfirst
= INTVAL (XEXP (e
, 1));
29183 gcc_assert (padfirst
== 0 || optimize_size
);
29184 /* The function prologue may also push pc, but not annotate it as it is
29185 never restored. We turn this into a stack pointer adjustment. */
29186 e
= XVECEXP (p
, 0, nregs
);
29187 e
= XEXP (SET_DEST (e
), 0);
29188 if (GET_CODE (e
) == PLUS
)
29189 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29191 padlast
= offset
- 4;
29192 gcc_assert (padlast
== 0 || padlast
== 4);
29194 fprintf (asm_out_file
, "\t.pad #4\n");
29196 fprintf (asm_out_file
, "\t.save {");
29198 else if (IS_VFP_REGNUM (reg
))
29201 fprintf (asm_out_file
, "\t.vsave {");
29204 /* Unknown register type. */
29205 gcc_unreachable ();
29207 /* If the stack increment doesn't match the size of the saved registers,
29208 something has gone horribly wrong. */
29209 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29213 /* The remaining insns will describe the stores. */
29214 for (i
= 1; i
<= nregs
; i
++)
29216 /* Expect (set (mem <addr>) (reg)).
29217 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29218 e
= XVECEXP (p
, 0, i
);
29219 gcc_assert (GET_CODE (e
) == SET
29220 && MEM_P (SET_DEST (e
))
29221 && REG_P (SET_SRC (e
)));
29223 reg
= REGNO (SET_SRC (e
));
29224 gcc_assert (reg
>= lastreg
);
29227 fprintf (asm_out_file
, ", ");
29228 /* We can't use %r for vfp because we need to use the
29229 double precision register names. */
29230 if (IS_VFP_REGNUM (reg
))
29231 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29233 asm_fprintf (asm_out_file
, "%r", reg
);
29235 #ifdef ENABLE_CHECKING
29236 /* Check that the addresses are consecutive. */
29237 e
= XEXP (SET_DEST (e
), 0);
29238 if (GET_CODE (e
) == PLUS
)
29239 gcc_assert (REG_P (XEXP (e
, 0))
29240 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29241 && CONST_INT_P (XEXP (e
, 1))
29242 && offset
== INTVAL (XEXP (e
, 1)));
29246 && REGNO (e
) == SP_REGNUM
);
29247 offset
+= reg_size
;
29250 fprintf (asm_out_file
, "}\n");
29252 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
29255 /* Emit unwind directives for a SET. */
29258 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
29266 switch (GET_CODE (e0
))
29269 /* Pushing a single register. */
29270 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29271 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29272 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
29275 asm_fprintf (asm_out_file
, "\t.save ");
29276 if (IS_VFP_REGNUM (REGNO (e1
)))
29277 asm_fprintf(asm_out_file
, "{d%d}\n",
29278 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
29280 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
29284 if (REGNO (e0
) == SP_REGNUM
)
29286 /* A stack increment. */
29287 if (GET_CODE (e1
) != PLUS
29288 || !REG_P (XEXP (e1
, 0))
29289 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
29290 || !CONST_INT_P (XEXP (e1
, 1)))
29293 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
29294 -INTVAL (XEXP (e1
, 1)));
29296 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
29298 HOST_WIDE_INT offset
;
29300 if (GET_CODE (e1
) == PLUS
)
29302 if (!REG_P (XEXP (e1
, 0))
29303 || !CONST_INT_P (XEXP (e1
, 1)))
29305 reg
= REGNO (XEXP (e1
, 0));
29306 offset
= INTVAL (XEXP (e1
, 1));
29307 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
29308 HARD_FRAME_POINTER_REGNUM
, reg
,
29311 else if (REG_P (e1
))
29314 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
29315 HARD_FRAME_POINTER_REGNUM
, reg
);
29320 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
29322 /* Move from sp to reg. */
29323 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
29325 else if (GET_CODE (e1
) == PLUS
29326 && REG_P (XEXP (e1
, 0))
29327 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
29328 && CONST_INT_P (XEXP (e1
, 1)))
29330 /* Set reg to offset from sp. */
29331 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
29332 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
29344 /* Emit unwind directives for the given insn. */
29347 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
29350 bool handled_one
= false;
29352 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29355 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29356 && (TREE_NOTHROW (current_function_decl
)
29357 || crtl
->all_throwers_are_sibcalls
))
29360 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
29363 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
29365 switch (REG_NOTE_KIND (note
))
29367 case REG_FRAME_RELATED_EXPR
:
29368 pat
= XEXP (note
, 0);
29371 case REG_CFA_REGISTER
:
29372 pat
= XEXP (note
, 0);
29375 pat
= PATTERN (insn
);
29376 if (GET_CODE (pat
) == PARALLEL
)
29377 pat
= XVECEXP (pat
, 0, 0);
29380 /* Only emitted for IS_STACKALIGN re-alignment. */
29385 src
= SET_SRC (pat
);
29386 dest
= SET_DEST (pat
);
29388 gcc_assert (src
== stack_pointer_rtx
);
29389 reg
= REGNO (dest
);
29390 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29393 handled_one
= true;
29396 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29397 to get correct dwarf information for shrink-wrap. We should not
29398 emit unwind information for it because these are used either for
29399 pretend arguments or notes to adjust sp and restore registers from
29401 case REG_CFA_DEF_CFA
:
29402 case REG_CFA_ADJUST_CFA
:
29403 case REG_CFA_RESTORE
:
29406 case REG_CFA_EXPRESSION
:
29407 case REG_CFA_OFFSET
:
29408 /* ??? Only handling here what we actually emit. */
29409 gcc_unreachable ();
29417 pat
= PATTERN (insn
);
29420 switch (GET_CODE (pat
))
29423 arm_unwind_emit_set (asm_out_file
, pat
);
29427 /* Store multiple. */
29428 arm_unwind_emit_sequence (asm_out_file
, pat
);
29437 /* Output a reference from a function exception table to the type_info
29438 object X. The EABI specifies that the symbol should be relocated by
29439 an R_ARM_TARGET2 relocation. */
29442 arm_output_ttype (rtx x
)
29444 fputs ("\t.word\t", asm_out_file
);
29445 output_addr_const (asm_out_file
, x
);
29446 /* Use special relocations for symbol references. */
29447 if (!CONST_INT_P (x
))
29448 fputs ("(TARGET2)", asm_out_file
);
29449 fputc ('\n', asm_out_file
);
29454 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29457 arm_asm_emit_except_personality (rtx personality
)
29459 fputs ("\t.personality\t", asm_out_file
);
29460 output_addr_const (asm_out_file
, personality
);
29461 fputc ('\n', asm_out_file
);
29464 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29467 arm_asm_init_sections (void)
29469 exception_section
= get_unnamed_section (0, output_section_asm_op
,
29472 #endif /* ARM_UNWIND_INFO */
29474 /* Output unwind directives for the start/end of a function. */
29477 arm_output_fn_unwind (FILE * f
, bool prologue
)
29479 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29483 fputs ("\t.fnstart\n", f
);
29486 /* If this function will never be unwound, then mark it as such.
29487 The came condition is used in arm_unwind_emit to suppress
29488 the frame annotations. */
29489 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29490 && (TREE_NOTHROW (current_function_decl
)
29491 || crtl
->all_throwers_are_sibcalls
))
29492 fputs("\t.cantunwind\n", f
);
29494 fputs ("\t.fnend\n", f
);
29499 arm_emit_tls_decoration (FILE *fp
, rtx x
)
29501 enum tls_reloc reloc
;
29504 val
= XVECEXP (x
, 0, 0);
29505 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
29507 output_addr_const (fp
, val
);
29512 fputs ("(tlsgd)", fp
);
29515 fputs ("(tlsldm)", fp
);
29518 fputs ("(tlsldo)", fp
);
29521 fputs ("(gottpoff)", fp
);
29524 fputs ("(tpoff)", fp
);
29527 fputs ("(tlsdesc)", fp
);
29530 gcc_unreachable ();
29539 fputs (" + (. - ", fp
);
29540 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29541 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29542 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29543 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29553 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29556 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29558 gcc_assert (size
== 4);
29559 fputs ("\t.word\t", file
);
29560 output_addr_const (file
, x
);
29561 fputs ("(tlsldo)", file
);
29564 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29567 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29569 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29570 return arm_emit_tls_decoration (fp
, x
);
29571 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29574 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29576 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29577 assemble_name_raw (fp
, label
);
29581 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29583 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29587 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29591 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29593 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29597 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29601 else if (GET_CODE (x
) == CONST_VECTOR
)
29602 return arm_emit_vector_const (fp
, x
);
29607 /* Output assembly for a shift instruction.
29608 SET_FLAGS determines how the instruction modifies the condition codes.
29609 0 - Do not set condition codes.
29610 1 - Set condition codes.
29611 2 - Use smallest instruction. */
29613 arm_output_shift(rtx
* operands
, int set_flags
)
29616 static const char flag_chars
[3] = {'?', '.', '!'};
29621 c
= flag_chars
[set_flags
];
29622 if (TARGET_UNIFIED_ASM
)
29624 shift
= shift_op(operands
[3], &val
);
29628 operands
[2] = GEN_INT(val
);
29629 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29632 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29635 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
29636 output_asm_insn (pattern
, operands
);
29640 /* Output assembly for a WMMX immediate shift instruction. */
29642 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29644 int shift
= INTVAL (operands
[2]);
29646 enum machine_mode opmode
= GET_MODE (operands
[0]);
29648 gcc_assert (shift
>= 0);
29650 /* If the shift value in the register versions is > 63 (for D qualifier),
29651 31 (for W qualifier) or 15 (for H qualifier). */
29652 if (((opmode
== V4HImode
) && (shift
> 15))
29653 || ((opmode
== V2SImode
) && (shift
> 31))
29654 || ((opmode
== DImode
) && (shift
> 63)))
29658 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29659 output_asm_insn (templ
, operands
);
29660 if (opmode
== DImode
)
29662 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29663 output_asm_insn (templ
, operands
);
29668 /* The destination register will contain all zeros. */
29669 sprintf (templ
, "wzero\t%%0");
29670 output_asm_insn (templ
, operands
);
29675 if ((opmode
== DImode
) && (shift
> 32))
29677 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29678 output_asm_insn (templ
, operands
);
29679 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29680 output_asm_insn (templ
, operands
);
29684 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29685 output_asm_insn (templ
, operands
);
29690 /* Output assembly for a WMMX tinsr instruction. */
29692 arm_output_iwmmxt_tinsr (rtx
*operands
)
29694 int mask
= INTVAL (operands
[3]);
29697 int units
= mode_nunits
[GET_MODE (operands
[0])];
29698 gcc_assert ((mask
& (mask
- 1)) == 0);
29699 for (i
= 0; i
< units
; ++i
)
29701 if ((mask
& 0x01) == 1)
29707 gcc_assert (i
< units
);
29709 switch (GET_MODE (operands
[0]))
29712 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29715 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29718 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29721 gcc_unreachable ();
29724 output_asm_insn (templ
, operands
);
29729 /* Output a Thumb-1 casesi dispatch sequence. */
29731 thumb1_output_casesi (rtx
*operands
)
29733 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[0]));
29735 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29737 switch (GET_MODE(diff_vec
))
29740 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29741 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29743 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29744 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29746 return "bl\t%___gnu_thumb1_case_si";
29748 gcc_unreachable ();
29752 /* Output a Thumb-2 casesi instruction. */
29754 thumb2_output_casesi (rtx
*operands
)
29756 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
29758 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29760 output_asm_insn ("cmp\t%0, %1", operands
);
29761 output_asm_insn ("bhi\t%l3", operands
);
29762 switch (GET_MODE(diff_vec
))
29765 return "tbb\t[%|pc, %0]";
29767 return "tbh\t[%|pc, %0, lsl #1]";
29771 output_asm_insn ("adr\t%4, %l2", operands
);
29772 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29773 output_asm_insn ("add\t%4, %4, %5", operands
);
29778 output_asm_insn ("adr\t%4, %l2", operands
);
29779 return "ldr\t%|pc, [%4, %0, lsl #2]";
29782 gcc_unreachable ();
29786 /* Most ARM cores are single issue, but some newer ones can dual issue.
29787 The scheduler descriptions rely on this being correct. */
29789 arm_issue_rate (void)
29816 /* A table and a function to perform ARM-specific name mangling for
29817 NEON vector types in order to conform to the AAPCS (see "Procedure
29818 Call Standard for the ARM Architecture", Appendix A). To qualify
29819 for emission with the mangled names defined in that document, a
29820 vector type must not only be of the correct mode but also be
29821 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29824 enum machine_mode mode
;
29825 const char *element_type_name
;
29826 const char *aapcs_name
;
29827 } arm_mangle_map_entry
;
29829 static arm_mangle_map_entry arm_mangle_map
[] = {
29830 /* 64-bit containerized types. */
29831 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
29832 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29833 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
29834 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29835 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
29836 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
29837 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
29838 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
29839 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29840 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29842 /* 128-bit containerized types. */
29843 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
29844 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29845 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
29846 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29847 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
29848 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
29849 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
29850 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29851 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29852 { VOIDmode
, NULL
, NULL
}
29856 arm_mangle_type (const_tree type
)
29858 arm_mangle_map_entry
*pos
= arm_mangle_map
;
29860 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29861 has to be managled as if it is in the "std" namespace. */
29862 if (TARGET_AAPCS_BASED
29863 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29864 return "St9__va_list";
29866 /* Half-precision float. */
29867 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
29870 if (TREE_CODE (type
) != VECTOR_TYPE
)
29873 /* Check the mode of the vector type, and the name of the vector
29874 element type, against the table. */
29875 while (pos
->mode
!= VOIDmode
)
29877 tree elt_type
= TREE_TYPE (type
);
29879 if (pos
->mode
== TYPE_MODE (type
)
29880 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
29881 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
29882 pos
->element_type_name
))
29883 return pos
->aapcs_name
;
29888 /* Use the default mangling for unrecognized (possibly user-defined)
29893 /* Order of allocation of core registers for Thumb: this allocation is
29894 written over the corresponding initial entries of the array
29895 initialized with REG_ALLOC_ORDER. We allocate all low registers
29896 first. Saving and restoring a low register is usually cheaper than
29897 using a call-clobbered high register. */
29899 static const int thumb_core_reg_alloc_order
[] =
29901 3, 2, 1, 0, 4, 5, 6, 7,
29902 14, 12, 8, 9, 10, 11
29905 /* Adjust register allocation order when compiling for Thumb. */
29908 arm_order_regs_for_local_alloc (void)
29910 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
29911 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
29913 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
29914 sizeof (thumb_core_reg_alloc_order
));
29917 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29920 arm_frame_pointer_required (void)
29922 return (cfun
->has_nonlocal_label
29923 || SUBTARGET_FRAME_POINTER_REQUIRED
29924 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
29927 /* Only thumb1 can't support conditional execution, so return true if
29928 the target is not thumb1. */
29930 arm_have_conditional_execution (void)
29932 return !TARGET_THUMB1
;
29936 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
29938 enum machine_mode in_mode
, out_mode
;
29941 if (TREE_CODE (type_out
) != VECTOR_TYPE
29942 || TREE_CODE (type_in
) != VECTOR_TYPE
)
29945 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29946 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29947 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29948 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29950 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29951 decl of the vectorized builtin for the appropriate vector mode.
29952 NULL_TREE is returned if no such builtin is available. */
29953 #undef ARM_CHECK_BUILTIN_MODE
29954 #define ARM_CHECK_BUILTIN_MODE(C) \
29955 (TARGET_NEON && TARGET_FPU_ARMV8 \
29956 && flag_unsafe_math_optimizations \
29957 && ARM_CHECK_BUILTIN_MODE_1 (C))
29959 #undef ARM_CHECK_BUILTIN_MODE_1
29960 #define ARM_CHECK_BUILTIN_MODE_1(C) \
29961 (out_mode == SFmode && out_n == C \
29962 && in_mode == SFmode && in_n == C)
29964 #undef ARM_FIND_VRINT_VARIANT
29965 #define ARM_FIND_VRINT_VARIANT(N) \
29966 (ARM_CHECK_BUILTIN_MODE (2) \
29967 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29968 : (ARM_CHECK_BUILTIN_MODE (4) \
29969 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29972 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
29974 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29977 case BUILT_IN_FLOORF
:
29978 return ARM_FIND_VRINT_VARIANT (vrintm
);
29979 case BUILT_IN_CEILF
:
29980 return ARM_FIND_VRINT_VARIANT (vrintp
);
29981 case BUILT_IN_TRUNCF
:
29982 return ARM_FIND_VRINT_VARIANT (vrintz
);
29983 case BUILT_IN_ROUNDF
:
29984 return ARM_FIND_VRINT_VARIANT (vrinta
);
29985 #undef ARM_CHECK_BUILTIN_MODE
29986 #define ARM_CHECK_BUILTIN_MODE(C, N) \
29987 (out_mode == N##Imode && out_n == C \
29988 && in_mode == N##Imode && in_n == C)
29989 case BUILT_IN_BSWAP16
:
29990 if (ARM_CHECK_BUILTIN_MODE (4, H
))
29991 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi
, false);
29992 else if (ARM_CHECK_BUILTIN_MODE (8, H
))
29993 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi
, false);
29996 case BUILT_IN_BSWAP32
:
29997 if (ARM_CHECK_BUILTIN_MODE (2, S
))
29998 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si
, false);
29999 else if (ARM_CHECK_BUILTIN_MODE (4, S
))
30000 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si
, false);
30003 case BUILT_IN_BSWAP64
:
30004 if (ARM_CHECK_BUILTIN_MODE (2, D
))
30005 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di
, false);
30015 #undef ARM_CHECK_BUILTIN_MODE
30016 #undef ARM_FIND_VRINT_VARIANT
30018 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30019 static HOST_WIDE_INT
30020 arm_vector_alignment (const_tree type
)
30022 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
30024 if (TARGET_AAPCS_BASED
)
30025 align
= MIN (align
, 64);
30030 static unsigned int
30031 arm_autovectorize_vector_sizes (void)
30033 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
30037 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
30039 /* Vectors which aren't in packed structures will not be less aligned than
30040 the natural alignment of their element type, so this is safe. */
30041 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30044 return default_builtin_vector_alignment_reachable (type
, is_packed
);
30048 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
30049 const_tree type
, int misalignment
,
30052 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30054 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
30059 /* If the misalignment is unknown, we should be able to handle the access
30060 so long as it is not to a member of a packed data structure. */
30061 if (misalignment
== -1)
30064 /* Return true if the misalignment is a multiple of the natural alignment
30065 of the vector's element type. This is probably always going to be
30066 true in practice, since we've already established that this isn't a
30068 return ((misalignment
% align
) == 0);
30071 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
30076 arm_conditional_register_usage (void)
30080 if (TARGET_THUMB1
&& optimize_size
)
30082 /* When optimizing for size on Thumb-1, it's better not
30083 to use the HI regs, because of the overhead of
30085 for (regno
= FIRST_HI_REGNUM
;
30086 regno
<= LAST_HI_REGNUM
; ++regno
)
30087 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
30090 /* The link register can be clobbered by any branch insn,
30091 but we have no way to track that at present, so mark
30092 it as unavailable. */
30094 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
30096 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
30098 /* VFPv3 registers are disabled when earlier VFP
30099 versions are selected due to the definition of
30100 LAST_VFP_REGNUM. */
30101 for (regno
= FIRST_VFP_REGNUM
;
30102 regno
<= LAST_VFP_REGNUM
; ++ regno
)
30104 fixed_regs
[regno
] = 0;
30105 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
30106 || regno
>= FIRST_VFP_REGNUM
+ 32;
30110 if (TARGET_REALLY_IWMMXT
)
30112 regno
= FIRST_IWMMXT_GR_REGNUM
;
30113 /* The 2002/10/09 revision of the XScale ABI has wCG0
30114 and wCG1 as call-preserved registers. The 2002/11/21
30115 revision changed this so that all wCG registers are
30116 scratch registers. */
30117 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
30118 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
30119 fixed_regs
[regno
] = 0;
30120 /* The XScale ABI has wR0 - wR9 as scratch registers,
30121 the rest as call-preserved registers. */
30122 for (regno
= FIRST_IWMMXT_REGNUM
;
30123 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
30125 fixed_regs
[regno
] = 0;
30126 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
30130 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
30132 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30133 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30135 else if (TARGET_APCS_STACK
)
30137 fixed_regs
[10] = 1;
30138 call_used_regs
[10] = 1;
30140 /* -mcaller-super-interworking reserves r11 for calls to
30141 _interwork_r11_call_via_rN(). Making the register global
30142 is an easy way of ensuring that it remains valid for all
30144 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
30145 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
30147 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30148 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30149 if (TARGET_CALLER_INTERWORKING
)
30150 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30152 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30156 arm_preferred_rename_class (reg_class_t rclass
)
30158 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30159 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30160 and code size can be reduced. */
30161 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30167 /* Compute the atrribute "length" of insn "*push_multi".
30168 So this function MUST be kept in sync with that insn pattern. */
30170 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30172 int i
, regno
, hi_reg
;
30173 int num_saves
= XVECLEN (parallel_op
, 0);
30183 regno
= REGNO (first_op
);
30184 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30185 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30187 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30188 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30196 /* Compute the number of instructions emitted by output_move_double. */
30198 arm_count_output_move_double_insns (rtx
*operands
)
30202 /* output_move_double may modify the operands array, so call it
30203 here on a copy of the array. */
30204 ops
[0] = operands
[0];
30205 ops
[1] = operands
[1];
30206 output_move_double (ops
, false, &count
);
30211 vfp3_const_double_for_fract_bits (rtx operand
)
30213 REAL_VALUE_TYPE r0
;
30215 if (!CONST_DOUBLE_P (operand
))
30218 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
30219 if (exact_real_inverse (DFmode
, &r0
))
30221 if (exact_real_truncate (DFmode
, &r0
))
30223 HOST_WIDE_INT value
= real_to_integer (&r0
);
30224 value
= value
& 0xffffffff;
30225 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30226 return int_log2 (value
);
30233 vfp3_const_double_for_bits (rtx operand
)
30235 REAL_VALUE_TYPE r0
;
30237 if (!CONST_DOUBLE_P (operand
))
30240 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
30241 if (exact_real_truncate (DFmode
, &r0
))
30243 HOST_WIDE_INT value
= real_to_integer (&r0
);
30244 value
= value
& 0xffffffff;
30245 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30246 return int_log2 (value
);
30252 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30255 arm_pre_atomic_barrier (enum memmodel model
)
30257 if (need_atomic_barrier_p (model
, true))
30258 emit_insn (gen_memory_barrier ());
30262 arm_post_atomic_barrier (enum memmodel model
)
30264 if (need_atomic_barrier_p (model
, false))
30265 emit_insn (gen_memory_barrier ());
30268 /* Emit the load-exclusive and store-exclusive instructions.
30269 Use acquire and release versions if necessary. */
30272 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
30274 rtx (*gen
) (rtx
, rtx
);
30280 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
30281 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
30282 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
30283 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
30285 gcc_unreachable ();
30292 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
30293 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
30294 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
30295 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
30297 gcc_unreachable ();
30301 emit_insn (gen (rval
, mem
));
30305 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
,
30308 rtx (*gen
) (rtx
, rtx
, rtx
);
30314 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
30315 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
30316 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
30317 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
30319 gcc_unreachable ();
30326 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
30327 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
30328 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
30329 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
30331 gcc_unreachable ();
30335 emit_insn (gen (bval
, rval
, mem
));
30338 /* Mark the previous jump instruction as unlikely. */
30341 emit_unlikely_jump (rtx insn
)
30343 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
30345 insn
= emit_jump_insn (insn
);
30346 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
30349 /* Expand a compare and swap pattern. */
30352 arm_expand_compare_and_swap (rtx operands
[])
30354 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
30355 enum machine_mode mode
;
30356 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
30358 bval
= operands
[0];
30359 rval
= operands
[1];
30361 oldval
= operands
[3];
30362 newval
= operands
[4];
30363 is_weak
= operands
[5];
30364 mod_s
= operands
[6];
30365 mod_f
= operands
[7];
30366 mode
= GET_MODE (mem
);
30368 /* Normally the succ memory model must be stronger than fail, but in the
30369 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30370 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30372 if (TARGET_HAVE_LDACQ
30373 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
30374 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
30375 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
30381 /* For narrow modes, we're going to perform the comparison in SImode,
30382 so do the zero-extension now. */
30383 rval
= gen_reg_rtx (SImode
);
30384 oldval
= convert_modes (SImode
, mode
, oldval
, true);
30388 /* Force the value into a register if needed. We waited until after
30389 the zero-extension above to do this properly. */
30390 if (!arm_add_operand (oldval
, SImode
))
30391 oldval
= force_reg (SImode
, oldval
);
30395 if (!cmpdi_operand (oldval
, mode
))
30396 oldval
= force_reg (mode
, oldval
);
30400 gcc_unreachable ();
30405 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
30406 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
30407 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
30408 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
30410 gcc_unreachable ();
30413 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
30415 if (mode
== QImode
|| mode
== HImode
)
30416 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
30418 /* In all cases, we arrange for success to be signaled by Z set.
30419 This arrangement allows for the boolean result to be used directly
30420 in a subsequent branch, post optimization. */
30421 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30422 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
30423 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
30426 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30427 another memory store between the load-exclusive and store-exclusive can
30428 reset the monitor from Exclusive to Open state. This means we must wait
30429 until after reload to split the pattern, lest we get a register spill in
30430 the middle of the atomic sequence. */
30433 arm_split_compare_and_swap (rtx operands
[])
30435 rtx rval
, mem
, oldval
, newval
, scratch
;
30436 enum machine_mode mode
;
30437 enum memmodel mod_s
, mod_f
;
30439 rtx label1
, label2
, x
, cond
;
30441 rval
= operands
[0];
30443 oldval
= operands
[2];
30444 newval
= operands
[3];
30445 is_weak
= (operands
[4] != const0_rtx
);
30446 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
30447 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
30448 scratch
= operands
[7];
30449 mode
= GET_MODE (mem
);
30451 bool use_acquire
= TARGET_HAVE_LDACQ
30452 && !(mod_s
== MEMMODEL_RELAXED
30453 || mod_s
== MEMMODEL_CONSUME
30454 || mod_s
== MEMMODEL_RELEASE
);
30456 bool use_release
= TARGET_HAVE_LDACQ
30457 && !(mod_s
== MEMMODEL_RELAXED
30458 || mod_s
== MEMMODEL_CONSUME
30459 || mod_s
== MEMMODEL_ACQUIRE
);
30461 /* Checks whether a barrier is needed and emits one accordingly. */
30462 if (!(use_acquire
|| use_release
))
30463 arm_pre_atomic_barrier (mod_s
);
30468 label1
= gen_label_rtx ();
30469 emit_label (label1
);
30471 label2
= gen_label_rtx ();
30473 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
30475 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
30476 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30477 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30478 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
30479 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30481 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
30483 /* Weak or strong, we want EQ to be true for success, so that we
30484 match the flags that we got from the compare above. */
30485 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30486 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
30487 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
30491 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30492 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30493 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
30494 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30497 if (mod_f
!= MEMMODEL_RELAXED
)
30498 emit_label (label2
);
30500 /* Checks whether a barrier is needed and emits one accordingly. */
30501 if (!(use_acquire
|| use_release
))
30502 arm_post_atomic_barrier (mod_s
);
30504 if (mod_f
== MEMMODEL_RELAXED
)
30505 emit_label (label2
);
30509 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
30510 rtx value
, rtx model_rtx
, rtx cond
)
30512 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
30513 enum machine_mode mode
= GET_MODE (mem
);
30514 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
30517 bool use_acquire
= TARGET_HAVE_LDACQ
30518 && !(model
== MEMMODEL_RELAXED
30519 || model
== MEMMODEL_CONSUME
30520 || model
== MEMMODEL_RELEASE
);
30522 bool use_release
= TARGET_HAVE_LDACQ
30523 && !(model
== MEMMODEL_RELAXED
30524 || model
== MEMMODEL_CONSUME
30525 || model
== MEMMODEL_ACQUIRE
);
30527 /* Checks whether a barrier is needed and emits one accordingly. */
30528 if (!(use_acquire
|| use_release
))
30529 arm_pre_atomic_barrier (model
);
30531 label
= gen_label_rtx ();
30532 emit_label (label
);
30535 new_out
= gen_lowpart (wmode
, new_out
);
30537 old_out
= gen_lowpart (wmode
, old_out
);
30540 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
30542 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
30551 x
= gen_rtx_AND (wmode
, old_out
, value
);
30552 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30553 x
= gen_rtx_NOT (wmode
, new_out
);
30554 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30558 if (CONST_INT_P (value
))
30560 value
= GEN_INT (-INTVAL (value
));
30566 if (mode
== DImode
)
30568 /* DImode plus/minus need to clobber flags. */
30569 /* The adddi3 and subdi3 patterns are incorrectly written so that
30570 they require matching operands, even when we could easily support
30571 three operands. Thankfully, this can be fixed up post-splitting,
30572 as the individual add+adc patterns do accept three operands and
30573 post-reload cprop can make these moves go away. */
30574 emit_move_insn (new_out
, old_out
);
30576 x
= gen_adddi3 (new_out
, new_out
, value
);
30578 x
= gen_subdi3 (new_out
, new_out
, value
);
30585 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30586 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30590 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30593 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30594 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30596 /* Checks whether a barrier is needed and emits one accordingly. */
30597 if (!(use_acquire
|| use_release
))
30598 arm_post_atomic_barrier (model
);
30601 #define MAX_VECT_LEN 16
30603 struct expand_vec_perm_d
30605 rtx target
, op0
, op1
;
30606 unsigned char perm
[MAX_VECT_LEN
];
30607 enum machine_mode vmode
;
30608 unsigned char nelt
;
30613 /* Generate a variable permutation. */
30616 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30618 enum machine_mode vmode
= GET_MODE (target
);
30619 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30621 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30622 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30623 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30624 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30625 gcc_checking_assert (TARGET_NEON
);
30629 if (vmode
== V8QImode
)
30630 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30632 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30638 if (vmode
== V8QImode
)
30640 pair
= gen_reg_rtx (V16QImode
);
30641 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30642 pair
= gen_lowpart (TImode
, pair
);
30643 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30647 pair
= gen_reg_rtx (OImode
);
30648 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30649 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30655 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30657 enum machine_mode vmode
= GET_MODE (target
);
30658 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
30659 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30660 rtx rmask
[MAX_VECT_LEN
], mask
;
30662 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30663 numbering of elements for big-endian, we must reverse the order. */
30664 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30666 /* The VTBL instruction does not use a modulo index, so we must take care
30667 of that ourselves. */
30668 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30669 for (i
= 0; i
< nelt
; ++i
)
30671 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
30672 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30674 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30677 /* Generate or test for an insn that supports a constant permutation. */
30679 /* Recognize patterns for the VUZP insns. */
30682 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30684 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30685 rtx out0
, out1
, in0
, in1
, x
;
30686 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30688 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30691 /* Note that these are little-endian tests. Adjust for big-endian later. */
30692 if (d
->perm
[0] == 0)
30694 else if (d
->perm
[0] == 1)
30698 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30700 for (i
= 0; i
< nelt
; i
++)
30702 unsigned elt
= (i
* 2 + odd
) & mask
;
30703 if (d
->perm
[i
] != elt
)
30713 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
30714 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
30715 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
30716 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
30717 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
30718 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
30719 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
30720 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
30722 gcc_unreachable ();
30727 if (BYTES_BIG_ENDIAN
)
30729 x
= in0
, in0
= in1
, in1
= x
;
30734 out1
= gen_reg_rtx (d
->vmode
);
30736 x
= out0
, out0
= out1
, out1
= x
;
30738 emit_insn (gen (out0
, in0
, in1
, out1
));
30742 /* Recognize patterns for the VZIP insns. */
30745 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30747 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
30748 rtx out0
, out1
, in0
, in1
, x
;
30749 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30751 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30754 /* Note that these are little-endian tests. Adjust for big-endian later. */
30756 if (d
->perm
[0] == high
)
30758 else if (d
->perm
[0] == 0)
30762 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30764 for (i
= 0; i
< nelt
/ 2; i
++)
30766 unsigned elt
= (i
+ high
) & mask
;
30767 if (d
->perm
[i
* 2] != elt
)
30769 elt
= (elt
+ nelt
) & mask
;
30770 if (d
->perm
[i
* 2 + 1] != elt
)
30780 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
30781 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
30782 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
30783 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
30784 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
30785 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
30786 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
30787 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
30789 gcc_unreachable ();
30794 if (BYTES_BIG_ENDIAN
)
30796 x
= in0
, in0
= in1
, in1
= x
;
30801 out1
= gen_reg_rtx (d
->vmode
);
30803 x
= out0
, out0
= out1
, out1
= x
;
30805 emit_insn (gen (out0
, in0
, in1
, out1
));
30809 /* Recognize patterns for the VREV insns. */
30812 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
30814 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
30815 rtx (*gen
)(rtx
, rtx
, rtx
);
30817 if (!d
->one_vector_p
)
30826 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
30827 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
30835 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
30836 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
30837 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
30838 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
30846 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
30847 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
30848 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
30849 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
30850 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
30851 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
30852 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
30853 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
30862 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
30863 for (j
= 0; j
<= diff
; j
+= 1)
30865 /* This is guaranteed to be true as the value of diff
30866 is 7, 3, 1 and we should have enough elements in the
30867 queue to generate this. Getting a vector mask with a
30868 value of diff other than these values implies that
30869 something is wrong by the time we get here. */
30870 gcc_assert (i
+ j
< nelt
);
30871 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
30879 /* ??? The third operand is an artifact of the builtin infrastructure
30880 and is ignored by the actual instruction. */
30881 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
30885 /* Recognize patterns for the VTRN insns. */
30888 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
30890 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30891 rtx out0
, out1
, in0
, in1
, x
;
30892 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30894 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30897 /* Note that these are little-endian tests. Adjust for big-endian later. */
30898 if (d
->perm
[0] == 0)
30900 else if (d
->perm
[0] == 1)
30904 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30906 for (i
= 0; i
< nelt
; i
+= 2)
30908 if (d
->perm
[i
] != i
+ odd
)
30910 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
30920 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
30921 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
30922 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
30923 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
30924 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
30925 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
30926 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
30927 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
30929 gcc_unreachable ();
30934 if (BYTES_BIG_ENDIAN
)
30936 x
= in0
, in0
= in1
, in1
= x
;
30941 out1
= gen_reg_rtx (d
->vmode
);
30943 x
= out0
, out0
= out1
, out1
= x
;
30945 emit_insn (gen (out0
, in0
, in1
, out1
));
30949 /* Recognize patterns for the VEXT insns. */
30952 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
30954 unsigned int i
, nelt
= d
->nelt
;
30955 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
30958 unsigned int location
;
30960 unsigned int next
= d
->perm
[0] + 1;
30962 /* TODO: Handle GCC's numbering of elements for big-endian. */
30963 if (BYTES_BIG_ENDIAN
)
30966 /* Check if the extracted indexes are increasing by one. */
30967 for (i
= 1; i
< nelt
; next
++, i
++)
30969 /* If we hit the most significant element of the 2nd vector in
30970 the previous iteration, no need to test further. */
30971 if (next
== 2 * nelt
)
30974 /* If we are operating on only one vector: it could be a
30975 rotation. If there are only two elements of size < 64, let
30976 arm_evpc_neon_vrev catch it. */
30977 if (d
->one_vector_p
&& (next
== nelt
))
30979 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
30985 if (d
->perm
[i
] != next
)
30989 location
= d
->perm
[0];
30993 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
30994 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
30995 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
30996 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
30997 case V2SImode
: gen
= gen_neon_vextv2si
; break;
30998 case V4SImode
: gen
= gen_neon_vextv4si
; break;
30999 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
31000 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
31001 case V2DImode
: gen
= gen_neon_vextv2di
; break;
31010 offset
= GEN_INT (location
);
31011 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
31015 /* The NEON VTBL instruction is a fully variable permuation that's even
31016 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31017 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31018 can do slightly better by expanding this as a constant where we don't
31019 have to apply a mask. */
31022 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
31024 rtx rperm
[MAX_VECT_LEN
], sel
;
31025 enum machine_mode vmode
= d
->vmode
;
31026 unsigned int i
, nelt
= d
->nelt
;
31028 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31029 numbering of elements for big-endian, we must reverse the order. */
31030 if (BYTES_BIG_ENDIAN
)
31036 /* Generic code will try constant permutation twice. Once with the
31037 original mode and again with the elements lowered to QImode.
31038 So wait and don't do the selector expansion ourselves. */
31039 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
31042 for (i
= 0; i
< nelt
; ++i
)
31043 rperm
[i
] = GEN_INT (d
->perm
[i
]);
31044 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
31045 sel
= force_reg (vmode
, sel
);
31047 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
31052 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
31054 /* Check if the input mask matches vext before reordering the
31057 if (arm_evpc_neon_vext (d
))
31060 /* The pattern matching functions above are written to look for a small
31061 number to begin the sequence (0, 1, N/2). If we begin with an index
31062 from the second operand, we can swap the operands. */
31063 if (d
->perm
[0] >= d
->nelt
)
31065 unsigned i
, nelt
= d
->nelt
;
31068 for (i
= 0; i
< nelt
; ++i
)
31069 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
31078 if (arm_evpc_neon_vuzp (d
))
31080 if (arm_evpc_neon_vzip (d
))
31082 if (arm_evpc_neon_vrev (d
))
31084 if (arm_evpc_neon_vtrn (d
))
31086 return arm_evpc_neon_vtbl (d
);
31091 /* Expand a vec_perm_const pattern. */
31094 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31096 struct expand_vec_perm_d d
;
31097 int i
, nelt
, which
;
31103 d
.vmode
= GET_MODE (target
);
31104 gcc_assert (VECTOR_MODE_P (d
.vmode
));
31105 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
31106 d
.testing_p
= false;
31108 for (i
= which
= 0; i
< nelt
; ++i
)
31110 rtx e
= XVECEXP (sel
, 0, i
);
31111 int ei
= INTVAL (e
) & (2 * nelt
- 1);
31112 which
|= (ei
< nelt
? 1 : 2);
31122 d
.one_vector_p
= false;
31123 if (!rtx_equal_p (op0
, op1
))
31126 /* The elements of PERM do not suggest that only the first operand
31127 is used, but both operands are identical. Allow easier matching
31128 of the permutation by folding the permutation into the single
31132 for (i
= 0; i
< nelt
; ++i
)
31133 d
.perm
[i
] &= nelt
- 1;
31135 d
.one_vector_p
= true;
31140 d
.one_vector_p
= true;
31144 return arm_expand_vec_perm_const_1 (&d
);
31147 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31150 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
31151 const unsigned char *sel
)
31153 struct expand_vec_perm_d d
;
31154 unsigned int i
, nelt
, which
;
31158 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
31159 d
.testing_p
= true;
31160 memcpy (d
.perm
, sel
, nelt
);
31162 /* Categorize the set of elements in the selector. */
31163 for (i
= which
= 0; i
< nelt
; ++i
)
31165 unsigned char e
= d
.perm
[i
];
31166 gcc_assert (e
< 2 * nelt
);
31167 which
|= (e
< nelt
? 1 : 2);
31170 /* For all elements from second vector, fold the elements to first. */
31172 for (i
= 0; i
< nelt
; ++i
)
31175 /* Check whether the mask can be applied to the vector type. */
31176 d
.one_vector_p
= (which
!= 3);
31178 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
31179 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
31180 if (!d
.one_vector_p
)
31181 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
31184 ret
= arm_expand_vec_perm_const_1 (&d
);
31191 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
31193 /* If we are soft float and we do not have ldrd
31194 then all auto increment forms are ok. */
31195 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
31200 /* Post increment and Pre Decrement are supported for all
31201 instruction forms except for vector forms. */
31204 if (VECTOR_MODE_P (mode
))
31206 if (code
!= ARM_PRE_DEC
)
31216 /* Without LDRD and mode size greater than
31217 word size, there is no point in auto-incrementing
31218 because ldm and stm will not have these forms. */
31219 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
31222 /* Vector and floating point modes do not support
31223 these auto increment forms. */
31224 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
31237 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31238 on ARM, since we know that shifts by negative amounts are no-ops.
31239 Additionally, the default expansion code is not available or suitable
31240 for post-reload insn splits (this can occur when the register allocator
31241 chooses not to do a shift in NEON).
31243 This function is used in both initial expand and post-reload splits, and
31244 handles all kinds of 64-bit shifts.
31246 Input requirements:
31247 - It is safe for the input and output to be the same register, but
31248 early-clobber rules apply for the shift amount and scratch registers.
31249 - Shift by register requires both scratch registers. In all other cases
31250 the scratch registers may be NULL.
31251 - Ashiftrt by a register also clobbers the CC register. */
31253 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
31254 rtx amount
, rtx scratch1
, rtx scratch2
)
31256 rtx out_high
= gen_highpart (SImode
, out
);
31257 rtx out_low
= gen_lowpart (SImode
, out
);
31258 rtx in_high
= gen_highpart (SImode
, in
);
31259 rtx in_low
= gen_lowpart (SImode
, in
);
31262 in = the register pair containing the input value.
31263 out = the destination register pair.
31264 up = the high- or low-part of each pair.
31265 down = the opposite part to "up".
31266 In a shift, we can consider bits to shift from "up"-stream to
31267 "down"-stream, so in a left-shift "up" is the low-part and "down"
31268 is the high-part of each register pair. */
31270 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
31271 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
31272 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
31273 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
31275 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
31277 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
31278 && GET_MODE (out
) == DImode
);
31280 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
31281 && GET_MODE (in
) == DImode
);
31283 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
31284 && GET_MODE (amount
) == SImode
)
31285 || CONST_INT_P (amount
)));
31286 gcc_assert (scratch1
== NULL
31287 || (GET_CODE (scratch1
) == SCRATCH
)
31288 || (GET_MODE (scratch1
) == SImode
31289 && REG_P (scratch1
)));
31290 gcc_assert (scratch2
== NULL
31291 || (GET_CODE (scratch2
) == SCRATCH
)
31292 || (GET_MODE (scratch2
) == SImode
31293 && REG_P (scratch2
)));
31294 gcc_assert (!REG_P (out
) || !REG_P (amount
)
31295 || !HARD_REGISTER_P (out
)
31296 || (REGNO (out
) != REGNO (amount
)
31297 && REGNO (out
) + 1 != REGNO (amount
)));
31299 /* Macros to make following code more readable. */
31300 #define SUB_32(DEST,SRC) \
31301 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31302 #define RSB_32(DEST,SRC) \
31303 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31304 #define SUB_S_32(DEST,SRC) \
31305 gen_addsi3_compare0 ((DEST), (SRC), \
31307 #define SET(DEST,SRC) \
31308 gen_rtx_SET (SImode, (DEST), (SRC))
31309 #define SHIFT(CODE,SRC,AMOUNT) \
31310 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31311 #define LSHIFT(CODE,SRC,AMOUNT) \
31312 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31313 SImode, (SRC), (AMOUNT))
31314 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31315 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31316 SImode, (SRC), (AMOUNT))
31318 gen_rtx_IOR (SImode, (A), (B))
31319 #define BRANCH(COND,LABEL) \
31320 gen_arm_cond_branch ((LABEL), \
31321 gen_rtx_ ## COND (CCmode, cc_reg, \
31325 /* Shifts by register and shifts by constant are handled separately. */
31326 if (CONST_INT_P (amount
))
31328 /* We have a shift-by-constant. */
31330 /* First, handle out-of-range shift amounts.
31331 In both cases we try to match the result an ARM instruction in a
31332 shift-by-register would give. This helps reduce execution
31333 differences between optimization levels, but it won't stop other
31334 parts of the compiler doing different things. This is "undefined
31335 behaviour, in any case. */
31336 if (INTVAL (amount
) <= 0)
31337 emit_insn (gen_movdi (out
, in
));
31338 else if (INTVAL (amount
) >= 64)
31340 if (code
== ASHIFTRT
)
31342 rtx const31_rtx
= GEN_INT (31);
31343 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
31344 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
31347 emit_insn (gen_movdi (out
, const0_rtx
));
31350 /* Now handle valid shifts. */
31351 else if (INTVAL (amount
) < 32)
31353 /* Shifts by a constant less than 32. */
31354 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
31356 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31357 emit_insn (SET (out_down
,
31358 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
31360 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31364 /* Shifts by a constant greater than 31. */
31365 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
31367 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
31368 if (code
== ASHIFTRT
)
31369 emit_insn (gen_ashrsi3 (out_up
, in_up
,
31372 emit_insn (SET (out_up
, const0_rtx
));
31377 /* We have a shift-by-register. */
31378 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
31380 /* This alternative requires the scratch registers. */
31381 gcc_assert (scratch1
&& REG_P (scratch1
));
31382 gcc_assert (scratch2
&& REG_P (scratch2
));
31384 /* We will need the values "amount-32" and "32-amount" later.
31385 Swapping them around now allows the later code to be more general. */
31389 emit_insn (SUB_32 (scratch1
, amount
));
31390 emit_insn (RSB_32 (scratch2
, amount
));
31393 emit_insn (RSB_32 (scratch1
, amount
));
31394 /* Also set CC = amount > 32. */
31395 emit_insn (SUB_S_32 (scratch2
, amount
));
31398 emit_insn (RSB_32 (scratch1
, amount
));
31399 emit_insn (SUB_32 (scratch2
, amount
));
31402 gcc_unreachable ();
31405 /* Emit code like this:
31408 out_down = in_down << amount;
31409 out_down = (in_up << (amount - 32)) | out_down;
31410 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31411 out_up = in_up << amount;
31414 out_down = in_down >> amount;
31415 out_down = (in_up << (32 - amount)) | out_down;
31417 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31418 out_up = in_up << amount;
31421 out_down = in_down >> amount;
31422 out_down = (in_up << (32 - amount)) | out_down;
31424 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31425 out_up = in_up << amount;
31427 The ARM and Thumb2 variants are the same but implemented slightly
31428 differently. If this were only called during expand we could just
31429 use the Thumb2 case and let combine do the right thing, but this
31430 can also be called from post-reload splitters. */
31432 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31434 if (!TARGET_THUMB2
)
31436 /* Emit code for ARM mode. */
31437 emit_insn (SET (out_down
,
31438 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
31439 if (code
== ASHIFTRT
)
31441 rtx done_label
= gen_label_rtx ();
31442 emit_jump_insn (BRANCH (LT
, done_label
));
31443 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
31445 emit_label (done_label
);
31448 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
31453 /* Emit code for Thumb2 mode.
31454 Thumb2 can't do shift and or in one insn. */
31455 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
31456 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
31458 if (code
== ASHIFTRT
)
31460 rtx done_label
= gen_label_rtx ();
31461 emit_jump_insn (BRANCH (LT
, done_label
));
31462 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
31463 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
31464 emit_label (done_label
);
31468 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
31469 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
31473 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31488 /* Returns true if a valid comparison operation and makes
31489 the operands in a form that is valid. */
31491 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
31493 enum rtx_code code
= GET_CODE (*comparison
);
31495 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
31496 ? GET_MODE (*op2
) : GET_MODE (*op1
);
31498 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
31500 if (code
== UNEQ
|| code
== LTGT
)
31503 code_int
= (int)code
;
31504 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
31505 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
31510 if (!arm_add_operand (*op1
, mode
))
31511 *op1
= force_reg (mode
, *op1
);
31512 if (!arm_add_operand (*op2
, mode
))
31513 *op2
= force_reg (mode
, *op2
);
31517 if (!cmpdi_operand (*op1
, mode
))
31518 *op1
= force_reg (mode
, *op1
);
31519 if (!cmpdi_operand (*op2
, mode
))
31520 *op2
= force_reg (mode
, *op2
);
31525 if (!arm_float_compare_operand (*op1
, mode
))
31526 *op1
= force_reg (mode
, *op1
);
31527 if (!arm_float_compare_operand (*op2
, mode
))
31528 *op2
= force_reg (mode
, *op2
);
31538 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31540 static unsigned HOST_WIDE_INT
31541 arm_asan_shadow_offset (void)
31543 return (unsigned HOST_WIDE_INT
) 1 << 29;
31547 /* This is a temporary fix for PR60655. Ideally we need
31548 to handle most of these cases in the generic part but
31549 currently we reject minus (..) (sym_ref). We try to
31550 ameliorate the case with minus (sym_ref1) (sym_ref2)
31551 where they are in the same section. */
31554 arm_const_not_ok_for_debug_p (rtx p
)
31556 tree decl_op0
= NULL
;
31557 tree decl_op1
= NULL
;
31559 if (GET_CODE (p
) == MINUS
)
31561 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
31563 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
31565 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
31566 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
31568 if ((TREE_CODE (decl_op1
) == VAR_DECL
31569 || TREE_CODE (decl_op1
) == CONST_DECL
)
31570 && (TREE_CODE (decl_op0
) == VAR_DECL
31571 || TREE_CODE (decl_op0
) == CONST_DECL
))
31572 return (get_variable_section (decl_op1
, false)
31573 != get_variable_section (decl_op0
, false));
31575 if (TREE_CODE (decl_op1
) == LABEL_DECL
31576 && TREE_CODE (decl_op0
) == LABEL_DECL
)
31577 return (DECL_CONTEXT (decl_op1
)
31578 != DECL_CONTEXT (decl_op0
));
31589 arm_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
31591 const unsigned ARM_FE_INVALID
= 1;
31592 const unsigned ARM_FE_DIVBYZERO
= 2;
31593 const unsigned ARM_FE_OVERFLOW
= 4;
31594 const unsigned ARM_FE_UNDERFLOW
= 8;
31595 const unsigned ARM_FE_INEXACT
= 16;
31596 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT
= (ARM_FE_INVALID
31601 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT
= 8;
31602 tree fenv_var
, get_fpscr
, set_fpscr
, mask
, ld_fenv
, masked_fenv
;
31603 tree new_fenv_var
, reload_fenv
, restore_fnenv
;
31604 tree update_call
, atomic_feraiseexcept
, hold_fnclex
;
31606 if (!TARGET_VFP
|| !TARGET_HARD_FLOAT
)
31609 /* Generate the equivalent of :
31610 unsigned int fenv_var;
31611 fenv_var = __builtin_arm_get_fpscr ();
31613 unsigned int masked_fenv;
31614 masked_fenv = fenv_var & mask;
31616 __builtin_arm_set_fpscr (masked_fenv); */
31618 fenv_var
= create_tmp_var (unsigned_type_node
, NULL
);
31619 get_fpscr
= arm_builtin_decls
[ARM_BUILTIN_GET_FPSCR
];
31620 set_fpscr
= arm_builtin_decls
[ARM_BUILTIN_SET_FPSCR
];
31621 mask
= build_int_cst (unsigned_type_node
,
31622 ~((ARM_FE_ALL_EXCEPT
<< ARM_FE_EXCEPT_SHIFT
)
31623 | ARM_FE_ALL_EXCEPT
));
31624 ld_fenv
= build2 (MODIFY_EXPR
, unsigned_type_node
,
31625 fenv_var
, build_call_expr (get_fpscr
, 0));
31626 masked_fenv
= build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
, mask
);
31627 hold_fnclex
= build_call_expr (set_fpscr
, 1, masked_fenv
);
31628 *hold
= build2 (COMPOUND_EXPR
, void_type_node
,
31629 build2 (COMPOUND_EXPR
, void_type_node
, masked_fenv
, ld_fenv
),
31632 /* Store the value of masked_fenv to clear the exceptions:
31633 __builtin_arm_set_fpscr (masked_fenv); */
31635 *clear
= build_call_expr (set_fpscr
, 1, masked_fenv
);
31637 /* Generate the equivalent of :
31638 unsigned int new_fenv_var;
31639 new_fenv_var = __builtin_arm_get_fpscr ();
31641 __builtin_arm_set_fpscr (fenv_var);
31643 __atomic_feraiseexcept (new_fenv_var); */
31645 new_fenv_var
= create_tmp_var (unsigned_type_node
, NULL
);
31646 reload_fenv
= build2 (MODIFY_EXPR
, unsigned_type_node
, new_fenv_var
,
31647 build_call_expr (get_fpscr
, 0));
31648 restore_fnenv
= build_call_expr (set_fpscr
, 1, fenv_var
);
31649 atomic_feraiseexcept
= builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
31650 update_call
= build_call_expr (atomic_feraiseexcept
, 1,
31651 fold_convert (integer_type_node
, new_fenv_var
));
31652 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
31653 build2 (COMPOUND_EXPR
, void_type_node
,
31654 reload_fenv
, restore_fnenv
), update_call
);
31657 /* return TRUE if x is a reference to a value in a constant pool */
31659 arm_is_constant_pool_ref (rtx x
)
31662 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
31663 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
31666 #include "gt-arm.h"