1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
46 #include "diagnostic-core.h"
53 #include "sched-int.h"
54 #include "target-def.h"
56 #include "langhooks.h"
63 #include "gimple-expr.h"
66 /* Forward definitions of types. */
67 typedef struct minipool_node Mnode
;
68 typedef struct minipool_fixup Mfix
;
70 void (*arm_lang_output_object_attributes_hook
)(void);
77 /* Forward function declarations. */
78 static bool arm_const_not_ok_for_debug_p (rtx
);
79 static bool arm_lra_p (void);
80 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
81 static int arm_compute_static_chain_stack_bytes (void);
82 static arm_stack_offsets
*arm_get_frame_offsets (void);
83 static void arm_add_gc_roots (void);
84 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
85 HOST_WIDE_INT
, rtx
, rtx
, int, int);
86 static unsigned bit_count (unsigned long);
87 static int arm_address_register_rtx_p (rtx
, int);
88 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
89 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
90 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
91 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
92 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
93 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
94 inline static int thumb1_index_register_rtx_p (rtx
, int);
95 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
96 static int thumb_far_jump_used_p (void);
97 static bool thumb_force_lr_save (void);
98 static unsigned arm_size_return_regs (void);
99 static bool arm_assemble_integer (rtx
, unsigned int, int);
100 static void arm_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
);
101 static void arm_print_operand (FILE *, rtx
, int);
102 static void arm_print_operand_address (FILE *, rtx
);
103 static bool arm_print_operand_punct_valid_p (unsigned char code
);
104 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
105 static arm_cc
get_arm_condition_code (rtx
);
106 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
107 static const char *output_multi_immediate (rtx
*, const char *, const char *,
109 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
110 static struct machine_function
*arm_init_machine_status (void);
111 static void thumb_exit (FILE *, int);
112 static HOST_WIDE_INT
get_jump_table_size (rtx
);
113 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
114 static Mnode
*add_minipool_forward_ref (Mfix
*);
115 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
116 static Mnode
*add_minipool_backward_ref (Mfix
*);
117 static void assign_minipool_offsets (Mfix
*);
118 static void arm_print_value (FILE *, rtx
);
119 static void dump_minipool (rtx
);
120 static int arm_barrier_cost (rtx
);
121 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
122 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
123 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
125 static void arm_reorg (void);
126 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
127 static unsigned long arm_compute_save_reg0_reg12_mask (void);
128 static unsigned long arm_compute_save_reg_mask (void);
129 static unsigned long arm_isr_value (tree
);
130 static unsigned long arm_compute_func_type (void);
131 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
132 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
133 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
134 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
135 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
137 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
138 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
139 static int arm_comp_type_attributes (const_tree
, const_tree
);
140 static void arm_set_default_type_attributes (tree
);
141 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
142 static int arm_sched_reorder (FILE *, int, rtx
*, int *, int);
143 static int optimal_immediate_sequence (enum rtx_code code
,
144 unsigned HOST_WIDE_INT val
,
145 struct four_ints
*return_sequence
);
146 static int optimal_immediate_sequence_1 (enum rtx_code code
,
147 unsigned HOST_WIDE_INT val
,
148 struct four_ints
*return_sequence
,
150 static int arm_get_strip_length (int);
151 static bool arm_function_ok_for_sibcall (tree
, tree
);
152 static enum machine_mode
arm_promote_function_mode (const_tree
,
153 enum machine_mode
, int *,
155 static bool arm_return_in_memory (const_tree
, const_tree
);
156 static rtx
arm_function_value (const_tree
, const_tree
, bool);
157 static rtx
arm_libcall_value_1 (enum machine_mode
);
158 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
159 static bool arm_function_value_regno_p (const unsigned int);
160 static void arm_internal_label (FILE *, const char *, unsigned long);
161 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
163 static bool arm_have_conditional_execution (void);
164 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
165 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
166 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
167 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
168 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
169 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
170 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
171 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
172 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
173 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
174 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
175 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
176 static void arm_init_builtins (void);
177 static void arm_init_iwmmxt_builtins (void);
178 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
179 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
180 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
181 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
182 static tree
arm_builtin_decl (unsigned, bool);
183 static void emit_constant_insn (rtx cond
, rtx pattern
);
184 static rtx
emit_set_insn (rtx
, rtx
);
185 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
186 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
188 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
190 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
192 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
193 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
195 static rtx
aapcs_libcall_value (enum machine_mode
);
196 static int aapcs_select_return_coproc (const_tree
, const_tree
);
198 #ifdef OBJECT_FORMAT_ELF
199 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
200 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
203 static void arm_encode_section_info (tree
, rtx
, int);
206 static void arm_file_end (void);
207 static void arm_file_start (void);
209 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
211 static bool arm_pass_by_reference (cumulative_args_t
,
212 enum machine_mode
, const_tree
, bool);
213 static bool arm_promote_prototypes (const_tree
);
214 static bool arm_default_short_enums (void);
215 static bool arm_align_anon_bitfield (void);
216 static bool arm_return_in_msb (const_tree
);
217 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
218 static bool arm_return_in_memory (const_tree
, const_tree
);
220 static void arm_unwind_emit (FILE *, rtx
);
221 static bool arm_output_ttype (rtx
);
222 static void arm_asm_emit_except_personality (rtx
);
223 static void arm_asm_init_sections (void);
225 static rtx
arm_dwarf_register_span (rtx
);
227 static tree
arm_cxx_guard_type (void);
228 static bool arm_cxx_guard_mask_bit (void);
229 static tree
arm_get_cookie_size (tree
);
230 static bool arm_cookie_has_size (void);
231 static bool arm_cxx_cdtor_returns_this (void);
232 static bool arm_cxx_key_method_may_be_inline (void);
233 static void arm_cxx_determine_class_data_visibility (tree
);
234 static bool arm_cxx_class_data_always_comdat (void);
235 static bool arm_cxx_use_aeabi_atexit (void);
236 static void arm_init_libfuncs (void);
237 static tree
arm_build_builtin_va_list (void);
238 static void arm_expand_builtin_va_start (tree
, rtx
);
239 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
240 static void arm_option_override (void);
241 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
242 static bool arm_cannot_copy_insn_p (rtx
);
243 static int arm_issue_rate (void);
244 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
245 static bool arm_output_addr_const_extra (FILE *, rtx
);
246 static bool arm_allocate_stack_slots_for_args (void);
247 static bool arm_warn_func_return (tree
);
248 static const char *arm_invalid_parameter_type (const_tree t
);
249 static const char *arm_invalid_return_type (const_tree t
);
250 static tree
arm_promoted_type (const_tree t
);
251 static tree
arm_convert_to_type (tree type
, tree expr
);
252 static bool arm_scalar_mode_supported_p (enum machine_mode
);
253 static bool arm_frame_pointer_required (void);
254 static bool arm_can_eliminate (const int, const int);
255 static void arm_asm_trampoline_template (FILE *);
256 static void arm_trampoline_init (rtx
, tree
, rtx
);
257 static rtx
arm_trampoline_adjust_address (rtx
);
258 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
259 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
260 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
261 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
262 static bool arm_array_mode_supported_p (enum machine_mode
,
263 unsigned HOST_WIDE_INT
);
264 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
265 static bool arm_class_likely_spilled_p (reg_class_t
);
266 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
267 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
268 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
272 static void arm_conditional_register_usage (void);
273 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
274 static unsigned int arm_autovectorize_vector_sizes (void);
275 static int arm_default_branch_cost (bool, bool);
276 static int arm_cortex_a5_branch_cost (bool, bool);
277 static int arm_cortex_m_branch_cost (bool, bool);
279 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
280 const unsigned char *sel
);
282 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
284 int misalign ATTRIBUTE_UNUSED
);
285 static unsigned arm_add_stmt_cost (void *data
, int count
,
286 enum vect_cost_for_stmt kind
,
287 struct _stmt_vec_info
*stmt_info
,
289 enum vect_cost_model_location where
);
291 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
292 bool op0_preserve_value
);
293 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
295 /* Table of machine attributes. */
296 static const struct attribute_spec arm_attribute_table
[] =
298 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
299 affects_type_identity } */
300 /* Function calls made to this symbol must be done indirectly, because
301 it may lie outside of the 26 bit addressing range of a normal function
303 { "long_call", 0, 0, false, true, true, NULL
, false },
304 /* Whereas these functions are always known to reside within the 26 bit
306 { "short_call", 0, 0, false, true, true, NULL
, false },
307 /* Specify the procedure call conventions for a function. */
308 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
310 /* Interrupt Service Routines have special prologue and epilogue requirements. */
311 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
313 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
315 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
318 /* ARM/PE has three new attributes:
320 dllexport - for exporting a function/variable that will live in a dll
321 dllimport - for importing a function/variable from a dll
323 Microsoft allows multiple declspecs in one __declspec, separating
324 them with spaces. We do NOT support this. Instead, use __declspec
327 { "dllimport", 0, 0, true, false, false, NULL
, false },
328 { "dllexport", 0, 0, true, false, false, NULL
, false },
329 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
331 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
332 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
333 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
334 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
337 { NULL
, 0, 0, false, false, false, NULL
, false }
340 /* Initialize the GCC target structure. */
341 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
342 #undef TARGET_MERGE_DECL_ATTRIBUTES
343 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
346 #undef TARGET_LEGITIMIZE_ADDRESS
347 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
350 #define TARGET_LRA_P arm_lra_p
352 #undef TARGET_ATTRIBUTE_TABLE
353 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
355 #undef TARGET_ASM_FILE_START
356 #define TARGET_ASM_FILE_START arm_file_start
357 #undef TARGET_ASM_FILE_END
358 #define TARGET_ASM_FILE_END arm_file_end
360 #undef TARGET_ASM_ALIGNED_SI_OP
361 #define TARGET_ASM_ALIGNED_SI_OP NULL
362 #undef TARGET_ASM_INTEGER
363 #define TARGET_ASM_INTEGER arm_assemble_integer
365 #undef TARGET_PRINT_OPERAND
366 #define TARGET_PRINT_OPERAND arm_print_operand
367 #undef TARGET_PRINT_OPERAND_ADDRESS
368 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
369 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
370 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
372 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
373 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
375 #undef TARGET_ASM_FUNCTION_PROLOGUE
376 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
378 #undef TARGET_ASM_FUNCTION_EPILOGUE
379 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
381 #undef TARGET_OPTION_OVERRIDE
382 #define TARGET_OPTION_OVERRIDE arm_option_override
384 #undef TARGET_COMP_TYPE_ATTRIBUTES
385 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
387 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
388 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
390 #undef TARGET_SCHED_ADJUST_COST
391 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
393 #undef TARGET_SCHED_REORDER
394 #define TARGET_SCHED_REORDER arm_sched_reorder
396 #undef TARGET_REGISTER_MOVE_COST
397 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
399 #undef TARGET_MEMORY_MOVE_COST
400 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
402 #undef TARGET_ENCODE_SECTION_INFO
404 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
406 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
409 #undef TARGET_STRIP_NAME_ENCODING
410 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
412 #undef TARGET_ASM_INTERNAL_LABEL
413 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
415 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
416 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
418 #undef TARGET_FUNCTION_VALUE
419 #define TARGET_FUNCTION_VALUE arm_function_value
421 #undef TARGET_LIBCALL_VALUE
422 #define TARGET_LIBCALL_VALUE arm_libcall_value
424 #undef TARGET_FUNCTION_VALUE_REGNO_P
425 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
427 #undef TARGET_ASM_OUTPUT_MI_THUNK
428 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
429 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
430 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
432 #undef TARGET_RTX_COSTS
433 #define TARGET_RTX_COSTS arm_rtx_costs
434 #undef TARGET_ADDRESS_COST
435 #define TARGET_ADDRESS_COST arm_address_cost
437 #undef TARGET_SHIFT_TRUNCATION_MASK
438 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
439 #undef TARGET_VECTOR_MODE_SUPPORTED_P
440 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
441 #undef TARGET_ARRAY_MODE_SUPPORTED_P
442 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
443 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
444 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
445 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
446 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
447 arm_autovectorize_vector_sizes
449 #undef TARGET_MACHINE_DEPENDENT_REORG
450 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
452 #undef TARGET_INIT_BUILTINS
453 #define TARGET_INIT_BUILTINS arm_init_builtins
454 #undef TARGET_EXPAND_BUILTIN
455 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
456 #undef TARGET_BUILTIN_DECL
457 #define TARGET_BUILTIN_DECL arm_builtin_decl
459 #undef TARGET_INIT_LIBFUNCS
460 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
462 #undef TARGET_PROMOTE_FUNCTION_MODE
463 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
464 #undef TARGET_PROMOTE_PROTOTYPES
465 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
466 #undef TARGET_PASS_BY_REFERENCE
467 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
468 #undef TARGET_ARG_PARTIAL_BYTES
469 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
470 #undef TARGET_FUNCTION_ARG
471 #define TARGET_FUNCTION_ARG arm_function_arg
472 #undef TARGET_FUNCTION_ARG_ADVANCE
473 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
474 #undef TARGET_FUNCTION_ARG_BOUNDARY
475 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
477 #undef TARGET_SETUP_INCOMING_VARARGS
478 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
480 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
481 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
483 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
484 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
485 #undef TARGET_TRAMPOLINE_INIT
486 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
487 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
488 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
490 #undef TARGET_WARN_FUNC_RETURN
491 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
493 #undef TARGET_DEFAULT_SHORT_ENUMS
494 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
496 #undef TARGET_ALIGN_ANON_BITFIELD
497 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
499 #undef TARGET_NARROW_VOLATILE_BITFIELD
500 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
502 #undef TARGET_CXX_GUARD_TYPE
503 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
505 #undef TARGET_CXX_GUARD_MASK_BIT
506 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
508 #undef TARGET_CXX_GET_COOKIE_SIZE
509 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
511 #undef TARGET_CXX_COOKIE_HAS_SIZE
512 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
514 #undef TARGET_CXX_CDTOR_RETURNS_THIS
515 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
517 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
518 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
520 #undef TARGET_CXX_USE_AEABI_ATEXIT
521 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
523 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
524 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
525 arm_cxx_determine_class_data_visibility
527 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
528 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
530 #undef TARGET_RETURN_IN_MSB
531 #define TARGET_RETURN_IN_MSB arm_return_in_msb
533 #undef TARGET_RETURN_IN_MEMORY
534 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
536 #undef TARGET_MUST_PASS_IN_STACK
537 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
540 #undef TARGET_ASM_UNWIND_EMIT
541 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
543 /* EABI unwinding tables use a different format for the typeinfo tables. */
544 #undef TARGET_ASM_TTYPE
545 #define TARGET_ASM_TTYPE arm_output_ttype
547 #undef TARGET_ARM_EABI_UNWINDER
548 #define TARGET_ARM_EABI_UNWINDER true
550 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
551 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
553 #undef TARGET_ASM_INIT_SECTIONS
554 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
555 #endif /* ARM_UNWIND_INFO */
557 #undef TARGET_DWARF_REGISTER_SPAN
558 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
560 #undef TARGET_CANNOT_COPY_INSN_P
561 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
564 #undef TARGET_HAVE_TLS
565 #define TARGET_HAVE_TLS true
568 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
569 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
571 #undef TARGET_LEGITIMATE_CONSTANT_P
572 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
574 #undef TARGET_CANNOT_FORCE_CONST_MEM
575 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
577 #undef TARGET_MAX_ANCHOR_OFFSET
578 #define TARGET_MAX_ANCHOR_OFFSET 4095
580 /* The minimum is set such that the total size of the block
581 for a particular anchor is -4088 + 1 + 4095 bytes, which is
582 divisible by eight, ensuring natural spacing of anchors. */
583 #undef TARGET_MIN_ANCHOR_OFFSET
584 #define TARGET_MIN_ANCHOR_OFFSET -4088
586 #undef TARGET_SCHED_ISSUE_RATE
587 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
589 #undef TARGET_MANGLE_TYPE
590 #define TARGET_MANGLE_TYPE arm_mangle_type
592 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
593 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
595 #undef TARGET_BUILD_BUILTIN_VA_LIST
596 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
597 #undef TARGET_EXPAND_BUILTIN_VA_START
598 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
599 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
600 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
603 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
604 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
607 #undef TARGET_LEGITIMATE_ADDRESS_P
608 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
610 #undef TARGET_PREFERRED_RELOAD_CLASS
611 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
613 #undef TARGET_INVALID_PARAMETER_TYPE
614 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
616 #undef TARGET_INVALID_RETURN_TYPE
617 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
619 #undef TARGET_PROMOTED_TYPE
620 #define TARGET_PROMOTED_TYPE arm_promoted_type
622 #undef TARGET_CONVERT_TO_TYPE
623 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
625 #undef TARGET_SCALAR_MODE_SUPPORTED_P
626 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
628 #undef TARGET_FRAME_POINTER_REQUIRED
629 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
631 #undef TARGET_CAN_ELIMINATE
632 #define TARGET_CAN_ELIMINATE arm_can_eliminate
634 #undef TARGET_CONDITIONAL_REGISTER_USAGE
635 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
637 #undef TARGET_CLASS_LIKELY_SPILLED_P
638 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
640 #undef TARGET_VECTORIZE_BUILTINS
641 #define TARGET_VECTORIZE_BUILTINS
643 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
644 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
645 arm_builtin_vectorized_function
647 #undef TARGET_VECTOR_ALIGNMENT
648 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
650 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
651 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
652 arm_vector_alignment_reachable
654 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
655 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
656 arm_builtin_support_vector_misalignment
658 #undef TARGET_PREFERRED_RENAME_CLASS
659 #define TARGET_PREFERRED_RENAME_CLASS \
660 arm_preferred_rename_class
662 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
663 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
664 arm_vectorize_vec_perm_const_ok
666 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
667 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
668 arm_builtin_vectorization_cost
669 #undef TARGET_VECTORIZE_ADD_STMT_COST
670 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
672 #undef TARGET_CANONICALIZE_COMPARISON
673 #define TARGET_CANONICALIZE_COMPARISON \
674 arm_canonicalize_comparison
676 #undef TARGET_ASAN_SHADOW_OFFSET
677 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
679 #undef MAX_INSN_PER_IT_BLOCK
680 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
682 #undef TARGET_CAN_USE_DOLOOP_P
683 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
685 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
686 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
688 struct gcc_target targetm
= TARGET_INITIALIZER
;
690 /* Obstack for minipool constant handling. */
691 static struct obstack minipool_obstack
;
692 static char * minipool_startobj
;
694 /* The maximum number of insns skipped which
695 will be conditionalised if possible. */
696 static int max_insns_skipped
= 5;
698 extern FILE * asm_out_file
;
700 /* True if we are currently building a constant table. */
701 int making_const_table
;
703 /* The processor for which instructions should be scheduled. */
704 enum processor_type arm_tune
= arm_none
;
706 /* The current tuning set. */
707 const struct tune_params
*current_tune
;
709 /* Which floating point hardware to schedule for. */
712 /* Which floating popint hardware to use. */
713 const struct arm_fpu_desc
*arm_fpu_desc
;
715 /* Used for Thumb call_via trampolines. */
716 rtx thumb_call_via_label
[14];
717 static int thumb_call_reg_needed
;
719 /* Bit values used to identify processor capabilities. */
720 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
721 #define FL_ARCH3M (1 << 1) /* Extended multiply */
722 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
723 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
724 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
725 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
726 #define FL_THUMB (1 << 6) /* Thumb aware */
727 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
728 #define FL_STRONG (1 << 8) /* StrongARM */
729 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
730 #define FL_XSCALE (1 << 10) /* XScale */
731 /* spare (1 << 11) */
732 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
733 media instructions. */
734 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
735 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
736 Note: ARM6 & 7 derivatives only. */
737 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
738 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
739 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
741 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
742 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
743 #define FL_NEON (1 << 20) /* Neon instructions. */
744 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
746 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
747 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
748 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
749 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
751 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
752 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
754 /* Flags that only effect tuning, not available instructions. */
755 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
758 #define FL_FOR_ARCH2 FL_NOTM
759 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
760 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
761 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
762 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
763 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
764 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
765 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
766 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
767 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
768 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
769 #define FL_FOR_ARCH6J FL_FOR_ARCH6
770 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
771 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
772 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
773 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
774 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
775 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
776 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
777 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
778 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
779 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
780 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
781 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
783 /* The bits in this mask specify which
784 instructions we are allowed to generate. */
785 static unsigned long insn_flags
= 0;
787 /* The bits in this mask specify which instruction scheduling options should
789 static unsigned long tune_flags
= 0;
791 /* The highest ARM architecture version supported by the
793 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
795 /* The following are used in the arm.md file as equivalents to bits
796 in the above two flag variables. */
798 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
801 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
804 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
807 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
810 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
813 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
816 /* Nonzero if this chip supports the ARM 6K extensions. */
819 /* Nonzero if instructions present in ARMv6-M can be used. */
822 /* Nonzero if this chip supports the ARM 7 extensions. */
825 /* Nonzero if instructions not present in the 'M' profile can be used. */
826 int arm_arch_notm
= 0;
828 /* Nonzero if instructions present in ARMv7E-M can be used. */
831 /* Nonzero if instructions present in ARMv8 can be used. */
834 /* Nonzero if this chip can benefit from load scheduling. */
835 int arm_ld_sched
= 0;
837 /* Nonzero if this chip is a StrongARM. */
838 int arm_tune_strongarm
= 0;
840 /* Nonzero if this chip supports Intel Wireless MMX technology. */
841 int arm_arch_iwmmxt
= 0;
843 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
844 int arm_arch_iwmmxt2
= 0;
846 /* Nonzero if this chip is an XScale. */
847 int arm_arch_xscale
= 0;
849 /* Nonzero if tuning for XScale */
850 int arm_tune_xscale
= 0;
852 /* Nonzero if we want to tune for stores that access the write-buffer.
853 This typically means an ARM6 or ARM7 with MMU or MPU. */
854 int arm_tune_wbuf
= 0;
856 /* Nonzero if tuning for Cortex-A9. */
857 int arm_tune_cortex_a9
= 0;
859 /* Nonzero if generating Thumb instructions. */
862 /* Nonzero if generating Thumb-1 instructions. */
865 /* Nonzero if we should define __THUMB_INTERWORK__ in the
867 XXX This is a bit of a hack, it's intended to help work around
868 problems in GLD which doesn't understand that armv5t code is
869 interworking clean. */
870 int arm_cpp_interwork
= 0;
872 /* Nonzero if chip supports Thumb 2. */
875 /* Nonzero if chip supports integer division instruction. */
876 int arm_arch_arm_hwdiv
;
877 int arm_arch_thumb_hwdiv
;
879 /* Nonzero if we should use Neon to handle 64-bits operations rather
880 than core registers. */
881 int prefer_neon_for_64bits
= 0;
883 /* Nonzero if we shouldn't use literal pools. */
884 bool arm_disable_literal_pool
= false;
886 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
887 we must report the mode of the memory reference from
888 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
889 enum machine_mode output_memory_reference_mode
;
891 /* The register number to be used for the PIC offset register. */
892 unsigned arm_pic_register
= INVALID_REGNUM
;
894 enum arm_pcs arm_pcs_default
;
896 /* For an explanation of these variables, see final_prescan_insn below. */
898 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
899 enum arm_cond_code arm_current_cc
;
902 int arm_target_label
;
903 /* The number of conditionally executed insns, including the current insn. */
904 int arm_condexec_count
= 0;
905 /* A bitmask specifying the patterns for the IT block.
906 Zero means do not output an IT block before this insn. */
907 int arm_condexec_mask
= 0;
908 /* The number of bits used in arm_condexec_mask. */
909 int arm_condexec_masklen
= 0;
911 /* Nonzero if chip supports the ARMv8 CRC instructions. */
912 int arm_arch_crc
= 0;
914 /* The condition codes of the ARM, and the inverse function. */
915 static const char * const arm_condition_codes
[] =
917 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
918 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
921 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
922 int arm_regs_in_sequence
[] =
924 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
927 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
928 #define streq(string1, string2) (strcmp (string1, string2) == 0)
930 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
931 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
932 | (1 << PIC_OFFSET_TABLE_REGNUM)))
934 /* Initialization code. */
938 const char *const name
;
939 enum processor_type core
;
941 enum base_architecture base_arch
;
942 const unsigned long flags
;
943 const struct tune_params
*const tune
;
947 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
948 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
953 /* arm generic vectorizer costs. */
955 struct cpu_vec_costs arm_default_vec_cost
= {
956 1, /* scalar_stmt_cost. */
957 1, /* scalar load_cost. */
958 1, /* scalar_store_cost. */
959 1, /* vec_stmt_cost. */
960 1, /* vec_to_scalar_cost. */
961 1, /* scalar_to_vec_cost. */
962 1, /* vec_align_load_cost. */
963 1, /* vec_unalign_load_cost. */
964 1, /* vec_unalign_store_cost. */
965 1, /* vec_store_cost. */
966 3, /* cond_taken_branch_cost. */
967 1, /* cond_not_taken_branch_cost. */
970 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
971 #include "aarch-cost-tables.h"
975 const struct cpu_cost_table cortexa9_extra_costs
=
982 COSTS_N_INSNS (1), /* shift_reg. */
983 COSTS_N_INSNS (1), /* arith_shift. */
984 COSTS_N_INSNS (2), /* arith_shift_reg. */
986 COSTS_N_INSNS (1), /* log_shift_reg. */
987 COSTS_N_INSNS (1), /* extend. */
988 COSTS_N_INSNS (2), /* extend_arith. */
989 COSTS_N_INSNS (1), /* bfi. */
990 COSTS_N_INSNS (1), /* bfx. */
994 true /* non_exec_costs_exec. */
999 COSTS_N_INSNS (3), /* simple. */
1000 COSTS_N_INSNS (3), /* flag_setting. */
1001 COSTS_N_INSNS (2), /* extend. */
1002 COSTS_N_INSNS (3), /* add. */
1003 COSTS_N_INSNS (2), /* extend_add. */
1004 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1008 0, /* simple (N/A). */
1009 0, /* flag_setting (N/A). */
1010 COSTS_N_INSNS (4), /* extend. */
1012 COSTS_N_INSNS (4), /* extend_add. */
1018 COSTS_N_INSNS (2), /* load. */
1019 COSTS_N_INSNS (2), /* load_sign_extend. */
1020 COSTS_N_INSNS (2), /* ldrd. */
1021 COSTS_N_INSNS (2), /* ldm_1st. */
1022 1, /* ldm_regs_per_insn_1st. */
1023 2, /* ldm_regs_per_insn_subsequent. */
1024 COSTS_N_INSNS (5), /* loadf. */
1025 COSTS_N_INSNS (5), /* loadd. */
1026 COSTS_N_INSNS (1), /* load_unaligned. */
1027 COSTS_N_INSNS (2), /* store. */
1028 COSTS_N_INSNS (2), /* strd. */
1029 COSTS_N_INSNS (2), /* stm_1st. */
1030 1, /* stm_regs_per_insn_1st. */
1031 2, /* stm_regs_per_insn_subsequent. */
1032 COSTS_N_INSNS (1), /* storef. */
1033 COSTS_N_INSNS (1), /* stored. */
1034 COSTS_N_INSNS (1) /* store_unaligned. */
1039 COSTS_N_INSNS (14), /* div. */
1040 COSTS_N_INSNS (4), /* mult. */
1041 COSTS_N_INSNS (7), /* mult_addsub. */
1042 COSTS_N_INSNS (30), /* fma. */
1043 COSTS_N_INSNS (3), /* addsub. */
1044 COSTS_N_INSNS (1), /* fpconst. */
1045 COSTS_N_INSNS (1), /* neg. */
1046 COSTS_N_INSNS (3), /* compare. */
1047 COSTS_N_INSNS (3), /* widen. */
1048 COSTS_N_INSNS (3), /* narrow. */
1049 COSTS_N_INSNS (3), /* toint. */
1050 COSTS_N_INSNS (3), /* fromint. */
1051 COSTS_N_INSNS (3) /* roundint. */
1055 COSTS_N_INSNS (24), /* div. */
1056 COSTS_N_INSNS (5), /* mult. */
1057 COSTS_N_INSNS (8), /* mult_addsub. */
1058 COSTS_N_INSNS (30), /* fma. */
1059 COSTS_N_INSNS (3), /* addsub. */
1060 COSTS_N_INSNS (1), /* fpconst. */
1061 COSTS_N_INSNS (1), /* neg. */
1062 COSTS_N_INSNS (3), /* compare. */
1063 COSTS_N_INSNS (3), /* widen. */
1064 COSTS_N_INSNS (3), /* narrow. */
1065 COSTS_N_INSNS (3), /* toint. */
1066 COSTS_N_INSNS (3), /* fromint. */
1067 COSTS_N_INSNS (3) /* roundint. */
1072 COSTS_N_INSNS (1) /* alu. */
1076 const struct cpu_cost_table cortexa8_extra_costs
=
1082 COSTS_N_INSNS (1), /* shift. */
1084 COSTS_N_INSNS (1), /* arith_shift. */
1085 0, /* arith_shift_reg. */
1086 COSTS_N_INSNS (1), /* log_shift. */
1087 0, /* log_shift_reg. */
1089 0, /* extend_arith. */
1095 true /* non_exec_costs_exec. */
1100 COSTS_N_INSNS (1), /* simple. */
1101 COSTS_N_INSNS (1), /* flag_setting. */
1102 COSTS_N_INSNS (1), /* extend. */
1103 COSTS_N_INSNS (1), /* add. */
1104 COSTS_N_INSNS (1), /* extend_add. */
1105 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1109 0, /* simple (N/A). */
1110 0, /* flag_setting (N/A). */
1111 COSTS_N_INSNS (2), /* extend. */
1113 COSTS_N_INSNS (2), /* extend_add. */
1119 COSTS_N_INSNS (1), /* load. */
1120 COSTS_N_INSNS (1), /* load_sign_extend. */
1121 COSTS_N_INSNS (1), /* ldrd. */
1122 COSTS_N_INSNS (1), /* ldm_1st. */
1123 1, /* ldm_regs_per_insn_1st. */
1124 2, /* ldm_regs_per_insn_subsequent. */
1125 COSTS_N_INSNS (1), /* loadf. */
1126 COSTS_N_INSNS (1), /* loadd. */
1127 COSTS_N_INSNS (1), /* load_unaligned. */
1128 COSTS_N_INSNS (1), /* store. */
1129 COSTS_N_INSNS (1), /* strd. */
1130 COSTS_N_INSNS (1), /* stm_1st. */
1131 1, /* stm_regs_per_insn_1st. */
1132 2, /* stm_regs_per_insn_subsequent. */
1133 COSTS_N_INSNS (1), /* storef. */
1134 COSTS_N_INSNS (1), /* stored. */
1135 COSTS_N_INSNS (1) /* store_unaligned. */
1140 COSTS_N_INSNS (36), /* div. */
1141 COSTS_N_INSNS (11), /* mult. */
1142 COSTS_N_INSNS (20), /* mult_addsub. */
1143 COSTS_N_INSNS (30), /* fma. */
1144 COSTS_N_INSNS (9), /* addsub. */
1145 COSTS_N_INSNS (3), /* fpconst. */
1146 COSTS_N_INSNS (3), /* neg. */
1147 COSTS_N_INSNS (6), /* compare. */
1148 COSTS_N_INSNS (4), /* widen. */
1149 COSTS_N_INSNS (4), /* narrow. */
1150 COSTS_N_INSNS (8), /* toint. */
1151 COSTS_N_INSNS (8), /* fromint. */
1152 COSTS_N_INSNS (8) /* roundint. */
1156 COSTS_N_INSNS (64), /* div. */
1157 COSTS_N_INSNS (16), /* mult. */
1158 COSTS_N_INSNS (25), /* mult_addsub. */
1159 COSTS_N_INSNS (30), /* fma. */
1160 COSTS_N_INSNS (9), /* addsub. */
1161 COSTS_N_INSNS (3), /* fpconst. */
1162 COSTS_N_INSNS (3), /* neg. */
1163 COSTS_N_INSNS (6), /* compare. */
1164 COSTS_N_INSNS (6), /* widen. */
1165 COSTS_N_INSNS (6), /* narrow. */
1166 COSTS_N_INSNS (8), /* toint. */
1167 COSTS_N_INSNS (8), /* fromint. */
1168 COSTS_N_INSNS (8) /* roundint. */
1173 COSTS_N_INSNS (1) /* alu. */
1179 const struct cpu_cost_table cortexa7_extra_costs
=
1185 COSTS_N_INSNS (1), /* shift. */
1186 COSTS_N_INSNS (1), /* shift_reg. */
1187 COSTS_N_INSNS (1), /* arith_shift. */
1188 COSTS_N_INSNS (1), /* arith_shift_reg. */
1189 COSTS_N_INSNS (1), /* log_shift. */
1190 COSTS_N_INSNS (1), /* log_shift_reg. */
1191 COSTS_N_INSNS (1), /* extend. */
1192 COSTS_N_INSNS (1), /* extend_arith. */
1193 COSTS_N_INSNS (1), /* bfi. */
1194 COSTS_N_INSNS (1), /* bfx. */
1195 COSTS_N_INSNS (1), /* clz. */
1196 COSTS_N_INSNS (1), /* rev. */
1198 true /* non_exec_costs_exec. */
1205 COSTS_N_INSNS (1), /* flag_setting. */
1206 COSTS_N_INSNS (1), /* extend. */
1207 COSTS_N_INSNS (1), /* add. */
1208 COSTS_N_INSNS (1), /* extend_add. */
1209 COSTS_N_INSNS (7) /* idiv. */
1213 0, /* simple (N/A). */
1214 0, /* flag_setting (N/A). */
1215 COSTS_N_INSNS (1), /* extend. */
1217 COSTS_N_INSNS (2), /* extend_add. */
1223 COSTS_N_INSNS (1), /* load. */
1224 COSTS_N_INSNS (1), /* load_sign_extend. */
1225 COSTS_N_INSNS (3), /* ldrd. */
1226 COSTS_N_INSNS (1), /* ldm_1st. */
1227 1, /* ldm_regs_per_insn_1st. */
1228 2, /* ldm_regs_per_insn_subsequent. */
1229 COSTS_N_INSNS (2), /* loadf. */
1230 COSTS_N_INSNS (2), /* loadd. */
1231 COSTS_N_INSNS (1), /* load_unaligned. */
1232 COSTS_N_INSNS (1), /* store. */
1233 COSTS_N_INSNS (3), /* strd. */
1234 COSTS_N_INSNS (1), /* stm_1st. */
1235 1, /* stm_regs_per_insn_1st. */
1236 2, /* stm_regs_per_insn_subsequent. */
1237 COSTS_N_INSNS (2), /* storef. */
1238 COSTS_N_INSNS (2), /* stored. */
1239 COSTS_N_INSNS (1) /* store_unaligned. */
1244 COSTS_N_INSNS (15), /* div. */
1245 COSTS_N_INSNS (3), /* mult. */
1246 COSTS_N_INSNS (7), /* mult_addsub. */
1247 COSTS_N_INSNS (7), /* fma. */
1248 COSTS_N_INSNS (3), /* addsub. */
1249 COSTS_N_INSNS (3), /* fpconst. */
1250 COSTS_N_INSNS (3), /* neg. */
1251 COSTS_N_INSNS (3), /* compare. */
1252 COSTS_N_INSNS (3), /* widen. */
1253 COSTS_N_INSNS (3), /* narrow. */
1254 COSTS_N_INSNS (3), /* toint. */
1255 COSTS_N_INSNS (3), /* fromint. */
1256 COSTS_N_INSNS (3) /* roundint. */
1260 COSTS_N_INSNS (30), /* div. */
1261 COSTS_N_INSNS (6), /* mult. */
1262 COSTS_N_INSNS (10), /* mult_addsub. */
1263 COSTS_N_INSNS (7), /* fma. */
1264 COSTS_N_INSNS (3), /* addsub. */
1265 COSTS_N_INSNS (3), /* fpconst. */
1266 COSTS_N_INSNS (3), /* neg. */
1267 COSTS_N_INSNS (3), /* compare. */
1268 COSTS_N_INSNS (3), /* widen. */
1269 COSTS_N_INSNS (3), /* narrow. */
1270 COSTS_N_INSNS (3), /* toint. */
1271 COSTS_N_INSNS (3), /* fromint. */
1272 COSTS_N_INSNS (3) /* roundint. */
1277 COSTS_N_INSNS (1) /* alu. */
1281 const struct cpu_cost_table cortexa12_extra_costs
=
1288 COSTS_N_INSNS (1), /* shift_reg. */
1289 COSTS_N_INSNS (1), /* arith_shift. */
1290 COSTS_N_INSNS (1), /* arith_shift_reg. */
1291 COSTS_N_INSNS (1), /* log_shift. */
1292 COSTS_N_INSNS (1), /* log_shift_reg. */
1294 COSTS_N_INSNS (1), /* extend_arith. */
1296 COSTS_N_INSNS (1), /* bfx. */
1297 COSTS_N_INSNS (1), /* clz. */
1298 COSTS_N_INSNS (1), /* rev. */
1300 true /* non_exec_costs_exec. */
1305 COSTS_N_INSNS (2), /* simple. */
1306 COSTS_N_INSNS (3), /* flag_setting. */
1307 COSTS_N_INSNS (2), /* extend. */
1308 COSTS_N_INSNS (3), /* add. */
1309 COSTS_N_INSNS (2), /* extend_add. */
1310 COSTS_N_INSNS (18) /* idiv. */
1314 0, /* simple (N/A). */
1315 0, /* flag_setting (N/A). */
1316 COSTS_N_INSNS (3), /* extend. */
1318 COSTS_N_INSNS (3), /* extend_add. */
1324 COSTS_N_INSNS (3), /* load. */
1325 COSTS_N_INSNS (3), /* load_sign_extend. */
1326 COSTS_N_INSNS (3), /* ldrd. */
1327 COSTS_N_INSNS (3), /* ldm_1st. */
1328 1, /* ldm_regs_per_insn_1st. */
1329 2, /* ldm_regs_per_insn_subsequent. */
1330 COSTS_N_INSNS (3), /* loadf. */
1331 COSTS_N_INSNS (3), /* loadd. */
1332 0, /* load_unaligned. */
1336 1, /* stm_regs_per_insn_1st. */
1337 2, /* stm_regs_per_insn_subsequent. */
1338 COSTS_N_INSNS (2), /* storef. */
1339 COSTS_N_INSNS (2), /* stored. */
1340 0 /* store_unaligned. */
1345 COSTS_N_INSNS (17), /* div. */
1346 COSTS_N_INSNS (4), /* mult. */
1347 COSTS_N_INSNS (8), /* mult_addsub. */
1348 COSTS_N_INSNS (8), /* fma. */
1349 COSTS_N_INSNS (4), /* addsub. */
1350 COSTS_N_INSNS (2), /* fpconst. */
1351 COSTS_N_INSNS (2), /* neg. */
1352 COSTS_N_INSNS (2), /* compare. */
1353 COSTS_N_INSNS (4), /* widen. */
1354 COSTS_N_INSNS (4), /* narrow. */
1355 COSTS_N_INSNS (4), /* toint. */
1356 COSTS_N_INSNS (4), /* fromint. */
1357 COSTS_N_INSNS (4) /* roundint. */
1361 COSTS_N_INSNS (31), /* div. */
1362 COSTS_N_INSNS (4), /* mult. */
1363 COSTS_N_INSNS (8), /* mult_addsub. */
1364 COSTS_N_INSNS (8), /* fma. */
1365 COSTS_N_INSNS (4), /* addsub. */
1366 COSTS_N_INSNS (2), /* fpconst. */
1367 COSTS_N_INSNS (2), /* neg. */
1368 COSTS_N_INSNS (2), /* compare. */
1369 COSTS_N_INSNS (4), /* widen. */
1370 COSTS_N_INSNS (4), /* narrow. */
1371 COSTS_N_INSNS (4), /* toint. */
1372 COSTS_N_INSNS (4), /* fromint. */
1373 COSTS_N_INSNS (4) /* roundint. */
1378 COSTS_N_INSNS (1) /* alu. */
1382 const struct cpu_cost_table cortexa15_extra_costs
=
1390 COSTS_N_INSNS (1), /* arith_shift. */
1391 COSTS_N_INSNS (1), /* arith_shift_reg. */
1392 COSTS_N_INSNS (1), /* log_shift. */
1393 COSTS_N_INSNS (1), /* log_shift_reg. */
1395 COSTS_N_INSNS (1), /* extend_arith. */
1396 COSTS_N_INSNS (1), /* bfi. */
1401 true /* non_exec_costs_exec. */
1406 COSTS_N_INSNS (2), /* simple. */
1407 COSTS_N_INSNS (3), /* flag_setting. */
1408 COSTS_N_INSNS (2), /* extend. */
1409 COSTS_N_INSNS (2), /* add. */
1410 COSTS_N_INSNS (2), /* extend_add. */
1411 COSTS_N_INSNS (18) /* idiv. */
1415 0, /* simple (N/A). */
1416 0, /* flag_setting (N/A). */
1417 COSTS_N_INSNS (3), /* extend. */
1419 COSTS_N_INSNS (3), /* extend_add. */
1425 COSTS_N_INSNS (3), /* load. */
1426 COSTS_N_INSNS (3), /* load_sign_extend. */
1427 COSTS_N_INSNS (3), /* ldrd. */
1428 COSTS_N_INSNS (4), /* ldm_1st. */
1429 1, /* ldm_regs_per_insn_1st. */
1430 2, /* ldm_regs_per_insn_subsequent. */
1431 COSTS_N_INSNS (4), /* loadf. */
1432 COSTS_N_INSNS (4), /* loadd. */
1433 0, /* load_unaligned. */
1436 COSTS_N_INSNS (1), /* stm_1st. */
1437 1, /* stm_regs_per_insn_1st. */
1438 2, /* stm_regs_per_insn_subsequent. */
1441 0 /* store_unaligned. */
1446 COSTS_N_INSNS (17), /* div. */
1447 COSTS_N_INSNS (4), /* mult. */
1448 COSTS_N_INSNS (8), /* mult_addsub. */
1449 COSTS_N_INSNS (8), /* fma. */
1450 COSTS_N_INSNS (4), /* addsub. */
1451 COSTS_N_INSNS (2), /* fpconst. */
1452 COSTS_N_INSNS (2), /* neg. */
1453 COSTS_N_INSNS (5), /* compare. */
1454 COSTS_N_INSNS (4), /* widen. */
1455 COSTS_N_INSNS (4), /* narrow. */
1456 COSTS_N_INSNS (4), /* toint. */
1457 COSTS_N_INSNS (4), /* fromint. */
1458 COSTS_N_INSNS (4) /* roundint. */
1462 COSTS_N_INSNS (31), /* div. */
1463 COSTS_N_INSNS (4), /* mult. */
1464 COSTS_N_INSNS (8), /* mult_addsub. */
1465 COSTS_N_INSNS (8), /* fma. */
1466 COSTS_N_INSNS (4), /* addsub. */
1467 COSTS_N_INSNS (2), /* fpconst. */
1468 COSTS_N_INSNS (2), /* neg. */
1469 COSTS_N_INSNS (2), /* compare. */
1470 COSTS_N_INSNS (4), /* widen. */
1471 COSTS_N_INSNS (4), /* narrow. */
1472 COSTS_N_INSNS (4), /* toint. */
1473 COSTS_N_INSNS (4), /* fromint. */
1474 COSTS_N_INSNS (4) /* roundint. */
1479 COSTS_N_INSNS (1) /* alu. */
1483 const struct cpu_cost_table v7m_extra_costs
=
1491 0, /* arith_shift. */
1492 COSTS_N_INSNS (1), /* arith_shift_reg. */
1494 COSTS_N_INSNS (1), /* log_shift_reg. */
1496 COSTS_N_INSNS (1), /* extend_arith. */
1501 COSTS_N_INSNS (1), /* non_exec. */
1502 false /* non_exec_costs_exec. */
1507 COSTS_N_INSNS (1), /* simple. */
1508 COSTS_N_INSNS (1), /* flag_setting. */
1509 COSTS_N_INSNS (2), /* extend. */
1510 COSTS_N_INSNS (1), /* add. */
1511 COSTS_N_INSNS (3), /* extend_add. */
1512 COSTS_N_INSNS (8) /* idiv. */
1516 0, /* simple (N/A). */
1517 0, /* flag_setting (N/A). */
1518 COSTS_N_INSNS (2), /* extend. */
1520 COSTS_N_INSNS (3), /* extend_add. */
1526 COSTS_N_INSNS (2), /* load. */
1527 0, /* load_sign_extend. */
1528 COSTS_N_INSNS (3), /* ldrd. */
1529 COSTS_N_INSNS (2), /* ldm_1st. */
1530 1, /* ldm_regs_per_insn_1st. */
1531 1, /* ldm_regs_per_insn_subsequent. */
1532 COSTS_N_INSNS (2), /* loadf. */
1533 COSTS_N_INSNS (3), /* loadd. */
1534 COSTS_N_INSNS (1), /* load_unaligned. */
1535 COSTS_N_INSNS (2), /* store. */
1536 COSTS_N_INSNS (3), /* strd. */
1537 COSTS_N_INSNS (2), /* stm_1st. */
1538 1, /* stm_regs_per_insn_1st. */
1539 1, /* stm_regs_per_insn_subsequent. */
1540 COSTS_N_INSNS (2), /* storef. */
1541 COSTS_N_INSNS (3), /* stored. */
1542 COSTS_N_INSNS (1) /* store_unaligned. */
1547 COSTS_N_INSNS (7), /* div. */
1548 COSTS_N_INSNS (2), /* mult. */
1549 COSTS_N_INSNS (5), /* mult_addsub. */
1550 COSTS_N_INSNS (3), /* fma. */
1551 COSTS_N_INSNS (1), /* addsub. */
1563 COSTS_N_INSNS (15), /* div. */
1564 COSTS_N_INSNS (5), /* mult. */
1565 COSTS_N_INSNS (7), /* mult_addsub. */
1566 COSTS_N_INSNS (7), /* fma. */
1567 COSTS_N_INSNS (3), /* addsub. */
1580 COSTS_N_INSNS (1) /* alu. */
1584 const struct tune_params arm_slowmul_tune
=
1586 arm_slowmul_rtx_costs
,
1588 NULL
, /* Sched adj cost. */
1589 3, /* Constant limit. */
1590 5, /* Max cond insns. */
1591 ARM_PREFETCH_NOT_BENEFICIAL
,
1592 true, /* Prefer constant pool. */
1593 arm_default_branch_cost
,
1594 false, /* Prefer LDRD/STRD. */
1595 {true, true}, /* Prefer non short circuit. */
1596 &arm_default_vec_cost
, /* Vectorizer costs. */
1597 false, /* Prefer Neon for 64-bits bitops. */
1598 false, false /* Prefer 32-bit encodings. */
1601 const struct tune_params arm_fastmul_tune
=
1603 arm_fastmul_rtx_costs
,
1605 NULL
, /* Sched adj cost. */
1606 1, /* Constant limit. */
1607 5, /* Max cond insns. */
1608 ARM_PREFETCH_NOT_BENEFICIAL
,
1609 true, /* Prefer constant pool. */
1610 arm_default_branch_cost
,
1611 false, /* Prefer LDRD/STRD. */
1612 {true, true}, /* Prefer non short circuit. */
1613 &arm_default_vec_cost
, /* Vectorizer costs. */
1614 false, /* Prefer Neon for 64-bits bitops. */
1615 false, false /* Prefer 32-bit encodings. */
1618 /* StrongARM has early execution of branches, so a sequence that is worth
1619 skipping is shorter. Set max_insns_skipped to a lower value. */
1621 const struct tune_params arm_strongarm_tune
=
1623 arm_fastmul_rtx_costs
,
1625 NULL
, /* Sched adj cost. */
1626 1, /* Constant limit. */
1627 3, /* Max cond insns. */
1628 ARM_PREFETCH_NOT_BENEFICIAL
,
1629 true, /* Prefer constant pool. */
1630 arm_default_branch_cost
,
1631 false, /* Prefer LDRD/STRD. */
1632 {true, true}, /* Prefer non short circuit. */
1633 &arm_default_vec_cost
, /* Vectorizer costs. */
1634 false, /* Prefer Neon for 64-bits bitops. */
1635 false, false /* Prefer 32-bit encodings. */
1638 const struct tune_params arm_xscale_tune
=
1640 arm_xscale_rtx_costs
,
1642 xscale_sched_adjust_cost
,
1643 2, /* Constant limit. */
1644 3, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL
,
1646 true, /* Prefer constant pool. */
1647 arm_default_branch_cost
,
1648 false, /* Prefer LDRD/STRD. */
1649 {true, true}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost
, /* Vectorizer costs. */
1651 false, /* Prefer Neon for 64-bits bitops. */
1652 false, false /* Prefer 32-bit encodings. */
1655 const struct tune_params arm_9e_tune
=
1659 NULL
, /* Sched adj cost. */
1660 1, /* Constant limit. */
1661 5, /* Max cond insns. */
1662 ARM_PREFETCH_NOT_BENEFICIAL
,
1663 true, /* Prefer constant pool. */
1664 arm_default_branch_cost
,
1665 false, /* Prefer LDRD/STRD. */
1666 {true, true}, /* Prefer non short circuit. */
1667 &arm_default_vec_cost
, /* Vectorizer costs. */
1668 false, /* Prefer Neon for 64-bits bitops. */
1669 false, false /* Prefer 32-bit encodings. */
1672 const struct tune_params arm_v6t2_tune
=
1676 NULL
, /* Sched adj cost. */
1677 1, /* Constant limit. */
1678 5, /* Max cond insns. */
1679 ARM_PREFETCH_NOT_BENEFICIAL
,
1680 false, /* Prefer constant pool. */
1681 arm_default_branch_cost
,
1682 false, /* Prefer LDRD/STRD. */
1683 {true, true}, /* Prefer non short circuit. */
1684 &arm_default_vec_cost
, /* Vectorizer costs. */
1685 false, /* Prefer Neon for 64-bits bitops. */
1686 false, false /* Prefer 32-bit encodings. */
1689 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1690 const struct tune_params arm_cortex_tune
=
1693 &generic_extra_costs
,
1694 NULL
, /* Sched adj cost. */
1695 1, /* Constant limit. */
1696 5, /* Max cond insns. */
1697 ARM_PREFETCH_NOT_BENEFICIAL
,
1698 false, /* Prefer constant pool. */
1699 arm_default_branch_cost
,
1700 false, /* Prefer LDRD/STRD. */
1701 {true, true}, /* Prefer non short circuit. */
1702 &arm_default_vec_cost
, /* Vectorizer costs. */
1703 false, /* Prefer Neon for 64-bits bitops. */
1704 false, false /* Prefer 32-bit encodings. */
1707 const struct tune_params arm_cortex_a8_tune
=
1710 &cortexa8_extra_costs
,
1711 NULL
, /* Sched adj cost. */
1712 1, /* Constant limit. */
1713 5, /* Max cond insns. */
1714 ARM_PREFETCH_NOT_BENEFICIAL
,
1715 false, /* Prefer constant pool. */
1716 arm_default_branch_cost
,
1717 false, /* Prefer LDRD/STRD. */
1718 {true, true}, /* Prefer non short circuit. */
1719 &arm_default_vec_cost
, /* Vectorizer costs. */
1720 false, /* Prefer Neon for 64-bits bitops. */
1721 false, false /* Prefer 32-bit encodings. */
1724 const struct tune_params arm_cortex_a7_tune
=
1727 &cortexa7_extra_costs
,
1729 1, /* Constant limit. */
1730 5, /* Max cond insns. */
1731 ARM_PREFETCH_NOT_BENEFICIAL
,
1732 false, /* Prefer constant pool. */
1733 arm_default_branch_cost
,
1734 false, /* Prefer LDRD/STRD. */
1735 {true, true}, /* Prefer non short circuit. */
1736 &arm_default_vec_cost
, /* Vectorizer costs. */
1737 false, /* Prefer Neon for 64-bits bitops. */
1738 false, false /* Prefer 32-bit encodings. */
1741 const struct tune_params arm_cortex_a15_tune
=
1744 &cortexa15_extra_costs
,
1745 NULL
, /* Sched adj cost. */
1746 1, /* Constant limit. */
1747 2, /* Max cond insns. */
1748 ARM_PREFETCH_NOT_BENEFICIAL
,
1749 false, /* Prefer constant pool. */
1750 arm_default_branch_cost
,
1751 true, /* Prefer LDRD/STRD. */
1752 {true, true}, /* Prefer non short circuit. */
1753 &arm_default_vec_cost
, /* Vectorizer costs. */
1754 false, /* Prefer Neon for 64-bits bitops. */
1755 true, true /* Prefer 32-bit encodings. */
1758 const struct tune_params arm_cortex_a53_tune
=
1761 &cortexa53_extra_costs
,
1762 NULL
, /* Scheduler cost adjustment. */
1763 1, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 ARM_PREFETCH_NOT_BENEFICIAL
,
1766 false, /* Prefer constant pool. */
1767 arm_default_branch_cost
,
1768 false, /* Prefer LDRD/STRD. */
1769 {true, true}, /* Prefer non short circuit. */
1770 &arm_default_vec_cost
, /* Vectorizer costs. */
1771 false, /* Prefer Neon for 64-bits bitops. */
1772 false, false /* Prefer 32-bit encodings. */
1775 const struct tune_params arm_cortex_a57_tune
=
1778 &cortexa57_extra_costs
,
1779 NULL
, /* Scheduler cost adjustment. */
1780 1, /* Constant limit. */
1781 2, /* Max cond insns. */
1782 ARM_PREFETCH_NOT_BENEFICIAL
,
1783 false, /* Prefer constant pool. */
1784 arm_default_branch_cost
,
1785 true, /* Prefer LDRD/STRD. */
1786 {true, true}, /* Prefer non short circuit. */
1787 &arm_default_vec_cost
, /* Vectorizer costs. */
1788 false, /* Prefer Neon for 64-bits bitops. */
1789 true, true /* Prefer 32-bit encodings. */
1792 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1793 less appealing. Set max_insns_skipped to a low value. */
1795 const struct tune_params arm_cortex_a5_tune
=
1799 NULL
, /* Sched adj cost. */
1800 1, /* Constant limit. */
1801 1, /* Max cond insns. */
1802 ARM_PREFETCH_NOT_BENEFICIAL
,
1803 false, /* Prefer constant pool. */
1804 arm_cortex_a5_branch_cost
,
1805 false, /* Prefer LDRD/STRD. */
1806 {false, false}, /* Prefer non short circuit. */
1807 &arm_default_vec_cost
, /* Vectorizer costs. */
1808 false, /* Prefer Neon for 64-bits bitops. */
1809 false, false /* Prefer 32-bit encodings. */
1812 const struct tune_params arm_cortex_a9_tune
=
1815 &cortexa9_extra_costs
,
1816 cortex_a9_sched_adjust_cost
,
1817 1, /* Constant limit. */
1818 5, /* Max cond insns. */
1819 ARM_PREFETCH_BENEFICIAL(4,32,32),
1820 false, /* Prefer constant pool. */
1821 arm_default_branch_cost
,
1822 false, /* Prefer LDRD/STRD. */
1823 {true, true}, /* Prefer non short circuit. */
1824 &arm_default_vec_cost
, /* Vectorizer costs. */
1825 false, /* Prefer Neon for 64-bits bitops. */
1826 false, false /* Prefer 32-bit encodings. */
1829 const struct tune_params arm_cortex_a12_tune
=
1832 &cortexa12_extra_costs
,
1834 1, /* Constant limit. */
1835 5, /* Max cond insns. */
1836 ARM_PREFETCH_BENEFICIAL(4,32,32),
1837 false, /* Prefer constant pool. */
1838 arm_default_branch_cost
,
1839 true, /* Prefer LDRD/STRD. */
1840 {true, true}, /* Prefer non short circuit. */
1841 &arm_default_vec_cost
, /* Vectorizer costs. */
1842 false, /* Prefer Neon for 64-bits bitops. */
1843 false, false /* Prefer 32-bit encodings. */
1846 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1847 cycle to execute each. An LDR from the constant pool also takes two cycles
1848 to execute, but mildly increases pipelining opportunity (consecutive
1849 loads/stores can be pipelined together, saving one cycle), and may also
1850 improve icache utilisation. Hence we prefer the constant pool for such
1853 const struct tune_params arm_v7m_tune
=
1857 NULL
, /* Sched adj cost. */
1858 1, /* Constant limit. */
1859 2, /* Max cond insns. */
1860 ARM_PREFETCH_NOT_BENEFICIAL
,
1861 true, /* Prefer constant pool. */
1862 arm_cortex_m_branch_cost
,
1863 false, /* Prefer LDRD/STRD. */
1864 {false, false}, /* Prefer non short circuit. */
1865 &arm_default_vec_cost
, /* Vectorizer costs. */
1866 false, /* Prefer Neon for 64-bits bitops. */
1867 false, false /* Prefer 32-bit encodings. */
1870 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1871 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1872 const struct tune_params arm_v6m_tune
=
1876 NULL
, /* Sched adj cost. */
1877 1, /* Constant limit. */
1878 5, /* Max cond insns. */
1879 ARM_PREFETCH_NOT_BENEFICIAL
,
1880 false, /* Prefer constant pool. */
1881 arm_default_branch_cost
,
1882 false, /* Prefer LDRD/STRD. */
1883 {false, false}, /* Prefer non short circuit. */
1884 &arm_default_vec_cost
, /* Vectorizer costs. */
1885 false, /* Prefer Neon for 64-bits bitops. */
1886 false, false /* Prefer 32-bit encodings. */
1889 const struct tune_params arm_fa726te_tune
=
1893 fa726te_sched_adjust_cost
,
1894 1, /* Constant limit. */
1895 5, /* Max cond insns. */
1896 ARM_PREFETCH_NOT_BENEFICIAL
,
1897 true, /* Prefer constant pool. */
1898 arm_default_branch_cost
,
1899 false, /* Prefer LDRD/STRD. */
1900 {true, true}, /* Prefer non short circuit. */
1901 &arm_default_vec_cost
, /* Vectorizer costs. */
1902 false, /* Prefer Neon for 64-bits bitops. */
1903 false, false /* Prefer 32-bit encodings. */
1907 /* Not all of these give usefully different compilation alternatives,
1908 but there is no simple way of generalizing them. */
1909 static const struct processors all_cores
[] =
1912 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1913 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1914 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1915 #include "arm-cores.def"
1917 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1920 static const struct processors all_architectures
[] =
1922 /* ARM Architectures */
1923 /* We don't specify tuning costs here as it will be figured out
1926 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1927 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1928 #include "arm-arches.def"
1930 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1934 /* These are populated as commandline arguments are processed, or NULL
1935 if not specified. */
1936 static const struct processors
*arm_selected_arch
;
1937 static const struct processors
*arm_selected_cpu
;
1938 static const struct processors
*arm_selected_tune
;
1940 /* The name of the preprocessor macro to define for this architecture. */
1942 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1944 /* Available values for -mfpu=. */
1946 static const struct arm_fpu_desc all_fpus
[] =
1948 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1949 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1950 #include "arm-fpus.def"
1955 /* Supported TLS relocations. */
1963 TLS_DESCSEQ
/* GNU scheme */
1966 /* The maximum number of insns to be used when loading a constant. */
1968 arm_constant_limit (bool size_p
)
1970 return size_p
? 1 : current_tune
->constant_limit
;
1973 /* Emit an insn that's a simple single-set. Both the operands must be known
1976 emit_set_insn (rtx x
, rtx y
)
1978 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1981 /* Return the number of bits set in VALUE. */
1983 bit_count (unsigned long value
)
1985 unsigned long count
= 0;
1990 value
&= value
- 1; /* Clear the least-significant set bit. */
1998 enum machine_mode mode
;
2000 } arm_fixed_mode_set
;
2002 /* A small helper for setting fixed-point library libfuncs. */
2005 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
2006 const char *funcname
, const char *modename
,
2011 if (num_suffix
== 0)
2012 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2014 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2016 set_optab_libfunc (optable
, mode
, buffer
);
2020 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
2021 enum machine_mode from
, const char *funcname
,
2022 const char *toname
, const char *fromname
)
2025 const char *maybe_suffix_2
= "";
2027 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2028 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2029 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2030 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2031 maybe_suffix_2
= "2";
2033 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2036 set_conv_libfunc (optable
, to
, from
, buffer
);
2039 /* Set up library functions unique to ARM. */
2042 arm_init_libfuncs (void)
2044 /* For Linux, we have access to kernel support for atomic operations. */
2045 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2046 init_sync_libfuncs (2 * UNITS_PER_WORD
);
2048 /* There are no special library functions unless we are using the
2053 /* The functions below are described in Section 4 of the "Run-Time
2054 ABI for the ARM architecture", Version 1.0. */
2056 /* Double-precision floating-point arithmetic. Table 2. */
2057 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2058 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2059 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2060 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2061 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2063 /* Double-precision comparisons. Table 3. */
2064 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2065 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2066 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2067 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2068 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2069 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2070 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2072 /* Single-precision floating-point arithmetic. Table 4. */
2073 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2074 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2075 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2076 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2077 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2079 /* Single-precision comparisons. Table 5. */
2080 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2081 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2082 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2083 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2084 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2085 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2086 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2088 /* Floating-point to integer conversions. Table 6. */
2089 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2090 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2091 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2092 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2093 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2094 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2095 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2096 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2098 /* Conversions between floating types. Table 7. */
2099 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2100 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2102 /* Integer to floating-point conversions. Table 8. */
2103 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2104 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2105 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2106 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2107 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2108 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2109 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2110 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2112 /* Long long. Table 9. */
2113 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2114 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2115 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2116 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2117 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2118 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2119 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2120 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2122 /* Integer (32/32->32) division. \S 4.3.1. */
2123 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2124 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2126 /* The divmod functions are designed so that they can be used for
2127 plain division, even though they return both the quotient and the
2128 remainder. The quotient is returned in the usual location (i.e.,
2129 r0 for SImode, {r0, r1} for DImode), just as would be expected
2130 for an ordinary division routine. Because the AAPCS calling
2131 conventions specify that all of { r0, r1, r2, r3 } are
2132 callee-saved registers, there is no need to tell the compiler
2133 explicitly that those registers are clobbered by these
2135 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2136 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2138 /* For SImode division the ABI provides div-without-mod routines,
2139 which are faster. */
2140 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2141 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2143 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2144 divmod libcalls instead. */
2145 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2146 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2147 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2148 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2150 /* Half-precision float operations. The compiler handles all operations
2151 with NULL libfuncs by converting the SFmode. */
2152 switch (arm_fp16_format
)
2154 case ARM_FP16_FORMAT_IEEE
:
2155 case ARM_FP16_FORMAT_ALTERNATIVE
:
2158 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2159 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2161 : "__gnu_f2h_alternative"));
2162 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2163 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2165 : "__gnu_h2f_alternative"));
2168 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2169 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2170 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2171 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2172 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2175 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2176 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2177 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2178 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2179 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2180 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2181 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2188 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2190 const arm_fixed_mode_set fixed_arith_modes
[] =
2211 const arm_fixed_mode_set fixed_conv_modes
[] =
2241 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2243 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2244 "add", fixed_arith_modes
[i
].name
, 3);
2245 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2246 "ssadd", fixed_arith_modes
[i
].name
, 3);
2247 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2248 "usadd", fixed_arith_modes
[i
].name
, 3);
2249 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2250 "sub", fixed_arith_modes
[i
].name
, 3);
2251 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2252 "sssub", fixed_arith_modes
[i
].name
, 3);
2253 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2254 "ussub", fixed_arith_modes
[i
].name
, 3);
2255 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2256 "mul", fixed_arith_modes
[i
].name
, 3);
2257 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2258 "ssmul", fixed_arith_modes
[i
].name
, 3);
2259 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2260 "usmul", fixed_arith_modes
[i
].name
, 3);
2261 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2262 "div", fixed_arith_modes
[i
].name
, 3);
2263 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2264 "udiv", fixed_arith_modes
[i
].name
, 3);
2265 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2266 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2267 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2268 "usdiv", fixed_arith_modes
[i
].name
, 3);
2269 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2270 "neg", fixed_arith_modes
[i
].name
, 2);
2271 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2272 "ssneg", fixed_arith_modes
[i
].name
, 2);
2273 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2274 "usneg", fixed_arith_modes
[i
].name
, 2);
2275 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2276 "ashl", fixed_arith_modes
[i
].name
, 3);
2277 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2278 "ashr", fixed_arith_modes
[i
].name
, 3);
2279 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2280 "lshr", fixed_arith_modes
[i
].name
, 3);
2281 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2282 "ssashl", fixed_arith_modes
[i
].name
, 3);
2283 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2284 "usashl", fixed_arith_modes
[i
].name
, 3);
2285 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2286 "cmp", fixed_arith_modes
[i
].name
, 2);
2289 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2290 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2293 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2294 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2297 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2298 fixed_conv_modes
[j
].mode
, "fract",
2299 fixed_conv_modes
[i
].name
,
2300 fixed_conv_modes
[j
].name
);
2301 arm_set_fixed_conv_libfunc (satfract_optab
,
2302 fixed_conv_modes
[i
].mode
,
2303 fixed_conv_modes
[j
].mode
, "satfract",
2304 fixed_conv_modes
[i
].name
,
2305 fixed_conv_modes
[j
].name
);
2306 arm_set_fixed_conv_libfunc (fractuns_optab
,
2307 fixed_conv_modes
[i
].mode
,
2308 fixed_conv_modes
[j
].mode
, "fractuns",
2309 fixed_conv_modes
[i
].name
,
2310 fixed_conv_modes
[j
].name
);
2311 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2312 fixed_conv_modes
[i
].mode
,
2313 fixed_conv_modes
[j
].mode
, "satfractuns",
2314 fixed_conv_modes
[i
].name
,
2315 fixed_conv_modes
[j
].name
);
2319 if (TARGET_AAPCS_BASED
)
2320 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2323 /* On AAPCS systems, this is the "struct __va_list". */
2324 static GTY(()) tree va_list_type
;
2326 /* Return the type to use as __builtin_va_list. */
2328 arm_build_builtin_va_list (void)
2333 if (!TARGET_AAPCS_BASED
)
2334 return std_build_builtin_va_list ();
2336 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2344 The C Library ABI further reinforces this definition in \S
2347 We must follow this definition exactly. The structure tag
2348 name is visible in C++ mangled names, and thus forms a part
2349 of the ABI. The field name may be used by people who
2350 #include <stdarg.h>. */
2351 /* Create the type. */
2352 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2353 /* Give it the required name. */
2354 va_list_name
= build_decl (BUILTINS_LOCATION
,
2356 get_identifier ("__va_list"),
2358 DECL_ARTIFICIAL (va_list_name
) = 1;
2359 TYPE_NAME (va_list_type
) = va_list_name
;
2360 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2361 /* Create the __ap field. */
2362 ap_field
= build_decl (BUILTINS_LOCATION
,
2364 get_identifier ("__ap"),
2366 DECL_ARTIFICIAL (ap_field
) = 1;
2367 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2368 TYPE_FIELDS (va_list_type
) = ap_field
;
2369 /* Compute its layout. */
2370 layout_type (va_list_type
);
2372 return va_list_type
;
2375 /* Return an expression of type "void *" pointing to the next
2376 available argument in a variable-argument list. VALIST is the
2377 user-level va_list object, of type __builtin_va_list. */
2379 arm_extract_valist_ptr (tree valist
)
2381 if (TREE_TYPE (valist
) == error_mark_node
)
2382 return error_mark_node
;
2384 /* On an AAPCS target, the pointer is stored within "struct
2386 if (TARGET_AAPCS_BASED
)
2388 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2389 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2390 valist
, ap_field
, NULL_TREE
);
2396 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2398 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2400 valist
= arm_extract_valist_ptr (valist
);
2401 std_expand_builtin_va_start (valist
, nextarg
);
2404 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2406 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2409 valist
= arm_extract_valist_ptr (valist
);
2410 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2413 /* Fix up any incompatible options that the user has specified. */
2415 arm_option_override (void)
2417 if (global_options_set
.x_arm_arch_option
)
2418 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2420 if (global_options_set
.x_arm_cpu_option
)
2422 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2423 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2426 if (global_options_set
.x_arm_tune_option
)
2427 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2429 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2430 SUBTARGET_OVERRIDE_OPTIONS
;
2433 if (arm_selected_arch
)
2435 if (arm_selected_cpu
)
2437 /* Check for conflict between mcpu and march. */
2438 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2440 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2441 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2442 /* -march wins for code generation.
2443 -mcpu wins for default tuning. */
2444 if (!arm_selected_tune
)
2445 arm_selected_tune
= arm_selected_cpu
;
2447 arm_selected_cpu
= arm_selected_arch
;
2451 arm_selected_arch
= NULL
;
2454 /* Pick a CPU based on the architecture. */
2455 arm_selected_cpu
= arm_selected_arch
;
2458 /* If the user did not specify a processor, choose one for them. */
2459 if (!arm_selected_cpu
)
2461 const struct processors
* sel
;
2462 unsigned int sought
;
2464 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2465 if (!arm_selected_cpu
->name
)
2467 #ifdef SUBTARGET_CPU_DEFAULT
2468 /* Use the subtarget default CPU if none was specified by
2470 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2472 /* Default to ARM6. */
2473 if (!arm_selected_cpu
->name
)
2474 arm_selected_cpu
= &all_cores
[arm6
];
2477 sel
= arm_selected_cpu
;
2478 insn_flags
= sel
->flags
;
2480 /* Now check to see if the user has specified some command line
2481 switch that require certain abilities from the cpu. */
2484 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2486 sought
|= (FL_THUMB
| FL_MODE32
);
2488 /* There are no ARM processors that support both APCS-26 and
2489 interworking. Therefore we force FL_MODE26 to be removed
2490 from insn_flags here (if it was set), so that the search
2491 below will always be able to find a compatible processor. */
2492 insn_flags
&= ~FL_MODE26
;
2495 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2497 /* Try to locate a CPU type that supports all of the abilities
2498 of the default CPU, plus the extra abilities requested by
2500 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2501 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2504 if (sel
->name
== NULL
)
2506 unsigned current_bit_count
= 0;
2507 const struct processors
* best_fit
= NULL
;
2509 /* Ideally we would like to issue an error message here
2510 saying that it was not possible to find a CPU compatible
2511 with the default CPU, but which also supports the command
2512 line options specified by the programmer, and so they
2513 ought to use the -mcpu=<name> command line option to
2514 override the default CPU type.
2516 If we cannot find a cpu that has both the
2517 characteristics of the default cpu and the given
2518 command line options we scan the array again looking
2519 for a best match. */
2520 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2521 if ((sel
->flags
& sought
) == sought
)
2525 count
= bit_count (sel
->flags
& insn_flags
);
2527 if (count
>= current_bit_count
)
2530 current_bit_count
= count
;
2534 gcc_assert (best_fit
);
2538 arm_selected_cpu
= sel
;
2542 gcc_assert (arm_selected_cpu
);
2543 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2544 if (!arm_selected_tune
)
2545 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2547 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2548 insn_flags
= arm_selected_cpu
->flags
;
2549 arm_base_arch
= arm_selected_cpu
->base_arch
;
2551 arm_tune
= arm_selected_tune
->core
;
2552 tune_flags
= arm_selected_tune
->flags
;
2553 current_tune
= arm_selected_tune
->tune
;
2555 /* Make sure that the processor choice does not conflict with any of the
2556 other command line choices. */
2557 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2558 error ("target CPU does not support ARM mode");
2560 /* BPABI targets use linker tricks to allow interworking on cores
2561 without thumb support. */
2562 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2564 warning (0, "target CPU does not support interworking" );
2565 target_flags
&= ~MASK_INTERWORK
;
2568 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2570 warning (0, "target CPU does not support THUMB instructions");
2571 target_flags
&= ~MASK_THUMB
;
2574 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2576 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2577 target_flags
&= ~MASK_APCS_FRAME
;
2580 /* Callee super interworking implies thumb interworking. Adding
2581 this to the flags here simplifies the logic elsewhere. */
2582 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2583 target_flags
|= MASK_INTERWORK
;
2585 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2586 from here where no function is being compiled currently. */
2587 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2588 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2590 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2591 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2593 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2595 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2596 target_flags
|= MASK_APCS_FRAME
;
2599 if (TARGET_POKE_FUNCTION_NAME
)
2600 target_flags
|= MASK_APCS_FRAME
;
2602 if (TARGET_APCS_REENT
&& flag_pic
)
2603 error ("-fpic and -mapcs-reent are incompatible");
2605 if (TARGET_APCS_REENT
)
2606 warning (0, "APCS reentrant code not supported. Ignored");
2608 /* If this target is normally configured to use APCS frames, warn if they
2609 are turned off and debugging is turned on. */
2611 && write_symbols
!= NO_DEBUG
2612 && !TARGET_APCS_FRAME
2613 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2614 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2616 if (TARGET_APCS_FLOAT
)
2617 warning (0, "passing floating point arguments in fp regs not yet supported");
2619 if (TARGET_LITTLE_WORDS
)
2620 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
2621 "will be removed in a future release");
2623 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2624 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2625 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2626 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2627 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2628 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2629 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2630 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2631 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2632 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2633 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2634 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2635 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2636 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2637 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2639 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2640 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2641 thumb_code
= TARGET_ARM
== 0;
2642 thumb1_code
= TARGET_THUMB1
!= 0;
2643 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2644 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2645 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2646 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2647 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2648 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2649 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2650 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2651 if (arm_restrict_it
== 2)
2652 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2655 arm_restrict_it
= 0;
2657 /* If we are not using the default (ARM mode) section anchor offset
2658 ranges, then set the correct ranges now. */
2661 /* Thumb-1 LDR instructions cannot have negative offsets.
2662 Permissible positive offset ranges are 5-bit (for byte loads),
2663 6-bit (for halfword loads), or 7-bit (for word loads).
2664 Empirical results suggest a 7-bit anchor range gives the best
2665 overall code size. */
2666 targetm
.min_anchor_offset
= 0;
2667 targetm
.max_anchor_offset
= 127;
2669 else if (TARGET_THUMB2
)
2671 /* The minimum is set such that the total size of the block
2672 for a particular anchor is 248 + 1 + 4095 bytes, which is
2673 divisible by eight, ensuring natural spacing of anchors. */
2674 targetm
.min_anchor_offset
= -248;
2675 targetm
.max_anchor_offset
= 4095;
2678 /* V5 code we generate is completely interworking capable, so we turn off
2679 TARGET_INTERWORK here to avoid many tests later on. */
2681 /* XXX However, we must pass the right pre-processor defines to CPP
2682 or GLD can get confused. This is a hack. */
2683 if (TARGET_INTERWORK
)
2684 arm_cpp_interwork
= 1;
2687 target_flags
&= ~MASK_INTERWORK
;
2689 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2690 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2692 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2693 error ("iwmmxt abi requires an iwmmxt capable cpu");
2695 if (!global_options_set
.x_arm_fpu_index
)
2697 const char *target_fpu_name
;
2700 #ifdef FPUTYPE_DEFAULT
2701 target_fpu_name
= FPUTYPE_DEFAULT
;
2703 target_fpu_name
= "vfp";
2706 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2711 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2713 switch (arm_fpu_desc
->model
)
2715 case ARM_FP_MODEL_VFP
:
2716 arm_fpu_attr
= FPU_VFP
;
2723 if (TARGET_AAPCS_BASED
)
2725 if (TARGET_CALLER_INTERWORKING
)
2726 error ("AAPCS does not support -mcaller-super-interworking");
2728 if (TARGET_CALLEE_INTERWORKING
)
2729 error ("AAPCS does not support -mcallee-super-interworking");
2732 /* iWMMXt and NEON are incompatible. */
2733 if (TARGET_IWMMXT
&& TARGET_NEON
)
2734 error ("iWMMXt and NEON are incompatible");
2736 /* iWMMXt unsupported under Thumb mode. */
2737 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2738 error ("iWMMXt unsupported under Thumb mode");
2740 /* __fp16 support currently assumes the core has ldrh. */
2741 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2742 sorry ("__fp16 and no ldrh");
2744 /* If soft-float is specified then don't use FPU. */
2745 if (TARGET_SOFT_FLOAT
)
2746 arm_fpu_attr
= FPU_NONE
;
2748 if (TARGET_AAPCS_BASED
)
2750 if (arm_abi
== ARM_ABI_IWMMXT
)
2751 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2752 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2753 && TARGET_HARD_FLOAT
2755 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2757 arm_pcs_default
= ARM_PCS_AAPCS
;
2761 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2762 sorry ("-mfloat-abi=hard and VFP");
2764 if (arm_abi
== ARM_ABI_APCS
)
2765 arm_pcs_default
= ARM_PCS_APCS
;
2767 arm_pcs_default
= ARM_PCS_ATPCS
;
2770 /* For arm2/3 there is no need to do any scheduling if we are doing
2771 software floating-point. */
2772 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2773 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2775 /* Use the cp15 method if it is available. */
2776 if (target_thread_pointer
== TP_AUTO
)
2778 if (arm_arch6k
&& !TARGET_THUMB1
)
2779 target_thread_pointer
= TP_CP15
;
2781 target_thread_pointer
= TP_SOFT
;
2784 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2785 error ("can not use -mtp=cp15 with 16-bit Thumb");
2787 /* Override the default structure alignment for AAPCS ABI. */
2788 if (!global_options_set
.x_arm_structure_size_boundary
)
2790 if (TARGET_AAPCS_BASED
)
2791 arm_structure_size_boundary
= 8;
2795 if (arm_structure_size_boundary
!= 8
2796 && arm_structure_size_boundary
!= 32
2797 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2799 if (ARM_DOUBLEWORD_ALIGN
)
2801 "structure size boundary can only be set to 8, 32 or 64");
2803 warning (0, "structure size boundary can only be set to 8 or 32");
2804 arm_structure_size_boundary
2805 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2809 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2811 error ("RTP PIC is incompatible with Thumb");
2815 /* If stack checking is disabled, we can use r10 as the PIC register,
2816 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2817 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2819 if (TARGET_VXWORKS_RTP
)
2820 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2821 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2824 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2825 arm_pic_register
= 9;
2827 if (arm_pic_register_string
!= NULL
)
2829 int pic_register
= decode_reg_name (arm_pic_register_string
);
2832 warning (0, "-mpic-register= is useless without -fpic");
2834 /* Prevent the user from choosing an obviously stupid PIC register. */
2835 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2836 || pic_register
== HARD_FRAME_POINTER_REGNUM
2837 || pic_register
== STACK_POINTER_REGNUM
2838 || pic_register
>= PC_REGNUM
2839 || (TARGET_VXWORKS_RTP
2840 && (unsigned int) pic_register
!= arm_pic_register
))
2841 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2843 arm_pic_register
= pic_register
;
2846 if (TARGET_VXWORKS_RTP
2847 && !global_options_set
.x_arm_pic_data_is_text_relative
)
2848 arm_pic_data_is_text_relative
= 0;
2850 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2851 if (fix_cm3_ldrd
== 2)
2853 if (arm_selected_cpu
->core
== cortexm3
)
2859 /* Enable -munaligned-access by default for
2860 - all ARMv6 architecture-based processors
2861 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2862 - ARMv8 architecture-base processors.
2864 Disable -munaligned-access by default for
2865 - all pre-ARMv6 architecture-based processors
2866 - ARMv6-M architecture-based processors. */
2868 if (unaligned_access
== 2)
2870 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2871 unaligned_access
= 1;
2873 unaligned_access
= 0;
2875 else if (unaligned_access
== 1
2876 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2878 warning (0, "target CPU does not support unaligned accesses");
2879 unaligned_access
= 0;
2882 if (TARGET_THUMB1
&& flag_schedule_insns
)
2884 /* Don't warn since it's on by default in -O2. */
2885 flag_schedule_insns
= 0;
2890 /* If optimizing for size, bump the number of instructions that we
2891 are prepared to conditionally execute (even on a StrongARM). */
2892 max_insns_skipped
= 6;
2895 max_insns_skipped
= current_tune
->max_insns_skipped
;
2897 /* Hot/Cold partitioning is not currently supported, since we can't
2898 handle literal pool placement in that case. */
2899 if (flag_reorder_blocks_and_partition
)
2901 inform (input_location
,
2902 "-freorder-blocks-and-partition not supported on this architecture");
2903 flag_reorder_blocks_and_partition
= 0;
2904 flag_reorder_blocks
= 1;
2908 /* Hoisting PIC address calculations more aggressively provides a small,
2909 but measurable, size reduction for PIC code. Therefore, we decrease
2910 the bar for unrestricted expression hoisting to the cost of PIC address
2911 calculation, which is 2 instructions. */
2912 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2913 global_options
.x_param_values
,
2914 global_options_set
.x_param_values
);
2916 /* ARM EABI defaults to strict volatile bitfields. */
2917 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2918 && abi_version_at_least(2))
2919 flag_strict_volatile_bitfields
= 1;
2921 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2922 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2923 if (flag_prefetch_loop_arrays
< 0
2926 && current_tune
->num_prefetch_slots
> 0)
2927 flag_prefetch_loop_arrays
= 1;
2929 /* Set up parameters to be used in prefetching algorithm. Do not override the
2930 defaults unless we are tuning for a core we have researched values for. */
2931 if (current_tune
->num_prefetch_slots
> 0)
2932 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2933 current_tune
->num_prefetch_slots
,
2934 global_options
.x_param_values
,
2935 global_options_set
.x_param_values
);
2936 if (current_tune
->l1_cache_line_size
>= 0)
2937 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2938 current_tune
->l1_cache_line_size
,
2939 global_options
.x_param_values
,
2940 global_options_set
.x_param_values
);
2941 if (current_tune
->l1_cache_size
>= 0)
2942 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2943 current_tune
->l1_cache_size
,
2944 global_options
.x_param_values
,
2945 global_options_set
.x_param_values
);
2947 /* Use Neon to perform 64-bits operations rather than core
2949 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
2950 if (use_neon_for_64bits
== 1)
2951 prefer_neon_for_64bits
= true;
2953 /* Use the alternative scheduling-pressure algorithm by default. */
2954 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
2955 global_options
.x_param_values
,
2956 global_options_set
.x_param_values
);
2958 /* Disable shrink-wrap when optimizing function for size, since it tends to
2959 generate additional returns. */
2960 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
2961 flag_shrink_wrap
= false;
2962 /* TBD: Dwarf info for apcs frame is not handled yet. */
2963 if (TARGET_APCS_FRAME
)
2964 flag_shrink_wrap
= false;
2966 /* We only support -mslow-flash-data on armv7-m targets. */
2967 if (target_slow_flash_data
2968 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2969 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
2970 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2972 /* Currently, for slow flash data, we just disable literal pools. */
2973 if (target_slow_flash_data
)
2974 arm_disable_literal_pool
= true;
2976 /* Register global variables with the garbage collector. */
2977 arm_add_gc_roots ();
2981 arm_add_gc_roots (void)
2983 gcc_obstack_init(&minipool_obstack
);
2984 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2987 /* A table of known ARM exception types.
2988 For use with the interrupt function attribute. */
2992 const char *const arg
;
2993 const unsigned long return_value
;
2997 static const isr_attribute_arg isr_attribute_args
[] =
2999 { "IRQ", ARM_FT_ISR
},
3000 { "irq", ARM_FT_ISR
},
3001 { "FIQ", ARM_FT_FIQ
},
3002 { "fiq", ARM_FT_FIQ
},
3003 { "ABORT", ARM_FT_ISR
},
3004 { "abort", ARM_FT_ISR
},
3005 { "ABORT", ARM_FT_ISR
},
3006 { "abort", ARM_FT_ISR
},
3007 { "UNDEF", ARM_FT_EXCEPTION
},
3008 { "undef", ARM_FT_EXCEPTION
},
3009 { "SWI", ARM_FT_EXCEPTION
},
3010 { "swi", ARM_FT_EXCEPTION
},
3011 { NULL
, ARM_FT_NORMAL
}
3014 /* Returns the (interrupt) function type of the current
3015 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3017 static unsigned long
3018 arm_isr_value (tree argument
)
3020 const isr_attribute_arg
* ptr
;
3024 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3026 /* No argument - default to IRQ. */
3027 if (argument
== NULL_TREE
)
3030 /* Get the value of the argument. */
3031 if (TREE_VALUE (argument
) == NULL_TREE
3032 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3033 return ARM_FT_UNKNOWN
;
3035 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3037 /* Check it against the list of known arguments. */
3038 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3039 if (streq (arg
, ptr
->arg
))
3040 return ptr
->return_value
;
3042 /* An unrecognized interrupt type. */
3043 return ARM_FT_UNKNOWN
;
3046 /* Computes the type of the current function. */
3048 static unsigned long
3049 arm_compute_func_type (void)
3051 unsigned long type
= ARM_FT_UNKNOWN
;
3055 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3057 /* Decide if the current function is volatile. Such functions
3058 never return, and many memory cycles can be saved by not storing
3059 register values that will never be needed again. This optimization
3060 was added to speed up context switching in a kernel application. */
3062 && (TREE_NOTHROW (current_function_decl
)
3063 || !(flag_unwind_tables
3065 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3066 && TREE_THIS_VOLATILE (current_function_decl
))
3067 type
|= ARM_FT_VOLATILE
;
3069 if (cfun
->static_chain_decl
!= NULL
)
3070 type
|= ARM_FT_NESTED
;
3072 attr
= DECL_ATTRIBUTES (current_function_decl
);
3074 a
= lookup_attribute ("naked", attr
);
3076 type
|= ARM_FT_NAKED
;
3078 a
= lookup_attribute ("isr", attr
);
3080 a
= lookup_attribute ("interrupt", attr
);
3083 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3085 type
|= arm_isr_value (TREE_VALUE (a
));
3090 /* Returns the type of the current function. */
3093 arm_current_func_type (void)
3095 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3096 cfun
->machine
->func_type
= arm_compute_func_type ();
3098 return cfun
->machine
->func_type
;
3102 arm_allocate_stack_slots_for_args (void)
3104 /* Naked functions should not allocate stack slots for arguments. */
3105 return !IS_NAKED (arm_current_func_type ());
3109 arm_warn_func_return (tree decl
)
3111 /* Naked functions are implemented entirely in assembly, including the
3112 return sequence, so suppress warnings about this. */
3113 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3117 /* Output assembler code for a block containing the constant parts
3118 of a trampoline, leaving space for the variable parts.
3120 On the ARM, (if r8 is the static chain regnum, and remembering that
3121 referencing pc adds an offset of 8) the trampoline looks like:
3124 .word static chain value
3125 .word function's address
3126 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3129 arm_asm_trampoline_template (FILE *f
)
3133 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3134 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3136 else if (TARGET_THUMB2
)
3138 /* The Thumb-2 trampoline is similar to the arm implementation.
3139 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3140 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3141 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3142 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3146 ASM_OUTPUT_ALIGN (f
, 2);
3147 fprintf (f
, "\t.code\t16\n");
3148 fprintf (f
, ".Ltrampoline_start:\n");
3149 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3150 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3151 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3152 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3153 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3154 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3156 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3157 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3160 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3163 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3165 rtx fnaddr
, mem
, a_tramp
;
3167 emit_block_move (m_tramp
, assemble_trampoline_template (),
3168 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3170 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3171 emit_move_insn (mem
, chain_value
);
3173 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3174 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3175 emit_move_insn (mem
, fnaddr
);
3177 a_tramp
= XEXP (m_tramp
, 0);
3178 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3179 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3180 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3183 /* Thumb trampolines should be entered in thumb mode, so set
3184 the bottom bit of the address. */
3187 arm_trampoline_adjust_address (rtx addr
)
3190 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3191 NULL
, 0, OPTAB_LIB_WIDEN
);
3195 /* Return 1 if it is possible to return using a single instruction.
3196 If SIBLING is non-null, this is a test for a return before a sibling
3197 call. SIBLING is the call insn, so we can examine its register usage. */
3200 use_return_insn (int iscond
, rtx sibling
)
3203 unsigned int func_type
;
3204 unsigned long saved_int_regs
;
3205 unsigned HOST_WIDE_INT stack_adjust
;
3206 arm_stack_offsets
*offsets
;
3208 /* Never use a return instruction before reload has run. */
3209 if (!reload_completed
)
3212 func_type
= arm_current_func_type ();
3214 /* Naked, volatile and stack alignment functions need special
3216 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3219 /* So do interrupt functions that use the frame pointer and Thumb
3220 interrupt functions. */
3221 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3224 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3225 && !optimize_function_for_size_p (cfun
))
3228 offsets
= arm_get_frame_offsets ();
3229 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3231 /* As do variadic functions. */
3232 if (crtl
->args
.pretend_args_size
3233 || cfun
->machine
->uses_anonymous_args
3234 /* Or if the function calls __builtin_eh_return () */
3235 || crtl
->calls_eh_return
3236 /* Or if the function calls alloca */
3237 || cfun
->calls_alloca
3238 /* Or if there is a stack adjustment. However, if the stack pointer
3239 is saved on the stack, we can use a pre-incrementing stack load. */
3240 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3241 && stack_adjust
== 4)))
3244 saved_int_regs
= offsets
->saved_regs_mask
;
3246 /* Unfortunately, the insn
3248 ldmib sp, {..., sp, ...}
3250 triggers a bug on most SA-110 based devices, such that the stack
3251 pointer won't be correctly restored if the instruction takes a
3252 page fault. We work around this problem by popping r3 along with
3253 the other registers, since that is never slower than executing
3254 another instruction.
3256 We test for !arm_arch5 here, because code for any architecture
3257 less than this could potentially be run on one of the buggy
3259 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3261 /* Validate that r3 is a call-clobbered register (always true in
3262 the default abi) ... */
3263 if (!call_used_regs
[3])
3266 /* ... that it isn't being used for a return value ... */
3267 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3270 /* ... or for a tail-call argument ... */
3273 gcc_assert (CALL_P (sibling
));
3275 if (find_regno_fusage (sibling
, USE
, 3))
3279 /* ... and that there are no call-saved registers in r0-r2
3280 (always true in the default ABI). */
3281 if (saved_int_regs
& 0x7)
3285 /* Can't be done if interworking with Thumb, and any registers have been
3287 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3290 /* On StrongARM, conditional returns are expensive if they aren't
3291 taken and multiple registers have been stacked. */
3292 if (iscond
&& arm_tune_strongarm
)
3294 /* Conditional return when just the LR is stored is a simple
3295 conditional-load instruction, that's not expensive. */
3296 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3300 && arm_pic_register
!= INVALID_REGNUM
3301 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3305 /* If there are saved registers but the LR isn't saved, then we need
3306 two instructions for the return. */
3307 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3310 /* Can't be done if any of the VFP regs are pushed,
3311 since this also requires an insn. */
3312 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3313 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3314 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3317 if (TARGET_REALLY_IWMMXT
)
3318 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3319 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3325 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3326 shrink-wrapping if possible. This is the case if we need to emit a
3327 prologue, which we can test by looking at the offsets. */
3329 use_simple_return_p (void)
3331 arm_stack_offsets
*offsets
;
3333 offsets
= arm_get_frame_offsets ();
3334 return offsets
->outgoing_args
!= 0;
3337 /* Return TRUE if int I is a valid immediate ARM constant. */
3340 const_ok_for_arm (HOST_WIDE_INT i
)
3344 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3345 be all zero, or all one. */
3346 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3347 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3348 != ((~(unsigned HOST_WIDE_INT
) 0)
3349 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3352 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3354 /* Fast return for 0 and small values. We must do this for zero, since
3355 the code below can't handle that one case. */
3356 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3359 /* Get the number of trailing zeros. */
3360 lowbit
= ffs((int) i
) - 1;
3362 /* Only even shifts are allowed in ARM mode so round down to the
3363 nearest even number. */
3367 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3372 /* Allow rotated constants in ARM mode. */
3374 && ((i
& ~0xc000003f) == 0
3375 || (i
& ~0xf000000f) == 0
3376 || (i
& ~0xfc000003) == 0))
3383 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3386 if (i
== v
|| i
== (v
| (v
<< 8)))
3389 /* Allow repeated pattern 0xXY00XY00. */
3399 /* Return true if I is a valid constant for the operation CODE. */
3401 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3403 if (const_ok_for_arm (i
))
3409 /* See if we can use movw. */
3410 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3413 /* Otherwise, try mvn. */
3414 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3417 /* See if we can use addw or subw. */
3419 && ((i
& 0xfffff000) == 0
3420 || ((-i
) & 0xfffff000) == 0))
3422 /* else fall through. */
3442 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3444 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3450 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3454 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3461 /* Return true if I is a valid di mode constant for the operation CODE. */
3463 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3465 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3466 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3467 rtx hi
= GEN_INT (hi_val
);
3468 rtx lo
= GEN_INT (lo_val
);
3478 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3479 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3481 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3488 /* Emit a sequence of insns to handle a large constant.
3489 CODE is the code of the operation required, it can be any of SET, PLUS,
3490 IOR, AND, XOR, MINUS;
3491 MODE is the mode in which the operation is being performed;
3492 VAL is the integer to operate on;
3493 SOURCE is the other operand (a register, or a null-pointer for SET);
3494 SUBTARGETS means it is safe to create scratch registers if that will
3495 either produce a simpler sequence, or we will want to cse the values.
3496 Return value is the number of insns emitted. */
3498 /* ??? Tweak this for thumb2. */
3500 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
3501 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3505 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3506 cond
= COND_EXEC_TEST (PATTERN (insn
));
3510 if (subtargets
|| code
== SET
3511 || (REG_P (target
) && REG_P (source
)
3512 && REGNO (target
) != REGNO (source
)))
3514 /* After arm_reorg has been called, we can't fix up expensive
3515 constants by pushing them into memory so we must synthesize
3516 them in-line, regardless of the cost. This is only likely to
3517 be more costly on chips that have load delay slots and we are
3518 compiling without running the scheduler (so no splitting
3519 occurred before the final instruction emission).
3521 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3523 if (!cfun
->machine
->after_arm_reorg
3525 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3527 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3532 /* Currently SET is the only monadic value for CODE, all
3533 the rest are diadic. */
3534 if (TARGET_USE_MOVT
)
3535 arm_emit_movpair (target
, GEN_INT (val
));
3537 emit_set_insn (target
, GEN_INT (val
));
3543 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3545 if (TARGET_USE_MOVT
)
3546 arm_emit_movpair (temp
, GEN_INT (val
));
3548 emit_set_insn (temp
, GEN_INT (val
));
3550 /* For MINUS, the value is subtracted from, since we never
3551 have subtraction of a constant. */
3553 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3555 emit_set_insn (target
,
3556 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3562 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3566 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3567 ARM/THUMB2 immediates, and add up to VAL.
3568 Thr function return value gives the number of insns required. */
3570 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3571 struct four_ints
*return_sequence
)
3573 int best_consecutive_zeros
= 0;
3577 struct four_ints tmp_sequence
;
3579 /* If we aren't targeting ARM, the best place to start is always at
3580 the bottom, otherwise look more closely. */
3583 for (i
= 0; i
< 32; i
+= 2)
3585 int consecutive_zeros
= 0;
3587 if (!(val
& (3 << i
)))
3589 while ((i
< 32) && !(val
& (3 << i
)))
3591 consecutive_zeros
+= 2;
3594 if (consecutive_zeros
> best_consecutive_zeros
)
3596 best_consecutive_zeros
= consecutive_zeros
;
3597 best_start
= i
- consecutive_zeros
;
3604 /* So long as it won't require any more insns to do so, it's
3605 desirable to emit a small constant (in bits 0...9) in the last
3606 insn. This way there is more chance that it can be combined with
3607 a later addressing insn to form a pre-indexed load or store
3608 operation. Consider:
3610 *((volatile int *)0xe0000100) = 1;
3611 *((volatile int *)0xe0000110) = 2;
3613 We want this to wind up as:
3617 str rB, [rA, #0x100]
3619 str rB, [rA, #0x110]
3621 rather than having to synthesize both large constants from scratch.
3623 Therefore, we calculate how many insns would be required to emit
3624 the constant starting from `best_start', and also starting from
3625 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3626 yield a shorter sequence, we may as well use zero. */
3627 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3629 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3631 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3632 if (insns2
<= insns1
)
3634 *return_sequence
= tmp_sequence
;
3642 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3644 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3645 struct four_ints
*return_sequence
, int i
)
3647 int remainder
= val
& 0xffffffff;
3650 /* Try and find a way of doing the job in either two or three
3653 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3654 location. We start at position I. This may be the MSB, or
3655 optimial_immediate_sequence may have positioned it at the largest block
3656 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3657 wrapping around to the top of the word when we drop off the bottom.
3658 In the worst case this code should produce no more than four insns.
3660 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3661 constants, shifted to any arbitrary location. We should always start
3666 unsigned int b1
, b2
, b3
, b4
;
3667 unsigned HOST_WIDE_INT result
;
3670 gcc_assert (insns
< 4);
3675 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3676 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3679 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3680 /* We can use addw/subw for the last 12 bits. */
3684 /* Use an 8-bit shifted/rotated immediate. */
3688 result
= remainder
& ((0x0ff << end
)
3689 | ((i
< end
) ? (0xff >> (32 - end
))
3696 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3697 arbitrary shifts. */
3698 i
-= TARGET_ARM
? 2 : 1;
3702 /* Next, see if we can do a better job with a thumb2 replicated
3705 We do it this way around to catch the cases like 0x01F001E0 where
3706 two 8-bit immediates would work, but a replicated constant would
3709 TODO: 16-bit constants that don't clear all the bits, but still win.
3710 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3713 b1
= (remainder
& 0xff000000) >> 24;
3714 b2
= (remainder
& 0x00ff0000) >> 16;
3715 b3
= (remainder
& 0x0000ff00) >> 8;
3716 b4
= remainder
& 0xff;
3720 /* The 8-bit immediate already found clears b1 (and maybe b2),
3721 but must leave b3 and b4 alone. */
3723 /* First try to find a 32-bit replicated constant that clears
3724 almost everything. We can assume that we can't do it in one,
3725 or else we wouldn't be here. */
3726 unsigned int tmp
= b1
& b2
& b3
& b4
;
3727 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3729 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3730 + (tmp
== b3
) + (tmp
== b4
);
3732 && (matching_bytes
>= 3
3733 || (matching_bytes
== 2
3734 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3736 /* At least 3 of the bytes match, and the fourth has at
3737 least as many bits set, or two of the bytes match
3738 and it will only require one more insn to finish. */
3746 /* Second, try to find a 16-bit replicated constant that can
3747 leave three of the bytes clear. If b2 or b4 is already
3748 zero, then we can. If the 8-bit from above would not
3749 clear b2 anyway, then we still win. */
3750 else if (b1
== b3
&& (!b2
|| !b4
3751 || (remainder
& 0x00ff0000 & ~result
)))
3753 result
= remainder
& 0xff00ff00;
3759 /* The 8-bit immediate already found clears b2 (and maybe b3)
3760 and we don't get here unless b1 is alredy clear, but it will
3761 leave b4 unchanged. */
3763 /* If we can clear b2 and b4 at once, then we win, since the
3764 8-bits couldn't possibly reach that far. */
3767 result
= remainder
& 0x00ff00ff;
3773 return_sequence
->i
[insns
++] = result
;
3774 remainder
&= ~result
;
3776 if (code
== SET
|| code
== MINUS
)
3784 /* Emit an instruction with the indicated PATTERN. If COND is
3785 non-NULL, conditionalize the execution of the instruction on COND
3789 emit_constant_insn (rtx cond
, rtx pattern
)
3792 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3793 emit_insn (pattern
);
3796 /* As above, but extra parameter GENERATE which, if clear, suppresses
3800 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
3801 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3806 int final_invert
= 0;
3808 int set_sign_bit_copies
= 0;
3809 int clear_sign_bit_copies
= 0;
3810 int clear_zero_bit_copies
= 0;
3811 int set_zero_bit_copies
= 0;
3812 int insns
= 0, neg_insns
, inv_insns
;
3813 unsigned HOST_WIDE_INT temp1
, temp2
;
3814 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3815 struct four_ints
*immediates
;
3816 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3818 /* Find out which operations are safe for a given CODE. Also do a quick
3819 check for degenerate cases; these can occur when DImode operations
3832 if (remainder
== 0xffffffff)
3835 emit_constant_insn (cond
,
3836 gen_rtx_SET (VOIDmode
, target
,
3837 GEN_INT (ARM_SIGN_EXTEND (val
))));
3843 if (reload_completed
&& rtx_equal_p (target
, source
))
3847 emit_constant_insn (cond
,
3848 gen_rtx_SET (VOIDmode
, target
, source
));
3857 emit_constant_insn (cond
,
3858 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
3861 if (remainder
== 0xffffffff)
3863 if (reload_completed
&& rtx_equal_p (target
, source
))
3866 emit_constant_insn (cond
,
3867 gen_rtx_SET (VOIDmode
, target
, source
));
3876 if (reload_completed
&& rtx_equal_p (target
, source
))
3879 emit_constant_insn (cond
,
3880 gen_rtx_SET (VOIDmode
, target
, source
));
3884 if (remainder
== 0xffffffff)
3887 emit_constant_insn (cond
,
3888 gen_rtx_SET (VOIDmode
, target
,
3889 gen_rtx_NOT (mode
, source
)));
3896 /* We treat MINUS as (val - source), since (source - val) is always
3897 passed as (source + (-val)). */
3901 emit_constant_insn (cond
,
3902 gen_rtx_SET (VOIDmode
, target
,
3903 gen_rtx_NEG (mode
, source
)));
3906 if (const_ok_for_arm (val
))
3909 emit_constant_insn (cond
,
3910 gen_rtx_SET (VOIDmode
, target
,
3911 gen_rtx_MINUS (mode
, GEN_INT (val
),
3922 /* If we can do it in one insn get out quickly. */
3923 if (const_ok_for_op (val
, code
))
3926 emit_constant_insn (cond
,
3927 gen_rtx_SET (VOIDmode
, target
,
3929 ? gen_rtx_fmt_ee (code
, mode
, source
,
3935 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3937 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
3938 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
3942 if (mode
== SImode
&& i
== 16)
3943 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3945 emit_constant_insn (cond
,
3946 gen_zero_extendhisi2
3947 (target
, gen_lowpart (HImode
, source
)));
3949 /* Extz only supports SImode, but we can coerce the operands
3951 emit_constant_insn (cond
,
3952 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3953 gen_lowpart (SImode
, source
),
3954 GEN_INT (i
), const0_rtx
));
3960 /* Calculate a few attributes that may be useful for specific
3962 /* Count number of leading zeros. */
3963 for (i
= 31; i
>= 0; i
--)
3965 if ((remainder
& (1 << i
)) == 0)
3966 clear_sign_bit_copies
++;
3971 /* Count number of leading 1's. */
3972 for (i
= 31; i
>= 0; i
--)
3974 if ((remainder
& (1 << i
)) != 0)
3975 set_sign_bit_copies
++;
3980 /* Count number of trailing zero's. */
3981 for (i
= 0; i
<= 31; i
++)
3983 if ((remainder
& (1 << i
)) == 0)
3984 clear_zero_bit_copies
++;
3989 /* Count number of trailing 1's. */
3990 for (i
= 0; i
<= 31; i
++)
3992 if ((remainder
& (1 << i
)) != 0)
3993 set_zero_bit_copies
++;
4001 /* See if we can do this by sign_extending a constant that is known
4002 to be negative. This is a good, way of doing it, since the shift
4003 may well merge into a subsequent insn. */
4004 if (set_sign_bit_copies
> 1)
4006 if (const_ok_for_arm
4007 (temp1
= ARM_SIGN_EXTEND (remainder
4008 << (set_sign_bit_copies
- 1))))
4012 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4013 emit_constant_insn (cond
,
4014 gen_rtx_SET (VOIDmode
, new_src
,
4016 emit_constant_insn (cond
,
4017 gen_ashrsi3 (target
, new_src
,
4018 GEN_INT (set_sign_bit_copies
- 1)));
4022 /* For an inverted constant, we will need to set the low bits,
4023 these will be shifted out of harm's way. */
4024 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4025 if (const_ok_for_arm (~temp1
))
4029 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4030 emit_constant_insn (cond
,
4031 gen_rtx_SET (VOIDmode
, new_src
,
4033 emit_constant_insn (cond
,
4034 gen_ashrsi3 (target
, new_src
,
4035 GEN_INT (set_sign_bit_copies
- 1)));
4041 /* See if we can calculate the value as the difference between two
4042 valid immediates. */
4043 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4045 int topshift
= clear_sign_bit_copies
& ~1;
4047 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4048 & (0xff000000 >> topshift
));
4050 /* If temp1 is zero, then that means the 9 most significant
4051 bits of remainder were 1 and we've caused it to overflow.
4052 When topshift is 0 we don't need to do anything since we
4053 can borrow from 'bit 32'. */
4054 if (temp1
== 0 && topshift
!= 0)
4055 temp1
= 0x80000000 >> (topshift
- 1);
4057 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4059 if (const_ok_for_arm (temp2
))
4063 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4064 emit_constant_insn (cond
,
4065 gen_rtx_SET (VOIDmode
, new_src
,
4067 emit_constant_insn (cond
,
4068 gen_addsi3 (target
, new_src
,
4076 /* See if we can generate this by setting the bottom (or the top)
4077 16 bits, and then shifting these into the other half of the
4078 word. We only look for the simplest cases, to do more would cost
4079 too much. Be careful, however, not to generate this when the
4080 alternative would take fewer insns. */
4081 if (val
& 0xffff0000)
4083 temp1
= remainder
& 0xffff0000;
4084 temp2
= remainder
& 0x0000ffff;
4086 /* Overlaps outside this range are best done using other methods. */
4087 for (i
= 9; i
< 24; i
++)
4089 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4090 && !const_ok_for_arm (temp2
))
4092 rtx new_src
= (subtargets
4093 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4095 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4096 source
, subtargets
, generate
);
4104 gen_rtx_ASHIFT (mode
, source
,
4111 /* Don't duplicate cases already considered. */
4112 for (i
= 17; i
< 24; i
++)
4114 if (((temp1
| (temp1
>> i
)) == remainder
)
4115 && !const_ok_for_arm (temp1
))
4117 rtx new_src
= (subtargets
4118 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4120 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4121 source
, subtargets
, generate
);
4126 gen_rtx_SET (VOIDmode
, target
,
4129 gen_rtx_LSHIFTRT (mode
, source
,
4140 /* If we have IOR or XOR, and the constant can be loaded in a
4141 single instruction, and we can find a temporary to put it in,
4142 then this can be done in two instructions instead of 3-4. */
4144 /* TARGET can't be NULL if SUBTARGETS is 0 */
4145 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4147 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4151 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4153 emit_constant_insn (cond
,
4154 gen_rtx_SET (VOIDmode
, sub
,
4156 emit_constant_insn (cond
,
4157 gen_rtx_SET (VOIDmode
, target
,
4158 gen_rtx_fmt_ee (code
, mode
,
4169 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4170 and the remainder 0s for e.g. 0xfff00000)
4171 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4173 This can be done in 2 instructions by using shifts with mov or mvn.
4178 mvn r0, r0, lsr #12 */
4179 if (set_sign_bit_copies
> 8
4180 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4184 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4185 rtx shift
= GEN_INT (set_sign_bit_copies
);
4189 gen_rtx_SET (VOIDmode
, sub
,
4191 gen_rtx_ASHIFT (mode
,
4196 gen_rtx_SET (VOIDmode
, target
,
4198 gen_rtx_LSHIFTRT (mode
, sub
,
4205 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4207 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4209 For eg. r0 = r0 | 0xfff
4214 if (set_zero_bit_copies
> 8
4215 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4219 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4220 rtx shift
= GEN_INT (set_zero_bit_copies
);
4224 gen_rtx_SET (VOIDmode
, sub
,
4226 gen_rtx_LSHIFTRT (mode
,
4231 gen_rtx_SET (VOIDmode
, target
,
4233 gen_rtx_ASHIFT (mode
, sub
,
4239 /* This will never be reached for Thumb2 because orn is a valid
4240 instruction. This is for Thumb1 and the ARM 32 bit cases.
4242 x = y | constant (such that ~constant is a valid constant)
4244 x = ~(~y & ~constant).
4246 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4250 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4251 emit_constant_insn (cond
,
4252 gen_rtx_SET (VOIDmode
, sub
,
4253 gen_rtx_NOT (mode
, source
)));
4256 sub
= gen_reg_rtx (mode
);
4257 emit_constant_insn (cond
,
4258 gen_rtx_SET (VOIDmode
, sub
,
4259 gen_rtx_AND (mode
, source
,
4261 emit_constant_insn (cond
,
4262 gen_rtx_SET (VOIDmode
, target
,
4263 gen_rtx_NOT (mode
, sub
)));
4270 /* See if two shifts will do 2 or more insn's worth of work. */
4271 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4273 HOST_WIDE_INT shift_mask
= ((0xffffffff
4274 << (32 - clear_sign_bit_copies
))
4277 if ((remainder
| shift_mask
) != 0xffffffff)
4281 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4282 insns
= arm_gen_constant (AND
, mode
, cond
,
4283 remainder
| shift_mask
,
4284 new_src
, source
, subtargets
, 1);
4289 rtx targ
= subtargets
? NULL_RTX
: target
;
4290 insns
= arm_gen_constant (AND
, mode
, cond
,
4291 remainder
| shift_mask
,
4292 targ
, source
, subtargets
, 0);
4298 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4299 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4301 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4302 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4308 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4310 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4312 if ((remainder
| shift_mask
) != 0xffffffff)
4316 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4318 insns
= arm_gen_constant (AND
, mode
, cond
,
4319 remainder
| shift_mask
,
4320 new_src
, source
, subtargets
, 1);
4325 rtx targ
= subtargets
? NULL_RTX
: target
;
4327 insns
= arm_gen_constant (AND
, mode
, cond
,
4328 remainder
| shift_mask
,
4329 targ
, source
, subtargets
, 0);
4335 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4336 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4338 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4339 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4351 /* Calculate what the instruction sequences would be if we generated it
4352 normally, negated, or inverted. */
4354 /* AND cannot be split into multiple insns, so invert and use BIC. */
4357 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4360 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4365 if (can_invert
|| final_invert
)
4366 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4371 immediates
= &pos_immediates
;
4373 /* Is the negated immediate sequence more efficient? */
4374 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4377 immediates
= &neg_immediates
;
4382 /* Is the inverted immediate sequence more efficient?
4383 We must allow for an extra NOT instruction for XOR operations, although
4384 there is some chance that the final 'mvn' will get optimized later. */
4385 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4388 immediates
= &inv_immediates
;
4396 /* Now output the chosen sequence as instructions. */
4399 for (i
= 0; i
< insns
; i
++)
4401 rtx new_src
, temp1_rtx
;
4403 temp1
= immediates
->i
[i
];
4405 if (code
== SET
|| code
== MINUS
)
4406 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4407 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4408 new_src
= gen_reg_rtx (mode
);
4414 else if (can_negate
)
4417 temp1
= trunc_int_for_mode (temp1
, mode
);
4418 temp1_rtx
= GEN_INT (temp1
);
4422 else if (code
== MINUS
)
4423 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4425 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4427 emit_constant_insn (cond
,
4428 gen_rtx_SET (VOIDmode
, new_src
,
4434 can_negate
= can_invert
;
4438 else if (code
== MINUS
)
4446 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4447 gen_rtx_NOT (mode
, source
)));
4454 /* Canonicalize a comparison so that we are more likely to recognize it.
4455 This can be done for a few constant compares, where we can make the
4456 immediate value easier to load. */
4459 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4460 bool op0_preserve_value
)
4462 enum machine_mode mode
;
4463 unsigned HOST_WIDE_INT i
, maxval
;
4465 mode
= GET_MODE (*op0
);
4466 if (mode
== VOIDmode
)
4467 mode
= GET_MODE (*op1
);
4469 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4471 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4472 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4473 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4474 for GTU/LEU in Thumb mode. */
4479 if (*code
== GT
|| *code
== LE
4480 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4482 /* Missing comparison. First try to use an available
4484 if (CONST_INT_P (*op1
))
4492 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4494 *op1
= GEN_INT (i
+ 1);
4495 *code
= *code
== GT
? GE
: LT
;
4501 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4502 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4504 *op1
= GEN_INT (i
+ 1);
4505 *code
= *code
== GTU
? GEU
: LTU
;
4514 /* If that did not work, reverse the condition. */
4515 if (!op0_preserve_value
)
4520 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4526 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4527 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4528 to facilitate possible combining with a cmp into 'ands'. */
4530 && GET_CODE (*op0
) == ZERO_EXTEND
4531 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4532 && GET_MODE (XEXP (*op0
, 0)) == QImode
4533 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4534 && subreg_lowpart_p (XEXP (*op0
, 0))
4535 && *op1
== const0_rtx
)
4536 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4539 /* Comparisons smaller than DImode. Only adjust comparisons against
4540 an out-of-range constant. */
4541 if (!CONST_INT_P (*op1
)
4542 || const_ok_for_arm (INTVAL (*op1
))
4543 || const_ok_for_arm (- INTVAL (*op1
)))
4557 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4559 *op1
= GEN_INT (i
+ 1);
4560 *code
= *code
== GT
? GE
: LT
;
4568 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4570 *op1
= GEN_INT (i
- 1);
4571 *code
= *code
== GE
? GT
: LE
;
4578 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4579 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4581 *op1
= GEN_INT (i
+ 1);
4582 *code
= *code
== GTU
? GEU
: LTU
;
4590 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4592 *op1
= GEN_INT (i
- 1);
4593 *code
= *code
== GEU
? GTU
: LEU
;
4604 /* Define how to find the value returned by a function. */
4607 arm_function_value(const_tree type
, const_tree func
,
4608 bool outgoing ATTRIBUTE_UNUSED
)
4610 enum machine_mode mode
;
4611 int unsignedp ATTRIBUTE_UNUSED
;
4612 rtx r ATTRIBUTE_UNUSED
;
4614 mode
= TYPE_MODE (type
);
4616 if (TARGET_AAPCS_BASED
)
4617 return aapcs_allocate_return_reg (mode
, type
, func
);
4619 /* Promote integer types. */
4620 if (INTEGRAL_TYPE_P (type
))
4621 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4623 /* Promotes small structs returned in a register to full-word size
4624 for big-endian AAPCS. */
4625 if (arm_return_in_msb (type
))
4627 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4628 if (size
% UNITS_PER_WORD
!= 0)
4630 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4631 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4635 return arm_libcall_value_1 (mode
);
4638 /* libcall hashtable helpers. */
4640 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4642 typedef rtx_def value_type
;
4643 typedef rtx_def compare_type
;
4644 static inline hashval_t
hash (const value_type
*);
4645 static inline bool equal (const value_type
*, const compare_type
*);
4646 static inline void remove (value_type
*);
4650 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4652 return rtx_equal_p (p1
, p2
);
4656 libcall_hasher::hash (const value_type
*p1
)
4658 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4661 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4664 add_libcall (libcall_table_type htab
, rtx libcall
)
4666 *htab
.find_slot (libcall
, INSERT
) = libcall
;
4670 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4672 static bool init_done
= false;
4673 static libcall_table_type libcall_htab
;
4679 libcall_htab
.create (31);
4680 add_libcall (libcall_htab
,
4681 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4682 add_libcall (libcall_htab
,
4683 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4684 add_libcall (libcall_htab
,
4685 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4686 add_libcall (libcall_htab
,
4687 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4689 add_libcall (libcall_htab
,
4690 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4691 add_libcall (libcall_htab
,
4692 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4693 add_libcall (libcall_htab
,
4694 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4695 add_libcall (libcall_htab
,
4696 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4698 add_libcall (libcall_htab
,
4699 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4700 add_libcall (libcall_htab
,
4701 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4702 add_libcall (libcall_htab
,
4703 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4704 add_libcall (libcall_htab
,
4705 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4706 add_libcall (libcall_htab
,
4707 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4708 add_libcall (libcall_htab
,
4709 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4710 add_libcall (libcall_htab
,
4711 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4712 add_libcall (libcall_htab
,
4713 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4715 /* Values from double-precision helper functions are returned in core
4716 registers if the selected core only supports single-precision
4717 arithmetic, even if we are using the hard-float ABI. The same is
4718 true for single-precision helpers, but we will never be using the
4719 hard-float ABI on a CPU which doesn't support single-precision
4720 operations in hardware. */
4721 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4722 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4723 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4724 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4725 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4726 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4727 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4728 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4729 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4730 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4731 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4732 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4734 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4738 return libcall
&& libcall_htab
.find (libcall
) != NULL
;
4742 arm_libcall_value_1 (enum machine_mode mode
)
4744 if (TARGET_AAPCS_BASED
)
4745 return aapcs_libcall_value (mode
);
4746 else if (TARGET_IWMMXT_ABI
4747 && arm_vector_mode_supported_p (mode
))
4748 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4750 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4753 /* Define how to find the value returned by a library function
4754 assuming the value has mode MODE. */
4757 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
4759 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4760 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4762 /* The following libcalls return their result in integer registers,
4763 even though they return a floating point value. */
4764 if (arm_libcall_uses_aapcs_base (libcall
))
4765 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4769 return arm_libcall_value_1 (mode
);
4772 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4775 arm_function_value_regno_p (const unsigned int regno
)
4777 if (regno
== ARG_REGISTER (1)
4779 && TARGET_AAPCS_BASED
4781 && TARGET_HARD_FLOAT
4782 && regno
== FIRST_VFP_REGNUM
)
4783 || (TARGET_IWMMXT_ABI
4784 && regno
== FIRST_IWMMXT_REGNUM
))
4790 /* Determine the amount of memory needed to store the possible return
4791 registers of an untyped call. */
4793 arm_apply_result_size (void)
4799 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4801 if (TARGET_IWMMXT_ABI
)
4808 /* Decide whether TYPE should be returned in memory (true)
4809 or in a register (false). FNTYPE is the type of the function making
4812 arm_return_in_memory (const_tree type
, const_tree fntype
)
4816 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4818 if (TARGET_AAPCS_BASED
)
4820 /* Simple, non-aggregate types (ie not including vectors and
4821 complex) are always returned in a register (or registers).
4822 We don't care about which register here, so we can short-cut
4823 some of the detail. */
4824 if (!AGGREGATE_TYPE_P (type
)
4825 && TREE_CODE (type
) != VECTOR_TYPE
4826 && TREE_CODE (type
) != COMPLEX_TYPE
)
4829 /* Any return value that is no larger than one word can be
4831 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4834 /* Check any available co-processors to see if they accept the
4835 type as a register candidate (VFP, for example, can return
4836 some aggregates in consecutive registers). These aren't
4837 available if the call is variadic. */
4838 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4841 /* Vector values should be returned using ARM registers, not
4842 memory (unless they're over 16 bytes, which will break since
4843 we only have four call-clobbered registers to play with). */
4844 if (TREE_CODE (type
) == VECTOR_TYPE
)
4845 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4847 /* The rest go in memory. */
4851 if (TREE_CODE (type
) == VECTOR_TYPE
)
4852 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4854 if (!AGGREGATE_TYPE_P (type
) &&
4855 (TREE_CODE (type
) != VECTOR_TYPE
))
4856 /* All simple types are returned in registers. */
4859 if (arm_abi
!= ARM_ABI_APCS
)
4861 /* ATPCS and later return aggregate types in memory only if they are
4862 larger than a word (or are variable size). */
4863 return (size
< 0 || size
> UNITS_PER_WORD
);
4866 /* For the arm-wince targets we choose to be compatible with Microsoft's
4867 ARM and Thumb compilers, which always return aggregates in memory. */
4869 /* All structures/unions bigger than one word are returned in memory.
4870 Also catch the case where int_size_in_bytes returns -1. In this case
4871 the aggregate is either huge or of variable size, and in either case
4872 we will want to return it via memory and not in a register. */
4873 if (size
< 0 || size
> UNITS_PER_WORD
)
4876 if (TREE_CODE (type
) == RECORD_TYPE
)
4880 /* For a struct the APCS says that we only return in a register
4881 if the type is 'integer like' and every addressable element
4882 has an offset of zero. For practical purposes this means
4883 that the structure can have at most one non bit-field element
4884 and that this element must be the first one in the structure. */
4886 /* Find the first field, ignoring non FIELD_DECL things which will
4887 have been created by C++. */
4888 for (field
= TYPE_FIELDS (type
);
4889 field
&& TREE_CODE (field
) != FIELD_DECL
;
4890 field
= DECL_CHAIN (field
))
4894 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4896 /* Check that the first field is valid for returning in a register. */
4898 /* ... Floats are not allowed */
4899 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4902 /* ... Aggregates that are not themselves valid for returning in
4903 a register are not allowed. */
4904 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4907 /* Now check the remaining fields, if any. Only bitfields are allowed,
4908 since they are not addressable. */
4909 for (field
= DECL_CHAIN (field
);
4911 field
= DECL_CHAIN (field
))
4913 if (TREE_CODE (field
) != FIELD_DECL
)
4916 if (!DECL_BIT_FIELD_TYPE (field
))
4923 if (TREE_CODE (type
) == UNION_TYPE
)
4927 /* Unions can be returned in registers if every element is
4928 integral, or can be returned in an integer register. */
4929 for (field
= TYPE_FIELDS (type
);
4931 field
= DECL_CHAIN (field
))
4933 if (TREE_CODE (field
) != FIELD_DECL
)
4936 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4939 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4945 #endif /* not ARM_WINCE */
4947 /* Return all other types in memory. */
4951 const struct pcs_attribute_arg
4955 } pcs_attribute_args
[] =
4957 {"aapcs", ARM_PCS_AAPCS
},
4958 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
4960 /* We could recognize these, but changes would be needed elsewhere
4961 * to implement them. */
4962 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
4963 {"atpcs", ARM_PCS_ATPCS
},
4964 {"apcs", ARM_PCS_APCS
},
4966 {NULL
, ARM_PCS_UNKNOWN
}
4970 arm_pcs_from_attribute (tree attr
)
4972 const struct pcs_attribute_arg
*ptr
;
4975 /* Get the value of the argument. */
4976 if (TREE_VALUE (attr
) == NULL_TREE
4977 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4978 return ARM_PCS_UNKNOWN
;
4980 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4982 /* Check it against the list of known arguments. */
4983 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4984 if (streq (arg
, ptr
->arg
))
4987 /* An unrecognized interrupt type. */
4988 return ARM_PCS_UNKNOWN
;
4991 /* Get the PCS variant to use for this call. TYPE is the function's type
4992 specification, DECL is the specific declartion. DECL may be null if
4993 the call could be indirect or if this is a library call. */
4995 arm_get_pcs_model (const_tree type
, const_tree decl
)
4997 bool user_convention
= false;
4998 enum arm_pcs user_pcs
= arm_pcs_default
;
5003 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5006 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5007 user_convention
= true;
5010 if (TARGET_AAPCS_BASED
)
5012 /* Detect varargs functions. These always use the base rules
5013 (no argument is ever a candidate for a co-processor
5015 bool base_rules
= stdarg_p (type
);
5017 if (user_convention
)
5019 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5020 sorry ("non-AAPCS derived PCS variant");
5021 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5022 error ("variadic functions must use the base AAPCS variant");
5026 return ARM_PCS_AAPCS
;
5027 else if (user_convention
)
5029 else if (decl
&& flag_unit_at_a_time
)
5031 /* Local functions never leak outside this compilation unit,
5032 so we are free to use whatever conventions are
5034 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5035 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5037 return ARM_PCS_AAPCS_LOCAL
;
5040 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5041 sorry ("PCS variant");
5043 /* For everything else we use the target's default. */
5044 return arm_pcs_default
;
5049 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5050 const_tree fntype ATTRIBUTE_UNUSED
,
5051 rtx libcall ATTRIBUTE_UNUSED
,
5052 const_tree fndecl ATTRIBUTE_UNUSED
)
5054 /* Record the unallocated VFP registers. */
5055 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5056 pcum
->aapcs_vfp_reg_alloc
= 0;
5059 /* Walk down the type tree of TYPE counting consecutive base elements.
5060 If *MODEP is VOIDmode, then set it to the first valid floating point
5061 type. If a non-floating point type is found, or if a floating point
5062 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5063 otherwise return the count in the sub-tree. */
5065 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
5067 enum machine_mode mode
;
5070 switch (TREE_CODE (type
))
5073 mode
= TYPE_MODE (type
);
5074 if (mode
!= DFmode
&& mode
!= SFmode
)
5077 if (*modep
== VOIDmode
)
5086 mode
= TYPE_MODE (TREE_TYPE (type
));
5087 if (mode
!= DFmode
&& mode
!= SFmode
)
5090 if (*modep
== VOIDmode
)
5099 /* Use V2SImode and V4SImode as representatives of all 64-bit
5100 and 128-bit vector types, whether or not those modes are
5101 supported with the present options. */
5102 size
= int_size_in_bytes (type
);
5115 if (*modep
== VOIDmode
)
5118 /* Vector modes are considered to be opaque: two vectors are
5119 equivalent for the purposes of being homogeneous aggregates
5120 if they are the same size. */
5129 tree index
= TYPE_DOMAIN (type
);
5131 /* Can't handle incomplete types nor sizes that are not
5133 if (!COMPLETE_TYPE_P (type
)
5134 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5137 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5140 || !TYPE_MAX_VALUE (index
)
5141 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5142 || !TYPE_MIN_VALUE (index
)
5143 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5147 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5148 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5150 /* There must be no padding. */
5151 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5163 /* Can't handle incomplete types nor sizes that are not
5165 if (!COMPLETE_TYPE_P (type
)
5166 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5169 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5171 if (TREE_CODE (field
) != FIELD_DECL
)
5174 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5180 /* There must be no padding. */
5181 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5188 case QUAL_UNION_TYPE
:
5190 /* These aren't very interesting except in a degenerate case. */
5195 /* Can't handle incomplete types nor sizes that are not
5197 if (!COMPLETE_TYPE_P (type
)
5198 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5201 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5203 if (TREE_CODE (field
) != FIELD_DECL
)
5206 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5209 count
= count
> sub_count
? count
: sub_count
;
5212 /* There must be no padding. */
5213 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5226 /* Return true if PCS_VARIANT should use VFP registers. */
5228 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5230 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5232 static bool seen_thumb1_vfp
= false;
5234 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5236 sorry ("Thumb-1 hard-float VFP ABI");
5237 /* sorry() is not immediately fatal, so only display this once. */
5238 seen_thumb1_vfp
= true;
5244 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5247 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5248 (TARGET_VFP_DOUBLE
|| !is_double
));
5251 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5252 suitable for passing or returning in VFP registers for the PCS
5253 variant selected. If it is, then *BASE_MODE is updated to contain
5254 a machine mode describing each element of the argument's type and
5255 *COUNT to hold the number of such elements. */
5257 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5258 enum machine_mode mode
, const_tree type
,
5259 enum machine_mode
*base_mode
, int *count
)
5261 enum machine_mode new_mode
= VOIDmode
;
5263 /* If we have the type information, prefer that to working things
5264 out from the mode. */
5267 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5269 if (ag_count
> 0 && ag_count
<= 4)
5274 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5275 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5276 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5281 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5284 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5290 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5293 *base_mode
= new_mode
;
5298 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5299 enum machine_mode mode
, const_tree type
)
5301 int count ATTRIBUTE_UNUSED
;
5302 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
5304 if (!use_vfp_abi (pcs_variant
, false))
5306 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5311 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5314 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5317 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5318 &pcum
->aapcs_vfp_rmode
,
5319 &pcum
->aapcs_vfp_rcount
);
5323 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5324 const_tree type ATTRIBUTE_UNUSED
)
5326 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5327 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5330 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5331 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5333 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5335 || (mode
== TImode
&& ! TARGET_NEON
)
5336 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5339 int rcount
= pcum
->aapcs_vfp_rcount
;
5341 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5345 /* Avoid using unsupported vector modes. */
5346 if (rmode
== V2SImode
)
5348 else if (rmode
== V4SImode
)
5355 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5356 for (i
= 0; i
< rcount
; i
++)
5358 rtx tmp
= gen_rtx_REG (rmode
,
5359 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5360 tmp
= gen_rtx_EXPR_LIST
5362 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5363 XVECEXP (par
, 0, i
) = tmp
;
5366 pcum
->aapcs_reg
= par
;
5369 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5376 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5377 enum machine_mode mode
,
5378 const_tree type ATTRIBUTE_UNUSED
)
5380 if (!use_vfp_abi (pcs_variant
, false))
5383 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5386 enum machine_mode ag_mode
;
5391 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5396 if (ag_mode
== V2SImode
)
5398 else if (ag_mode
== V4SImode
)
5404 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5405 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5406 for (i
= 0; i
< count
; i
++)
5408 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5409 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5410 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5411 XVECEXP (par
, 0, i
) = tmp
;
5417 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5421 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5422 enum machine_mode mode ATTRIBUTE_UNUSED
,
5423 const_tree type ATTRIBUTE_UNUSED
)
5425 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5426 pcum
->aapcs_vfp_reg_alloc
= 0;
5430 #define AAPCS_CP(X) \
5432 aapcs_ ## X ## _cum_init, \
5433 aapcs_ ## X ## _is_call_candidate, \
5434 aapcs_ ## X ## _allocate, \
5435 aapcs_ ## X ## _is_return_candidate, \
5436 aapcs_ ## X ## _allocate_return_reg, \
5437 aapcs_ ## X ## _advance \
5440 /* Table of co-processors that can be used to pass arguments in
5441 registers. Idealy no arugment should be a candidate for more than
5442 one co-processor table entry, but the table is processed in order
5443 and stops after the first match. If that entry then fails to put
5444 the argument into a co-processor register, the argument will go on
5448 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5449 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5451 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5452 BLKmode) is a candidate for this co-processor's registers; this
5453 function should ignore any position-dependent state in
5454 CUMULATIVE_ARGS and only use call-type dependent information. */
5455 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5457 /* Return true if the argument does get a co-processor register; it
5458 should set aapcs_reg to an RTX of the register allocated as is
5459 required for a return from FUNCTION_ARG. */
5460 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5462 /* Return true if a result of mode MODE (or type TYPE if MODE is
5463 BLKmode) is can be returned in this co-processor's registers. */
5464 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5466 /* Allocate and return an RTX element to hold the return type of a
5467 call, this routine must not fail and will only be called if
5468 is_return_candidate returned true with the same parameters. */
5469 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5471 /* Finish processing this argument and prepare to start processing
5473 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5474 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5482 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5487 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5488 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5495 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5497 /* We aren't passed a decl, so we can't check that a call is local.
5498 However, it isn't clear that that would be a win anyway, since it
5499 might limit some tail-calling opportunities. */
5500 enum arm_pcs pcs_variant
;
5504 const_tree fndecl
= NULL_TREE
;
5506 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5509 fntype
= TREE_TYPE (fntype
);
5512 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5515 pcs_variant
= arm_pcs_default
;
5517 if (pcs_variant
!= ARM_PCS_AAPCS
)
5521 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5522 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5531 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
5534 /* We aren't passed a decl, so we can't check that a call is local.
5535 However, it isn't clear that that would be a win anyway, since it
5536 might limit some tail-calling opportunities. */
5537 enum arm_pcs pcs_variant
;
5538 int unsignedp ATTRIBUTE_UNUSED
;
5542 const_tree fndecl
= NULL_TREE
;
5544 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5547 fntype
= TREE_TYPE (fntype
);
5550 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5553 pcs_variant
= arm_pcs_default
;
5555 /* Promote integer types. */
5556 if (type
&& INTEGRAL_TYPE_P (type
))
5557 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5559 if (pcs_variant
!= ARM_PCS_AAPCS
)
5563 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5564 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5566 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5570 /* Promotes small structs returned in a register to full-word size
5571 for big-endian AAPCS. */
5572 if (type
&& arm_return_in_msb (type
))
5574 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5575 if (size
% UNITS_PER_WORD
!= 0)
5577 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5578 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5582 return gen_rtx_REG (mode
, R0_REGNUM
);
5586 aapcs_libcall_value (enum machine_mode mode
)
5588 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5589 && GET_MODE_SIZE (mode
) <= 4)
5592 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5595 /* Lay out a function argument using the AAPCS rules. The rule
5596 numbers referred to here are those in the AAPCS. */
5598 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5599 const_tree type
, bool named
)
5604 /* We only need to do this once per argument. */
5605 if (pcum
->aapcs_arg_processed
)
5608 pcum
->aapcs_arg_processed
= true;
5610 /* Special case: if named is false then we are handling an incoming
5611 anonymous argument which is on the stack. */
5615 /* Is this a potential co-processor register candidate? */
5616 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5618 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5619 pcum
->aapcs_cprc_slot
= slot
;
5621 /* We don't have to apply any of the rules from part B of the
5622 preparation phase, these are handled elsewhere in the
5627 /* A Co-processor register candidate goes either in its own
5628 class of registers or on the stack. */
5629 if (!pcum
->aapcs_cprc_failed
[slot
])
5631 /* C1.cp - Try to allocate the argument to co-processor
5633 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5636 /* C2.cp - Put the argument on the stack and note that we
5637 can't assign any more candidates in this slot. We also
5638 need to note that we have allocated stack space, so that
5639 we won't later try to split a non-cprc candidate between
5640 core registers and the stack. */
5641 pcum
->aapcs_cprc_failed
[slot
] = true;
5642 pcum
->can_split
= false;
5645 /* We didn't get a register, so this argument goes on the
5647 gcc_assert (pcum
->can_split
== false);
5652 /* C3 - For double-word aligned arguments, round the NCRN up to the
5653 next even number. */
5654 ncrn
= pcum
->aapcs_ncrn
;
5655 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5658 nregs
= ARM_NUM_REGS2(mode
, type
);
5660 /* Sigh, this test should really assert that nregs > 0, but a GCC
5661 extension allows empty structs and then gives them empty size; it
5662 then allows such a structure to be passed by value. For some of
5663 the code below we have to pretend that such an argument has
5664 non-zero size so that we 'locate' it correctly either in
5665 registers or on the stack. */
5666 gcc_assert (nregs
>= 0);
5668 nregs2
= nregs
? nregs
: 1;
5670 /* C4 - Argument fits entirely in core registers. */
5671 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5673 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5674 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5678 /* C5 - Some core registers left and there are no arguments already
5679 on the stack: split this argument between the remaining core
5680 registers and the stack. */
5681 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5683 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5684 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5685 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5689 /* C6 - NCRN is set to 4. */
5690 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5692 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5696 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5697 for a call to a function whose data type is FNTYPE.
5698 For a library call, FNTYPE is NULL. */
5700 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5702 tree fndecl ATTRIBUTE_UNUSED
)
5704 /* Long call handling. */
5706 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5708 pcum
->pcs_variant
= arm_pcs_default
;
5710 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5712 if (arm_libcall_uses_aapcs_base (libname
))
5713 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5715 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5716 pcum
->aapcs_reg
= NULL_RTX
;
5717 pcum
->aapcs_partial
= 0;
5718 pcum
->aapcs_arg_processed
= false;
5719 pcum
->aapcs_cprc_slot
= -1;
5720 pcum
->can_split
= true;
5722 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5726 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5728 pcum
->aapcs_cprc_failed
[i
] = false;
5729 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5737 /* On the ARM, the offset starts at 0. */
5739 pcum
->iwmmxt_nregs
= 0;
5740 pcum
->can_split
= true;
5742 /* Varargs vectors are treated the same as long long.
5743 named_count avoids having to change the way arm handles 'named' */
5744 pcum
->named_count
= 0;
5747 if (TARGET_REALLY_IWMMXT
&& fntype
)
5751 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5753 fn_arg
= TREE_CHAIN (fn_arg
))
5754 pcum
->named_count
+= 1;
5756 if (! pcum
->named_count
)
5757 pcum
->named_count
= INT_MAX
;
5761 /* Return true if we use LRA instead of reload pass. */
5765 return arm_lra_flag
;
5768 /* Return true if mode/type need doubleword alignment. */
5770 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
5772 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5773 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5777 /* Determine where to put an argument to a function.
5778 Value is zero to push the argument on the stack,
5779 or a hard register in which to store the argument.
5781 MODE is the argument's machine mode.
5782 TYPE is the data type of the argument (as a tree).
5783 This is null for libcalls where that information may
5785 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5786 the preceding args and about the function being called.
5787 NAMED is nonzero if this argument is a named parameter
5788 (otherwise it is an extra parameter matching an ellipsis).
5790 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5791 other arguments are passed on the stack. If (NAMED == 0) (which happens
5792 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5793 defined), say it is passed in the stack (function_prologue will
5794 indeed make it pass in the stack if necessary). */
5797 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
5798 const_tree type
, bool named
)
5800 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5803 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5804 a call insn (op3 of a call_value insn). */
5805 if (mode
== VOIDmode
)
5808 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5810 aapcs_layout_arg (pcum
, mode
, type
, named
);
5811 return pcum
->aapcs_reg
;
5814 /* Varargs vectors are treated the same as long long.
5815 named_count avoids having to change the way arm handles 'named' */
5816 if (TARGET_IWMMXT_ABI
5817 && arm_vector_mode_supported_p (mode
)
5818 && pcum
->named_count
> pcum
->nargs
+ 1)
5820 if (pcum
->iwmmxt_nregs
<= 9)
5821 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5824 pcum
->can_split
= false;
5829 /* Put doubleword aligned quantities in even register pairs. */
5831 && ARM_DOUBLEWORD_ALIGN
5832 && arm_needs_doubleword_align (mode
, type
))
5835 /* Only allow splitting an arg between regs and memory if all preceding
5836 args were allocated to regs. For args passed by reference we only count
5837 the reference pointer. */
5838 if (pcum
->can_split
)
5841 nregs
= ARM_NUM_REGS2 (mode
, type
);
5843 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5846 return gen_rtx_REG (mode
, pcum
->nregs
);
5850 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
5852 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5853 ? DOUBLEWORD_ALIGNMENT
5858 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
5859 tree type
, bool named
)
5861 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5862 int nregs
= pcum
->nregs
;
5864 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5866 aapcs_layout_arg (pcum
, mode
, type
, named
);
5867 return pcum
->aapcs_partial
;
5870 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
5873 if (NUM_ARG_REGS
> nregs
5874 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
5876 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
5881 /* Update the data in PCUM to advance over an argument
5882 of mode MODE and data type TYPE.
5883 (TYPE is null for libcalls where that information may not be available.) */
5886 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
5887 const_tree type
, bool named
)
5889 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5891 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5893 aapcs_layout_arg (pcum
, mode
, type
, named
);
5895 if (pcum
->aapcs_cprc_slot
>= 0)
5897 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
5899 pcum
->aapcs_cprc_slot
= -1;
5902 /* Generic stuff. */
5903 pcum
->aapcs_arg_processed
= false;
5904 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
5905 pcum
->aapcs_reg
= NULL_RTX
;
5906 pcum
->aapcs_partial
= 0;
5911 if (arm_vector_mode_supported_p (mode
)
5912 && pcum
->named_count
> pcum
->nargs
5913 && TARGET_IWMMXT_ABI
)
5914 pcum
->iwmmxt_nregs
+= 1;
5916 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
5920 /* Variable sized types are passed by reference. This is a GCC
5921 extension to the ARM ABI. */
5924 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
5925 enum machine_mode mode ATTRIBUTE_UNUSED
,
5926 const_tree type
, bool named ATTRIBUTE_UNUSED
)
5928 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
5931 /* Encode the current state of the #pragma [no_]long_calls. */
5934 OFF
, /* No #pragma [no_]long_calls is in effect. */
5935 LONG
, /* #pragma long_calls is in effect. */
5936 SHORT
/* #pragma no_long_calls is in effect. */
5939 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
5942 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5944 arm_pragma_long_calls
= LONG
;
5948 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5950 arm_pragma_long_calls
= SHORT
;
5954 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5956 arm_pragma_long_calls
= OFF
;
5959 /* Handle an attribute requiring a FUNCTION_DECL;
5960 arguments as in struct attribute_spec.handler. */
5962 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
5963 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5965 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5967 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5969 *no_add_attrs
= true;
5975 /* Handle an "interrupt" or "isr" attribute;
5976 arguments as in struct attribute_spec.handler. */
5978 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5983 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5985 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5987 *no_add_attrs
= true;
5989 /* FIXME: the argument if any is checked for type attributes;
5990 should it be checked for decl ones? */
5994 if (TREE_CODE (*node
) == FUNCTION_TYPE
5995 || TREE_CODE (*node
) == METHOD_TYPE
)
5997 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5999 warning (OPT_Wattributes
, "%qE attribute ignored",
6001 *no_add_attrs
= true;
6004 else if (TREE_CODE (*node
) == POINTER_TYPE
6005 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6006 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6007 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6009 *node
= build_variant_type_copy (*node
);
6010 TREE_TYPE (*node
) = build_type_attribute_variant
6012 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6013 *no_add_attrs
= true;
6017 /* Possibly pass this attribute on from the type to a decl. */
6018 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6019 | (int) ATTR_FLAG_FUNCTION_NEXT
6020 | (int) ATTR_FLAG_ARRAY_NEXT
))
6022 *no_add_attrs
= true;
6023 return tree_cons (name
, args
, NULL_TREE
);
6027 warning (OPT_Wattributes
, "%qE attribute ignored",
6036 /* Handle a "pcs" attribute; arguments as in struct
6037 attribute_spec.handler. */
6039 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6040 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6042 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6044 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6045 *no_add_attrs
= true;
6050 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6051 /* Handle the "notshared" attribute. This attribute is another way of
6052 requesting hidden visibility. ARM's compiler supports
6053 "__declspec(notshared)"; we support the same thing via an
6057 arm_handle_notshared_attribute (tree
*node
,
6058 tree name ATTRIBUTE_UNUSED
,
6059 tree args ATTRIBUTE_UNUSED
,
6060 int flags ATTRIBUTE_UNUSED
,
6063 tree decl
= TYPE_NAME (*node
);
6067 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6068 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6069 *no_add_attrs
= false;
6075 /* Return 0 if the attributes for two types are incompatible, 1 if they
6076 are compatible, and 2 if they are nearly compatible (which causes a
6077 warning to be generated). */
6079 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6083 /* Check for mismatch of non-default calling convention. */
6084 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6087 /* Check for mismatched call attributes. */
6088 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6089 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6090 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6091 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6093 /* Only bother to check if an attribute is defined. */
6094 if (l1
| l2
| s1
| s2
)
6096 /* If one type has an attribute, the other must have the same attribute. */
6097 if ((l1
!= l2
) || (s1
!= s2
))
6100 /* Disallow mixed attributes. */
6101 if ((l1
& s2
) || (l2
& s1
))
6105 /* Check for mismatched ISR attribute. */
6106 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6108 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6109 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6111 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6118 /* Assigns default attributes to newly defined type. This is used to
6119 set short_call/long_call attributes for function types of
6120 functions defined inside corresponding #pragma scopes. */
6122 arm_set_default_type_attributes (tree type
)
6124 /* Add __attribute__ ((long_call)) to all functions, when
6125 inside #pragma long_calls or __attribute__ ((short_call)),
6126 when inside #pragma no_long_calls. */
6127 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6129 tree type_attr_list
, attr_name
;
6130 type_attr_list
= TYPE_ATTRIBUTES (type
);
6132 if (arm_pragma_long_calls
== LONG
)
6133 attr_name
= get_identifier ("long_call");
6134 else if (arm_pragma_long_calls
== SHORT
)
6135 attr_name
= get_identifier ("short_call");
6139 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6140 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6144 /* Return true if DECL is known to be linked into section SECTION. */
6147 arm_function_in_section_p (tree decl
, section
*section
)
6149 /* We can only be certain about functions defined in the same
6150 compilation unit. */
6151 if (!TREE_STATIC (decl
))
6154 /* Make sure that SYMBOL always binds to the definition in this
6155 compilation unit. */
6156 if (!targetm
.binds_local_p (decl
))
6159 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6160 if (!DECL_SECTION_NAME (decl
))
6162 /* Make sure that we will not create a unique section for DECL. */
6163 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6167 return function_section (decl
) == section
;
6170 /* Return nonzero if a 32-bit "long_call" should be generated for
6171 a call from the current function to DECL. We generate a long_call
6174 a. has an __attribute__((long call))
6175 or b. is within the scope of a #pragma long_calls
6176 or c. the -mlong-calls command line switch has been specified
6178 However we do not generate a long call if the function:
6180 d. has an __attribute__ ((short_call))
6181 or e. is inside the scope of a #pragma no_long_calls
6182 or f. is defined in the same section as the current function. */
6185 arm_is_long_call_p (tree decl
)
6190 return TARGET_LONG_CALLS
;
6192 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6193 if (lookup_attribute ("short_call", attrs
))
6196 /* For "f", be conservative, and only cater for cases in which the
6197 whole of the current function is placed in the same section. */
6198 if (!flag_reorder_blocks_and_partition
6199 && TREE_CODE (decl
) == FUNCTION_DECL
6200 && arm_function_in_section_p (decl
, current_function_section ()))
6203 if (lookup_attribute ("long_call", attrs
))
6206 return TARGET_LONG_CALLS
;
6209 /* Return nonzero if it is ok to make a tail-call to DECL. */
6211 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6213 unsigned long func_type
;
6215 if (cfun
->machine
->sibcall_blocked
)
6218 /* Never tailcall something if we are generating code for Thumb-1. */
6222 /* The PIC register is live on entry to VxWorks PLT entries, so we
6223 must make the call before restoring the PIC register. */
6224 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6227 /* If we are interworking and the function is not declared static
6228 then we can't tail-call it unless we know that it exists in this
6229 compilation unit (since it might be a Thumb routine). */
6230 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6231 && !TREE_ASM_WRITTEN (decl
))
6234 func_type
= arm_current_func_type ();
6235 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6236 if (IS_INTERRUPT (func_type
))
6239 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6241 /* Check that the return value locations are the same. For
6242 example that we aren't returning a value from the sibling in
6243 a VFP register but then need to transfer it to a core
6247 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6248 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6250 if (!rtx_equal_p (a
, b
))
6254 /* Never tailcall if function may be called with a misaligned SP. */
6255 if (IS_STACKALIGN (func_type
))
6258 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6259 references should become a NOP. Don't convert such calls into
6261 if (TARGET_AAPCS_BASED
6262 && arm_abi
== ARM_ABI_AAPCS
6264 && DECL_WEAK (decl
))
6267 /* Everything else is ok. */
6272 /* Addressing mode support functions. */
6274 /* Return nonzero if X is a legitimate immediate operand when compiling
6275 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6277 legitimate_pic_operand_p (rtx x
)
6279 if (GET_CODE (x
) == SYMBOL_REF
6280 || (GET_CODE (x
) == CONST
6281 && GET_CODE (XEXP (x
, 0)) == PLUS
6282 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6288 /* Record that the current function needs a PIC register. Initialize
6289 cfun->machine->pic_reg if we have not already done so. */
6292 require_pic_register (void)
6294 /* A lot of the logic here is made obscure by the fact that this
6295 routine gets called as part of the rtx cost estimation process.
6296 We don't want those calls to affect any assumptions about the real
6297 function; and further, we can't call entry_of_function() until we
6298 start the real expansion process. */
6299 if (!crtl
->uses_pic_offset_table
)
6301 gcc_assert (can_create_pseudo_p ());
6302 if (arm_pic_register
!= INVALID_REGNUM
6303 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6305 if (!cfun
->machine
->pic_reg
)
6306 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6308 /* Play games to avoid marking the function as needing pic
6309 if we are being called as part of the cost-estimation
6311 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6312 crtl
->uses_pic_offset_table
= 1;
6318 if (!cfun
->machine
->pic_reg
)
6319 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6321 /* Play games to avoid marking the function as needing pic
6322 if we are being called as part of the cost-estimation
6324 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6326 crtl
->uses_pic_offset_table
= 1;
6329 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6330 && arm_pic_register
> LAST_LO_REGNUM
)
6331 emit_move_insn (cfun
->machine
->pic_reg
,
6332 gen_rtx_REG (Pmode
, arm_pic_register
));
6334 arm_load_pic_register (0UL);
6339 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6341 INSN_LOCATION (insn
) = prologue_location
;
6343 /* We can be called during expansion of PHI nodes, where
6344 we can't yet emit instructions directly in the final
6345 insn stream. Queue the insns on the entry edge, they will
6346 be committed after everything else is expanded. */
6347 insert_insn_on_edge (seq
,
6348 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6355 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
6357 if (GET_CODE (orig
) == SYMBOL_REF
6358 || GET_CODE (orig
) == LABEL_REF
)
6364 gcc_assert (can_create_pseudo_p ());
6365 reg
= gen_reg_rtx (Pmode
);
6368 /* VxWorks does not impose a fixed gap between segments; the run-time
6369 gap can be different from the object-file gap. We therefore can't
6370 use GOTOFF unless we are absolutely sure that the symbol is in the
6371 same segment as the GOT. Unfortunately, the flexibility of linker
6372 scripts means that we can't be sure of that in general, so assume
6373 that GOTOFF is never valid on VxWorks. */
6374 if ((GET_CODE (orig
) == LABEL_REF
6375 || (GET_CODE (orig
) == SYMBOL_REF
&&
6376 SYMBOL_REF_LOCAL_P (orig
)))
6378 && arm_pic_data_is_text_relative
)
6379 insn
= arm_pic_static_addr (orig
, reg
);
6385 /* If this function doesn't have a pic register, create one now. */
6386 require_pic_register ();
6388 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6390 /* Make the MEM as close to a constant as possible. */
6391 mem
= SET_SRC (pat
);
6392 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6393 MEM_READONLY_P (mem
) = 1;
6394 MEM_NOTRAP_P (mem
) = 1;
6396 insn
= emit_insn (pat
);
6399 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6401 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6405 else if (GET_CODE (orig
) == CONST
)
6409 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6410 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6413 /* Handle the case where we have: const (UNSPEC_TLS). */
6414 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6415 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6418 /* Handle the case where we have:
6419 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6421 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6422 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6423 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6425 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6431 gcc_assert (can_create_pseudo_p ());
6432 reg
= gen_reg_rtx (Pmode
);
6435 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6437 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6438 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6439 base
== reg
? 0 : reg
);
6441 if (CONST_INT_P (offset
))
6443 /* The base register doesn't really matter, we only want to
6444 test the index for the appropriate mode. */
6445 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6447 gcc_assert (can_create_pseudo_p ());
6448 offset
= force_reg (Pmode
, offset
);
6451 if (CONST_INT_P (offset
))
6452 return plus_constant (Pmode
, base
, INTVAL (offset
));
6455 if (GET_MODE_SIZE (mode
) > 4
6456 && (GET_MODE_CLASS (mode
) == MODE_INT
6457 || TARGET_SOFT_FLOAT
))
6459 emit_insn (gen_addsi3 (reg
, base
, offset
));
6463 return gen_rtx_PLUS (Pmode
, base
, offset
);
6470 /* Find a spare register to use during the prolog of a function. */
6473 thumb_find_work_register (unsigned long pushed_regs_mask
)
6477 /* Check the argument registers first as these are call-used. The
6478 register allocation order means that sometimes r3 might be used
6479 but earlier argument registers might not, so check them all. */
6480 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6481 if (!df_regs_ever_live_p (reg
))
6484 /* Before going on to check the call-saved registers we can try a couple
6485 more ways of deducing that r3 is available. The first is when we are
6486 pushing anonymous arguments onto the stack and we have less than 4
6487 registers worth of fixed arguments(*). In this case r3 will be part of
6488 the variable argument list and so we can be sure that it will be
6489 pushed right at the start of the function. Hence it will be available
6490 for the rest of the prologue.
6491 (*): ie crtl->args.pretend_args_size is greater than 0. */
6492 if (cfun
->machine
->uses_anonymous_args
6493 && crtl
->args
.pretend_args_size
> 0)
6494 return LAST_ARG_REGNUM
;
6496 /* The other case is when we have fixed arguments but less than 4 registers
6497 worth. In this case r3 might be used in the body of the function, but
6498 it is not being used to convey an argument into the function. In theory
6499 we could just check crtl->args.size to see how many bytes are
6500 being passed in argument registers, but it seems that it is unreliable.
6501 Sometimes it will have the value 0 when in fact arguments are being
6502 passed. (See testcase execute/20021111-1.c for an example). So we also
6503 check the args_info.nregs field as well. The problem with this field is
6504 that it makes no allowances for arguments that are passed to the
6505 function but which are not used. Hence we could miss an opportunity
6506 when a function has an unused argument in r3. But it is better to be
6507 safe than to be sorry. */
6508 if (! cfun
->machine
->uses_anonymous_args
6509 && crtl
->args
.size
>= 0
6510 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6511 && (TARGET_AAPCS_BASED
6512 ? crtl
->args
.info
.aapcs_ncrn
< 4
6513 : crtl
->args
.info
.nregs
< 4))
6514 return LAST_ARG_REGNUM
;
6516 /* Otherwise look for a call-saved register that is going to be pushed. */
6517 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6518 if (pushed_regs_mask
& (1 << reg
))
6523 /* Thumb-2 can use high regs. */
6524 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6525 if (pushed_regs_mask
& (1 << reg
))
6528 /* Something went wrong - thumb_compute_save_reg_mask()
6529 should have arranged for a suitable register to be pushed. */
6533 static GTY(()) int pic_labelno
;
6535 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6539 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6541 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6543 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6546 gcc_assert (flag_pic
);
6548 pic_reg
= cfun
->machine
->pic_reg
;
6549 if (TARGET_VXWORKS_RTP
)
6551 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6552 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6553 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6555 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6557 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6558 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6562 /* We use an UNSPEC rather than a LABEL_REF because this label
6563 never appears in the code stream. */
6565 labelno
= GEN_INT (pic_labelno
++);
6566 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6567 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6569 /* On the ARM the PC register contains 'dot + 8' at the time of the
6570 addition, on the Thumb it is 'dot + 4'. */
6571 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6572 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6574 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6578 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6580 else /* TARGET_THUMB1 */
6582 if (arm_pic_register
!= INVALID_REGNUM
6583 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6585 /* We will have pushed the pic register, so we should always be
6586 able to find a work register. */
6587 pic_tmp
= gen_rtx_REG (SImode
,
6588 thumb_find_work_register (saved_regs
));
6589 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6590 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6591 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6593 else if (arm_pic_register
!= INVALID_REGNUM
6594 && arm_pic_register
> LAST_LO_REGNUM
6595 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6597 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6598 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6599 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6602 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6606 /* Need to emit this whether or not we obey regdecls,
6607 since setjmp/longjmp can cause life info to screw up. */
6611 /* Generate code to load the address of a static var when flag_pic is set. */
6613 arm_pic_static_addr (rtx orig
, rtx reg
)
6615 rtx l1
, labelno
, offset_rtx
, insn
;
6617 gcc_assert (flag_pic
);
6619 /* We use an UNSPEC rather than a LABEL_REF because this label
6620 never appears in the code stream. */
6621 labelno
= GEN_INT (pic_labelno
++);
6622 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6623 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6625 /* On the ARM the PC register contains 'dot + 8' at the time of the
6626 addition, on the Thumb it is 'dot + 4'. */
6627 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6628 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6629 UNSPEC_SYMBOL_OFFSET
);
6630 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6632 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6636 /* Return nonzero if X is valid as an ARM state addressing register. */
6638 arm_address_register_rtx_p (rtx x
, int strict_p
)
6648 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6650 return (regno
<= LAST_ARM_REGNUM
6651 || regno
>= FIRST_PSEUDO_REGISTER
6652 || regno
== FRAME_POINTER_REGNUM
6653 || regno
== ARG_POINTER_REGNUM
);
6656 /* Return TRUE if this rtx is the difference of a symbol and a label,
6657 and will reduce to a PC-relative relocation in the object file.
6658 Expressions like this can be left alone when generating PIC, rather
6659 than forced through the GOT. */
6661 pcrel_constant_p (rtx x
)
6663 if (GET_CODE (x
) == MINUS
)
6664 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6669 /* Return true if X will surely end up in an index register after next
6672 will_be_in_index_register (const_rtx x
)
6674 /* arm.md: calculate_pic_address will split this into a register. */
6675 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6678 /* Return nonzero if X is a valid ARM state address operand. */
6680 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
6684 enum rtx_code code
= GET_CODE (x
);
6686 if (arm_address_register_rtx_p (x
, strict_p
))
6689 use_ldrd
= (TARGET_LDRD
6691 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6693 if (code
== POST_INC
|| code
== PRE_DEC
6694 || ((code
== PRE_INC
|| code
== POST_DEC
)
6695 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6696 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6698 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6699 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6700 && GET_CODE (XEXP (x
, 1)) == PLUS
6701 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6703 rtx addend
= XEXP (XEXP (x
, 1), 1);
6705 /* Don't allow ldrd post increment by register because it's hard
6706 to fixup invalid register choices. */
6708 && GET_CODE (x
) == POST_MODIFY
6712 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6713 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6716 /* After reload constants split into minipools will have addresses
6717 from a LABEL_REF. */
6718 else if (reload_completed
6719 && (code
== LABEL_REF
6721 && GET_CODE (XEXP (x
, 0)) == PLUS
6722 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6723 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6726 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6729 else if (code
== PLUS
)
6731 rtx xop0
= XEXP (x
, 0);
6732 rtx xop1
= XEXP (x
, 1);
6734 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6735 && ((CONST_INT_P (xop1
)
6736 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6737 || (!strict_p
&& will_be_in_index_register (xop1
))))
6738 || (arm_address_register_rtx_p (xop1
, strict_p
)
6739 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6743 /* Reload currently can't handle MINUS, so disable this for now */
6744 else if (GET_CODE (x
) == MINUS
)
6746 rtx xop0
= XEXP (x
, 0);
6747 rtx xop1
= XEXP (x
, 1);
6749 return (arm_address_register_rtx_p (xop0
, strict_p
)
6750 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6754 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6755 && code
== SYMBOL_REF
6756 && CONSTANT_POOL_ADDRESS_P (x
)
6758 && symbol_mentioned_p (get_pool_constant (x
))
6759 && ! pcrel_constant_p (get_pool_constant (x
))))
6765 /* Return nonzero if X is a valid Thumb-2 address operand. */
6767 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6770 enum rtx_code code
= GET_CODE (x
);
6772 if (arm_address_register_rtx_p (x
, strict_p
))
6775 use_ldrd
= (TARGET_LDRD
6777 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6779 if (code
== POST_INC
|| code
== PRE_DEC
6780 || ((code
== PRE_INC
|| code
== POST_DEC
)
6781 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6782 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6784 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6785 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6786 && GET_CODE (XEXP (x
, 1)) == PLUS
6787 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6789 /* Thumb-2 only has autoincrement by constant. */
6790 rtx addend
= XEXP (XEXP (x
, 1), 1);
6791 HOST_WIDE_INT offset
;
6793 if (!CONST_INT_P (addend
))
6796 offset
= INTVAL(addend
);
6797 if (GET_MODE_SIZE (mode
) <= 4)
6798 return (offset
> -256 && offset
< 256);
6800 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6801 && (offset
& 3) == 0);
6804 /* After reload constants split into minipools will have addresses
6805 from a LABEL_REF. */
6806 else if (reload_completed
6807 && (code
== LABEL_REF
6809 && GET_CODE (XEXP (x
, 0)) == PLUS
6810 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6811 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6814 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6817 else if (code
== PLUS
)
6819 rtx xop0
= XEXP (x
, 0);
6820 rtx xop1
= XEXP (x
, 1);
6822 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6823 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6824 || (!strict_p
&& will_be_in_index_register (xop1
))))
6825 || (arm_address_register_rtx_p (xop1
, strict_p
)
6826 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6829 /* Normally we can assign constant values to target registers without
6830 the help of constant pool. But there are cases we have to use constant
6832 1) assign a label to register.
6833 2) sign-extend a 8bit value to 32bit and then assign to register.
6835 Constant pool access in format:
6836 (set (reg r0) (mem (symbol_ref (".LC0"))))
6837 will cause the use of literal pool (later in function arm_reorg).
6838 So here we mark such format as an invalid format, then the compiler
6839 will adjust it into:
6840 (set (reg r0) (symbol_ref (".LC0")))
6841 (set (reg r0) (mem (reg r0))).
6842 No extra register is required, and (mem (reg r0)) won't cause the use
6843 of literal pools. */
6844 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
6845 && CONSTANT_POOL_ADDRESS_P (x
))
6848 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6849 && code
== SYMBOL_REF
6850 && CONSTANT_POOL_ADDRESS_P (x
)
6852 && symbol_mentioned_p (get_pool_constant (x
))
6853 && ! pcrel_constant_p (get_pool_constant (x
))))
6859 /* Return nonzero if INDEX is valid for an address index operand in
6862 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
6865 HOST_WIDE_INT range
;
6866 enum rtx_code code
= GET_CODE (index
);
6868 /* Standard coprocessor addressing modes. */
6869 if (TARGET_HARD_FLOAT
6871 && (mode
== SFmode
|| mode
== DFmode
))
6872 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6873 && INTVAL (index
) > -1024
6874 && (INTVAL (index
) & 3) == 0);
6876 /* For quad modes, we restrict the constant offset to be slightly less
6877 than what the instruction format permits. We do this because for
6878 quad mode moves, we will actually decompose them into two separate
6879 double-mode reads or writes. INDEX must therefore be a valid
6880 (double-mode) offset and so should INDEX+8. */
6881 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6882 return (code
== CONST_INT
6883 && INTVAL (index
) < 1016
6884 && INTVAL (index
) > -1024
6885 && (INTVAL (index
) & 3) == 0);
6887 /* We have no such constraint on double mode offsets, so we permit the
6888 full range of the instruction format. */
6889 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6890 return (code
== CONST_INT
6891 && INTVAL (index
) < 1024
6892 && INTVAL (index
) > -1024
6893 && (INTVAL (index
) & 3) == 0);
6895 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6896 return (code
== CONST_INT
6897 && INTVAL (index
) < 1024
6898 && INTVAL (index
) > -1024
6899 && (INTVAL (index
) & 3) == 0);
6901 if (arm_address_register_rtx_p (index
, strict_p
)
6902 && (GET_MODE_SIZE (mode
) <= 4))
6905 if (mode
== DImode
|| mode
== DFmode
)
6907 if (code
== CONST_INT
)
6909 HOST_WIDE_INT val
= INTVAL (index
);
6912 return val
> -256 && val
< 256;
6914 return val
> -4096 && val
< 4092;
6917 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
6920 if (GET_MODE_SIZE (mode
) <= 4
6924 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
6928 rtx xiop0
= XEXP (index
, 0);
6929 rtx xiop1
= XEXP (index
, 1);
6931 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6932 && power_of_two_operand (xiop1
, SImode
))
6933 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6934 && power_of_two_operand (xiop0
, SImode
)));
6936 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
6937 || code
== ASHIFT
|| code
== ROTATERT
)
6939 rtx op
= XEXP (index
, 1);
6941 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6944 && INTVAL (op
) <= 31);
6948 /* For ARM v4 we may be doing a sign-extend operation during the
6954 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
6960 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
6962 return (code
== CONST_INT
6963 && INTVAL (index
) < range
6964 && INTVAL (index
) > -range
);
6967 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6968 index operand. i.e. 1, 2, 4 or 8. */
6970 thumb2_index_mul_operand (rtx op
)
6974 if (!CONST_INT_P (op
))
6978 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
6981 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6983 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
6985 enum rtx_code code
= GET_CODE (index
);
6987 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6988 /* Standard coprocessor addressing modes. */
6989 if (TARGET_HARD_FLOAT
6991 && (mode
== SFmode
|| mode
== DFmode
))
6992 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6993 /* Thumb-2 allows only > -256 index range for it's core register
6994 load/stores. Since we allow SF/DF in core registers, we have
6995 to use the intersection between -256~4096 (core) and -1024~1024
6997 && INTVAL (index
) > -256
6998 && (INTVAL (index
) & 3) == 0);
7000 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7002 /* For DImode assume values will usually live in core regs
7003 and only allow LDRD addressing modes. */
7004 if (!TARGET_LDRD
|| mode
!= DImode
)
7005 return (code
== CONST_INT
7006 && INTVAL (index
) < 1024
7007 && INTVAL (index
) > -1024
7008 && (INTVAL (index
) & 3) == 0);
7011 /* For quad modes, we restrict the constant offset to be slightly less
7012 than what the instruction format permits. We do this because for
7013 quad mode moves, we will actually decompose them into two separate
7014 double-mode reads or writes. INDEX must therefore be a valid
7015 (double-mode) offset and so should INDEX+8. */
7016 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7017 return (code
== CONST_INT
7018 && INTVAL (index
) < 1016
7019 && INTVAL (index
) > -1024
7020 && (INTVAL (index
) & 3) == 0);
7022 /* We have no such constraint on double mode offsets, so we permit the
7023 full range of the instruction format. */
7024 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7025 return (code
== CONST_INT
7026 && INTVAL (index
) < 1024
7027 && INTVAL (index
) > -1024
7028 && (INTVAL (index
) & 3) == 0);
7030 if (arm_address_register_rtx_p (index
, strict_p
)
7031 && (GET_MODE_SIZE (mode
) <= 4))
7034 if (mode
== DImode
|| mode
== DFmode
)
7036 if (code
== CONST_INT
)
7038 HOST_WIDE_INT val
= INTVAL (index
);
7039 /* ??? Can we assume ldrd for thumb2? */
7040 /* Thumb-2 ldrd only has reg+const addressing modes. */
7041 /* ldrd supports offsets of +-1020.
7042 However the ldr fallback does not. */
7043 return val
> -256 && val
< 256 && (val
& 3) == 0;
7051 rtx xiop0
= XEXP (index
, 0);
7052 rtx xiop1
= XEXP (index
, 1);
7054 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7055 && thumb2_index_mul_operand (xiop1
))
7056 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7057 && thumb2_index_mul_operand (xiop0
)));
7059 else if (code
== ASHIFT
)
7061 rtx op
= XEXP (index
, 1);
7063 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7066 && INTVAL (op
) <= 3);
7069 return (code
== CONST_INT
7070 && INTVAL (index
) < 4096
7071 && INTVAL (index
) > -256);
7074 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7076 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
7086 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7088 return (regno
<= LAST_LO_REGNUM
7089 || regno
> LAST_VIRTUAL_REGISTER
7090 || regno
== FRAME_POINTER_REGNUM
7091 || (GET_MODE_SIZE (mode
) >= 4
7092 && (regno
== STACK_POINTER_REGNUM
7093 || regno
>= FIRST_PSEUDO_REGISTER
7094 || x
== hard_frame_pointer_rtx
7095 || x
== arg_pointer_rtx
)));
7098 /* Return nonzero if x is a legitimate index register. This is the case
7099 for any base register that can access a QImode object. */
7101 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7103 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7106 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7108 The AP may be eliminated to either the SP or the FP, so we use the
7109 least common denominator, e.g. SImode, and offsets from 0 to 64.
7111 ??? Verify whether the above is the right approach.
7113 ??? Also, the FP may be eliminated to the SP, so perhaps that
7114 needs special handling also.
7116 ??? Look at how the mips16 port solves this problem. It probably uses
7117 better ways to solve some of these problems.
7119 Although it is not incorrect, we don't accept QImode and HImode
7120 addresses based on the frame pointer or arg pointer until the
7121 reload pass starts. This is so that eliminating such addresses
7122 into stack based ones won't produce impossible code. */
7124 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
7126 /* ??? Not clear if this is right. Experiment. */
7127 if (GET_MODE_SIZE (mode
) < 4
7128 && !(reload_in_progress
|| reload_completed
)
7129 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7130 || reg_mentioned_p (arg_pointer_rtx
, x
)
7131 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7132 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7133 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7134 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7137 /* Accept any base register. SP only in SImode or larger. */
7138 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7141 /* This is PC relative data before arm_reorg runs. */
7142 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7143 && GET_CODE (x
) == SYMBOL_REF
7144 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7147 /* This is PC relative data after arm_reorg runs. */
7148 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7150 && (GET_CODE (x
) == LABEL_REF
7151 || (GET_CODE (x
) == CONST
7152 && GET_CODE (XEXP (x
, 0)) == PLUS
7153 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7154 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7157 /* Post-inc indexing only supported for SImode and larger. */
7158 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7159 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7162 else if (GET_CODE (x
) == PLUS
)
7164 /* REG+REG address can be any two index registers. */
7165 /* We disallow FRAME+REG addressing since we know that FRAME
7166 will be replaced with STACK, and SP relative addressing only
7167 permits SP+OFFSET. */
7168 if (GET_MODE_SIZE (mode
) <= 4
7169 && XEXP (x
, 0) != frame_pointer_rtx
7170 && XEXP (x
, 1) != frame_pointer_rtx
7171 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7172 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7173 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7176 /* REG+const has 5-7 bit offset for non-SP registers. */
7177 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7178 || XEXP (x
, 0) == arg_pointer_rtx
)
7179 && CONST_INT_P (XEXP (x
, 1))
7180 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7183 /* REG+const has 10-bit offset for SP, but only SImode and
7184 larger is supported. */
7185 /* ??? Should probably check for DI/DFmode overflow here
7186 just like GO_IF_LEGITIMATE_OFFSET does. */
7187 else if (REG_P (XEXP (x
, 0))
7188 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7189 && GET_MODE_SIZE (mode
) >= 4
7190 && CONST_INT_P (XEXP (x
, 1))
7191 && INTVAL (XEXP (x
, 1)) >= 0
7192 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7193 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7196 else if (REG_P (XEXP (x
, 0))
7197 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7198 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7199 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7200 && REGNO (XEXP (x
, 0))
7201 <= LAST_VIRTUAL_POINTER_REGISTER
))
7202 && GET_MODE_SIZE (mode
) >= 4
7203 && CONST_INT_P (XEXP (x
, 1))
7204 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7208 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7209 && GET_MODE_SIZE (mode
) == 4
7210 && GET_CODE (x
) == SYMBOL_REF
7211 && CONSTANT_POOL_ADDRESS_P (x
)
7213 && symbol_mentioned_p (get_pool_constant (x
))
7214 && ! pcrel_constant_p (get_pool_constant (x
))))
7220 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7221 instruction of mode MODE. */
7223 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
7225 switch (GET_MODE_SIZE (mode
))
7228 return val
>= 0 && val
< 32;
7231 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7235 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7241 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
7244 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7245 else if (TARGET_THUMB2
)
7246 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7247 else /* if (TARGET_THUMB1) */
7248 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7251 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7253 Given an rtx X being reloaded into a reg required to be
7254 in class CLASS, return the class of reg to actually use.
7255 In general this is just CLASS, but for the Thumb core registers and
7256 immediate constants we prefer a LO_REGS class or a subset. */
7259 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7265 if (rclass
== GENERAL_REGS
)
7272 /* Build the SYMBOL_REF for __tls_get_addr. */
7274 static GTY(()) rtx tls_get_addr_libfunc
;
7277 get_tls_get_addr (void)
7279 if (!tls_get_addr_libfunc
)
7280 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7281 return tls_get_addr_libfunc
;
7285 arm_load_tp (rtx target
)
7288 target
= gen_reg_rtx (SImode
);
7292 /* Can return in any reg. */
7293 emit_insn (gen_load_tp_hard (target
));
7297 /* Always returned in r0. Immediately copy the result into a pseudo,
7298 otherwise other uses of r0 (e.g. setting up function arguments) may
7299 clobber the value. */
7303 emit_insn (gen_load_tp_soft ());
7305 tmp
= gen_rtx_REG (SImode
, 0);
7306 emit_move_insn (target
, tmp
);
7312 load_tls_operand (rtx x
, rtx reg
)
7316 if (reg
== NULL_RTX
)
7317 reg
= gen_reg_rtx (SImode
);
7319 tmp
= gen_rtx_CONST (SImode
, x
);
7321 emit_move_insn (reg
, tmp
);
7327 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7329 rtx insns
, label
, labelno
, sum
;
7331 gcc_assert (reloc
!= TLS_DESCSEQ
);
7334 labelno
= GEN_INT (pic_labelno
++);
7335 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7336 label
= gen_rtx_CONST (VOIDmode
, label
);
7338 sum
= gen_rtx_UNSPEC (Pmode
,
7339 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7340 GEN_INT (TARGET_ARM
? 8 : 4)),
7342 reg
= load_tls_operand (sum
, reg
);
7345 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7347 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7349 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7350 LCT_PURE
, /* LCT_CONST? */
7351 Pmode
, 1, reg
, Pmode
);
7353 insns
= get_insns ();
7360 arm_tls_descseq_addr (rtx x
, rtx reg
)
7362 rtx labelno
= GEN_INT (pic_labelno
++);
7363 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7364 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7365 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7366 gen_rtx_CONST (VOIDmode
, label
),
7367 GEN_INT (!TARGET_ARM
)),
7369 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7371 emit_insn (gen_tlscall (x
, labelno
));
7373 reg
= gen_reg_rtx (SImode
);
7375 gcc_assert (REGNO (reg
) != 0);
7377 emit_move_insn (reg
, reg0
);
7383 legitimize_tls_address (rtx x
, rtx reg
)
7385 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7386 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7390 case TLS_MODEL_GLOBAL_DYNAMIC
:
7391 if (TARGET_GNU2_TLS
)
7393 reg
= arm_tls_descseq_addr (x
, reg
);
7395 tp
= arm_load_tp (NULL_RTX
);
7397 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7401 /* Original scheme */
7402 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7403 dest
= gen_reg_rtx (Pmode
);
7404 emit_libcall_block (insns
, dest
, ret
, x
);
7408 case TLS_MODEL_LOCAL_DYNAMIC
:
7409 if (TARGET_GNU2_TLS
)
7411 reg
= arm_tls_descseq_addr (x
, reg
);
7413 tp
= arm_load_tp (NULL_RTX
);
7415 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7419 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7421 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7422 share the LDM result with other LD model accesses. */
7423 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7425 dest
= gen_reg_rtx (Pmode
);
7426 emit_libcall_block (insns
, dest
, ret
, eqv
);
7428 /* Load the addend. */
7429 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7430 GEN_INT (TLS_LDO32
)),
7432 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7433 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7437 case TLS_MODEL_INITIAL_EXEC
:
7438 labelno
= GEN_INT (pic_labelno
++);
7439 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7440 label
= gen_rtx_CONST (VOIDmode
, label
);
7441 sum
= gen_rtx_UNSPEC (Pmode
,
7442 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7443 GEN_INT (TARGET_ARM
? 8 : 4)),
7445 reg
= load_tls_operand (sum
, reg
);
7448 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7449 else if (TARGET_THUMB2
)
7450 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7453 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7454 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7457 tp
= arm_load_tp (NULL_RTX
);
7459 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7461 case TLS_MODEL_LOCAL_EXEC
:
7462 tp
= arm_load_tp (NULL_RTX
);
7464 reg
= gen_rtx_UNSPEC (Pmode
,
7465 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7467 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7469 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7476 /* Try machine-dependent ways of modifying an illegitimate address
7477 to be legitimate. If we find one, return the new, valid address. */
7479 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7481 if (arm_tls_referenced_p (x
))
7485 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7487 addend
= XEXP (XEXP (x
, 0), 1);
7488 x
= XEXP (XEXP (x
, 0), 0);
7491 if (GET_CODE (x
) != SYMBOL_REF
)
7494 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7496 x
= legitimize_tls_address (x
, NULL_RTX
);
7500 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7509 /* TODO: legitimize_address for Thumb2. */
7512 return thumb_legitimize_address (x
, orig_x
, mode
);
7515 if (GET_CODE (x
) == PLUS
)
7517 rtx xop0
= XEXP (x
, 0);
7518 rtx xop1
= XEXP (x
, 1);
7520 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7521 xop0
= force_reg (SImode
, xop0
);
7523 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7524 && !symbol_mentioned_p (xop1
))
7525 xop1
= force_reg (SImode
, xop1
);
7527 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7528 && CONST_INT_P (xop1
))
7530 HOST_WIDE_INT n
, low_n
;
7534 /* VFP addressing modes actually allow greater offsets, but for
7535 now we just stick with the lowest common denominator. */
7537 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7549 low_n
= ((mode
) == TImode
? 0
7550 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7554 base_reg
= gen_reg_rtx (SImode
);
7555 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7556 emit_move_insn (base_reg
, val
);
7557 x
= plus_constant (Pmode
, base_reg
, low_n
);
7559 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7560 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7563 /* XXX We don't allow MINUS any more -- see comment in
7564 arm_legitimate_address_outer_p (). */
7565 else if (GET_CODE (x
) == MINUS
)
7567 rtx xop0
= XEXP (x
, 0);
7568 rtx xop1
= XEXP (x
, 1);
7570 if (CONSTANT_P (xop0
))
7571 xop0
= force_reg (SImode
, xop0
);
7573 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7574 xop1
= force_reg (SImode
, xop1
);
7576 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7577 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7580 /* Make sure to take full advantage of the pre-indexed addressing mode
7581 with absolute addresses which often allows for the base register to
7582 be factorized for multiple adjacent memory references, and it might
7583 even allows for the mini pool to be avoided entirely. */
7584 else if (CONST_INT_P (x
) && optimize
> 0)
7587 HOST_WIDE_INT mask
, base
, index
;
7590 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7591 use a 8-bit index. So let's use a 12-bit index for SImode only and
7592 hope that arm_gen_constant will enable ldrb to use more bits. */
7593 bits
= (mode
== SImode
) ? 12 : 8;
7594 mask
= (1 << bits
) - 1;
7595 base
= INTVAL (x
) & ~mask
;
7596 index
= INTVAL (x
) & mask
;
7597 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7599 /* It'll most probably be more efficient to generate the base
7600 with more bits set and use a negative index instead. */
7604 base_reg
= force_reg (SImode
, GEN_INT (base
));
7605 x
= plus_constant (Pmode
, base_reg
, index
);
7610 /* We need to find and carefully transform any SYMBOL and LABEL
7611 references; so go back to the original address expression. */
7612 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7614 if (new_x
!= orig_x
)
7622 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7623 to be legitimate. If we find one, return the new, valid address. */
7625 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7627 if (GET_CODE (x
) == PLUS
7628 && CONST_INT_P (XEXP (x
, 1))
7629 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7630 || INTVAL (XEXP (x
, 1)) < 0))
7632 rtx xop0
= XEXP (x
, 0);
7633 rtx xop1
= XEXP (x
, 1);
7634 HOST_WIDE_INT offset
= INTVAL (xop1
);
7636 /* Try and fold the offset into a biasing of the base register and
7637 then offsetting that. Don't do this when optimizing for space
7638 since it can cause too many CSEs. */
7639 if (optimize_size
&& offset
>= 0
7640 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7642 HOST_WIDE_INT delta
;
7645 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7646 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7647 delta
= 31 * GET_MODE_SIZE (mode
);
7649 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7651 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7653 x
= plus_constant (Pmode
, xop0
, delta
);
7655 else if (offset
< 0 && offset
> -256)
7656 /* Small negative offsets are best done with a subtract before the
7657 dereference, forcing these into a register normally takes two
7659 x
= force_operand (x
, NULL_RTX
);
7662 /* For the remaining cases, force the constant into a register. */
7663 xop1
= force_reg (SImode
, xop1
);
7664 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7667 else if (GET_CODE (x
) == PLUS
7668 && s_register_operand (XEXP (x
, 1), SImode
)
7669 && !s_register_operand (XEXP (x
, 0), SImode
))
7671 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7673 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7678 /* We need to find and carefully transform any SYMBOL and LABEL
7679 references; so go back to the original address expression. */
7680 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7682 if (new_x
!= orig_x
)
7690 arm_legitimize_reload_address (rtx
*p
,
7691 enum machine_mode mode
,
7692 int opnum
, int type
,
7693 int ind_levels ATTRIBUTE_UNUSED
)
7695 /* We must recognize output that we have already generated ourselves. */
7696 if (GET_CODE (*p
) == PLUS
7697 && GET_CODE (XEXP (*p
, 0)) == PLUS
7698 && REG_P (XEXP (XEXP (*p
, 0), 0))
7699 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7700 && CONST_INT_P (XEXP (*p
, 1)))
7702 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7703 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7704 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7708 if (GET_CODE (*p
) == PLUS
7709 && REG_P (XEXP (*p
, 0))
7710 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7711 /* If the base register is equivalent to a constant, let the generic
7712 code handle it. Otherwise we will run into problems if a future
7713 reload pass decides to rematerialize the constant. */
7714 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7715 && CONST_INT_P (XEXP (*p
, 1)))
7717 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7718 HOST_WIDE_INT low
, high
;
7720 /* Detect coprocessor load/stores. */
7721 bool coproc_p
= ((TARGET_HARD_FLOAT
7723 && (mode
== SFmode
|| mode
== DFmode
))
7724 || (TARGET_REALLY_IWMMXT
7725 && VALID_IWMMXT_REG_MODE (mode
))
7727 && (VALID_NEON_DREG_MODE (mode
)
7728 || VALID_NEON_QREG_MODE (mode
))));
7730 /* For some conditions, bail out when lower two bits are unaligned. */
7731 if ((val
& 0x3) != 0
7732 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7734 /* For DI, and DF under soft-float: */
7735 || ((mode
== DImode
|| mode
== DFmode
)
7736 /* Without ldrd, we use stm/ldm, which does not
7737 fair well with unaligned bits. */
7739 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7740 || TARGET_THUMB2
))))
7743 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7744 of which the (reg+high) gets turned into a reload add insn,
7745 we try to decompose the index into high/low values that can often
7746 also lead to better reload CSE.
7748 ldr r0, [r2, #4100] // Offset too large
7749 ldr r1, [r2, #4104] // Offset too large
7751 is best reloaded as:
7757 which post-reload CSE can simplify in most cases to eliminate the
7758 second add instruction:
7763 The idea here is that we want to split out the bits of the constant
7764 as a mask, rather than as subtracting the maximum offset that the
7765 respective type of load/store used can handle.
7767 When encountering negative offsets, we can still utilize it even if
7768 the overall offset is positive; sometimes this may lead to an immediate
7769 that can be constructed with fewer instructions.
7771 ldr r0, [r2, #0x3FFFFC]
7773 This is best reloaded as:
7774 add t1, r2, #0x400000
7777 The trick for spotting this for a load insn with N bits of offset
7778 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7779 negative offset that is going to make bit N and all the bits below
7780 it become zero in the remainder part.
7782 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7783 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7784 used in most cases of ARM load/store instructions. */
7786 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7787 (((VAL) & ((1 << (N)) - 1)) \
7788 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7793 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7795 /* NEON quad-word load/stores are made of two double-word accesses,
7796 so the valid index range is reduced by 8. Treat as 9-bit range if
7798 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7799 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7801 else if (GET_MODE_SIZE (mode
) == 8)
7804 low
= (TARGET_THUMB2
7805 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7806 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7808 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7809 to access doublewords. The supported load/store offsets are
7810 -8, -4, and 4, which we try to produce here. */
7811 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7813 else if (GET_MODE_SIZE (mode
) < 8)
7815 /* NEON element load/stores do not have an offset. */
7816 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7821 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7822 Try the wider 12-bit range first, and re-try if the result
7824 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7826 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7830 if (mode
== HImode
|| mode
== HFmode
)
7833 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7836 /* The storehi/movhi_bytes fallbacks can use only
7837 [-4094,+4094] of the full ldrb/strb index range. */
7838 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7839 if (low
== 4095 || low
== -4095)
7844 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7850 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7851 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7852 - (unsigned HOST_WIDE_INT
) 0x80000000);
7853 /* Check for overflow or zero */
7854 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
7857 /* Reload the high part into a base reg; leave the low part
7859 Note that replacing this gen_rtx_PLUS with plus_constant is
7860 wrong in this case because we rely on the
7861 (plus (plus reg c1) c2) structure being preserved so that
7862 XEXP (*p, 0) in push_reload below uses the correct term. */
7863 *p
= gen_rtx_PLUS (GET_MODE (*p
),
7864 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
7867 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7868 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7869 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7877 thumb_legitimize_reload_address (rtx
*x_p
,
7878 enum machine_mode mode
,
7879 int opnum
, int type
,
7880 int ind_levels ATTRIBUTE_UNUSED
)
7884 if (GET_CODE (x
) == PLUS
7885 && GET_MODE_SIZE (mode
) < 4
7886 && REG_P (XEXP (x
, 0))
7887 && XEXP (x
, 0) == stack_pointer_rtx
7888 && CONST_INT_P (XEXP (x
, 1))
7889 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7894 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7895 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7899 /* If both registers are hi-regs, then it's better to reload the
7900 entire expression rather than each register individually. That
7901 only requires one reload register rather than two. */
7902 if (GET_CODE (x
) == PLUS
7903 && REG_P (XEXP (x
, 0))
7904 && REG_P (XEXP (x
, 1))
7905 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
7906 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
7911 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7912 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7919 /* Test for various thread-local symbols. */
7921 /* Helper for arm_tls_referenced_p. */
7924 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
7926 if (GET_CODE (*x
) == SYMBOL_REF
)
7927 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
7929 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7930 TLS offsets, not real symbol references. */
7931 if (GET_CODE (*x
) == UNSPEC
7932 && XINT (*x
, 1) == UNSPEC_TLS
)
7938 /* Return TRUE if X contains any TLS symbol references. */
7941 arm_tls_referenced_p (rtx x
)
7943 if (! TARGET_HAVE_TLS
)
7946 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
7949 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7951 On the ARM, allow any integer (invalid ones are removed later by insn
7952 patterns), nice doubles and symbol_refs which refer to the function's
7955 When generating pic allow anything. */
7958 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
7960 /* At present, we have no support for Neon structure constants, so forbid
7961 them here. It might be possible to handle simple cases like 0 and -1
7963 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
7966 return flag_pic
|| !label_mentioned_p (x
);
7970 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7972 return (CONST_INT_P (x
)
7973 || CONST_DOUBLE_P (x
)
7974 || CONSTANT_ADDRESS_P (x
)
7979 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
7981 return (!arm_cannot_force_const_mem (mode
, x
)
7983 ? arm_legitimate_constant_p_1 (mode
, x
)
7984 : thumb_legitimate_constant_p (mode
, x
)));
7987 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7990 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7994 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
7996 split_const (x
, &base
, &offset
);
7997 if (GET_CODE (base
) == SYMBOL_REF
7998 && !offset_within_block_p (base
, INTVAL (offset
)))
8001 return arm_tls_referenced_p (x
);
8004 #define REG_OR_SUBREG_REG(X) \
8006 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8008 #define REG_OR_SUBREG_RTX(X) \
8009 (REG_P (X) ? (X) : SUBREG_REG (X))
8012 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8014 enum machine_mode mode
= GET_MODE (x
);
8023 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8030 return COSTS_N_INSNS (1);
8033 if (CONST_INT_P (XEXP (x
, 1)))
8036 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8043 return COSTS_N_INSNS (2) + cycles
;
8045 return COSTS_N_INSNS (1) + 16;
8048 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8050 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8051 return (COSTS_N_INSNS (words
)
8052 + 4 * ((MEM_P (SET_SRC (x
)))
8053 + MEM_P (SET_DEST (x
))));
8058 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8060 if (thumb_shiftable_const (INTVAL (x
)))
8061 return COSTS_N_INSNS (2);
8062 return COSTS_N_INSNS (3);
8064 else if ((outer
== PLUS
|| outer
== COMPARE
)
8065 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8067 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8068 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8069 return COSTS_N_INSNS (1);
8070 else if (outer
== AND
)
8073 /* This duplicates the tests in the andsi3 expander. */
8074 for (i
= 9; i
<= 31; i
++)
8075 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8076 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8077 return COSTS_N_INSNS (2);
8079 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8080 || outer
== LSHIFTRT
)
8082 return COSTS_N_INSNS (2);
8088 return COSTS_N_INSNS (3);
8106 /* XXX another guess. */
8107 /* Memory costs quite a lot for the first word, but subsequent words
8108 load at the equivalent of a single insn each. */
8109 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8110 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8115 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8121 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8122 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8128 return total
+ COSTS_N_INSNS (1);
8130 /* Assume a two-shift sequence. Increase the cost slightly so
8131 we prefer actual shifts over an extend operation. */
8132 return total
+ 1 + COSTS_N_INSNS (2);
8140 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8142 enum machine_mode mode
= GET_MODE (x
);
8143 enum rtx_code subcode
;
8145 enum rtx_code code
= GET_CODE (x
);
8151 /* Memory costs quite a lot for the first word, but subsequent words
8152 load at the equivalent of a single insn each. */
8153 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8160 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8161 *total
= COSTS_N_INSNS (2);
8162 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8163 *total
= COSTS_N_INSNS (4);
8165 *total
= COSTS_N_INSNS (20);
8169 if (REG_P (XEXP (x
, 1)))
8170 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8171 else if (!CONST_INT_P (XEXP (x
, 1)))
8172 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8178 *total
+= COSTS_N_INSNS (4);
8183 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8184 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8187 *total
+= COSTS_N_INSNS (3);
8191 *total
+= COSTS_N_INSNS (1);
8192 /* Increase the cost of complex shifts because they aren't any faster,
8193 and reduce dual issue opportunities. */
8194 if (arm_tune_cortex_a9
8195 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8203 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8204 if (CONST_INT_P (XEXP (x
, 0))
8205 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8207 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8211 if (CONST_INT_P (XEXP (x
, 1))
8212 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8214 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8221 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8223 if (TARGET_HARD_FLOAT
8225 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8227 *total
= COSTS_N_INSNS (1);
8228 if (CONST_DOUBLE_P (XEXP (x
, 0))
8229 && arm_const_double_rtx (XEXP (x
, 0)))
8231 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8235 if (CONST_DOUBLE_P (XEXP (x
, 1))
8236 && arm_const_double_rtx (XEXP (x
, 1)))
8238 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8244 *total
= COSTS_N_INSNS (20);
8248 *total
= COSTS_N_INSNS (1);
8249 if (CONST_INT_P (XEXP (x
, 0))
8250 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8252 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8256 subcode
= GET_CODE (XEXP (x
, 1));
8257 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8258 || subcode
== LSHIFTRT
8259 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8261 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8262 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8266 /* A shift as a part of RSB costs no more than RSB itself. */
8267 if (GET_CODE (XEXP (x
, 0)) == MULT
8268 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8270 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8271 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8276 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8278 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8279 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8283 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8284 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8286 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8287 if (REG_P (XEXP (XEXP (x
, 1), 0))
8288 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8289 *total
+= COSTS_N_INSNS (1);
8297 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8298 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8299 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8301 *total
= COSTS_N_INSNS (1);
8302 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8304 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8308 /* MLA: All arguments must be registers. We filter out
8309 multiplication by a power of two, so that we fall down into
8311 if (GET_CODE (XEXP (x
, 0)) == MULT
8312 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8314 /* The cost comes from the cost of the multiply. */
8318 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8320 if (TARGET_HARD_FLOAT
8322 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8324 *total
= COSTS_N_INSNS (1);
8325 if (CONST_DOUBLE_P (XEXP (x
, 1))
8326 && arm_const_double_rtx (XEXP (x
, 1)))
8328 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8335 *total
= COSTS_N_INSNS (20);
8339 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8340 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8342 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8343 if (REG_P (XEXP (XEXP (x
, 0), 0))
8344 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8345 *total
+= COSTS_N_INSNS (1);
8351 case AND
: case XOR
: case IOR
:
8353 /* Normally the frame registers will be spilt into reg+const during
8354 reload, so it is a bad idea to combine them with other instructions,
8355 since then they might not be moved outside of loops. As a compromise
8356 we allow integration with ops that have a constant as their second
8358 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8359 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8360 && !CONST_INT_P (XEXP (x
, 1)))
8361 *total
= COSTS_N_INSNS (1);
8365 *total
+= COSTS_N_INSNS (2);
8366 if (CONST_INT_P (XEXP (x
, 1))
8367 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8369 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8376 *total
+= COSTS_N_INSNS (1);
8377 if (CONST_INT_P (XEXP (x
, 1))
8378 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8380 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8383 subcode
= GET_CODE (XEXP (x
, 0));
8384 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8385 || subcode
== LSHIFTRT
8386 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8388 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8389 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8394 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8396 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8397 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8401 if (subcode
== UMIN
|| subcode
== UMAX
8402 || subcode
== SMIN
|| subcode
== SMAX
)
8404 *total
= COSTS_N_INSNS (3);
8411 /* This should have been handled by the CPU specific routines. */
8415 if (arm_arch3m
&& mode
== SImode
8416 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8417 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8418 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8419 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8420 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8421 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8423 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8426 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8430 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8432 if (TARGET_HARD_FLOAT
8434 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8436 *total
= COSTS_N_INSNS (1);
8439 *total
= COSTS_N_INSNS (2);
8445 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8446 if (mode
== SImode
&& code
== NOT
)
8448 subcode
= GET_CODE (XEXP (x
, 0));
8449 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8450 || subcode
== LSHIFTRT
8451 || subcode
== ROTATE
|| subcode
== ROTATERT
8453 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8455 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8456 /* Register shifts cost an extra cycle. */
8457 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8458 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8467 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8469 *total
= COSTS_N_INSNS (4);
8473 operand
= XEXP (x
, 0);
8475 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8476 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8477 && REG_P (XEXP (operand
, 0))
8478 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8479 *total
+= COSTS_N_INSNS (1);
8480 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8481 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8485 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8487 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8493 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8494 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8496 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8502 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8503 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8505 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8525 /* SCC insns. In the case where the comparison has already been
8526 performed, then they cost 2 instructions. Otherwise they need
8527 an additional comparison before them. */
8528 *total
= COSTS_N_INSNS (2);
8529 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8536 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8542 *total
+= COSTS_N_INSNS (1);
8543 if (CONST_INT_P (XEXP (x
, 1))
8544 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8546 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8550 subcode
= GET_CODE (XEXP (x
, 0));
8551 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8552 || subcode
== LSHIFTRT
8553 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8555 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8556 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8561 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8563 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8564 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8574 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8575 if (!CONST_INT_P (XEXP (x
, 1))
8576 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8577 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8581 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8583 if (TARGET_HARD_FLOAT
8585 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8587 *total
= COSTS_N_INSNS (1);
8590 *total
= COSTS_N_INSNS (20);
8593 *total
= COSTS_N_INSNS (1);
8595 *total
+= COSTS_N_INSNS (3);
8601 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8603 rtx op
= XEXP (x
, 0);
8604 enum machine_mode opmode
= GET_MODE (op
);
8607 *total
+= COSTS_N_INSNS (1);
8609 if (opmode
!= SImode
)
8613 /* If !arm_arch4, we use one of the extendhisi2_mem
8614 or movhi_bytes patterns for HImode. For a QImode
8615 sign extension, we first zero-extend from memory
8616 and then perform a shift sequence. */
8617 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8618 *total
+= COSTS_N_INSNS (2);
8621 *total
+= COSTS_N_INSNS (1);
8623 /* We don't have the necessary insn, so we need to perform some
8625 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8626 /* An and with constant 255. */
8627 *total
+= COSTS_N_INSNS (1);
8629 /* A shift sequence. Increase costs slightly to avoid
8630 combining two shifts into an extend operation. */
8631 *total
+= COSTS_N_INSNS (2) + 1;
8637 switch (GET_MODE (XEXP (x
, 0)))
8644 *total
= COSTS_N_INSNS (1);
8654 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8658 if (const_ok_for_arm (INTVAL (x
))
8659 || const_ok_for_arm (~INTVAL (x
)))
8660 *total
= COSTS_N_INSNS (1);
8662 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8663 INTVAL (x
), NULL_RTX
,
8670 *total
= COSTS_N_INSNS (3);
8674 *total
= COSTS_N_INSNS (1);
8678 *total
= COSTS_N_INSNS (1);
8679 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8683 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8684 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8685 *total
= COSTS_N_INSNS (1);
8687 *total
= COSTS_N_INSNS (4);
8691 /* The vec_extract patterns accept memory operands that require an
8692 address reload. Account for the cost of that reload to give the
8693 auto-inc-dec pass an incentive to try to replace them. */
8694 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8695 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8697 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8698 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8699 *total
+= COSTS_N_INSNS (1);
8702 /* Likewise for the vec_set patterns. */
8703 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8704 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8705 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8707 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8708 *total
= rtx_cost (mem
, code
, 0, speed
);
8709 if (!neon_vector_mem_operand (mem
, 2, true))
8710 *total
+= COSTS_N_INSNS (1);
8716 /* We cost this as high as our memory costs to allow this to
8717 be hoisted from loops. */
8718 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8720 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8726 && TARGET_HARD_FLOAT
8728 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8729 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8730 *total
= COSTS_N_INSNS (1);
8732 *total
= COSTS_N_INSNS (4);
8736 *total
= COSTS_N_INSNS (4);
8741 /* Estimates the size cost of thumb1 instructions.
8742 For now most of the code is copied from thumb1_rtx_costs. We need more
8743 fine grain tuning when we have more related test cases. */
8745 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8747 enum machine_mode mode
= GET_MODE (x
);
8756 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8760 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8761 defined by RTL expansion, especially for the expansion of
8763 if ((GET_CODE (XEXP (x
, 0)) == MULT
8764 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8765 || (GET_CODE (XEXP (x
, 1)) == MULT
8766 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8767 return COSTS_N_INSNS (2);
8768 /* On purpose fall through for normal RTX. */
8772 return COSTS_N_INSNS (1);
8775 if (CONST_INT_P (XEXP (x
, 1)))
8777 /* Thumb1 mul instruction can't operate on const. We must Load it
8778 into a register first. */
8779 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8780 return COSTS_N_INSNS (1) + const_size
;
8782 return COSTS_N_INSNS (1);
8785 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8787 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8788 return (COSTS_N_INSNS (words
)
8789 + 4 * ((MEM_P (SET_SRC (x
)))
8790 + MEM_P (SET_DEST (x
))));
8795 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8796 return COSTS_N_INSNS (1);
8797 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8798 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8799 return COSTS_N_INSNS (2);
8800 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8801 if (thumb_shiftable_const (INTVAL (x
)))
8802 return COSTS_N_INSNS (2);
8803 return COSTS_N_INSNS (3);
8805 else if ((outer
== PLUS
|| outer
== COMPARE
)
8806 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8808 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8809 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8810 return COSTS_N_INSNS (1);
8811 else if (outer
== AND
)
8814 /* This duplicates the tests in the andsi3 expander. */
8815 for (i
= 9; i
<= 31; i
++)
8816 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8817 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8818 return COSTS_N_INSNS (2);
8820 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8821 || outer
== LSHIFTRT
)
8823 return COSTS_N_INSNS (2);
8829 return COSTS_N_INSNS (3);
8847 /* XXX another guess. */
8848 /* Memory costs quite a lot for the first word, but subsequent words
8849 load at the equivalent of a single insn each. */
8850 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8851 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8856 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8861 /* XXX still guessing. */
8862 switch (GET_MODE (XEXP (x
, 0)))
8865 return (1 + (mode
== DImode
? 4 : 0)
8866 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8869 return (4 + (mode
== DImode
? 4 : 0)
8870 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8873 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8884 /* RTX costs when optimizing for size. */
8886 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8889 enum machine_mode mode
= GET_MODE (x
);
8892 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8896 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8900 /* A memory access costs 1 insn if the mode is small, or the address is
8901 a single register, otherwise it costs one insn per word. */
8902 if (REG_P (XEXP (x
, 0)))
8903 *total
= COSTS_N_INSNS (1);
8905 && GET_CODE (XEXP (x
, 0)) == PLUS
8906 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8907 /* This will be split into two instructions.
8908 See arm.md:calculate_pic_address. */
8909 *total
= COSTS_N_INSNS (2);
8911 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8918 /* Needs a libcall, so it costs about this. */
8919 *total
= COSTS_N_INSNS (2);
8923 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8925 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8933 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8935 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8938 else if (mode
== SImode
)
8940 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8941 /* Slightly disparage register shifts, but not by much. */
8942 if (!CONST_INT_P (XEXP (x
, 1)))
8943 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
8947 /* Needs a libcall. */
8948 *total
= COSTS_N_INSNS (2);
8952 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8953 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8955 *total
= COSTS_N_INSNS (1);
8961 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
8962 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
8964 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
8965 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
8966 || subcode1
== ROTATE
|| subcode1
== ROTATERT
8967 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
8968 || subcode1
== ASHIFTRT
)
8970 /* It's just the cost of the two operands. */
8975 *total
= COSTS_N_INSNS (1);
8979 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8983 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8984 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8986 *total
= COSTS_N_INSNS (1);
8990 /* A shift as a part of ADD costs nothing. */
8991 if (GET_CODE (XEXP (x
, 0)) == MULT
8992 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8994 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
8995 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
8996 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9001 case AND
: case XOR
: case IOR
:
9004 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9006 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9007 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9008 || (code
== AND
&& subcode
== NOT
))
9010 /* It's just the cost of the two operands. */
9016 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9020 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9024 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9025 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9027 *total
= COSTS_N_INSNS (1);
9033 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9042 if (cc_register (XEXP (x
, 0), VOIDmode
))
9045 *total
= COSTS_N_INSNS (1);
9049 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9050 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9051 *total
= COSTS_N_INSNS (1);
9053 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9058 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9061 if (const_ok_for_arm (INTVAL (x
)))
9062 /* A multiplication by a constant requires another instruction
9063 to load the constant to a register. */
9064 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9066 else if (const_ok_for_arm (~INTVAL (x
)))
9067 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9068 else if (const_ok_for_arm (-INTVAL (x
)))
9070 if (outer_code
== COMPARE
|| outer_code
== PLUS
9071 || outer_code
== MINUS
)
9074 *total
= COSTS_N_INSNS (1);
9077 *total
= COSTS_N_INSNS (2);
9083 *total
= COSTS_N_INSNS (2);
9087 *total
= COSTS_N_INSNS (4);
9092 && TARGET_HARD_FLOAT
9093 && outer_code
== SET
9094 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9095 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9096 *total
= COSTS_N_INSNS (1);
9098 *total
= COSTS_N_INSNS (4);
9103 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9104 cost of these slightly. */
9105 *total
= COSTS_N_INSNS (1) + 1;
9112 if (mode
!= VOIDmode
)
9113 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9115 *total
= COSTS_N_INSNS (4); /* How knows? */
9120 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9121 operand, then return the operand that is being shifted. If the shift
9122 is not by a constant, then set SHIFT_REG to point to the operand.
9123 Return NULL if OP is not a shifter operand. */
9125 shifter_op_p (rtx op
, rtx
*shift_reg
)
9127 enum rtx_code code
= GET_CODE (op
);
9129 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9130 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9131 return XEXP (op
, 0);
9132 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9133 return XEXP (op
, 0);
9134 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9135 || code
== ASHIFTRT
)
9137 if (!CONST_INT_P (XEXP (op
, 1)))
9138 *shift_reg
= XEXP (op
, 1);
9139 return XEXP (op
, 0);
9146 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9148 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9149 gcc_assert (GET_CODE (x
) == UNSPEC
);
9151 switch (XINT (x
, 1))
9153 case UNSPEC_UNALIGNED_LOAD
:
9154 /* We can only do unaligned loads into the integer unit, and we can't
9156 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9158 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9159 + extra_cost
->ldst
.load_unaligned
);
9162 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9163 ADDR_SPACE_GENERIC
, speed_p
);
9167 case UNSPEC_UNALIGNED_STORE
:
9168 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9170 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9171 + extra_cost
->ldst
.store_unaligned
);
9173 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9175 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9176 ADDR_SPACE_GENERIC
, speed_p
);
9186 *cost
= COSTS_N_INSNS (1);
9188 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9192 *cost
= COSTS_N_INSNS (2);
9198 /* Cost of a libcall. We assume one insn per argument, an amount for the
9199 call (one insn for -Os) and then one for processing the result. */
9200 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9202 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9205 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9206 if (shift_op != NULL \
9207 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9212 *cost += extra_cost->alu.arith_shift_reg; \
9213 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9216 *cost += extra_cost->alu.arith_shift; \
9218 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9219 + rtx_cost (XEXP (x, 1 - IDX), \
9226 /* RTX costs. Make an estimate of the cost of executing the operation
9227 X, which is contained with an operation with code OUTER_CODE.
9228 SPEED_P indicates whether the cost desired is the performance cost,
9229 or the size cost. The estimate is stored in COST and the return
9230 value is TRUE if the cost calculation is final, or FALSE if the
9231 caller should recurse through the operands of X to add additional
9234 We currently make no attempt to model the size savings of Thumb-2
9235 16-bit instructions. At the normal points in compilation where
9236 this code is called we have no measure of whether the condition
9237 flags are live or not, and thus no realistic way to determine what
9238 the size will eventually be. */
9240 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9241 const struct cpu_cost_table
*extra_cost
,
9242 int *cost
, bool speed_p
)
9244 enum machine_mode mode
= GET_MODE (x
);
9249 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9251 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9259 /* SET RTXs don't have a mode so we get it from the destination. */
9260 mode
= GET_MODE (SET_DEST (x
));
9262 if (REG_P (SET_SRC (x
))
9263 && REG_P (SET_DEST (x
)))
9265 /* Assume that most copies can be done with a single insn,
9266 unless we don't have HW FP, in which case everything
9267 larger than word mode will require two insns. */
9268 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9269 && GET_MODE_SIZE (mode
) > 4)
9272 /* Conditional register moves can be encoded
9273 in 16 bits in Thumb mode. */
9274 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9280 if (CONST_INT_P (SET_SRC (x
)))
9282 /* Handle CONST_INT here, since the value doesn't have a mode
9283 and we would otherwise be unable to work out the true cost. */
9284 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9286 /* Slightly lower the cost of setting a core reg to a constant.
9287 This helps break up chains and allows for better scheduling. */
9288 if (REG_P (SET_DEST (x
))
9289 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9292 /* Immediate moves with an immediate in the range [0, 255] can be
9293 encoded in 16 bits in Thumb mode. */
9294 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9295 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9297 goto const_int_cost
;
9303 /* A memory access costs 1 insn if the mode is small, or the address is
9304 a single register, otherwise it costs one insn per word. */
9305 if (REG_P (XEXP (x
, 0)))
9306 *cost
= COSTS_N_INSNS (1);
9308 && GET_CODE (XEXP (x
, 0)) == PLUS
9309 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9310 /* This will be split into two instructions.
9311 See arm.md:calculate_pic_address. */
9312 *cost
= COSTS_N_INSNS (2);
9314 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9316 /* For speed optimizations, add the costs of the address and
9317 accessing memory. */
9320 *cost
+= (extra_cost
->ldst
.load
9321 + arm_address_cost (XEXP (x
, 0), mode
,
9322 ADDR_SPACE_GENERIC
, speed_p
));
9324 *cost
+= extra_cost
->ldst
.load
;
9330 /* Calculations of LDM costs are complex. We assume an initial cost
9331 (ldm_1st) which will load the number of registers mentioned in
9332 ldm_regs_per_insn_1st registers; then each additional
9333 ldm_regs_per_insn_subsequent registers cost one more insn. The
9334 formula for N regs is thus:
9336 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9337 + ldm_regs_per_insn_subsequent - 1)
9338 / ldm_regs_per_insn_subsequent).
9340 Additional costs may also be added for addressing. A similar
9341 formula is used for STM. */
9343 bool is_ldm
= load_multiple_operation (x
, SImode
);
9344 bool is_stm
= store_multiple_operation (x
, SImode
);
9346 *cost
= COSTS_N_INSNS (1);
9348 if (is_ldm
|| is_stm
)
9352 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9353 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9354 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9355 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9356 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9357 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9358 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9360 *cost
+= regs_per_insn_1st
9361 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9362 + regs_per_insn_sub
- 1)
9363 / regs_per_insn_sub
);
9372 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9373 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9374 *cost
= COSTS_N_INSNS (speed_p
9375 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9376 else if (mode
== SImode
&& TARGET_IDIV
)
9377 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9379 *cost
= LIBCALL_COST (2);
9380 return false; /* All arguments must be in registers. */
9384 *cost
= LIBCALL_COST (2);
9385 return false; /* All arguments must be in registers. */
9388 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9390 *cost
= (COSTS_N_INSNS (2)
9391 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9393 *cost
+= extra_cost
->alu
.shift_reg
;
9401 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9403 *cost
= (COSTS_N_INSNS (3)
9404 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9406 *cost
+= 2 * extra_cost
->alu
.shift
;
9409 else if (mode
== SImode
)
9411 *cost
= (COSTS_N_INSNS (1)
9412 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9413 /* Slightly disparage register shifts at -Os, but not by much. */
9414 if (!CONST_INT_P (XEXP (x
, 1)))
9415 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9416 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9419 else if (GET_MODE_CLASS (mode
) == MODE_INT
9420 && GET_MODE_SIZE (mode
) < 4)
9424 *cost
= (COSTS_N_INSNS (1)
9425 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9426 /* Slightly disparage register shifts at -Os, but not by
9428 if (!CONST_INT_P (XEXP (x
, 1)))
9429 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9430 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9432 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9434 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9436 /* Can use SBFX/UBFX. */
9437 *cost
= COSTS_N_INSNS (1);
9439 *cost
+= extra_cost
->alu
.bfx
;
9440 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9444 *cost
= COSTS_N_INSNS (2);
9445 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9448 if (CONST_INT_P (XEXP (x
, 1)))
9449 *cost
+= 2 * extra_cost
->alu
.shift
;
9451 *cost
+= (extra_cost
->alu
.shift
9452 + extra_cost
->alu
.shift_reg
);
9455 /* Slightly disparage register shifts. */
9456 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9461 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9462 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9465 if (CONST_INT_P (XEXP (x
, 1)))
9466 *cost
+= (2 * extra_cost
->alu
.shift
9467 + extra_cost
->alu
.log_shift
);
9469 *cost
+= (extra_cost
->alu
.shift
9470 + extra_cost
->alu
.shift_reg
9471 + extra_cost
->alu
.log_shift_reg
);
9477 *cost
= LIBCALL_COST (2);
9485 *cost
= COSTS_N_INSNS (1);
9487 *cost
+= extra_cost
->alu
.rev
;
9494 /* No rev instruction available. Look at arm_legacy_rev
9495 and thumb_legacy_rev for the form of RTL used then. */
9498 *cost
= COSTS_N_INSNS (10);
9502 *cost
+= 6 * extra_cost
->alu
.shift
;
9503 *cost
+= 3 * extra_cost
->alu
.logical
;
9508 *cost
= COSTS_N_INSNS (5);
9512 *cost
+= 2 * extra_cost
->alu
.shift
;
9513 *cost
+= extra_cost
->alu
.arith_shift
;
9514 *cost
+= 2 * extra_cost
->alu
.logical
;
9522 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9523 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9525 *cost
= COSTS_N_INSNS (1);
9526 if (GET_CODE (XEXP (x
, 0)) == MULT
9527 || GET_CODE (XEXP (x
, 1)) == MULT
)
9529 rtx mul_op0
, mul_op1
, sub_op
;
9532 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9534 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9536 mul_op0
= XEXP (XEXP (x
, 0), 0);
9537 mul_op1
= XEXP (XEXP (x
, 0), 1);
9538 sub_op
= XEXP (x
, 1);
9542 mul_op0
= XEXP (XEXP (x
, 1), 0);
9543 mul_op1
= XEXP (XEXP (x
, 1), 1);
9544 sub_op
= XEXP (x
, 0);
9547 /* The first operand of the multiply may be optionally
9549 if (GET_CODE (mul_op0
) == NEG
)
9550 mul_op0
= XEXP (mul_op0
, 0);
9552 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9553 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9554 + rtx_cost (sub_op
, code
, 0, speed_p
));
9560 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9566 rtx shift_by_reg
= NULL
;
9570 *cost
= COSTS_N_INSNS (1);
9572 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9573 if (shift_op
== NULL
)
9575 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9576 non_shift_op
= XEXP (x
, 0);
9579 non_shift_op
= XEXP (x
, 1);
9581 if (shift_op
!= NULL
)
9583 if (shift_by_reg
!= NULL
)
9586 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9587 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9590 *cost
+= extra_cost
->alu
.arith_shift
;
9592 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9593 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9598 && GET_CODE (XEXP (x
, 1)) == MULT
)
9602 *cost
+= extra_cost
->mult
[0].add
;
9603 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9604 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9605 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9609 if (CONST_INT_P (XEXP (x
, 0)))
9611 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9612 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9614 *cost
= COSTS_N_INSNS (insns
);
9616 *cost
+= insns
* extra_cost
->alu
.arith
;
9617 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9624 if (GET_MODE_CLASS (mode
) == MODE_INT
9625 && GET_MODE_SIZE (mode
) < 4)
9627 rtx shift_op
, shift_reg
;
9630 /* We check both sides of the MINUS for shifter operands since,
9631 unlike PLUS, it's not commutative. */
9633 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9634 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9636 /* Slightly disparage, as we might need to widen the result. */
9637 *cost
= 1 + COSTS_N_INSNS (1);
9639 *cost
+= extra_cost
->alu
.arith
;
9641 if (CONST_INT_P (XEXP (x
, 0)))
9643 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9652 *cost
= COSTS_N_INSNS (2);
9654 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9656 rtx op1
= XEXP (x
, 1);
9659 *cost
+= 2 * extra_cost
->alu
.arith
;
9661 if (GET_CODE (op1
) == ZERO_EXTEND
)
9662 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9664 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9665 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9669 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9672 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9673 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9675 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9678 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9679 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9682 *cost
+= (extra_cost
->alu
.arith
9683 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9684 ? extra_cost
->alu
.arith
9685 : extra_cost
->alu
.arith_shift
));
9686 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9687 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9688 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9693 *cost
+= 2 * extra_cost
->alu
.arith
;
9699 *cost
= LIBCALL_COST (2);
9703 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9704 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9706 *cost
= COSTS_N_INSNS (1);
9707 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9709 rtx mul_op0
, mul_op1
, add_op
;
9712 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9714 mul_op0
= XEXP (XEXP (x
, 0), 0);
9715 mul_op1
= XEXP (XEXP (x
, 0), 1);
9716 add_op
= XEXP (x
, 1);
9718 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9719 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9720 + rtx_cost (add_op
, code
, 0, speed_p
));
9726 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9729 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9731 *cost
= LIBCALL_COST (2);
9735 /* Narrow modes can be synthesized in SImode, but the range
9736 of useful sub-operations is limited. Check for shift operations
9737 on one of the operands. Only left shifts can be used in the
9739 if (GET_MODE_CLASS (mode
) == MODE_INT
9740 && GET_MODE_SIZE (mode
) < 4)
9742 rtx shift_op
, shift_reg
;
9745 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9747 if (CONST_INT_P (XEXP (x
, 1)))
9749 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9750 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9752 *cost
= COSTS_N_INSNS (insns
);
9754 *cost
+= insns
* extra_cost
->alu
.arith
;
9755 /* Slightly penalize a narrow operation as the result may
9757 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9761 /* Slightly penalize a narrow operation as the result may
9763 *cost
= 1 + COSTS_N_INSNS (1);
9765 *cost
+= extra_cost
->alu
.arith
;
9772 rtx shift_op
, shift_reg
;
9774 *cost
= COSTS_N_INSNS (1);
9776 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9777 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9779 /* UXTA[BH] or SXTA[BH]. */
9781 *cost
+= extra_cost
->alu
.extend_arith
;
9782 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9784 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9789 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9790 if (shift_op
!= NULL
)
9795 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9796 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9799 *cost
+= extra_cost
->alu
.arith_shift
;
9801 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9802 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9805 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9807 rtx mul_op
= XEXP (x
, 0);
9809 *cost
= COSTS_N_INSNS (1);
9811 if (TARGET_DSP_MULTIPLY
9812 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9813 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9814 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9815 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9816 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9817 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9818 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9819 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9820 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9821 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9822 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9823 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9828 *cost
+= extra_cost
->mult
[0].extend_add
;
9829 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9830 SIGN_EXTEND
, 0, speed_p
)
9831 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9832 SIGN_EXTEND
, 0, speed_p
)
9833 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9838 *cost
+= extra_cost
->mult
[0].add
;
9839 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9840 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9841 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9844 if (CONST_INT_P (XEXP (x
, 1)))
9846 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9847 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9849 *cost
= COSTS_N_INSNS (insns
);
9851 *cost
+= insns
* extra_cost
->alu
.arith
;
9852 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9861 && GET_CODE (XEXP (x
, 0)) == MULT
9862 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9863 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9864 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9865 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9867 *cost
= COSTS_N_INSNS (1);
9869 *cost
+= extra_cost
->mult
[1].extend_add
;
9870 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9871 ZERO_EXTEND
, 0, speed_p
)
9872 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9873 ZERO_EXTEND
, 0, speed_p
)
9874 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9878 *cost
= COSTS_N_INSNS (2);
9880 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9881 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9884 *cost
+= (extra_cost
->alu
.arith
9885 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9886 ? extra_cost
->alu
.arith
9887 : extra_cost
->alu
.arith_shift
));
9889 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9891 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9896 *cost
+= 2 * extra_cost
->alu
.arith
;
9901 *cost
= LIBCALL_COST (2);
9904 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9906 *cost
= COSTS_N_INSNS (1);
9908 *cost
+= extra_cost
->alu
.rev
;
9916 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9917 rtx op0
= XEXP (x
, 0);
9918 rtx shift_op
, shift_reg
;
9920 *cost
= COSTS_N_INSNS (1);
9924 || (code
== IOR
&& TARGET_THUMB2
)))
9925 op0
= XEXP (op0
, 0);
9928 shift_op
= shifter_op_p (op0
, &shift_reg
);
9929 if (shift_op
!= NULL
)
9934 *cost
+= extra_cost
->alu
.log_shift_reg
;
9935 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9938 *cost
+= extra_cost
->alu
.log_shift
;
9940 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9941 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9945 if (CONST_INT_P (XEXP (x
, 1)))
9947 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9948 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9951 *cost
= COSTS_N_INSNS (insns
);
9953 *cost
+= insns
* extra_cost
->alu
.logical
;
9954 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
9959 *cost
+= extra_cost
->alu
.logical
;
9960 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
9961 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9967 rtx op0
= XEXP (x
, 0);
9968 enum rtx_code subcode
= GET_CODE (op0
);
9970 *cost
= COSTS_N_INSNS (2);
9974 || (code
== IOR
&& TARGET_THUMB2
)))
9975 op0
= XEXP (op0
, 0);
9977 if (GET_CODE (op0
) == ZERO_EXTEND
)
9980 *cost
+= 2 * extra_cost
->alu
.logical
;
9982 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
9983 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9986 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9989 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9991 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
9992 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9997 *cost
+= 2 * extra_cost
->alu
.logical
;
10003 *cost
= LIBCALL_COST (2);
10007 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10008 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10010 rtx op0
= XEXP (x
, 0);
10012 *cost
= COSTS_N_INSNS (1);
10014 if (GET_CODE (op0
) == NEG
)
10015 op0
= XEXP (op0
, 0);
10018 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10020 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10021 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10024 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10026 *cost
= LIBCALL_COST (2);
10030 if (mode
== SImode
)
10032 *cost
= COSTS_N_INSNS (1);
10033 if (TARGET_DSP_MULTIPLY
10034 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10035 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10036 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10037 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10038 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10039 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10040 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10041 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10042 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10043 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10044 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10045 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10048 /* SMUL[TB][TB]. */
10050 *cost
+= extra_cost
->mult
[0].extend
;
10051 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10052 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10056 *cost
+= extra_cost
->mult
[0].simple
;
10060 if (mode
== DImode
)
10063 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10064 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10065 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10066 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10068 *cost
= COSTS_N_INSNS (1);
10070 *cost
+= extra_cost
->mult
[1].extend
;
10071 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10072 ZERO_EXTEND
, 0, speed_p
)
10073 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10074 ZERO_EXTEND
, 0, speed_p
));
10078 *cost
= LIBCALL_COST (2);
10083 *cost
= LIBCALL_COST (2);
10087 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10088 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10090 *cost
= COSTS_N_INSNS (1);
10092 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10096 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10098 *cost
= LIBCALL_COST (1);
10102 if (mode
== SImode
)
10104 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10106 *cost
= COSTS_N_INSNS (2);
10107 /* Assume the non-flag-changing variant. */
10109 *cost
+= (extra_cost
->alu
.log_shift
10110 + extra_cost
->alu
.arith_shift
);
10111 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10115 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10116 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10118 *cost
= COSTS_N_INSNS (2);
10119 /* No extra cost for MOV imm and MVN imm. */
10120 /* If the comparison op is using the flags, there's no further
10121 cost, otherwise we need to add the cost of the comparison. */
10122 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10123 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10124 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10126 *cost
+= (COSTS_N_INSNS (1)
10127 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10129 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10132 *cost
+= extra_cost
->alu
.arith
;
10136 *cost
= COSTS_N_INSNS (1);
10138 *cost
+= extra_cost
->alu
.arith
;
10142 if (GET_MODE_CLASS (mode
) == MODE_INT
10143 && GET_MODE_SIZE (mode
) < 4)
10145 /* Slightly disparage, as we might need an extend operation. */
10146 *cost
= 1 + COSTS_N_INSNS (1);
10148 *cost
+= extra_cost
->alu
.arith
;
10152 if (mode
== DImode
)
10154 *cost
= COSTS_N_INSNS (2);
10156 *cost
+= 2 * extra_cost
->alu
.arith
;
10161 *cost
= LIBCALL_COST (1);
10165 if (mode
== SImode
)
10168 rtx shift_reg
= NULL
;
10170 *cost
= COSTS_N_INSNS (1);
10171 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10175 if (shift_reg
!= NULL
)
10178 *cost
+= extra_cost
->alu
.log_shift_reg
;
10179 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10182 *cost
+= extra_cost
->alu
.log_shift
;
10183 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10188 *cost
+= extra_cost
->alu
.logical
;
10191 if (mode
== DImode
)
10193 *cost
= COSTS_N_INSNS (2);
10199 *cost
+= LIBCALL_COST (1);
10204 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10206 *cost
= COSTS_N_INSNS (4);
10209 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10210 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10212 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10213 /* Assume that if one arm of the if_then_else is a register,
10214 that it will be tied with the result and eliminate the
10215 conditional insn. */
10216 if (REG_P (XEXP (x
, 1)))
10218 else if (REG_P (XEXP (x
, 2)))
10224 if (extra_cost
->alu
.non_exec_costs_exec
)
10225 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10227 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10230 *cost
+= op1cost
+ op2cost
;
10236 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10240 enum machine_mode op0mode
;
10241 /* We'll mostly assume that the cost of a compare is the cost of the
10242 LHS. However, there are some notable exceptions. */
10244 /* Floating point compares are never done as side-effects. */
10245 op0mode
= GET_MODE (XEXP (x
, 0));
10246 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10247 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10249 *cost
= COSTS_N_INSNS (1);
10251 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10253 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10255 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10261 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10263 *cost
= LIBCALL_COST (2);
10267 /* DImode compares normally take two insns. */
10268 if (op0mode
== DImode
)
10270 *cost
= COSTS_N_INSNS (2);
10272 *cost
+= 2 * extra_cost
->alu
.arith
;
10276 if (op0mode
== SImode
)
10281 if (XEXP (x
, 1) == const0_rtx
10282 && !(REG_P (XEXP (x
, 0))
10283 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10284 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10286 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10288 /* Multiply operations that set the flags are often
10289 significantly more expensive. */
10291 && GET_CODE (XEXP (x
, 0)) == MULT
10292 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10293 *cost
+= extra_cost
->mult
[0].flag_setting
;
10296 && GET_CODE (XEXP (x
, 0)) == PLUS
10297 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10298 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10300 *cost
+= extra_cost
->mult
[0].flag_setting
;
10305 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10306 if (shift_op
!= NULL
)
10308 *cost
= COSTS_N_INSNS (1);
10309 if (shift_reg
!= NULL
)
10311 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10313 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10316 *cost
+= extra_cost
->alu
.arith_shift
;
10317 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10318 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10322 *cost
= COSTS_N_INSNS (1);
10324 *cost
+= extra_cost
->alu
.arith
;
10325 if (CONST_INT_P (XEXP (x
, 1))
10326 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10328 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10336 *cost
= LIBCALL_COST (2);
10359 if (outer_code
== SET
)
10361 /* Is it a store-flag operation? */
10362 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10363 && XEXP (x
, 1) == const0_rtx
)
10365 /* Thumb also needs an IT insn. */
10366 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10369 if (XEXP (x
, 1) == const0_rtx
)
10374 /* LSR Rd, Rn, #31. */
10375 *cost
= COSTS_N_INSNS (1);
10377 *cost
+= extra_cost
->alu
.shift
;
10387 *cost
= COSTS_N_INSNS (2);
10391 /* RSBS T1, Rn, Rn, LSR #31
10393 *cost
= COSTS_N_INSNS (2);
10395 *cost
+= extra_cost
->alu
.arith_shift
;
10399 /* RSB Rd, Rn, Rn, ASR #1
10400 LSR Rd, Rd, #31. */
10401 *cost
= COSTS_N_INSNS (2);
10403 *cost
+= (extra_cost
->alu
.arith_shift
10404 + extra_cost
->alu
.shift
);
10410 *cost
= COSTS_N_INSNS (2);
10412 *cost
+= extra_cost
->alu
.shift
;
10416 /* Remaining cases are either meaningless or would take
10417 three insns anyway. */
10418 *cost
= COSTS_N_INSNS (3);
10421 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10426 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10427 if (CONST_INT_P (XEXP (x
, 1))
10428 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10430 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10437 /* Not directly inside a set. If it involves the condition code
10438 register it must be the condition for a branch, cond_exec or
10439 I_T_E operation. Since the comparison is performed elsewhere
10440 this is just the control part which has no additional
10442 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10443 && XEXP (x
, 1) == const0_rtx
)
10451 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10452 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10454 *cost
= COSTS_N_INSNS (1);
10456 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10460 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10462 *cost
= LIBCALL_COST (1);
10466 if (mode
== SImode
)
10468 *cost
= COSTS_N_INSNS (1);
10470 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10474 *cost
= LIBCALL_COST (1);
10478 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10479 && MEM_P (XEXP (x
, 0)))
10481 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10483 if (mode
== DImode
)
10484 *cost
+= COSTS_N_INSNS (1);
10489 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10490 *cost
+= extra_cost
->ldst
.load
;
10492 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10494 if (mode
== DImode
)
10495 *cost
+= extra_cost
->alu
.shift
;
10500 /* Widening from less than 32-bits requires an extend operation. */
10501 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10503 /* We have SXTB/SXTH. */
10504 *cost
= COSTS_N_INSNS (1);
10505 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10507 *cost
+= extra_cost
->alu
.extend
;
10509 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10511 /* Needs two shifts. */
10512 *cost
= COSTS_N_INSNS (2);
10513 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10515 *cost
+= 2 * extra_cost
->alu
.shift
;
10518 /* Widening beyond 32-bits requires one more insn. */
10519 if (mode
== DImode
)
10521 *cost
+= COSTS_N_INSNS (1);
10523 *cost
+= extra_cost
->alu
.shift
;
10530 || GET_MODE (XEXP (x
, 0)) == SImode
10531 || GET_MODE (XEXP (x
, 0)) == QImode
)
10532 && MEM_P (XEXP (x
, 0)))
10534 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10536 if (mode
== DImode
)
10537 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10542 /* Widening from less than 32-bits requires an extend operation. */
10543 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10545 /* UXTB can be a shorter instruction in Thumb2, but it might
10546 be slower than the AND Rd, Rn, #255 alternative. When
10547 optimizing for speed it should never be slower to use
10548 AND, and we don't really model 16-bit vs 32-bit insns
10550 *cost
= COSTS_N_INSNS (1);
10552 *cost
+= extra_cost
->alu
.logical
;
10554 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10556 /* We have UXTB/UXTH. */
10557 *cost
= COSTS_N_INSNS (1);
10558 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10560 *cost
+= extra_cost
->alu
.extend
;
10562 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10564 /* Needs two shifts. It's marginally preferable to use
10565 shifts rather than two BIC instructions as the second
10566 shift may merge with a subsequent insn as a shifter
10568 *cost
= COSTS_N_INSNS (2);
10569 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10571 *cost
+= 2 * extra_cost
->alu
.shift
;
10573 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10574 *cost
= COSTS_N_INSNS (1);
10576 /* Widening beyond 32-bits requires one more insn. */
10577 if (mode
== DImode
)
10579 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10586 /* CONST_INT has no mode, so we cannot tell for sure how many
10587 insns are really going to be needed. The best we can do is
10588 look at the value passed. If it fits in SImode, then assume
10589 that's the mode it will be used for. Otherwise assume it
10590 will be used in DImode. */
10591 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10596 /* Avoid blowing up in arm_gen_constant (). */
10597 if (!(outer_code
== PLUS
10598 || outer_code
== AND
10599 || outer_code
== IOR
10600 || outer_code
== XOR
10601 || outer_code
== MINUS
))
10605 if (mode
== SImode
)
10607 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10608 INTVAL (x
), NULL
, NULL
,
10614 *cost
+= COSTS_N_INSNS (arm_gen_constant
10615 (outer_code
, SImode
, NULL
,
10616 trunc_int_for_mode (INTVAL (x
), SImode
),
10618 + arm_gen_constant (outer_code
, SImode
, NULL
,
10619 INTVAL (x
) >> 32, NULL
,
10631 if (arm_arch_thumb2
&& !flag_pic
)
10632 *cost
= COSTS_N_INSNS (2);
10634 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10637 *cost
= COSTS_N_INSNS (2);
10641 *cost
+= COSTS_N_INSNS (1);
10643 *cost
+= extra_cost
->alu
.arith
;
10649 *cost
= COSTS_N_INSNS (4);
10654 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10655 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10657 if (vfp3_const_double_rtx (x
))
10659 *cost
= COSTS_N_INSNS (1);
10661 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10667 *cost
= COSTS_N_INSNS (1);
10668 if (mode
== DFmode
)
10669 *cost
+= extra_cost
->ldst
.loadd
;
10671 *cost
+= extra_cost
->ldst
.loadf
;
10674 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10678 *cost
= COSTS_N_INSNS (4);
10684 && TARGET_HARD_FLOAT
10685 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10686 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10687 *cost
= COSTS_N_INSNS (1);
10689 *cost
= COSTS_N_INSNS (4);
10694 *cost
= COSTS_N_INSNS (1);
10695 /* When optimizing for size, we prefer constant pool entries to
10696 MOVW/MOVT pairs, so bump the cost of these slightly. */
10702 *cost
= COSTS_N_INSNS (1);
10704 *cost
+= extra_cost
->alu
.clz
;
10708 if (XEXP (x
, 1) == const0_rtx
)
10710 *cost
= COSTS_N_INSNS (1);
10712 *cost
+= extra_cost
->alu
.log_shift
;
10713 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10716 /* Fall through. */
10720 *cost
= COSTS_N_INSNS (2);
10724 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10725 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10726 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10727 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10728 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10729 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10730 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10731 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10734 *cost
= COSTS_N_INSNS (1);
10736 *cost
+= extra_cost
->mult
[1].extend
;
10737 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10739 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10743 *cost
= LIBCALL_COST (1);
10747 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10750 /* Reading the PC is like reading any other register. Writing it
10751 is more expensive, but we take that into account elsewhere. */
10756 /* TODO: Simple zero_extract of bottom bits using AND. */
10757 /* Fall through. */
10761 && CONST_INT_P (XEXP (x
, 1))
10762 && CONST_INT_P (XEXP (x
, 2)))
10764 *cost
= COSTS_N_INSNS (1);
10766 *cost
+= extra_cost
->alu
.bfx
;
10767 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10770 /* Without UBFX/SBFX, need to resort to shift operations. */
10771 *cost
= COSTS_N_INSNS (2);
10773 *cost
+= 2 * extra_cost
->alu
.shift
;
10774 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10778 if (TARGET_HARD_FLOAT
)
10780 *cost
= COSTS_N_INSNS (1);
10782 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10783 if (!TARGET_FPU_ARMV8
10784 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10786 /* Pre v8, widening HF->DF is a two-step process, first
10787 widening to SFmode. */
10788 *cost
+= COSTS_N_INSNS (1);
10790 *cost
+= extra_cost
->fp
[0].widen
;
10792 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10796 *cost
= LIBCALL_COST (1);
10799 case FLOAT_TRUNCATE
:
10800 if (TARGET_HARD_FLOAT
)
10802 *cost
= COSTS_N_INSNS (1);
10804 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10805 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10807 /* Vector modes? */
10809 *cost
= LIBCALL_COST (1);
10813 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10815 rtx op0
= XEXP (x
, 0);
10816 rtx op1
= XEXP (x
, 1);
10817 rtx op2
= XEXP (x
, 2);
10819 *cost
= COSTS_N_INSNS (1);
10821 /* vfms or vfnma. */
10822 if (GET_CODE (op0
) == NEG
)
10823 op0
= XEXP (op0
, 0);
10825 /* vfnms or vfnma. */
10826 if (GET_CODE (op2
) == NEG
)
10827 op2
= XEXP (op2
, 0);
10829 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
10830 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
10831 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
10834 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10839 *cost
= LIBCALL_COST (3);
10844 if (TARGET_HARD_FLOAT
)
10846 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10848 *cost
= COSTS_N_INSNS (1);
10850 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10851 /* Strip of the 'cost' of rounding towards zero. */
10852 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10853 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10855 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10856 /* ??? Increase the cost to deal with transferring from
10857 FP -> CORE registers? */
10860 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10861 && TARGET_FPU_ARMV8
)
10863 *cost
= COSTS_N_INSNS (1);
10865 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10868 /* Vector costs? */
10870 *cost
= LIBCALL_COST (1);
10874 case UNSIGNED_FLOAT
:
10875 if (TARGET_HARD_FLOAT
)
10877 /* ??? Increase the cost to deal with transferring from CORE
10878 -> FP registers? */
10879 *cost
= COSTS_N_INSNS (1);
10881 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10884 *cost
= LIBCALL_COST (1);
10888 *cost
= COSTS_N_INSNS (1);
10893 /* Just a guess. Guess number of instructions in the asm
10894 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10895 though (see PR60663). */
10896 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10897 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10899 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10903 if (mode
!= VOIDmode
)
10904 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10906 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10911 #undef HANDLE_NARROW_SHIFT_ARITH
10913 /* RTX costs when optimizing for size. */
10915 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
10916 int *total
, bool speed
)
10920 if (TARGET_OLD_RTX_COSTS
10921 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
10923 /* Old way. (Deprecated.) */
10925 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
10926 (enum rtx_code
) outer_code
, total
);
10928 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
10929 (enum rtx_code
) outer_code
, total
,
10935 if (current_tune
->insn_extra_cost
)
10936 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10937 (enum rtx_code
) outer_code
,
10938 current_tune
->insn_extra_cost
,
10940 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10941 && current_tune->insn_extra_cost != NULL */
10943 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10944 (enum rtx_code
) outer_code
,
10945 &generic_extra_costs
, total
, speed
);
10948 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10950 print_rtl_single (dump_file
, x
);
10951 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10952 *total
, result
? "final" : "partial");
10957 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10958 supported on any "slowmul" cores, so it can be ignored. */
10961 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10962 int *total
, bool speed
)
10964 enum machine_mode mode
= GET_MODE (x
);
10968 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10975 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10978 *total
= COSTS_N_INSNS (20);
10982 if (CONST_INT_P (XEXP (x
, 1)))
10984 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10985 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10986 int cost
, const_ok
= const_ok_for_arm (i
);
10987 int j
, booth_unit_size
;
10989 /* Tune as appropriate. */
10990 cost
= const_ok
? 4 : 8;
10991 booth_unit_size
= 2;
10992 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10994 i
>>= booth_unit_size
;
10998 *total
= COSTS_N_INSNS (cost
);
10999 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11003 *total
= COSTS_N_INSNS (20);
11007 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11012 /* RTX cost for cores with a fast multiply unit (M variants). */
11015 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11016 int *total
, bool speed
)
11018 enum machine_mode mode
= GET_MODE (x
);
11022 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11026 /* ??? should thumb2 use different costs? */
11030 /* There is no point basing this on the tuning, since it is always the
11031 fast variant if it exists at all. */
11033 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11034 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11035 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11037 *total
= COSTS_N_INSNS(2);
11042 if (mode
== DImode
)
11044 *total
= COSTS_N_INSNS (5);
11048 if (CONST_INT_P (XEXP (x
, 1)))
11050 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11051 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11052 int cost
, const_ok
= const_ok_for_arm (i
);
11053 int j
, booth_unit_size
;
11055 /* Tune as appropriate. */
11056 cost
= const_ok
? 4 : 8;
11057 booth_unit_size
= 8;
11058 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11060 i
>>= booth_unit_size
;
11064 *total
= COSTS_N_INSNS(cost
);
11068 if (mode
== SImode
)
11070 *total
= COSTS_N_INSNS (4);
11074 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11076 if (TARGET_HARD_FLOAT
11078 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11080 *total
= COSTS_N_INSNS (1);
11085 /* Requires a lib call */
11086 *total
= COSTS_N_INSNS (20);
11090 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11095 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11096 so it can be ignored. */
11099 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11100 int *total
, bool speed
)
11102 enum machine_mode mode
= GET_MODE (x
);
11106 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11113 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11114 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11116 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11117 will stall until the multiplication is complete. */
11118 *total
= COSTS_N_INSNS (3);
11122 /* There is no point basing this on the tuning, since it is always the
11123 fast variant if it exists at all. */
11125 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11126 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11127 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11129 *total
= COSTS_N_INSNS (2);
11134 if (mode
== DImode
)
11136 *total
= COSTS_N_INSNS (5);
11140 if (CONST_INT_P (XEXP (x
, 1)))
11142 /* If operand 1 is a constant we can more accurately
11143 calculate the cost of the multiply. The multiplier can
11144 retire 15 bits on the first cycle and a further 12 on the
11145 second. We do, of course, have to load the constant into
11146 a register first. */
11147 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11148 /* There's a general overhead of one cycle. */
11150 unsigned HOST_WIDE_INT masked_const
;
11152 if (i
& 0x80000000)
11155 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11157 masked_const
= i
& 0xffff8000;
11158 if (masked_const
!= 0)
11161 masked_const
= i
& 0xf8000000;
11162 if (masked_const
!= 0)
11165 *total
= COSTS_N_INSNS (cost
);
11169 if (mode
== SImode
)
11171 *total
= COSTS_N_INSNS (3);
11175 /* Requires a lib call */
11176 *total
= COSTS_N_INSNS (20);
11180 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11185 /* RTX costs for 9e (and later) cores. */
11188 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11189 int *total
, bool speed
)
11191 enum machine_mode mode
= GET_MODE (x
);
11198 *total
= COSTS_N_INSNS (3);
11202 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11210 /* There is no point basing this on the tuning, since it is always the
11211 fast variant if it exists at all. */
11213 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11214 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11215 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11217 *total
= COSTS_N_INSNS (2);
11222 if (mode
== DImode
)
11224 *total
= COSTS_N_INSNS (5);
11228 if (mode
== SImode
)
11230 *total
= COSTS_N_INSNS (2);
11234 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11236 if (TARGET_HARD_FLOAT
11238 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11240 *total
= COSTS_N_INSNS (1);
11245 *total
= COSTS_N_INSNS (20);
11249 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11252 /* All address computations that can be done are free, but rtx cost returns
11253 the same for practically all of them. So we weight the different types
11254 of address here in the order (most pref first):
11255 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11257 arm_arm_address_cost (rtx x
)
11259 enum rtx_code c
= GET_CODE (x
);
11261 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11263 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11268 if (CONST_INT_P (XEXP (x
, 1)))
11271 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11281 arm_thumb_address_cost (rtx x
)
11283 enum rtx_code c
= GET_CODE (x
);
11288 && REG_P (XEXP (x
, 0))
11289 && CONST_INT_P (XEXP (x
, 1)))
11296 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11297 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11299 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11302 /* Adjust cost hook for XScale. */
11304 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11306 /* Some true dependencies can have a higher cost depending
11307 on precisely how certain input operands are used. */
11308 if (REG_NOTE_KIND(link
) == 0
11309 && recog_memoized (insn
) >= 0
11310 && recog_memoized (dep
) >= 0)
11312 int shift_opnum
= get_attr_shift (insn
);
11313 enum attr_type attr_type
= get_attr_type (dep
);
11315 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11316 operand for INSN. If we have a shifted input operand and the
11317 instruction we depend on is another ALU instruction, then we may
11318 have to account for an additional stall. */
11319 if (shift_opnum
!= 0
11320 && (attr_type
== TYPE_ALU_SHIFT_IMM
11321 || attr_type
== TYPE_ALUS_SHIFT_IMM
11322 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11323 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11324 || attr_type
== TYPE_ALU_SHIFT_REG
11325 || attr_type
== TYPE_ALUS_SHIFT_REG
11326 || attr_type
== TYPE_LOGIC_SHIFT_REG
11327 || attr_type
== TYPE_LOGICS_SHIFT_REG
11328 || attr_type
== TYPE_MOV_SHIFT
11329 || attr_type
== TYPE_MVN_SHIFT
11330 || attr_type
== TYPE_MOV_SHIFT_REG
11331 || attr_type
== TYPE_MVN_SHIFT_REG
))
11333 rtx shifted_operand
;
11336 /* Get the shifted operand. */
11337 extract_insn (insn
);
11338 shifted_operand
= recog_data
.operand
[shift_opnum
];
11340 /* Iterate over all the operands in DEP. If we write an operand
11341 that overlaps with SHIFTED_OPERAND, then we have increase the
11342 cost of this dependency. */
11343 extract_insn (dep
);
11344 preprocess_constraints (dep
);
11345 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11347 /* We can ignore strict inputs. */
11348 if (recog_data
.operand_type
[opno
] == OP_IN
)
11351 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11363 /* Adjust cost hook for Cortex A9. */
11365 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11367 switch (REG_NOTE_KIND (link
))
11374 case REG_DEP_OUTPUT
:
11375 if (recog_memoized (insn
) >= 0
11376 && recog_memoized (dep
) >= 0)
11378 if (GET_CODE (PATTERN (insn
)) == SET
)
11381 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11383 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11385 enum attr_type attr_type_insn
= get_attr_type (insn
);
11386 enum attr_type attr_type_dep
= get_attr_type (dep
);
11388 /* By default all dependencies of the form
11391 have an extra latency of 1 cycle because
11392 of the input and output dependency in this
11393 case. However this gets modeled as an true
11394 dependency and hence all these checks. */
11395 if (REG_P (SET_DEST (PATTERN (insn
)))
11396 && REG_P (SET_DEST (PATTERN (dep
)))
11397 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11398 SET_DEST (PATTERN (dep
))))
11400 /* FMACS is a special case where the dependent
11401 instruction can be issued 3 cycles before
11402 the normal latency in case of an output
11404 if ((attr_type_insn
== TYPE_FMACS
11405 || attr_type_insn
== TYPE_FMACD
)
11406 && (attr_type_dep
== TYPE_FMACS
11407 || attr_type_dep
== TYPE_FMACD
))
11409 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11410 *cost
= insn_default_latency (dep
) - 3;
11412 *cost
= insn_default_latency (dep
);
11417 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11418 *cost
= insn_default_latency (dep
) + 1;
11420 *cost
= insn_default_latency (dep
);
11430 gcc_unreachable ();
11436 /* Adjust cost hook for FA726TE. */
11438 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11440 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11441 have penalty of 3. */
11442 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11443 && recog_memoized (insn
) >= 0
11444 && recog_memoized (dep
) >= 0
11445 && get_attr_conds (dep
) == CONDS_SET
)
11447 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11448 if (get_attr_conds (insn
) == CONDS_USE
11449 && get_attr_type (insn
) != TYPE_BRANCH
)
11455 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11456 || get_attr_conds (insn
) == CONDS_USE
)
11466 /* Implement TARGET_REGISTER_MOVE_COST.
11468 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11469 it is typically more expensive than a single memory access. We set
11470 the cost to less than two memory accesses so that floating
11471 point to integer conversion does not go through memory. */
11474 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
11475 reg_class_t from
, reg_class_t to
)
11479 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11480 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11482 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11483 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11485 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11492 if (from
== HI_REGS
|| to
== HI_REGS
)
11499 /* Implement TARGET_MEMORY_MOVE_COST. */
11502 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
11503 bool in ATTRIBUTE_UNUSED
)
11509 if (GET_MODE_SIZE (mode
) < 4)
11512 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11516 /* Vectorizer cost model implementation. */
11518 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11520 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11522 int misalign ATTRIBUTE_UNUSED
)
11526 switch (type_of_cost
)
11529 return current_tune
->vec_costs
->scalar_stmt_cost
;
11532 return current_tune
->vec_costs
->scalar_load_cost
;
11535 return current_tune
->vec_costs
->scalar_store_cost
;
11538 return current_tune
->vec_costs
->vec_stmt_cost
;
11541 return current_tune
->vec_costs
->vec_align_load_cost
;
11544 return current_tune
->vec_costs
->vec_store_cost
;
11546 case vec_to_scalar
:
11547 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11549 case scalar_to_vec
:
11550 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11552 case unaligned_load
:
11553 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11555 case unaligned_store
:
11556 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11558 case cond_branch_taken
:
11559 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11561 case cond_branch_not_taken
:
11562 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11565 case vec_promote_demote
:
11566 return current_tune
->vec_costs
->vec_stmt_cost
;
11568 case vec_construct
:
11569 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11570 return elements
/ 2 + 1;
11573 gcc_unreachable ();
11577 /* Implement targetm.vectorize.add_stmt_cost. */
11580 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11581 struct _stmt_vec_info
*stmt_info
, int misalign
,
11582 enum vect_cost_model_location where
)
11584 unsigned *cost
= (unsigned *) data
;
11585 unsigned retval
= 0;
11587 if (flag_vect_cost_model
)
11589 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11590 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11592 /* Statements in an inner loop relative to the loop being
11593 vectorized are weighted more heavily. The value here is
11594 arbitrary and could potentially be improved with analysis. */
11595 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11596 count
*= 50; /* FIXME. */
11598 retval
= (unsigned) (count
* stmt_cost
);
11599 cost
[where
] += retval
;
11605 /* Return true if and only if this insn can dual-issue only as older. */
11607 cortexa7_older_only (rtx insn
)
11609 if (recog_memoized (insn
) < 0)
11612 switch (get_attr_type (insn
))
11615 case TYPE_ALUS_REG
:
11616 case TYPE_LOGIC_REG
:
11617 case TYPE_LOGICS_REG
:
11619 case TYPE_ADCS_REG
:
11624 case TYPE_SHIFT_IMM
:
11625 case TYPE_SHIFT_REG
:
11626 case TYPE_LOAD_BYTE
:
11629 case TYPE_FFARITHS
:
11631 case TYPE_FFARITHD
:
11649 case TYPE_F_STORES
:
11656 /* Return true if and only if this insn can dual-issue as younger. */
11658 cortexa7_younger (FILE *file
, int verbose
, rtx insn
)
11660 if (recog_memoized (insn
) < 0)
11663 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11667 switch (get_attr_type (insn
))
11670 case TYPE_ALUS_IMM
:
11671 case TYPE_LOGIC_IMM
:
11672 case TYPE_LOGICS_IMM
:
11677 case TYPE_MOV_SHIFT
:
11678 case TYPE_MOV_SHIFT_REG
:
11688 /* Look for an instruction that can dual issue only as an older
11689 instruction, and move it in front of any instructions that can
11690 dual-issue as younger, while preserving the relative order of all
11691 other instructions in the ready list. This is a hueuristic to help
11692 dual-issue in later cycles, by postponing issue of more flexible
11693 instructions. This heuristic may affect dual issue opportunities
11694 in the current cycle. */
11696 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11700 int first_older_only
= -1, first_younger
= -1;
11704 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11708 /* Traverse the ready list from the head (the instruction to issue
11709 first), and looking for the first instruction that can issue as
11710 younger and the first instruction that can dual-issue only as
11712 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11714 rtx insn
= ready
[i
];
11715 if (cortexa7_older_only (insn
))
11717 first_older_only
= i
;
11719 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11722 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11726 /* Nothing to reorder because either no younger insn found or insn
11727 that can dual-issue only as older appears before any insn that
11728 can dual-issue as younger. */
11729 if (first_younger
== -1)
11732 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11736 /* Nothing to reorder because no older-only insn in the ready list. */
11737 if (first_older_only
== -1)
11740 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11744 /* Move first_older_only insn before first_younger. */
11746 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11747 INSN_UID(ready
[first_older_only
]),
11748 INSN_UID(ready
[first_younger
]));
11749 rtx first_older_only_insn
= ready
[first_older_only
];
11750 for (i
= first_older_only
; i
< first_younger
; i
++)
11752 ready
[i
] = ready
[i
+1];
11755 ready
[i
] = first_older_only_insn
;
11759 /* Implement TARGET_SCHED_REORDER. */
11761 arm_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11767 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11770 /* Do nothing for other cores. */
11774 return arm_issue_rate ();
11777 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11778 It corrects the value of COST based on the relationship between
11779 INSN and DEP through the dependence LINK. It returns the new
11780 value. There is a per-core adjust_cost hook to adjust scheduler costs
11781 and the per-core hook can choose to completely override the generic
11782 adjust_cost function. Only put bits of code into arm_adjust_cost that
11783 are common across all cores. */
11785 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
11789 /* When generating Thumb-1 code, we want to place flag-setting operations
11790 close to a conditional branch which depends on them, so that we can
11791 omit the comparison. */
11793 && REG_NOTE_KIND (link
) == 0
11794 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11795 && recog_memoized (dep
) >= 0
11796 && get_attr_conds (dep
) == CONDS_SET
)
11799 if (current_tune
->sched_adjust_cost
!= NULL
)
11801 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11805 /* XXX Is this strictly true? */
11806 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11807 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11810 /* Call insns don't incur a stall, even if they follow a load. */
11811 if (REG_NOTE_KIND (link
) == 0
11815 if ((i_pat
= single_set (insn
)) != NULL
11816 && MEM_P (SET_SRC (i_pat
))
11817 && (d_pat
= single_set (dep
)) != NULL
11818 && MEM_P (SET_DEST (d_pat
)))
11820 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11821 /* This is a load after a store, there is no conflict if the load reads
11822 from a cached area. Assume that loads from the stack, and from the
11823 constant pool are cached, and that others will miss. This is a
11826 if ((GET_CODE (src_mem
) == SYMBOL_REF
11827 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11828 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11829 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11830 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11838 arm_max_conditional_execute (void)
11840 return max_insns_skipped
;
11844 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11847 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11849 return (optimize
> 0) ? 2 : 0;
11853 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11855 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11858 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11859 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11860 sequences of non-executed instructions in IT blocks probably take the same
11861 amount of time as executed instructions (and the IT instruction itself takes
11862 space in icache). This function was experimentally determined to give good
11863 results on a popular embedded benchmark. */
11866 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11868 return (TARGET_32BIT
&& speed_p
) ? 1
11869 : arm_default_branch_cost (speed_p
, predictable_p
);
11872 static bool fp_consts_inited
= false;
11874 static REAL_VALUE_TYPE value_fp0
;
11877 init_fp_table (void)
11881 r
= REAL_VALUE_ATOF ("0", DFmode
);
11883 fp_consts_inited
= true;
11886 /* Return TRUE if rtx X is a valid immediate FP constant. */
11888 arm_const_double_rtx (rtx x
)
11892 if (!fp_consts_inited
)
11895 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11896 if (REAL_VALUE_MINUS_ZERO (r
))
11899 if (REAL_VALUES_EQUAL (r
, value_fp0
))
11905 /* VFPv3 has a fairly wide range of representable immediates, formed from
11906 "quarter-precision" floating-point values. These can be evaluated using this
11907 formula (with ^ for exponentiation):
11911 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11912 16 <= n <= 31 and 0 <= r <= 7.
11914 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11916 - A (most-significant) is the sign bit.
11917 - BCD are the exponent (encoded as r XOR 3).
11918 - EFGH are the mantissa (encoded as n - 16).
11921 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11922 fconst[sd] instruction, or -1 if X isn't suitable. */
11924 vfp3_const_double_index (rtx x
)
11926 REAL_VALUE_TYPE r
, m
;
11927 int sign
, exponent
;
11928 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11929 unsigned HOST_WIDE_INT mask
;
11930 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11933 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11936 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11938 /* We can't represent these things, so detect them first. */
11939 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11942 /* Extract sign, exponent and mantissa. */
11943 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11944 r
= real_value_abs (&r
);
11945 exponent
= REAL_EXP (&r
);
11946 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11947 highest (sign) bit, with a fixed binary point at bit point_pos.
11948 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11949 bits for the mantissa, this may fail (low bits would be lost). */
11950 real_ldexp (&m
, &r
, point_pos
- exponent
);
11951 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11952 mantissa
= w
.elt (0);
11953 mant_hi
= w
.elt (1);
11955 /* If there are bits set in the low part of the mantissa, we can't
11956 represent this value. */
11960 /* Now make it so that mantissa contains the most-significant bits, and move
11961 the point_pos to indicate that the least-significant bits have been
11963 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11964 mantissa
= mant_hi
;
11966 /* We can permit four significant bits of mantissa only, plus a high bit
11967 which is always 1. */
11968 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
11969 if ((mantissa
& mask
) != 0)
11972 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11973 mantissa
>>= point_pos
- 5;
11975 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11976 floating-point immediate zero with Neon using an integer-zero load, but
11977 that case is handled elsewhere.) */
11981 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11983 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11984 normalized significands are in the range [1, 2). (Our mantissa is shifted
11985 left 4 places at this point relative to normalized IEEE754 values). GCC
11986 internally uses [0.5, 1) (see real.c), so the exponent returned from
11987 REAL_EXP must be altered. */
11988 exponent
= 5 - exponent
;
11990 if (exponent
< 0 || exponent
> 7)
11993 /* Sign, mantissa and exponent are now in the correct form to plug into the
11994 formula described in the comment above. */
11995 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11998 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12000 vfp3_const_double_rtx (rtx x
)
12005 return vfp3_const_double_index (x
) != -1;
12008 /* Recognize immediates which can be used in various Neon instructions. Legal
12009 immediates are described by the following table (for VMVN variants, the
12010 bitwise inverse of the constant shown is recognized. In either case, VMOV
12011 is output and the correct instruction to use for a given constant is chosen
12012 by the assembler). The constant shown is replicated across all elements of
12013 the destination vector.
12015 insn elems variant constant (binary)
12016 ---- ----- ------- -----------------
12017 vmov i32 0 00000000 00000000 00000000 abcdefgh
12018 vmov i32 1 00000000 00000000 abcdefgh 00000000
12019 vmov i32 2 00000000 abcdefgh 00000000 00000000
12020 vmov i32 3 abcdefgh 00000000 00000000 00000000
12021 vmov i16 4 00000000 abcdefgh
12022 vmov i16 5 abcdefgh 00000000
12023 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12024 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12025 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12026 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12027 vmvn i16 10 00000000 abcdefgh
12028 vmvn i16 11 abcdefgh 00000000
12029 vmov i32 12 00000000 00000000 abcdefgh 11111111
12030 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12031 vmov i32 14 00000000 abcdefgh 11111111 11111111
12032 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12033 vmov i8 16 abcdefgh
12034 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12035 eeeeeeee ffffffff gggggggg hhhhhhhh
12036 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12037 vmov f32 19 00000000 00000000 00000000 00000000
12039 For case 18, B = !b. Representable values are exactly those accepted by
12040 vfp3_const_double_index, but are output as floating-point numbers rather
12043 For case 19, we will change it to vmov.i32 when assembling.
12045 Variants 0-5 (inclusive) may also be used as immediates for the second
12046 operand of VORR/VBIC instructions.
12048 The INVERSE argument causes the bitwise inverse of the given operand to be
12049 recognized instead (used for recognizing legal immediates for the VAND/VORN
12050 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12051 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12052 output, rather than the real insns vbic/vorr).
12054 INVERSE makes no difference to the recognition of float vectors.
12056 The return value is the variant of immediate as shown in the above table, or
12057 -1 if the given value doesn't match any of the listed patterns.
12060 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
12061 rtx
*modconst
, int *elementwidth
)
12063 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12065 for (i = 0; i < idx; i += (STRIDE)) \
12070 immtype = (CLASS); \
12071 elsize = (ELSIZE); \
12075 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12076 unsigned int innersize
;
12077 unsigned char bytes
[16];
12078 int immtype
= -1, matches
;
12079 unsigned int invmask
= inverse
? 0xff : 0;
12080 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12084 n_elts
= CONST_VECTOR_NUNITS (op
);
12085 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12090 if (mode
== VOIDmode
)
12092 innersize
= GET_MODE_SIZE (mode
);
12095 /* Vectors of float constants. */
12096 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12098 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12099 REAL_VALUE_TYPE r0
;
12101 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12104 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12106 for (i
= 1; i
< n_elts
; i
++)
12108 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12109 REAL_VALUE_TYPE re
;
12111 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12113 if (!REAL_VALUES_EQUAL (r0
, re
))
12118 *modconst
= CONST_VECTOR_ELT (op
, 0);
12123 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12129 /* Splat vector constant out into a byte vector. */
12130 for (i
= 0; i
< n_elts
; i
++)
12132 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12133 unsigned HOST_WIDE_INT elpart
;
12134 unsigned int part
, parts
;
12136 if (CONST_INT_P (el
))
12138 elpart
= INTVAL (el
);
12141 else if (CONST_DOUBLE_P (el
))
12143 elpart
= CONST_DOUBLE_LOW (el
);
12147 gcc_unreachable ();
12149 for (part
= 0; part
< parts
; part
++)
12152 for (byte
= 0; byte
< innersize
; byte
++)
12154 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12155 elpart
>>= BITS_PER_UNIT
;
12157 if (CONST_DOUBLE_P (el
))
12158 elpart
= CONST_DOUBLE_HIGH (el
);
12162 /* Sanity check. */
12163 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12167 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12168 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12170 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12171 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12173 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12174 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12176 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12177 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12179 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12181 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12183 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12184 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12186 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12187 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12189 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12190 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12192 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12193 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12195 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12197 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12199 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12200 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12202 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12203 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12205 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12206 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12208 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12209 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12211 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12213 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12214 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12222 *elementwidth
= elsize
;
12226 unsigned HOST_WIDE_INT imm
= 0;
12228 /* Un-invert bytes of recognized vector, if necessary. */
12230 for (i
= 0; i
< idx
; i
++)
12231 bytes
[i
] ^= invmask
;
12235 /* FIXME: Broken on 32-bit H_W_I hosts. */
12236 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12238 for (i
= 0; i
< 8; i
++)
12239 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12240 << (i
* BITS_PER_UNIT
);
12242 *modconst
= GEN_INT (imm
);
12246 unsigned HOST_WIDE_INT imm
= 0;
12248 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12249 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12251 *modconst
= GEN_INT (imm
);
12259 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12260 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12261 float elements), and a modified constant (whatever should be output for a
12262 VMOV) in *MODCONST. */
12265 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
12266 rtx
*modconst
, int *elementwidth
)
12270 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12276 *modconst
= tmpconst
;
12279 *elementwidth
= tmpwidth
;
12284 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12285 the immediate is valid, write a constant suitable for using as an operand
12286 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12287 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12290 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
12291 rtx
*modconst
, int *elementwidth
)
12295 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12297 if (retval
< 0 || retval
> 5)
12301 *modconst
= tmpconst
;
12304 *elementwidth
= tmpwidth
;
12309 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12310 the immediate is valid, write a constant suitable for using as an operand
12311 to VSHR/VSHL to *MODCONST and the corresponding element width to
12312 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12313 because they have different limitations. */
12316 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
12317 rtx
*modconst
, int *elementwidth
,
12320 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12321 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12322 unsigned HOST_WIDE_INT last_elt
= 0;
12323 unsigned HOST_WIDE_INT maxshift
;
12325 /* Split vector constant out into a byte vector. */
12326 for (i
= 0; i
< n_elts
; i
++)
12328 rtx el
= CONST_VECTOR_ELT (op
, i
);
12329 unsigned HOST_WIDE_INT elpart
;
12331 if (CONST_INT_P (el
))
12332 elpart
= INTVAL (el
);
12333 else if (CONST_DOUBLE_P (el
))
12336 gcc_unreachable ();
12338 if (i
!= 0 && elpart
!= last_elt
)
12344 /* Shift less than element size. */
12345 maxshift
= innersize
* 8;
12349 /* Left shift immediate value can be from 0 to <size>-1. */
12350 if (last_elt
>= maxshift
)
12355 /* Right shift immediate value can be from 1 to <size>. */
12356 if (last_elt
== 0 || last_elt
> maxshift
)
12361 *elementwidth
= innersize
* 8;
12364 *modconst
= CONST_VECTOR_ELT (op
, 0);
12369 /* Return a string suitable for output of Neon immediate logic operation
12373 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
12374 int inverse
, int quad
)
12376 int width
, is_valid
;
12377 static char templ
[40];
12379 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12381 gcc_assert (is_valid
!= 0);
12384 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12386 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12391 /* Return a string suitable for output of Neon immediate shift operation
12392 (VSHR or VSHL) MNEM. */
12395 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12396 enum machine_mode mode
, int quad
,
12399 int width
, is_valid
;
12400 static char templ
[40];
12402 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12403 gcc_assert (is_valid
!= 0);
12406 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12408 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12413 /* Output a sequence of pairwise operations to implement a reduction.
12414 NOTE: We do "too much work" here, because pairwise operations work on two
12415 registers-worth of operands in one go. Unfortunately we can't exploit those
12416 extra calculations to do the full operation in fewer steps, I don't think.
12417 Although all vector elements of the result but the first are ignored, we
12418 actually calculate the same result in each of the elements. An alternative
12419 such as initially loading a vector with zero to use as each of the second
12420 operands would use up an additional register and take an extra instruction,
12421 for no particular gain. */
12424 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
12425 rtx (*reduc
) (rtx
, rtx
, rtx
))
12427 enum machine_mode inner
= GET_MODE_INNER (mode
);
12428 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12431 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12433 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12434 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12439 /* If VALS is a vector constant that can be loaded into a register
12440 using VDUP, generate instructions to do so and return an RTX to
12441 assign to the register. Otherwise return NULL_RTX. */
12444 neon_vdup_constant (rtx vals
)
12446 enum machine_mode mode
= GET_MODE (vals
);
12447 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12448 int n_elts
= GET_MODE_NUNITS (mode
);
12449 bool all_same
= true;
12453 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12456 for (i
= 0; i
< n_elts
; ++i
)
12458 x
= XVECEXP (vals
, 0, i
);
12459 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12464 /* The elements are not all the same. We could handle repeating
12465 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12466 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12470 /* We can load this constant by using VDUP and a constant in a
12471 single ARM register. This will be cheaper than a vector
12474 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12475 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12478 /* Generate code to load VALS, which is a PARALLEL containing only
12479 constants (for vec_init) or CONST_VECTOR, efficiently into a
12480 register. Returns an RTX to copy into the register, or NULL_RTX
12481 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12484 neon_make_constant (rtx vals
)
12486 enum machine_mode mode
= GET_MODE (vals
);
12488 rtx const_vec
= NULL_RTX
;
12489 int n_elts
= GET_MODE_NUNITS (mode
);
12493 if (GET_CODE (vals
) == CONST_VECTOR
)
12495 else if (GET_CODE (vals
) == PARALLEL
)
12497 /* A CONST_VECTOR must contain only CONST_INTs and
12498 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12499 Only store valid constants in a CONST_VECTOR. */
12500 for (i
= 0; i
< n_elts
; ++i
)
12502 rtx x
= XVECEXP (vals
, 0, i
);
12503 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12506 if (n_const
== n_elts
)
12507 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12510 gcc_unreachable ();
12512 if (const_vec
!= NULL
12513 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12514 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12516 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12517 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12518 pipeline cycle; creating the constant takes one or two ARM
12519 pipeline cycles. */
12521 else if (const_vec
!= NULL_RTX
)
12522 /* Load from constant pool. On Cortex-A8 this takes two cycles
12523 (for either double or quad vectors). We can not take advantage
12524 of single-cycle VLD1 because we need a PC-relative addressing
12528 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12529 We can not construct an initializer. */
12533 /* Initialize vector TARGET to VALS. */
12536 neon_expand_vector_init (rtx target
, rtx vals
)
12538 enum machine_mode mode
= GET_MODE (target
);
12539 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12540 int n_elts
= GET_MODE_NUNITS (mode
);
12541 int n_var
= 0, one_var
= -1;
12542 bool all_same
= true;
12546 for (i
= 0; i
< n_elts
; ++i
)
12548 x
= XVECEXP (vals
, 0, i
);
12549 if (!CONSTANT_P (x
))
12550 ++n_var
, one_var
= i
;
12552 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12558 rtx constant
= neon_make_constant (vals
);
12559 if (constant
!= NULL_RTX
)
12561 emit_move_insn (target
, constant
);
12566 /* Splat a single non-constant element if we can. */
12567 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12569 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12570 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12571 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12575 /* One field is non-constant. Load constant then overwrite varying
12576 field. This is more efficient than using the stack. */
12579 rtx copy
= copy_rtx (vals
);
12580 rtx index
= GEN_INT (one_var
);
12582 /* Load constant part of vector, substitute neighboring value for
12583 varying element. */
12584 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12585 neon_expand_vector_init (target
, copy
);
12587 /* Insert variable. */
12588 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12592 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12595 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12598 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12601 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12604 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12607 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12610 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12613 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12616 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12619 gcc_unreachable ();
12624 /* Construct the vector in memory one field at a time
12625 and load the whole vector. */
12626 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12627 for (i
= 0; i
< n_elts
; i
++)
12628 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12629 i
* GET_MODE_SIZE (inner_mode
)),
12630 XVECEXP (vals
, 0, i
));
12631 emit_move_insn (target
, mem
);
12634 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12635 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12636 reported source locations are bogus. */
12639 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12642 HOST_WIDE_INT lane
;
12644 gcc_assert (CONST_INT_P (operand
));
12646 lane
= INTVAL (operand
);
12648 if (lane
< low
|| lane
>= high
)
12652 /* Bounds-check lanes. */
12655 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12657 bounds_check (operand
, low
, high
, "lane out of range");
12660 /* Bounds-check constants. */
12663 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12665 bounds_check (operand
, low
, high
, "constant out of range");
12669 neon_element_bits (enum machine_mode mode
)
12671 if (mode
== DImode
)
12672 return GET_MODE_BITSIZE (mode
);
12674 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12678 /* Predicates for `match_operand' and `match_operator'. */
12680 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12681 WB is true if full writeback address modes are allowed and is false
12682 if limited writeback address modes (POST_INC and PRE_DEC) are
12686 arm_coproc_mem_operand (rtx op
, bool wb
)
12690 /* Reject eliminable registers. */
12691 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12692 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12693 || reg_mentioned_p (arg_pointer_rtx
, op
)
12694 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12695 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12696 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12697 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12700 /* Constants are converted into offsets from labels. */
12704 ind
= XEXP (op
, 0);
12706 if (reload_completed
12707 && (GET_CODE (ind
) == LABEL_REF
12708 || (GET_CODE (ind
) == CONST
12709 && GET_CODE (XEXP (ind
, 0)) == PLUS
12710 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12711 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12714 /* Match: (mem (reg)). */
12716 return arm_address_register_rtx_p (ind
, 0);
12718 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12719 acceptable in any case (subject to verification by
12720 arm_address_register_rtx_p). We need WB to be true to accept
12721 PRE_INC and POST_DEC. */
12722 if (GET_CODE (ind
) == POST_INC
12723 || GET_CODE (ind
) == PRE_DEC
12725 && (GET_CODE (ind
) == PRE_INC
12726 || GET_CODE (ind
) == POST_DEC
)))
12727 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12730 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12731 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12732 && GET_CODE (XEXP (ind
, 1)) == PLUS
12733 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12734 ind
= XEXP (ind
, 1);
12739 if (GET_CODE (ind
) == PLUS
12740 && REG_P (XEXP (ind
, 0))
12741 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12742 && CONST_INT_P (XEXP (ind
, 1))
12743 && INTVAL (XEXP (ind
, 1)) > -1024
12744 && INTVAL (XEXP (ind
, 1)) < 1024
12745 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12751 /* Return TRUE if OP is a memory operand which we can load or store a vector
12752 to/from. TYPE is one of the following values:
12753 0 - Vector load/stor (vldr)
12754 1 - Core registers (ldm)
12755 2 - Element/structure loads (vld1)
12758 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12762 /* Reject eliminable registers. */
12763 if (! (reload_in_progress
|| reload_completed
)
12764 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12765 || reg_mentioned_p (arg_pointer_rtx
, op
)
12766 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12767 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12768 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12769 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12772 /* Constants are converted into offsets from labels. */
12776 ind
= XEXP (op
, 0);
12778 if (reload_completed
12779 && (GET_CODE (ind
) == LABEL_REF
12780 || (GET_CODE (ind
) == CONST
12781 && GET_CODE (XEXP (ind
, 0)) == PLUS
12782 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12783 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12786 /* Match: (mem (reg)). */
12788 return arm_address_register_rtx_p (ind
, 0);
12790 /* Allow post-increment with Neon registers. */
12791 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12792 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12793 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12795 /* FIXME: vld1 allows register post-modify. */
12801 && GET_CODE (ind
) == PLUS
12802 && REG_P (XEXP (ind
, 0))
12803 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12804 && CONST_INT_P (XEXP (ind
, 1))
12805 && INTVAL (XEXP (ind
, 1)) > -1024
12806 /* For quad modes, we restrict the constant offset to be slightly less
12807 than what the instruction format permits. We have no such constraint
12808 on double mode offsets. (This must match arm_legitimate_index_p.) */
12809 && (INTVAL (XEXP (ind
, 1))
12810 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12811 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12817 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12820 neon_struct_mem_operand (rtx op
)
12824 /* Reject eliminable registers. */
12825 if (! (reload_in_progress
|| reload_completed
)
12826 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12827 || reg_mentioned_p (arg_pointer_rtx
, op
)
12828 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12829 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12830 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12831 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12834 /* Constants are converted into offsets from labels. */
12838 ind
= XEXP (op
, 0);
12840 if (reload_completed
12841 && (GET_CODE (ind
) == LABEL_REF
12842 || (GET_CODE (ind
) == CONST
12843 && GET_CODE (XEXP (ind
, 0)) == PLUS
12844 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12845 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12848 /* Match: (mem (reg)). */
12850 return arm_address_register_rtx_p (ind
, 0);
12852 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12853 if (GET_CODE (ind
) == POST_INC
12854 || GET_CODE (ind
) == PRE_DEC
)
12855 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12860 /* Return true if X is a register that will be eliminated later on. */
12862 arm_eliminable_register (rtx x
)
12864 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12865 || REGNO (x
) == ARG_POINTER_REGNUM
12866 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12867 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12870 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12871 coprocessor registers. Otherwise return NO_REGS. */
12874 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
12876 if (mode
== HFmode
)
12878 if (!TARGET_NEON_FP16
)
12879 return GENERAL_REGS
;
12880 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12882 return GENERAL_REGS
;
12885 /* The neon move patterns handle all legitimate vector and struct
12888 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12889 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12890 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12891 || VALID_NEON_STRUCT_MODE (mode
)))
12894 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12897 return GENERAL_REGS
;
12900 /* Values which must be returned in the most-significant end of the return
12904 arm_return_in_msb (const_tree valtype
)
12906 return (TARGET_AAPCS_BASED
12907 && BYTES_BIG_ENDIAN
12908 && (AGGREGATE_TYPE_P (valtype
)
12909 || TREE_CODE (valtype
) == COMPLEX_TYPE
12910 || FIXED_POINT_TYPE_P (valtype
)));
12913 /* Return TRUE if X references a SYMBOL_REF. */
12915 symbol_mentioned_p (rtx x
)
12920 if (GET_CODE (x
) == SYMBOL_REF
)
12923 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12924 are constant offsets, not symbols. */
12925 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12928 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12930 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12936 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12937 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12940 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12947 /* Return TRUE if X references a LABEL_REF. */
12949 label_mentioned_p (rtx x
)
12954 if (GET_CODE (x
) == LABEL_REF
)
12957 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12958 instruction, but they are constant offsets, not symbols. */
12959 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12962 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12963 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12969 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12970 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12973 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12981 tls_mentioned_p (rtx x
)
12983 switch (GET_CODE (x
))
12986 return tls_mentioned_p (XEXP (x
, 0));
12989 if (XINT (x
, 1) == UNSPEC_TLS
)
12997 /* Must not copy any rtx that uses a pc-relative address. */
13000 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
13002 if (GET_CODE (*x
) == UNSPEC
13003 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
13004 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
13010 arm_cannot_copy_insn_p (rtx insn
)
13012 /* The tls call insn cannot be copied, as it is paired with a data
13014 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13017 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
13021 minmax_code (rtx x
)
13023 enum rtx_code code
= GET_CODE (x
);
13036 gcc_unreachable ();
13040 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13043 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13044 int *mask
, bool *signed_sat
)
13046 /* The high bound must be a power of two minus one. */
13047 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13051 /* The low bound is either zero (for usat) or one less than the
13052 negation of the high bound (for ssat). */
13053 if (INTVAL (lo_bound
) == 0)
13058 *signed_sat
= false;
13063 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13068 *signed_sat
= true;
13076 /* Return 1 if memory locations are adjacent. */
13078 adjacent_mem_locations (rtx a
, rtx b
)
13080 /* We don't guarantee to preserve the order of these memory refs. */
13081 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13084 if ((REG_P (XEXP (a
, 0))
13085 || (GET_CODE (XEXP (a
, 0)) == PLUS
13086 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13087 && (REG_P (XEXP (b
, 0))
13088 || (GET_CODE (XEXP (b
, 0)) == PLUS
13089 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13091 HOST_WIDE_INT val0
= 0, val1
= 0;
13095 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13097 reg0
= XEXP (XEXP (a
, 0), 0);
13098 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13101 reg0
= XEXP (a
, 0);
13103 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13105 reg1
= XEXP (XEXP (b
, 0), 0);
13106 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13109 reg1
= XEXP (b
, 0);
13111 /* Don't accept any offset that will require multiple
13112 instructions to handle, since this would cause the
13113 arith_adjacentmem pattern to output an overlong sequence. */
13114 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13117 /* Don't allow an eliminable register: register elimination can make
13118 the offset too large. */
13119 if (arm_eliminable_register (reg0
))
13122 val_diff
= val1
- val0
;
13126 /* If the target has load delay slots, then there's no benefit
13127 to using an ldm instruction unless the offset is zero and
13128 we are optimizing for size. */
13129 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13130 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13131 && (val_diff
== 4 || val_diff
== -4));
13134 return ((REGNO (reg0
) == REGNO (reg1
))
13135 && (val_diff
== 4 || val_diff
== -4));
13141 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13142 for load operations, false for store operations. CONSECUTIVE is true
13143 if the register numbers in the operation must be consecutive in the register
13144 bank. RETURN_PC is true if value is to be loaded in PC.
13145 The pattern we are trying to match for load is:
13146 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13147 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13150 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13153 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13154 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13155 3. If consecutive is TRUE, then for kth register being loaded,
13156 REGNO (R_dk) = REGNO (R_d0) + k.
13157 The pattern for store is similar. */
13159 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
13160 bool consecutive
, bool return_pc
)
13162 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13163 rtx reg
, mem
, addr
;
13165 unsigned first_regno
;
13166 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13168 bool addr_reg_in_reglist
= false;
13169 bool update
= false;
13174 /* If not in SImode, then registers must be consecutive
13175 (e.g., VLDM instructions for DFmode). */
13176 gcc_assert ((mode
== SImode
) || consecutive
);
13177 /* Setting return_pc for stores is illegal. */
13178 gcc_assert (!return_pc
|| load
);
13180 /* Set up the increments and the regs per val based on the mode. */
13181 reg_increment
= GET_MODE_SIZE (mode
);
13182 regs_per_val
= reg_increment
/ 4;
13183 offset_adj
= return_pc
? 1 : 0;
13186 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13187 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13190 /* Check if this is a write-back. */
13191 elt
= XVECEXP (op
, 0, offset_adj
);
13192 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13198 /* The offset adjustment must be the number of registers being
13199 popped times the size of a single register. */
13200 if (!REG_P (SET_DEST (elt
))
13201 || !REG_P (XEXP (SET_SRC (elt
), 0))
13202 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13203 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13204 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13205 ((count
- 1 - offset_adj
) * reg_increment
))
13209 i
= i
+ offset_adj
;
13210 base
= base
+ offset_adj
;
13211 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13212 success depends on the type: VLDM can do just one reg,
13213 LDM must do at least two. */
13214 if ((count
<= i
) && (mode
== SImode
))
13217 elt
= XVECEXP (op
, 0, i
- 1);
13218 if (GET_CODE (elt
) != SET
)
13223 reg
= SET_DEST (elt
);
13224 mem
= SET_SRC (elt
);
13228 reg
= SET_SRC (elt
);
13229 mem
= SET_DEST (elt
);
13232 if (!REG_P (reg
) || !MEM_P (mem
))
13235 regno
= REGNO (reg
);
13236 first_regno
= regno
;
13237 addr
= XEXP (mem
, 0);
13238 if (GET_CODE (addr
) == PLUS
)
13240 if (!CONST_INT_P (XEXP (addr
, 1)))
13243 offset
= INTVAL (XEXP (addr
, 1));
13244 addr
= XEXP (addr
, 0);
13250 /* Don't allow SP to be loaded unless it is also the base register. It
13251 guarantees that SP is reset correctly when an LDM instruction
13252 is interrupted. Otherwise, we might end up with a corrupt stack. */
13253 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13256 for (; i
< count
; i
++)
13258 elt
= XVECEXP (op
, 0, i
);
13259 if (GET_CODE (elt
) != SET
)
13264 reg
= SET_DEST (elt
);
13265 mem
= SET_SRC (elt
);
13269 reg
= SET_SRC (elt
);
13270 mem
= SET_DEST (elt
);
13274 || GET_MODE (reg
) != mode
13275 || REGNO (reg
) <= regno
13278 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13279 /* Don't allow SP to be loaded unless it is also the base register. It
13280 guarantees that SP is reset correctly when an LDM instruction
13281 is interrupted. Otherwise, we might end up with a corrupt stack. */
13282 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13284 || GET_MODE (mem
) != mode
13285 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13286 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13287 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13288 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13289 offset
+ (i
- base
) * reg_increment
))
13290 && (!REG_P (XEXP (mem
, 0))
13291 || offset
+ (i
- base
) * reg_increment
!= 0)))
13294 regno
= REGNO (reg
);
13295 if (regno
== REGNO (addr
))
13296 addr_reg_in_reglist
= true;
13301 if (update
&& addr_reg_in_reglist
)
13304 /* For Thumb-1, address register is always modified - either by write-back
13305 or by explicit load. If the pattern does not describe an update,
13306 then the address register must be in the list of loaded registers. */
13308 return update
|| addr_reg_in_reglist
;
13314 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13315 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13316 instruction. ADD_OFFSET is nonzero if the base address register needs
13317 to be modified with an add instruction before we can use it. */
13320 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13321 int nops
, HOST_WIDE_INT add_offset
)
13323 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13324 if the offset isn't small enough. The reason 2 ldrs are faster
13325 is because these ARMs are able to do more than one cache access
13326 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13327 whilst the ARM8 has a double bandwidth cache. This means that
13328 these cores can do both an instruction fetch and a data fetch in
13329 a single cycle, so the trick of calculating the address into a
13330 scratch register (one of the result regs) and then doing a load
13331 multiple actually becomes slower (and no smaller in code size).
13332 That is the transformation
13334 ldr rd1, [rbase + offset]
13335 ldr rd2, [rbase + offset + 4]
13339 add rd1, rbase, offset
13340 ldmia rd1, {rd1, rd2}
13342 produces worse code -- '3 cycles + any stalls on rd2' instead of
13343 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13344 access per cycle, the first sequence could never complete in less
13345 than 6 cycles, whereas the ldm sequence would only take 5 and
13346 would make better use of sequential accesses if not hitting the
13349 We cheat here and test 'arm_ld_sched' which we currently know to
13350 only be true for the ARM8, ARM9 and StrongARM. If this ever
13351 changes, then the test below needs to be reworked. */
13352 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13355 /* XScale has load-store double instructions, but they have stricter
13356 alignment requirements than load-store multiple, so we cannot
13359 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13360 the pipeline until completion.
13368 An ldr instruction takes 1-3 cycles, but does not block the
13377 Best case ldr will always win. However, the more ldr instructions
13378 we issue, the less likely we are to be able to schedule them well.
13379 Using ldr instructions also increases code size.
13381 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13382 for counts of 3 or 4 regs. */
13383 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13388 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13389 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13390 an array ORDER which describes the sequence to use when accessing the
13391 offsets that produces an ascending order. In this sequence, each
13392 offset must be larger by exactly 4 than the previous one. ORDER[0]
13393 must have been filled in with the lowest offset by the caller.
13394 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13395 we use to verify that ORDER produces an ascending order of registers.
13396 Return true if it was possible to construct such an order, false if
13400 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13401 int *unsorted_regs
)
13404 for (i
= 1; i
< nops
; i
++)
13408 order
[i
] = order
[i
- 1];
13409 for (j
= 0; j
< nops
; j
++)
13410 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13412 /* We must find exactly one offset that is higher than the
13413 previous one by 4. */
13414 if (order
[i
] != order
[i
- 1])
13418 if (order
[i
] == order
[i
- 1])
13420 /* The register numbers must be ascending. */
13421 if (unsorted_regs
!= NULL
13422 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13428 /* Used to determine in a peephole whether a sequence of load
13429 instructions can be changed into a load-multiple instruction.
13430 NOPS is the number of separate load instructions we are examining. The
13431 first NOPS entries in OPERANDS are the destination registers, the
13432 next NOPS entries are memory operands. If this function is
13433 successful, *BASE is set to the common base register of the memory
13434 accesses; *LOAD_OFFSET is set to the first memory location's offset
13435 from that base register.
13436 REGS is an array filled in with the destination register numbers.
13437 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13438 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13439 the sequence of registers in REGS matches the loads from ascending memory
13440 locations, and the function verifies that the register numbers are
13441 themselves ascending. If CHECK_REGS is false, the register numbers
13442 are stored in the order they are found in the operands. */
13444 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13445 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13447 int unsorted_regs
[MAX_LDM_STM_OPS
];
13448 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13449 int order
[MAX_LDM_STM_OPS
];
13450 rtx base_reg_rtx
= NULL
;
13454 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13455 easily extended if required. */
13456 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13458 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13460 /* Loop over the operands and check that the memory references are
13461 suitable (i.e. immediate offsets from the same base register). At
13462 the same time, extract the target register, and the memory
13464 for (i
= 0; i
< nops
; i
++)
13469 /* Convert a subreg of a mem into the mem itself. */
13470 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13471 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13473 gcc_assert (MEM_P (operands
[nops
+ i
]));
13475 /* Don't reorder volatile memory references; it doesn't seem worth
13476 looking for the case where the order is ok anyway. */
13477 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13480 offset
= const0_rtx
;
13482 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13483 || (GET_CODE (reg
) == SUBREG
13484 && REG_P (reg
= SUBREG_REG (reg
))))
13485 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13486 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13487 || (GET_CODE (reg
) == SUBREG
13488 && REG_P (reg
= SUBREG_REG (reg
))))
13489 && (CONST_INT_P (offset
13490 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13494 base_reg
= REGNO (reg
);
13495 base_reg_rtx
= reg
;
13496 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13499 else if (base_reg
!= (int) REGNO (reg
))
13500 /* Not addressed from the same base register. */
13503 unsorted_regs
[i
] = (REG_P (operands
[i
])
13504 ? REGNO (operands
[i
])
13505 : REGNO (SUBREG_REG (operands
[i
])));
13507 /* If it isn't an integer register, or if it overwrites the
13508 base register but isn't the last insn in the list, then
13509 we can't do this. */
13510 if (unsorted_regs
[i
] < 0
13511 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13512 || unsorted_regs
[i
] > 14
13513 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13516 /* Don't allow SP to be loaded unless it is also the base
13517 register. It guarantees that SP is reset correctly when
13518 an LDM instruction is interrupted. Otherwise, we might
13519 end up with a corrupt stack. */
13520 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13523 unsorted_offsets
[i
] = INTVAL (offset
);
13524 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13528 /* Not a suitable memory address. */
13532 /* All the useful information has now been extracted from the
13533 operands into unsorted_regs and unsorted_offsets; additionally,
13534 order[0] has been set to the lowest offset in the list. Sort
13535 the offsets into order, verifying that they are adjacent, and
13536 check that the register numbers are ascending. */
13537 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13538 check_regs
? unsorted_regs
: NULL
))
13542 memcpy (saved_order
, order
, sizeof order
);
13548 for (i
= 0; i
< nops
; i
++)
13549 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13551 *load_offset
= unsorted_offsets
[order
[0]];
13555 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13558 if (unsorted_offsets
[order
[0]] == 0)
13559 ldm_case
= 1; /* ldmia */
13560 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13561 ldm_case
= 2; /* ldmib */
13562 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13563 ldm_case
= 3; /* ldmda */
13564 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13565 ldm_case
= 4; /* ldmdb */
13566 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13567 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13572 if (!multiple_operation_profitable_p (false, nops
,
13574 ? unsorted_offsets
[order
[0]] : 0))
13580 /* Used to determine in a peephole whether a sequence of store instructions can
13581 be changed into a store-multiple instruction.
13582 NOPS is the number of separate store instructions we are examining.
13583 NOPS_TOTAL is the total number of instructions recognized by the peephole
13585 The first NOPS entries in OPERANDS are the source registers, the next
13586 NOPS entries are memory operands. If this function is successful, *BASE is
13587 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13588 to the first memory location's offset from that base register. REGS is an
13589 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13590 likewise filled with the corresponding rtx's.
13591 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13592 numbers to an ascending order of stores.
13593 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13594 from ascending memory locations, and the function verifies that the register
13595 numbers are themselves ascending. If CHECK_REGS is false, the register
13596 numbers are stored in the order they are found in the operands. */
13598 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13599 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13600 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13602 int unsorted_regs
[MAX_LDM_STM_OPS
];
13603 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13604 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13605 int order
[MAX_LDM_STM_OPS
];
13607 rtx base_reg_rtx
= NULL
;
13610 /* Write back of base register is currently only supported for Thumb 1. */
13611 int base_writeback
= TARGET_THUMB1
;
13613 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13614 easily extended if required. */
13615 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13617 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13619 /* Loop over the operands and check that the memory references are
13620 suitable (i.e. immediate offsets from the same base register). At
13621 the same time, extract the target register, and the memory
13623 for (i
= 0; i
< nops
; i
++)
13628 /* Convert a subreg of a mem into the mem itself. */
13629 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13630 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13632 gcc_assert (MEM_P (operands
[nops
+ i
]));
13634 /* Don't reorder volatile memory references; it doesn't seem worth
13635 looking for the case where the order is ok anyway. */
13636 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13639 offset
= const0_rtx
;
13641 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13642 || (GET_CODE (reg
) == SUBREG
13643 && REG_P (reg
= SUBREG_REG (reg
))))
13644 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13645 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13646 || (GET_CODE (reg
) == SUBREG
13647 && REG_P (reg
= SUBREG_REG (reg
))))
13648 && (CONST_INT_P (offset
13649 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13651 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13652 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13653 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13657 base_reg
= REGNO (reg
);
13658 base_reg_rtx
= reg
;
13659 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13662 else if (base_reg
!= (int) REGNO (reg
))
13663 /* Not addressed from the same base register. */
13666 /* If it isn't an integer register, then we can't do this. */
13667 if (unsorted_regs
[i
] < 0
13668 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13669 /* The effects are unpredictable if the base register is
13670 both updated and stored. */
13671 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13672 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13673 || unsorted_regs
[i
] > 14)
13676 unsorted_offsets
[i
] = INTVAL (offset
);
13677 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13681 /* Not a suitable memory address. */
13685 /* All the useful information has now been extracted from the
13686 operands into unsorted_regs and unsorted_offsets; additionally,
13687 order[0] has been set to the lowest offset in the list. Sort
13688 the offsets into order, verifying that they are adjacent, and
13689 check that the register numbers are ascending. */
13690 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13691 check_regs
? unsorted_regs
: NULL
))
13695 memcpy (saved_order
, order
, sizeof order
);
13701 for (i
= 0; i
< nops
; i
++)
13703 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13705 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13708 *load_offset
= unsorted_offsets
[order
[0]];
13712 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13715 if (unsorted_offsets
[order
[0]] == 0)
13716 stm_case
= 1; /* stmia */
13717 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13718 stm_case
= 2; /* stmib */
13719 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13720 stm_case
= 3; /* stmda */
13721 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13722 stm_case
= 4; /* stmdb */
13726 if (!multiple_operation_profitable_p (false, nops
, 0))
13732 /* Routines for use in generating RTL. */
13734 /* Generate a load-multiple instruction. COUNT is the number of loads in
13735 the instruction; REGS and MEMS are arrays containing the operands.
13736 BASEREG is the base register to be used in addressing the memory operands.
13737 WBACK_OFFSET is nonzero if the instruction should update the base
13741 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13742 HOST_WIDE_INT wback_offset
)
13747 if (!multiple_operation_profitable_p (false, count
, 0))
13753 for (i
= 0; i
< count
; i
++)
13754 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13756 if (wback_offset
!= 0)
13757 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13759 seq
= get_insns ();
13765 result
= gen_rtx_PARALLEL (VOIDmode
,
13766 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13767 if (wback_offset
!= 0)
13769 XVECEXP (result
, 0, 0)
13770 = gen_rtx_SET (VOIDmode
, basereg
,
13771 plus_constant (Pmode
, basereg
, wback_offset
));
13776 for (j
= 0; i
< count
; i
++, j
++)
13777 XVECEXP (result
, 0, i
)
13778 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13783 /* Generate a store-multiple instruction. COUNT is the number of stores in
13784 the instruction; REGS and MEMS are arrays containing the operands.
13785 BASEREG is the base register to be used in addressing the memory operands.
13786 WBACK_OFFSET is nonzero if the instruction should update the base
13790 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13791 HOST_WIDE_INT wback_offset
)
13796 if (GET_CODE (basereg
) == PLUS
)
13797 basereg
= XEXP (basereg
, 0);
13799 if (!multiple_operation_profitable_p (false, count
, 0))
13805 for (i
= 0; i
< count
; i
++)
13806 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13808 if (wback_offset
!= 0)
13809 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13811 seq
= get_insns ();
13817 result
= gen_rtx_PARALLEL (VOIDmode
,
13818 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13819 if (wback_offset
!= 0)
13821 XVECEXP (result
, 0, 0)
13822 = gen_rtx_SET (VOIDmode
, basereg
,
13823 plus_constant (Pmode
, basereg
, wback_offset
));
13828 for (j
= 0; i
< count
; i
++, j
++)
13829 XVECEXP (result
, 0, i
)
13830 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13835 /* Generate either a load-multiple or a store-multiple instruction. This
13836 function can be used in situations where we can start with a single MEM
13837 rtx and adjust its address upwards.
13838 COUNT is the number of operations in the instruction, not counting a
13839 possible update of the base register. REGS is an array containing the
13841 BASEREG is the base register to be used in addressing the memory operands,
13842 which are constructed from BASEMEM.
13843 WRITE_BACK specifies whether the generated instruction should include an
13844 update of the base register.
13845 OFFSETP is used to pass an offset to and from this function; this offset
13846 is not used when constructing the address (instead BASEMEM should have an
13847 appropriate offset in its address), it is used only for setting
13848 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13851 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13852 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13854 rtx mems
[MAX_LDM_STM_OPS
];
13855 HOST_WIDE_INT offset
= *offsetp
;
13858 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13860 if (GET_CODE (basereg
) == PLUS
)
13861 basereg
= XEXP (basereg
, 0);
13863 for (i
= 0; i
< count
; i
++)
13865 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13866 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13874 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13875 write_back
? 4 * count
: 0);
13877 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13878 write_back
? 4 * count
: 0);
13882 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13883 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13885 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13890 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13891 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13893 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13897 /* Called from a peephole2 expander to turn a sequence of loads into an
13898 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13899 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13900 is true if we can reorder the registers because they are used commutatively
13902 Returns true iff we could generate a new instruction. */
13905 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13907 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13908 rtx mems
[MAX_LDM_STM_OPS
];
13909 int i
, j
, base_reg
;
13911 HOST_WIDE_INT offset
;
13912 int write_back
= FALSE
;
13916 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13917 &base_reg
, &offset
, !sort_regs
);
13923 for (i
= 0; i
< nops
- 1; i
++)
13924 for (j
= i
+ 1; j
< nops
; j
++)
13925 if (regs
[i
] > regs
[j
])
13931 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13935 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13936 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13942 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13943 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13945 if (!TARGET_THUMB1
)
13947 base_reg
= regs
[0];
13948 base_reg_rtx
= newbase
;
13952 for (i
= 0; i
< nops
; i
++)
13954 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13955 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13958 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13959 write_back
? offset
+ i
* 4 : 0));
13963 /* Called from a peephole2 expander to turn a sequence of stores into an
13964 STM instruction. OPERANDS are the operands found by the peephole matcher;
13965 NOPS indicates how many separate stores we are trying to combine.
13966 Returns true iff we could generate a new instruction. */
13969 gen_stm_seq (rtx
*operands
, int nops
)
13972 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13973 rtx mems
[MAX_LDM_STM_OPS
];
13976 HOST_WIDE_INT offset
;
13977 int write_back
= FALSE
;
13980 bool base_reg_dies
;
13982 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13983 mem_order
, &base_reg
, &offset
, true);
13988 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13990 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13993 gcc_assert (base_reg_dies
);
13999 gcc_assert (base_reg_dies
);
14000 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14004 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14006 for (i
= 0; i
< nops
; i
++)
14008 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14009 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14012 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14013 write_back
? offset
+ i
* 4 : 0));
14017 /* Called from a peephole2 expander to turn a sequence of stores that are
14018 preceded by constant loads into an STM instruction. OPERANDS are the
14019 operands found by the peephole matcher; NOPS indicates how many
14020 separate stores we are trying to combine; there are 2 * NOPS
14021 instructions in the peephole.
14022 Returns true iff we could generate a new instruction. */
14025 gen_const_stm_seq (rtx
*operands
, int nops
)
14027 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14028 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14029 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14030 rtx mems
[MAX_LDM_STM_OPS
];
14033 HOST_WIDE_INT offset
;
14034 int write_back
= FALSE
;
14037 bool base_reg_dies
;
14039 HARD_REG_SET allocated
;
14041 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14042 mem_order
, &base_reg
, &offset
, false);
14047 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14049 /* If the same register is used more than once, try to find a free
14051 CLEAR_HARD_REG_SET (allocated
);
14052 for (i
= 0; i
< nops
; i
++)
14054 for (j
= i
+ 1; j
< nops
; j
++)
14055 if (regs
[i
] == regs
[j
])
14057 rtx t
= peep2_find_free_register (0, nops
* 2,
14058 TARGET_THUMB1
? "l" : "r",
14059 SImode
, &allocated
);
14063 regs
[i
] = REGNO (t
);
14067 /* Compute an ordering that maps the register numbers to an ascending
14070 for (i
= 0; i
< nops
; i
++)
14071 if (regs
[i
] < regs
[reg_order
[0]])
14074 for (i
= 1; i
< nops
; i
++)
14076 int this_order
= reg_order
[i
- 1];
14077 for (j
= 0; j
< nops
; j
++)
14078 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14079 && (this_order
== reg_order
[i
- 1]
14080 || regs
[j
] < regs
[this_order
]))
14082 reg_order
[i
] = this_order
;
14085 /* Ensure that registers that must be live after the instruction end
14086 up with the correct value. */
14087 for (i
= 0; i
< nops
; i
++)
14089 int this_order
= reg_order
[i
];
14090 if ((this_order
!= mem_order
[i
]
14091 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14092 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14096 /* Load the constants. */
14097 for (i
= 0; i
< nops
; i
++)
14099 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14100 sorted_regs
[i
] = regs
[reg_order
[i
]];
14101 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14104 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14106 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14109 gcc_assert (base_reg_dies
);
14115 gcc_assert (base_reg_dies
);
14116 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14120 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14122 for (i
= 0; i
< nops
; i
++)
14124 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14125 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14128 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14129 write_back
? offset
+ i
* 4 : 0));
14133 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14134 unaligned copies on processors which support unaligned semantics for those
14135 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14136 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14137 An interleave factor of 1 (the minimum) will perform no interleaving.
14138 Load/store multiple are used for aligned addresses where possible. */
14141 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14142 HOST_WIDE_INT length
,
14143 unsigned int interleave_factor
)
14145 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14146 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14147 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14148 HOST_WIDE_INT i
, j
;
14149 HOST_WIDE_INT remaining
= length
, words
;
14150 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14152 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14153 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14154 HOST_WIDE_INT srcoffset
, dstoffset
;
14155 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14158 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14160 /* Use hard registers if we have aligned source or destination so we can use
14161 load/store multiple with contiguous registers. */
14162 if (dst_aligned
|| src_aligned
)
14163 for (i
= 0; i
< interleave_factor
; i
++)
14164 regs
[i
] = gen_rtx_REG (SImode
, i
);
14166 for (i
= 0; i
< interleave_factor
; i
++)
14167 regs
[i
] = gen_reg_rtx (SImode
);
14169 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14170 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14172 srcoffset
= dstoffset
= 0;
14174 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14175 For copying the last bytes we want to subtract this offset again. */
14176 src_autoinc
= dst_autoinc
= 0;
14178 for (i
= 0; i
< interleave_factor
; i
++)
14181 /* Copy BLOCK_SIZE_BYTES chunks. */
14183 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14186 if (src_aligned
&& interleave_factor
> 1)
14188 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14189 TRUE
, srcbase
, &srcoffset
));
14190 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14194 for (j
= 0; j
< interleave_factor
; j
++)
14196 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14198 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14199 srcoffset
+ j
* UNITS_PER_WORD
);
14200 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14202 srcoffset
+= block_size_bytes
;
14206 if (dst_aligned
&& interleave_factor
> 1)
14208 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14209 TRUE
, dstbase
, &dstoffset
));
14210 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14214 for (j
= 0; j
< interleave_factor
; j
++)
14216 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14218 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14219 dstoffset
+ j
* UNITS_PER_WORD
);
14220 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14222 dstoffset
+= block_size_bytes
;
14225 remaining
-= block_size_bytes
;
14228 /* Copy any whole words left (note these aren't interleaved with any
14229 subsequent halfword/byte load/stores in the interests of simplicity). */
14231 words
= remaining
/ UNITS_PER_WORD
;
14233 gcc_assert (words
< interleave_factor
);
14235 if (src_aligned
&& words
> 1)
14237 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14239 src_autoinc
+= UNITS_PER_WORD
* words
;
14243 for (j
= 0; j
< words
; j
++)
14245 addr
= plus_constant (Pmode
, src
,
14246 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14247 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14248 srcoffset
+ j
* UNITS_PER_WORD
);
14249 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14251 srcoffset
+= words
* UNITS_PER_WORD
;
14254 if (dst_aligned
&& words
> 1)
14256 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14258 dst_autoinc
+= words
* UNITS_PER_WORD
;
14262 for (j
= 0; j
< words
; j
++)
14264 addr
= plus_constant (Pmode
, dst
,
14265 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14266 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14267 dstoffset
+ j
* UNITS_PER_WORD
);
14268 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14270 dstoffset
+= words
* UNITS_PER_WORD
;
14273 remaining
-= words
* UNITS_PER_WORD
;
14275 gcc_assert (remaining
< 4);
14277 /* Copy a halfword if necessary. */
14279 if (remaining
>= 2)
14281 halfword_tmp
= gen_reg_rtx (SImode
);
14283 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14284 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14285 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14287 /* Either write out immediately, or delay until we've loaded the last
14288 byte, depending on interleave factor. */
14289 if (interleave_factor
== 1)
14291 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14292 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14293 emit_insn (gen_unaligned_storehi (mem
,
14294 gen_lowpart (HImode
, halfword_tmp
)));
14295 halfword_tmp
= NULL
;
14303 gcc_assert (remaining
< 2);
14305 /* Copy last byte. */
14307 if ((remaining
& 1) != 0)
14309 byte_tmp
= gen_reg_rtx (SImode
);
14311 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14312 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14313 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14315 if (interleave_factor
== 1)
14317 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14318 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14319 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14328 /* Store last halfword if we haven't done so already. */
14332 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14333 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14334 emit_insn (gen_unaligned_storehi (mem
,
14335 gen_lowpart (HImode
, halfword_tmp
)));
14339 /* Likewise for last byte. */
14343 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14344 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14345 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14349 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14352 /* From mips_adjust_block_mem:
14354 Helper function for doing a loop-based block operation on memory
14355 reference MEM. Each iteration of the loop will operate on LENGTH
14358 Create a new base register for use within the loop and point it to
14359 the start of MEM. Create a new memory reference that uses this
14360 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14363 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14366 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14368 /* Although the new mem does not refer to a known location,
14369 it does keep up to LENGTH bytes of alignment. */
14370 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14371 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14374 /* From mips_block_move_loop:
14376 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14377 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14378 the memory regions do not overlap. */
14381 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14382 unsigned int interleave_factor
,
14383 HOST_WIDE_INT bytes_per_iter
)
14385 rtx label
, src_reg
, dest_reg
, final_src
, test
;
14386 HOST_WIDE_INT leftover
;
14388 leftover
= length
% bytes_per_iter
;
14389 length
-= leftover
;
14391 /* Create registers and memory references for use within the loop. */
14392 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14393 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14395 /* Calculate the value that SRC_REG should have after the last iteration of
14397 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14398 0, 0, OPTAB_WIDEN
);
14400 /* Emit the start of the loop. */
14401 label
= gen_label_rtx ();
14402 emit_label (label
);
14404 /* Emit the loop body. */
14405 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14406 interleave_factor
);
14408 /* Move on to the next block. */
14409 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14410 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14412 /* Emit the loop condition. */
14413 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14414 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14416 /* Mop up any left-over bytes. */
14418 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14421 /* Emit a block move when either the source or destination is unaligned (not
14422 aligned to a four-byte boundary). This may need further tuning depending on
14423 core type, optimize_size setting, etc. */
14426 arm_movmemqi_unaligned (rtx
*operands
)
14428 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14432 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14433 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14434 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14435 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14436 or dst_aligned though: allow more interleaving in those cases since the
14437 resulting code can be smaller. */
14438 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14439 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14442 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14443 interleave_factor
, bytes_per_iter
);
14445 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14446 interleave_factor
);
14450 /* Note that the loop created by arm_block_move_unaligned_loop may be
14451 subject to loop unrolling, which makes tuning this condition a little
14454 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14456 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14463 arm_gen_movmemqi (rtx
*operands
)
14465 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14466 HOST_WIDE_INT srcoffset
, dstoffset
;
14468 rtx src
, dst
, srcbase
, dstbase
;
14469 rtx part_bytes_reg
= NULL
;
14472 if (!CONST_INT_P (operands
[2])
14473 || !CONST_INT_P (operands
[3])
14474 || INTVAL (operands
[2]) > 64)
14477 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14478 return arm_movmemqi_unaligned (operands
);
14480 if (INTVAL (operands
[3]) & 3)
14483 dstbase
= operands
[0];
14484 srcbase
= operands
[1];
14486 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14487 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14489 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14490 out_words_to_go
= INTVAL (operands
[2]) / 4;
14491 last_bytes
= INTVAL (operands
[2]) & 3;
14492 dstoffset
= srcoffset
= 0;
14494 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14495 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14497 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14499 if (in_words_to_go
> 4)
14500 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14501 TRUE
, srcbase
, &srcoffset
));
14503 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14504 src
, FALSE
, srcbase
,
14507 if (out_words_to_go
)
14509 if (out_words_to_go
> 4)
14510 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14511 TRUE
, dstbase
, &dstoffset
));
14512 else if (out_words_to_go
!= 1)
14513 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14514 out_words_to_go
, dst
,
14517 dstbase
, &dstoffset
));
14520 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14521 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14522 if (last_bytes
!= 0)
14524 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14530 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14531 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14534 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14535 if (out_words_to_go
)
14539 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14540 sreg
= copy_to_reg (mem
);
14542 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14543 emit_move_insn (mem
, sreg
);
14546 gcc_assert (!in_words_to_go
); /* Sanity check */
14549 if (in_words_to_go
)
14551 gcc_assert (in_words_to_go
> 0);
14553 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14554 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14557 gcc_assert (!last_bytes
|| part_bytes_reg
);
14559 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14561 rtx tmp
= gen_reg_rtx (SImode
);
14563 /* The bytes we want are in the top end of the word. */
14564 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14565 GEN_INT (8 * (4 - last_bytes
))));
14566 part_bytes_reg
= tmp
;
14570 mem
= adjust_automodify_address (dstbase
, QImode
,
14571 plus_constant (Pmode
, dst
,
14573 dstoffset
+ last_bytes
- 1);
14574 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14578 tmp
= gen_reg_rtx (SImode
);
14579 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14580 part_bytes_reg
= tmp
;
14587 if (last_bytes
> 1)
14589 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14590 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14594 rtx tmp
= gen_reg_rtx (SImode
);
14595 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14596 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14597 part_bytes_reg
= tmp
;
14604 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14605 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14612 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14615 next_consecutive_mem (rtx mem
)
14617 enum machine_mode mode
= GET_MODE (mem
);
14618 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14619 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14621 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14624 /* Copy using LDRD/STRD instructions whenever possible.
14625 Returns true upon success. */
14627 gen_movmem_ldrd_strd (rtx
*operands
)
14629 unsigned HOST_WIDE_INT len
;
14630 HOST_WIDE_INT align
;
14631 rtx src
, dst
, base
;
14633 bool src_aligned
, dst_aligned
;
14634 bool src_volatile
, dst_volatile
;
14636 gcc_assert (CONST_INT_P (operands
[2]));
14637 gcc_assert (CONST_INT_P (operands
[3]));
14639 len
= UINTVAL (operands
[2]);
14643 /* Maximum alignment we can assume for both src and dst buffers. */
14644 align
= INTVAL (operands
[3]);
14646 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14649 /* Place src and dst addresses in registers
14650 and update the corresponding mem rtx. */
14652 dst_volatile
= MEM_VOLATILE_P (dst
);
14653 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14654 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14655 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14658 src_volatile
= MEM_VOLATILE_P (src
);
14659 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14660 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14661 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14663 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14666 if (src_volatile
|| dst_volatile
)
14669 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14670 if (!(dst_aligned
|| src_aligned
))
14671 return arm_gen_movmemqi (operands
);
14673 src
= adjust_address (src
, DImode
, 0);
14674 dst
= adjust_address (dst
, DImode
, 0);
14678 reg0
= gen_reg_rtx (DImode
);
14680 emit_move_insn (reg0
, src
);
14682 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14685 emit_move_insn (dst
, reg0
);
14687 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14689 src
= next_consecutive_mem (src
);
14690 dst
= next_consecutive_mem (dst
);
14693 gcc_assert (len
< 8);
14696 /* More than a word but less than a double-word to copy. Copy a word. */
14697 reg0
= gen_reg_rtx (SImode
);
14698 src
= adjust_address (src
, SImode
, 0);
14699 dst
= adjust_address (dst
, SImode
, 0);
14701 emit_move_insn (reg0
, src
);
14703 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14706 emit_move_insn (dst
, reg0
);
14708 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14710 src
= next_consecutive_mem (src
);
14711 dst
= next_consecutive_mem (dst
);
14718 /* Copy the remaining bytes. */
14721 dst
= adjust_address (dst
, HImode
, 0);
14722 src
= adjust_address (src
, HImode
, 0);
14723 reg0
= gen_reg_rtx (SImode
);
14725 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14727 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14730 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14732 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14734 src
= next_consecutive_mem (src
);
14735 dst
= next_consecutive_mem (dst
);
14740 dst
= adjust_address (dst
, QImode
, 0);
14741 src
= adjust_address (src
, QImode
, 0);
14742 reg0
= gen_reg_rtx (QImode
);
14743 emit_move_insn (reg0
, src
);
14744 emit_move_insn (dst
, reg0
);
14748 /* Select a dominance comparison mode if possible for a test of the general
14749 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14750 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14751 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14752 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14753 In all cases OP will be either EQ or NE, but we don't need to know which
14754 here. If we are unable to support a dominance comparison we return
14755 CC mode. This will then fail to match for the RTL expressions that
14756 generate this call. */
14758 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14760 enum rtx_code cond1
, cond2
;
14763 /* Currently we will probably get the wrong result if the individual
14764 comparisons are not simple. This also ensures that it is safe to
14765 reverse a comparison if necessary. */
14766 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14768 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14772 /* The if_then_else variant of this tests the second condition if the
14773 first passes, but is true if the first fails. Reverse the first
14774 condition to get a true "inclusive-or" expression. */
14775 if (cond_or
== DOM_CC_NX_OR_Y
)
14776 cond1
= reverse_condition (cond1
);
14778 /* If the comparisons are not equal, and one doesn't dominate the other,
14779 then we can't do this. */
14781 && !comparison_dominates_p (cond1
, cond2
)
14782 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14787 enum rtx_code temp
= cond1
;
14795 if (cond_or
== DOM_CC_X_AND_Y
)
14800 case EQ
: return CC_DEQmode
;
14801 case LE
: return CC_DLEmode
;
14802 case LEU
: return CC_DLEUmode
;
14803 case GE
: return CC_DGEmode
;
14804 case GEU
: return CC_DGEUmode
;
14805 default: gcc_unreachable ();
14809 if (cond_or
== DOM_CC_X_AND_Y
)
14821 gcc_unreachable ();
14825 if (cond_or
== DOM_CC_X_AND_Y
)
14837 gcc_unreachable ();
14841 if (cond_or
== DOM_CC_X_AND_Y
)
14842 return CC_DLTUmode
;
14847 return CC_DLTUmode
;
14849 return CC_DLEUmode
;
14853 gcc_unreachable ();
14857 if (cond_or
== DOM_CC_X_AND_Y
)
14858 return CC_DGTUmode
;
14863 return CC_DGTUmode
;
14865 return CC_DGEUmode
;
14869 gcc_unreachable ();
14872 /* The remaining cases only occur when both comparisons are the
14875 gcc_assert (cond1
== cond2
);
14879 gcc_assert (cond1
== cond2
);
14883 gcc_assert (cond1
== cond2
);
14887 gcc_assert (cond1
== cond2
);
14888 return CC_DLEUmode
;
14891 gcc_assert (cond1
== cond2
);
14892 return CC_DGEUmode
;
14895 gcc_unreachable ();
14900 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14902 /* All floating point compares return CCFP if it is an equality
14903 comparison, and CCFPE otherwise. */
14904 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14927 gcc_unreachable ();
14931 /* A compare with a shifted operand. Because of canonicalization, the
14932 comparison will have to be swapped when we emit the assembler. */
14933 if (GET_MODE (y
) == SImode
14934 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14935 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14936 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14937 || GET_CODE (x
) == ROTATERT
))
14940 /* This operation is performed swapped, but since we only rely on the Z
14941 flag we don't need an additional mode. */
14942 if (GET_MODE (y
) == SImode
14943 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14944 && GET_CODE (x
) == NEG
14945 && (op
== EQ
|| op
== NE
))
14948 /* This is a special case that is used by combine to allow a
14949 comparison of a shifted byte load to be split into a zero-extend
14950 followed by a comparison of the shifted integer (only valid for
14951 equalities and unsigned inequalities). */
14952 if (GET_MODE (x
) == SImode
14953 && GET_CODE (x
) == ASHIFT
14954 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14955 && GET_CODE (XEXP (x
, 0)) == SUBREG
14956 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14957 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14958 && (op
== EQ
|| op
== NE
14959 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14960 && CONST_INT_P (y
))
14963 /* A construct for a conditional compare, if the false arm contains
14964 0, then both conditions must be true, otherwise either condition
14965 must be true. Not all conditions are possible, so CCmode is
14966 returned if it can't be done. */
14967 if (GET_CODE (x
) == IF_THEN_ELSE
14968 && (XEXP (x
, 2) == const0_rtx
14969 || XEXP (x
, 2) == const1_rtx
)
14970 && COMPARISON_P (XEXP (x
, 0))
14971 && COMPARISON_P (XEXP (x
, 1)))
14972 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14973 INTVAL (XEXP (x
, 2)));
14975 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14976 if (GET_CODE (x
) == AND
14977 && (op
== EQ
|| op
== NE
)
14978 && COMPARISON_P (XEXP (x
, 0))
14979 && COMPARISON_P (XEXP (x
, 1)))
14980 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14983 if (GET_CODE (x
) == IOR
14984 && (op
== EQ
|| op
== NE
)
14985 && COMPARISON_P (XEXP (x
, 0))
14986 && COMPARISON_P (XEXP (x
, 1)))
14987 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14990 /* An operation (on Thumb) where we want to test for a single bit.
14991 This is done by shifting that bit up into the top bit of a
14992 scratch register; we can then branch on the sign bit. */
14994 && GET_MODE (x
) == SImode
14995 && (op
== EQ
|| op
== NE
)
14996 && GET_CODE (x
) == ZERO_EXTRACT
14997 && XEXP (x
, 1) == const1_rtx
)
15000 /* An operation that sets the condition codes as a side-effect, the
15001 V flag is not set correctly, so we can only use comparisons where
15002 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15004 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15005 if (GET_MODE (x
) == SImode
15007 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15008 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15009 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15010 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15011 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15012 || GET_CODE (x
) == LSHIFTRT
15013 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15014 || GET_CODE (x
) == ROTATERT
15015 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15016 return CC_NOOVmode
;
15018 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15021 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15022 && GET_CODE (x
) == PLUS
15023 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15026 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15032 /* A DImode comparison against zero can be implemented by
15033 or'ing the two halves together. */
15034 if (y
== const0_rtx
)
15037 /* We can do an equality test in three Thumb instructions. */
15047 /* DImode unsigned comparisons can be implemented by cmp +
15048 cmpeq without a scratch register. Not worth doing in
15059 /* DImode signed and unsigned comparisons can be implemented
15060 by cmp + sbcs with a scratch register, but that does not
15061 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15062 gcc_assert (op
!= EQ
&& op
!= NE
);
15066 gcc_unreachable ();
15070 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15071 return GET_MODE (x
);
15076 /* X and Y are two things to compare using CODE. Emit the compare insn and
15077 return the rtx for register 0 in the proper mode. FP means this is a
15078 floating point compare: I don't think that it is needed on the arm. */
15080 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15082 enum machine_mode mode
;
15084 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15086 /* We might have X as a constant, Y as a register because of the predicates
15087 used for cmpdi. If so, force X to a register here. */
15088 if (dimode_comparison
&& !REG_P (x
))
15089 x
= force_reg (DImode
, x
);
15091 mode
= SELECT_CC_MODE (code
, x
, y
);
15092 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15094 if (dimode_comparison
15095 && mode
!= CC_CZmode
)
15099 /* To compare two non-zero values for equality, XOR them and
15100 then compare against zero. Not used for ARM mode; there
15101 CC_CZmode is cheaper. */
15102 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15104 gcc_assert (!reload_completed
);
15105 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15109 /* A scratch register is required. */
15110 if (reload_completed
)
15111 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15113 scratch
= gen_rtx_SCRATCH (SImode
);
15115 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15116 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15117 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15120 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15125 /* Generate a sequence of insns that will generate the correct return
15126 address mask depending on the physical architecture that the program
15129 arm_gen_return_addr_mask (void)
15131 rtx reg
= gen_reg_rtx (Pmode
);
15133 emit_insn (gen_return_addr_mask (reg
));
15138 arm_reload_in_hi (rtx
*operands
)
15140 rtx ref
= operands
[1];
15142 HOST_WIDE_INT offset
= 0;
15144 if (GET_CODE (ref
) == SUBREG
)
15146 offset
= SUBREG_BYTE (ref
);
15147 ref
= SUBREG_REG (ref
);
15152 /* We have a pseudo which has been spilt onto the stack; there
15153 are two cases here: the first where there is a simple
15154 stack-slot replacement and a second where the stack-slot is
15155 out of range, or is used as a subreg. */
15156 if (reg_equiv_mem (REGNO (ref
)))
15158 ref
= reg_equiv_mem (REGNO (ref
));
15159 base
= find_replacement (&XEXP (ref
, 0));
15162 /* The slot is out of range, or was dressed up in a SUBREG. */
15163 base
= reg_equiv_address (REGNO (ref
));
15166 base
= find_replacement (&XEXP (ref
, 0));
15168 /* Handle the case where the address is too complex to be offset by 1. */
15169 if (GET_CODE (base
) == MINUS
15170 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15172 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15174 emit_set_insn (base_plus
, base
);
15177 else if (GET_CODE (base
) == PLUS
)
15179 /* The addend must be CONST_INT, or we would have dealt with it above. */
15180 HOST_WIDE_INT hi
, lo
;
15182 offset
+= INTVAL (XEXP (base
, 1));
15183 base
= XEXP (base
, 0);
15185 /* Rework the address into a legal sequence of insns. */
15186 /* Valid range for lo is -4095 -> 4095 */
15189 : -((-offset
) & 0xfff));
15191 /* Corner case, if lo is the max offset then we would be out of range
15192 once we have added the additional 1 below, so bump the msb into the
15193 pre-loading insn(s). */
15197 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15198 ^ (HOST_WIDE_INT
) 0x80000000)
15199 - (HOST_WIDE_INT
) 0x80000000);
15201 gcc_assert (hi
+ lo
== offset
);
15205 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15207 /* Get the base address; addsi3 knows how to handle constants
15208 that require more than one insn. */
15209 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15215 /* Operands[2] may overlap operands[0] (though it won't overlap
15216 operands[1]), that's why we asked for a DImode reg -- so we can
15217 use the bit that does not overlap. */
15218 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15219 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15221 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15223 emit_insn (gen_zero_extendqisi2 (scratch
,
15224 gen_rtx_MEM (QImode
,
15225 plus_constant (Pmode
, base
,
15227 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15228 gen_rtx_MEM (QImode
,
15229 plus_constant (Pmode
, base
,
15231 if (!BYTES_BIG_ENDIAN
)
15232 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15233 gen_rtx_IOR (SImode
,
15236 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15240 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15241 gen_rtx_IOR (SImode
,
15242 gen_rtx_ASHIFT (SImode
, scratch
,
15244 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15247 /* Handle storing a half-word to memory during reload by synthesizing as two
15248 byte stores. Take care not to clobber the input values until after we
15249 have moved them somewhere safe. This code assumes that if the DImode
15250 scratch in operands[2] overlaps either the input value or output address
15251 in some way, then that value must die in this insn (we absolutely need
15252 two scratch registers for some corner cases). */
15254 arm_reload_out_hi (rtx
*operands
)
15256 rtx ref
= operands
[0];
15257 rtx outval
= operands
[1];
15259 HOST_WIDE_INT offset
= 0;
15261 if (GET_CODE (ref
) == SUBREG
)
15263 offset
= SUBREG_BYTE (ref
);
15264 ref
= SUBREG_REG (ref
);
15269 /* We have a pseudo which has been spilt onto the stack; there
15270 are two cases here: the first where there is a simple
15271 stack-slot replacement and a second where the stack-slot is
15272 out of range, or is used as a subreg. */
15273 if (reg_equiv_mem (REGNO (ref
)))
15275 ref
= reg_equiv_mem (REGNO (ref
));
15276 base
= find_replacement (&XEXP (ref
, 0));
15279 /* The slot is out of range, or was dressed up in a SUBREG. */
15280 base
= reg_equiv_address (REGNO (ref
));
15283 base
= find_replacement (&XEXP (ref
, 0));
15285 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15287 /* Handle the case where the address is too complex to be offset by 1. */
15288 if (GET_CODE (base
) == MINUS
15289 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15291 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15293 /* Be careful not to destroy OUTVAL. */
15294 if (reg_overlap_mentioned_p (base_plus
, outval
))
15296 /* Updating base_plus might destroy outval, see if we can
15297 swap the scratch and base_plus. */
15298 if (!reg_overlap_mentioned_p (scratch
, outval
))
15301 scratch
= base_plus
;
15306 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15308 /* Be conservative and copy OUTVAL into the scratch now,
15309 this should only be necessary if outval is a subreg
15310 of something larger than a word. */
15311 /* XXX Might this clobber base? I can't see how it can,
15312 since scratch is known to overlap with OUTVAL, and
15313 must be wider than a word. */
15314 emit_insn (gen_movhi (scratch_hi
, outval
));
15315 outval
= scratch_hi
;
15319 emit_set_insn (base_plus
, base
);
15322 else if (GET_CODE (base
) == PLUS
)
15324 /* The addend must be CONST_INT, or we would have dealt with it above. */
15325 HOST_WIDE_INT hi
, lo
;
15327 offset
+= INTVAL (XEXP (base
, 1));
15328 base
= XEXP (base
, 0);
15330 /* Rework the address into a legal sequence of insns. */
15331 /* Valid range for lo is -4095 -> 4095 */
15334 : -((-offset
) & 0xfff));
15336 /* Corner case, if lo is the max offset then we would be out of range
15337 once we have added the additional 1 below, so bump the msb into the
15338 pre-loading insn(s). */
15342 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15343 ^ (HOST_WIDE_INT
) 0x80000000)
15344 - (HOST_WIDE_INT
) 0x80000000);
15346 gcc_assert (hi
+ lo
== offset
);
15350 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15352 /* Be careful not to destroy OUTVAL. */
15353 if (reg_overlap_mentioned_p (base_plus
, outval
))
15355 /* Updating base_plus might destroy outval, see if we
15356 can swap the scratch and base_plus. */
15357 if (!reg_overlap_mentioned_p (scratch
, outval
))
15360 scratch
= base_plus
;
15365 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15367 /* Be conservative and copy outval into scratch now,
15368 this should only be necessary if outval is a
15369 subreg of something larger than a word. */
15370 /* XXX Might this clobber base? I can't see how it
15371 can, since scratch is known to overlap with
15373 emit_insn (gen_movhi (scratch_hi
, outval
));
15374 outval
= scratch_hi
;
15378 /* Get the base address; addsi3 knows how to handle constants
15379 that require more than one insn. */
15380 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15386 if (BYTES_BIG_ENDIAN
)
15388 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15389 plus_constant (Pmode
, base
,
15391 gen_lowpart (QImode
, outval
)));
15392 emit_insn (gen_lshrsi3 (scratch
,
15393 gen_rtx_SUBREG (SImode
, outval
, 0),
15395 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15397 gen_lowpart (QImode
, scratch
)));
15401 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15403 gen_lowpart (QImode
, outval
)));
15404 emit_insn (gen_lshrsi3 (scratch
,
15405 gen_rtx_SUBREG (SImode
, outval
, 0),
15407 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15408 plus_constant (Pmode
, base
,
15410 gen_lowpart (QImode
, scratch
)));
15414 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15415 (padded to the size of a word) should be passed in a register. */
15418 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
15420 if (TARGET_AAPCS_BASED
)
15421 return must_pass_in_stack_var_size (mode
, type
);
15423 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15427 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15428 Return true if an argument passed on the stack should be padded upwards,
15429 i.e. if the least-significant byte has useful data.
15430 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15431 aggregate types are placed in the lowest memory address. */
15434 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15436 if (!TARGET_AAPCS_BASED
)
15437 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15439 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15446 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15447 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15448 register has useful data, and return the opposite if the most
15449 significant byte does. */
15452 arm_pad_reg_upward (enum machine_mode mode
,
15453 tree type
, int first ATTRIBUTE_UNUSED
)
15455 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15457 /* For AAPCS, small aggregates, small fixed-point types,
15458 and small complex types are always padded upwards. */
15461 if ((AGGREGATE_TYPE_P (type
)
15462 || TREE_CODE (type
) == COMPLEX_TYPE
15463 || FIXED_POINT_TYPE_P (type
))
15464 && int_size_in_bytes (type
) <= 4)
15469 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15470 && GET_MODE_SIZE (mode
) <= 4)
15475 /* Otherwise, use default padding. */
15476 return !BYTES_BIG_ENDIAN
;
15479 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15480 assuming that the address in the base register is word aligned. */
15482 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15484 HOST_WIDE_INT max_offset
;
15486 /* Offset must be a multiple of 4 in Thumb mode. */
15487 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15492 else if (TARGET_ARM
)
15497 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15500 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15501 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15502 Assumes that the address in the base register RN is word aligned. Pattern
15503 guarantees that both memory accesses use the same base register,
15504 the offsets are constants within the range, and the gap between the offsets is 4.
15505 If preload complete then check that registers are legal. WBACK indicates whether
15506 address is updated. LOAD indicates whether memory access is load or store. */
15508 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15509 bool wback
, bool load
)
15511 unsigned int t
, t2
, n
;
15513 if (!reload_completed
)
15516 if (!offset_ok_for_ldrd_strd (offset
))
15523 if ((TARGET_THUMB2
)
15524 && ((wback
&& (n
== t
|| n
== t2
))
15525 || (t
== SP_REGNUM
)
15526 || (t
== PC_REGNUM
)
15527 || (t2
== SP_REGNUM
)
15528 || (t2
== PC_REGNUM
)
15529 || (!load
&& (n
== PC_REGNUM
))
15530 || (load
&& (t
== t2
))
15531 /* Triggers Cortex-M3 LDRD errata. */
15532 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15536 && ((wback
&& (n
== t
|| n
== t2
))
15537 || (t2
== PC_REGNUM
)
15538 || (t
% 2 != 0) /* First destination register is not even. */
15540 /* PC can be used as base register (for offset addressing only),
15541 but it is depricated. */
15542 || (n
== PC_REGNUM
)))
15548 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15549 operand MEM's address contains an immediate offset from the base
15550 register and has no side effects, in which case it sets BASE and
15551 OFFSET accordingly. */
15553 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15557 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15559 /* TODO: Handle more general memory operand patterns, such as
15560 PRE_DEC and PRE_INC. */
15562 if (side_effects_p (mem
))
15565 /* Can't deal with subregs. */
15566 if (GET_CODE (mem
) == SUBREG
)
15569 gcc_assert (MEM_P (mem
));
15571 *offset
= const0_rtx
;
15573 addr
= XEXP (mem
, 0);
15575 /* If addr isn't valid for DImode, then we can't handle it. */
15576 if (!arm_legitimate_address_p (DImode
, addr
,
15577 reload_in_progress
|| reload_completed
))
15585 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15587 *base
= XEXP (addr
, 0);
15588 *offset
= XEXP (addr
, 1);
15589 return (REG_P (*base
) && CONST_INT_P (*offset
));
15595 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15597 /* Called from a peephole2 to replace two word-size accesses with a
15598 single LDRD/STRD instruction. Returns true iff we can generate a
15599 new instruction sequence. That is, both accesses use the same base
15600 register and the gap between constant offsets is 4. This function
15601 may reorder its operands to match ldrd/strd RTL templates.
15602 OPERANDS are the operands found by the peephole matcher;
15603 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15604 corresponding memory operands. LOAD indicaates whether the access
15605 is load or store. CONST_STORE indicates a store of constant
15606 integer values held in OPERANDS[4,5] and assumes that the pattern
15607 is of length 4 insn, for the purpose of checking dead registers.
15608 COMMUTE indicates that register operands may be reordered. */
15610 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15611 bool const_store
, bool commute
)
15614 HOST_WIDE_INT offsets
[2], offset
;
15615 rtx base
= NULL_RTX
;
15616 rtx cur_base
, cur_offset
, tmp
;
15618 HARD_REG_SET regset
;
15620 gcc_assert (!const_store
|| !load
);
15621 /* Check that the memory references are immediate offsets from the
15622 same base register. Extract the base register, the destination
15623 registers, and the corresponding memory offsets. */
15624 for (i
= 0; i
< nops
; i
++)
15626 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15631 else if (REGNO (base
) != REGNO (cur_base
))
15634 offsets
[i
] = INTVAL (cur_offset
);
15635 if (GET_CODE (operands
[i
]) == SUBREG
)
15637 tmp
= SUBREG_REG (operands
[i
]);
15638 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15643 /* Make sure there is no dependency between the individual loads. */
15644 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15645 return false; /* RAW */
15647 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15648 return false; /* WAW */
15650 /* If the same input register is used in both stores
15651 when storing different constants, try to find a free register.
15652 For example, the code
15657 can be transformed into
15660 in Thumb mode assuming that r1 is free. */
15662 && REGNO (operands
[0]) == REGNO (operands
[1])
15663 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15667 CLEAR_HARD_REG_SET (regset
);
15668 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15669 if (tmp
== NULL_RTX
)
15672 /* Use the new register in the first load to ensure that
15673 if the original input register is not dead after peephole,
15674 then it will have the correct constant value. */
15677 else if (TARGET_ARM
)
15680 int regno
= REGNO (operands
[0]);
15681 if (!peep2_reg_dead_p (4, operands
[0]))
15683 /* When the input register is even and is not dead after the
15684 pattern, it has to hold the second constant but we cannot
15685 form a legal STRD in ARM mode with this register as the second
15687 if (regno
% 2 == 0)
15690 /* Is regno-1 free? */
15691 SET_HARD_REG_SET (regset
);
15692 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15693 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15694 if (tmp
== NULL_RTX
)
15701 /* Find a DImode register. */
15702 CLEAR_HARD_REG_SET (regset
);
15703 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15704 if (tmp
!= NULL_RTX
)
15706 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15707 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15711 /* Can we use the input register to form a DI register? */
15712 SET_HARD_REG_SET (regset
);
15713 CLEAR_HARD_REG_BIT(regset
,
15714 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15715 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15716 if (tmp
== NULL_RTX
)
15718 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15722 gcc_assert (operands
[0] != NULL_RTX
);
15723 gcc_assert (operands
[1] != NULL_RTX
);
15724 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15725 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15729 /* Make sure the instructions are ordered with lower memory access first. */
15730 if (offsets
[0] > offsets
[1])
15732 gap
= offsets
[0] - offsets
[1];
15733 offset
= offsets
[1];
15735 /* Swap the instructions such that lower memory is accessed first. */
15736 SWAP_RTX (operands
[0], operands
[1]);
15737 SWAP_RTX (operands
[2], operands
[3]);
15739 SWAP_RTX (operands
[4], operands
[5]);
15743 gap
= offsets
[1] - offsets
[0];
15744 offset
= offsets
[0];
15747 /* Make sure accesses are to consecutive memory locations. */
15751 /* Make sure we generate legal instructions. */
15752 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15756 /* In Thumb state, where registers are almost unconstrained, there
15757 is little hope to fix it. */
15761 if (load
&& commute
)
15763 /* Try reordering registers. */
15764 SWAP_RTX (operands
[0], operands
[1]);
15765 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15772 /* If input registers are dead after this pattern, they can be
15773 reordered or replaced by other registers that are free in the
15774 current pattern. */
15775 if (!peep2_reg_dead_p (4, operands
[0])
15776 || !peep2_reg_dead_p (4, operands
[1]))
15779 /* Try to reorder the input registers. */
15780 /* For example, the code
15785 can be transformed into
15790 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15793 SWAP_RTX (operands
[0], operands
[1]);
15797 /* Try to find a free DI register. */
15798 CLEAR_HARD_REG_SET (regset
);
15799 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15800 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15803 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15804 if (tmp
== NULL_RTX
)
15807 /* DREG must be an even-numbered register in DImode.
15808 Split it into SI registers. */
15809 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15810 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15811 gcc_assert (operands
[0] != NULL_RTX
);
15812 gcc_assert (operands
[1] != NULL_RTX
);
15813 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15814 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15816 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15829 /* Print a symbolic form of X to the debug file, F. */
15831 arm_print_value (FILE *f
, rtx x
)
15833 switch (GET_CODE (x
))
15836 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15840 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15848 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15850 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15851 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15859 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15863 fprintf (f
, "`%s'", XSTR (x
, 0));
15867 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15871 arm_print_value (f
, XEXP (x
, 0));
15875 arm_print_value (f
, XEXP (x
, 0));
15877 arm_print_value (f
, XEXP (x
, 1));
15885 fprintf (f
, "????");
15890 /* Routines for manipulation of the constant pool. */
15892 /* Arm instructions cannot load a large constant directly into a
15893 register; they have to come from a pc relative load. The constant
15894 must therefore be placed in the addressable range of the pc
15895 relative load. Depending on the precise pc relative load
15896 instruction the range is somewhere between 256 bytes and 4k. This
15897 means that we often have to dump a constant inside a function, and
15898 generate code to branch around it.
15900 It is important to minimize this, since the branches will slow
15901 things down and make the code larger.
15903 Normally we can hide the table after an existing unconditional
15904 branch so that there is no interruption of the flow, but in the
15905 worst case the code looks like this:
15923 We fix this by performing a scan after scheduling, which notices
15924 which instructions need to have their operands fetched from the
15925 constant table and builds the table.
15927 The algorithm starts by building a table of all the constants that
15928 need fixing up and all the natural barriers in the function (places
15929 where a constant table can be dropped without breaking the flow).
15930 For each fixup we note how far the pc-relative replacement will be
15931 able to reach and the offset of the instruction into the function.
15933 Having built the table we then group the fixes together to form
15934 tables that are as large as possible (subject to addressing
15935 constraints) and emit each table of constants after the last
15936 barrier that is within range of all the instructions in the group.
15937 If a group does not contain a barrier, then we forcibly create one
15938 by inserting a jump instruction into the flow. Once the table has
15939 been inserted, the insns are then modified to reference the
15940 relevant entry in the pool.
15942 Possible enhancements to the algorithm (not implemented) are:
15944 1) For some processors and object formats, there may be benefit in
15945 aligning the pools to the start of cache lines; this alignment
15946 would need to be taken into account when calculating addressability
15949 /* These typedefs are located at the start of this file, so that
15950 they can be used in the prototypes there. This comment is to
15951 remind readers of that fact so that the following structures
15952 can be understood more easily.
15954 typedef struct minipool_node Mnode;
15955 typedef struct minipool_fixup Mfix; */
15957 struct minipool_node
15959 /* Doubly linked chain of entries. */
15962 /* The maximum offset into the code that this entry can be placed. While
15963 pushing fixes for forward references, all entries are sorted in order
15964 of increasing max_address. */
15965 HOST_WIDE_INT max_address
;
15966 /* Similarly for an entry inserted for a backwards ref. */
15967 HOST_WIDE_INT min_address
;
15968 /* The number of fixes referencing this entry. This can become zero
15969 if we "unpush" an entry. In this case we ignore the entry when we
15970 come to emit the code. */
15972 /* The offset from the start of the minipool. */
15973 HOST_WIDE_INT offset
;
15974 /* The value in table. */
15976 /* The mode of value. */
15977 enum machine_mode mode
;
15978 /* The size of the value. With iWMMXt enabled
15979 sizes > 4 also imply an alignment of 8-bytes. */
15983 struct minipool_fixup
15987 HOST_WIDE_INT address
;
15989 enum machine_mode mode
;
15993 HOST_WIDE_INT forwards
;
15994 HOST_WIDE_INT backwards
;
15997 /* Fixes less than a word need padding out to a word boundary. */
15998 #define MINIPOOL_FIX_SIZE(mode) \
15999 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16001 static Mnode
* minipool_vector_head
;
16002 static Mnode
* minipool_vector_tail
;
16003 static rtx minipool_vector_label
;
16004 static int minipool_pad
;
16006 /* The linked list of all minipool fixes required for this function. */
16007 Mfix
* minipool_fix_head
;
16008 Mfix
* minipool_fix_tail
;
16009 /* The fix entry for the current minipool, once it has been placed. */
16010 Mfix
* minipool_barrier
;
16012 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16013 #define JUMP_TABLES_IN_TEXT_SECTION 0
16016 static HOST_WIDE_INT
16017 get_jump_table_size (rtx insn
)
16019 /* ADDR_VECs only take room if read-only data does into the text
16021 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16023 rtx body
= PATTERN (insn
);
16024 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16025 HOST_WIDE_INT size
;
16026 HOST_WIDE_INT modesize
;
16028 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16029 size
= modesize
* XVECLEN (body
, elt
);
16033 /* Round up size of TBB table to a halfword boundary. */
16034 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16037 /* No padding necessary for TBH. */
16040 /* Add two bytes for alignment on Thumb. */
16045 gcc_unreachable ();
16053 /* Return the maximum amount of padding that will be inserted before
16056 static HOST_WIDE_INT
16057 get_label_padding (rtx label
)
16059 HOST_WIDE_INT align
, min_insn_size
;
16061 align
= 1 << label_to_alignment (label
);
16062 min_insn_size
= TARGET_THUMB
? 2 : 4;
16063 return align
> min_insn_size
? align
- min_insn_size
: 0;
16066 /* Move a minipool fix MP from its current location to before MAX_MP.
16067 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16068 constraints may need updating. */
16070 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16071 HOST_WIDE_INT max_address
)
16073 /* The code below assumes these are different. */
16074 gcc_assert (mp
!= max_mp
);
16076 if (max_mp
== NULL
)
16078 if (max_address
< mp
->max_address
)
16079 mp
->max_address
= max_address
;
16083 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16084 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16086 mp
->max_address
= max_address
;
16088 /* Unlink MP from its current position. Since max_mp is non-null,
16089 mp->prev must be non-null. */
16090 mp
->prev
->next
= mp
->next
;
16091 if (mp
->next
!= NULL
)
16092 mp
->next
->prev
= mp
->prev
;
16094 minipool_vector_tail
= mp
->prev
;
16096 /* Re-insert it before MAX_MP. */
16098 mp
->prev
= max_mp
->prev
;
16101 if (mp
->prev
!= NULL
)
16102 mp
->prev
->next
= mp
;
16104 minipool_vector_head
= mp
;
16107 /* Save the new entry. */
16110 /* Scan over the preceding entries and adjust their addresses as
16112 while (mp
->prev
!= NULL
16113 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16115 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16122 /* Add a constant to the minipool for a forward reference. Returns the
16123 node added or NULL if the constant will not fit in this pool. */
16125 add_minipool_forward_ref (Mfix
*fix
)
16127 /* If set, max_mp is the first pool_entry that has a lower
16128 constraint than the one we are trying to add. */
16129 Mnode
* max_mp
= NULL
;
16130 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16133 /* If the minipool starts before the end of FIX->INSN then this FIX
16134 can not be placed into the current pool. Furthermore, adding the
16135 new constant pool entry may cause the pool to start FIX_SIZE bytes
16137 if (minipool_vector_head
&&
16138 (fix
->address
+ get_attr_length (fix
->insn
)
16139 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16142 /* Scan the pool to see if a constant with the same value has
16143 already been added. While we are doing this, also note the
16144 location where we must insert the constant if it doesn't already
16146 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16148 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16149 && fix
->mode
== mp
->mode
16150 && (!LABEL_P (fix
->value
)
16151 || (CODE_LABEL_NUMBER (fix
->value
)
16152 == CODE_LABEL_NUMBER (mp
->value
)))
16153 && rtx_equal_p (fix
->value
, mp
->value
))
16155 /* More than one fix references this entry. */
16157 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16160 /* Note the insertion point if necessary. */
16162 && mp
->max_address
> max_address
)
16165 /* If we are inserting an 8-bytes aligned quantity and
16166 we have not already found an insertion point, then
16167 make sure that all such 8-byte aligned quantities are
16168 placed at the start of the pool. */
16169 if (ARM_DOUBLEWORD_ALIGN
16171 && fix
->fix_size
>= 8
16172 && mp
->fix_size
< 8)
16175 max_address
= mp
->max_address
;
16179 /* The value is not currently in the minipool, so we need to create
16180 a new entry for it. If MAX_MP is NULL, the entry will be put on
16181 the end of the list since the placement is less constrained than
16182 any existing entry. Otherwise, we insert the new fix before
16183 MAX_MP and, if necessary, adjust the constraints on the other
16186 mp
->fix_size
= fix
->fix_size
;
16187 mp
->mode
= fix
->mode
;
16188 mp
->value
= fix
->value
;
16190 /* Not yet required for a backwards ref. */
16191 mp
->min_address
= -65536;
16193 if (max_mp
== NULL
)
16195 mp
->max_address
= max_address
;
16197 mp
->prev
= minipool_vector_tail
;
16199 if (mp
->prev
== NULL
)
16201 minipool_vector_head
= mp
;
16202 minipool_vector_label
= gen_label_rtx ();
16205 mp
->prev
->next
= mp
;
16207 minipool_vector_tail
= mp
;
16211 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16212 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16214 mp
->max_address
= max_address
;
16217 mp
->prev
= max_mp
->prev
;
16219 if (mp
->prev
!= NULL
)
16220 mp
->prev
->next
= mp
;
16222 minipool_vector_head
= mp
;
16225 /* Save the new entry. */
16228 /* Scan over the preceding entries and adjust their addresses as
16230 while (mp
->prev
!= NULL
16231 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16233 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16241 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16242 HOST_WIDE_INT min_address
)
16244 HOST_WIDE_INT offset
;
16246 /* The code below assumes these are different. */
16247 gcc_assert (mp
!= min_mp
);
16249 if (min_mp
== NULL
)
16251 if (min_address
> mp
->min_address
)
16252 mp
->min_address
= min_address
;
16256 /* We will adjust this below if it is too loose. */
16257 mp
->min_address
= min_address
;
16259 /* Unlink MP from its current position. Since min_mp is non-null,
16260 mp->next must be non-null. */
16261 mp
->next
->prev
= mp
->prev
;
16262 if (mp
->prev
!= NULL
)
16263 mp
->prev
->next
= mp
->next
;
16265 minipool_vector_head
= mp
->next
;
16267 /* Reinsert it after MIN_MP. */
16269 mp
->next
= min_mp
->next
;
16271 if (mp
->next
!= NULL
)
16272 mp
->next
->prev
= mp
;
16274 minipool_vector_tail
= mp
;
16280 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16282 mp
->offset
= offset
;
16283 if (mp
->refcount
> 0)
16284 offset
+= mp
->fix_size
;
16286 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16287 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16293 /* Add a constant to the minipool for a backward reference. Returns the
16294 node added or NULL if the constant will not fit in this pool.
16296 Note that the code for insertion for a backwards reference can be
16297 somewhat confusing because the calculated offsets for each fix do
16298 not take into account the size of the pool (which is still under
16301 add_minipool_backward_ref (Mfix
*fix
)
16303 /* If set, min_mp is the last pool_entry that has a lower constraint
16304 than the one we are trying to add. */
16305 Mnode
*min_mp
= NULL
;
16306 /* This can be negative, since it is only a constraint. */
16307 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16310 /* If we can't reach the current pool from this insn, or if we can't
16311 insert this entry at the end of the pool without pushing other
16312 fixes out of range, then we don't try. This ensures that we
16313 can't fail later on. */
16314 if (min_address
>= minipool_barrier
->address
16315 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16316 >= minipool_barrier
->address
))
16319 /* Scan the pool to see if a constant with the same value has
16320 already been added. While we are doing this, also note the
16321 location where we must insert the constant if it doesn't already
16323 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16325 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16326 && fix
->mode
== mp
->mode
16327 && (!LABEL_P (fix
->value
)
16328 || (CODE_LABEL_NUMBER (fix
->value
)
16329 == CODE_LABEL_NUMBER (mp
->value
)))
16330 && rtx_equal_p (fix
->value
, mp
->value
)
16331 /* Check that there is enough slack to move this entry to the
16332 end of the table (this is conservative). */
16333 && (mp
->max_address
16334 > (minipool_barrier
->address
16335 + minipool_vector_tail
->offset
16336 + minipool_vector_tail
->fix_size
)))
16339 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16342 if (min_mp
!= NULL
)
16343 mp
->min_address
+= fix
->fix_size
;
16346 /* Note the insertion point if necessary. */
16347 if (mp
->min_address
< min_address
)
16349 /* For now, we do not allow the insertion of 8-byte alignment
16350 requiring nodes anywhere but at the start of the pool. */
16351 if (ARM_DOUBLEWORD_ALIGN
16352 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16357 else if (mp
->max_address
16358 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16360 /* Inserting before this entry would push the fix beyond
16361 its maximum address (which can happen if we have
16362 re-located a forwards fix); force the new fix to come
16364 if (ARM_DOUBLEWORD_ALIGN
16365 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16370 min_address
= mp
->min_address
+ fix
->fix_size
;
16373 /* Do not insert a non-8-byte aligned quantity before 8-byte
16374 aligned quantities. */
16375 else if (ARM_DOUBLEWORD_ALIGN
16376 && fix
->fix_size
< 8
16377 && mp
->fix_size
>= 8)
16380 min_address
= mp
->min_address
+ fix
->fix_size
;
16385 /* We need to create a new entry. */
16387 mp
->fix_size
= fix
->fix_size
;
16388 mp
->mode
= fix
->mode
;
16389 mp
->value
= fix
->value
;
16391 mp
->max_address
= minipool_barrier
->address
+ 65536;
16393 mp
->min_address
= min_address
;
16395 if (min_mp
== NULL
)
16398 mp
->next
= minipool_vector_head
;
16400 if (mp
->next
== NULL
)
16402 minipool_vector_tail
= mp
;
16403 minipool_vector_label
= gen_label_rtx ();
16406 mp
->next
->prev
= mp
;
16408 minipool_vector_head
= mp
;
16412 mp
->next
= min_mp
->next
;
16416 if (mp
->next
!= NULL
)
16417 mp
->next
->prev
= mp
;
16419 minipool_vector_tail
= mp
;
16422 /* Save the new entry. */
16430 /* Scan over the following entries and adjust their offsets. */
16431 while (mp
->next
!= NULL
)
16433 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16434 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16437 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16439 mp
->next
->offset
= mp
->offset
;
16448 assign_minipool_offsets (Mfix
*barrier
)
16450 HOST_WIDE_INT offset
= 0;
16453 minipool_barrier
= barrier
;
16455 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16457 mp
->offset
= offset
;
16459 if (mp
->refcount
> 0)
16460 offset
+= mp
->fix_size
;
16464 /* Output the literal table */
16466 dump_minipool (rtx scan
)
16472 if (ARM_DOUBLEWORD_ALIGN
)
16473 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16474 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16481 fprintf (dump_file
,
16482 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16483 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16485 scan
= emit_label_after (gen_label_rtx (), scan
);
16486 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16487 scan
= emit_label_after (minipool_vector_label
, scan
);
16489 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16491 if (mp
->refcount
> 0)
16495 fprintf (dump_file
,
16496 ";; Offset %u, min %ld, max %ld ",
16497 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16498 (unsigned long) mp
->max_address
);
16499 arm_print_value (dump_file
, mp
->value
);
16500 fputc ('\n', dump_file
);
16503 switch (mp
->fix_size
)
16505 #ifdef HAVE_consttable_1
16507 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16511 #ifdef HAVE_consttable_2
16513 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16517 #ifdef HAVE_consttable_4
16519 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16523 #ifdef HAVE_consttable_8
16525 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16529 #ifdef HAVE_consttable_16
16531 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16536 gcc_unreachable ();
16544 minipool_vector_head
= minipool_vector_tail
= NULL
;
16545 scan
= emit_insn_after (gen_consttable_end (), scan
);
16546 scan
= emit_barrier_after (scan
);
16549 /* Return the cost of forcibly inserting a barrier after INSN. */
16551 arm_barrier_cost (rtx insn
)
16553 /* Basing the location of the pool on the loop depth is preferable,
16554 but at the moment, the basic block information seems to be
16555 corrupt by this stage of the compilation. */
16556 int base_cost
= 50;
16557 rtx next
= next_nonnote_insn (insn
);
16559 if (next
!= NULL
&& LABEL_P (next
))
16562 switch (GET_CODE (insn
))
16565 /* It will always be better to place the table before the label, rather
16574 return base_cost
- 10;
16577 return base_cost
+ 10;
16581 /* Find the best place in the insn stream in the range
16582 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16583 Create the barrier by inserting a jump and add a new fix entry for
16586 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16588 HOST_WIDE_INT count
= 0;
16590 rtx from
= fix
->insn
;
16591 /* The instruction after which we will insert the jump. */
16592 rtx selected
= NULL
;
16594 /* The address at which the jump instruction will be placed. */
16595 HOST_WIDE_INT selected_address
;
16597 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16598 rtx label
= gen_label_rtx ();
16600 selected_cost
= arm_barrier_cost (from
);
16601 selected_address
= fix
->address
;
16603 while (from
&& count
< max_count
)
16608 /* This code shouldn't have been called if there was a natural barrier
16610 gcc_assert (!BARRIER_P (from
));
16612 /* Count the length of this insn. This must stay in sync with the
16613 code that pushes minipool fixes. */
16614 if (LABEL_P (from
))
16615 count
+= get_label_padding (from
);
16617 count
+= get_attr_length (from
);
16619 /* If there is a jump table, add its length. */
16620 if (tablejump_p (from
, NULL
, &tmp
))
16622 count
+= get_jump_table_size (tmp
);
16624 /* Jump tables aren't in a basic block, so base the cost on
16625 the dispatch insn. If we select this location, we will
16626 still put the pool after the table. */
16627 new_cost
= arm_barrier_cost (from
);
16629 if (count
< max_count
16630 && (!selected
|| new_cost
<= selected_cost
))
16633 selected_cost
= new_cost
;
16634 selected_address
= fix
->address
+ count
;
16637 /* Continue after the dispatch table. */
16638 from
= NEXT_INSN (tmp
);
16642 new_cost
= arm_barrier_cost (from
);
16644 if (count
< max_count
16645 && (!selected
|| new_cost
<= selected_cost
))
16648 selected_cost
= new_cost
;
16649 selected_address
= fix
->address
+ count
;
16652 from
= NEXT_INSN (from
);
16655 /* Make sure that we found a place to insert the jump. */
16656 gcc_assert (selected
);
16658 /* Make sure we do not split a call and its corresponding
16659 CALL_ARG_LOCATION note. */
16660 if (CALL_P (selected
))
16662 rtx next
= NEXT_INSN (selected
);
16663 if (next
&& NOTE_P (next
)
16664 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16668 /* Create a new JUMP_INSN that branches around a barrier. */
16669 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16670 JUMP_LABEL (from
) = label
;
16671 barrier
= emit_barrier_after (from
);
16672 emit_label_after (label
, barrier
);
16674 /* Create a minipool barrier entry for the new barrier. */
16675 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16676 new_fix
->insn
= barrier
;
16677 new_fix
->address
= selected_address
;
16678 new_fix
->next
= fix
->next
;
16679 fix
->next
= new_fix
;
16684 /* Record that there is a natural barrier in the insn stream at
16687 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
16689 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16692 fix
->address
= address
;
16695 if (minipool_fix_head
!= NULL
)
16696 minipool_fix_tail
->next
= fix
;
16698 minipool_fix_head
= fix
;
16700 minipool_fix_tail
= fix
;
16703 /* Record INSN, which will need fixing up to load a value from the
16704 minipool. ADDRESS is the offset of the insn since the start of the
16705 function; LOC is a pointer to the part of the insn which requires
16706 fixing; VALUE is the constant that must be loaded, which is of type
16709 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
16710 enum machine_mode mode
, rtx value
)
16712 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16715 fix
->address
= address
;
16718 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16719 fix
->value
= value
;
16720 fix
->forwards
= get_attr_pool_range (insn
);
16721 fix
->backwards
= get_attr_neg_pool_range (insn
);
16722 fix
->minipool
= NULL
;
16724 /* If an insn doesn't have a range defined for it, then it isn't
16725 expecting to be reworked by this code. Better to stop now than
16726 to generate duff assembly code. */
16727 gcc_assert (fix
->forwards
|| fix
->backwards
);
16729 /* If an entry requires 8-byte alignment then assume all constant pools
16730 require 4 bytes of padding. Trying to do this later on a per-pool
16731 basis is awkward because existing pool entries have to be modified. */
16732 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16737 fprintf (dump_file
,
16738 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16739 GET_MODE_NAME (mode
),
16740 INSN_UID (insn
), (unsigned long) address
,
16741 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16742 arm_print_value (dump_file
, fix
->value
);
16743 fprintf (dump_file
, "\n");
16746 /* Add it to the chain of fixes. */
16749 if (minipool_fix_head
!= NULL
)
16750 minipool_fix_tail
->next
= fix
;
16752 minipool_fix_head
= fix
;
16754 minipool_fix_tail
= fix
;
16757 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16758 Returns the number of insns needed, or 99 if we always want to synthesize
16761 arm_max_const_double_inline_cost ()
16763 /* Let the value get synthesized to avoid the use of literal pools. */
16764 if (arm_disable_literal_pool
)
16767 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16770 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16771 Returns the number of insns needed, or 99 if we don't know how to
16774 arm_const_double_inline_cost (rtx val
)
16776 rtx lowpart
, highpart
;
16777 enum machine_mode mode
;
16779 mode
= GET_MODE (val
);
16781 if (mode
== VOIDmode
)
16784 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16786 lowpart
= gen_lowpart (SImode
, val
);
16787 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16789 gcc_assert (CONST_INT_P (lowpart
));
16790 gcc_assert (CONST_INT_P (highpart
));
16792 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16793 NULL_RTX
, NULL_RTX
, 0, 0)
16794 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16795 NULL_RTX
, NULL_RTX
, 0, 0));
16798 /* Return true if it is worthwhile to split a 64-bit constant into two
16799 32-bit operations. This is the case if optimizing for size, or
16800 if we have load delay slots, or if one 32-bit part can be done with
16801 a single data operation. */
16803 arm_const_double_by_parts (rtx val
)
16805 enum machine_mode mode
= GET_MODE (val
);
16808 if (optimize_size
|| arm_ld_sched
)
16811 if (mode
== VOIDmode
)
16814 part
= gen_highpart_mode (SImode
, mode
, val
);
16816 gcc_assert (CONST_INT_P (part
));
16818 if (const_ok_for_arm (INTVAL (part
))
16819 || const_ok_for_arm (~INTVAL (part
)))
16822 part
= gen_lowpart (SImode
, val
);
16824 gcc_assert (CONST_INT_P (part
));
16826 if (const_ok_for_arm (INTVAL (part
))
16827 || const_ok_for_arm (~INTVAL (part
)))
16833 /* Return true if it is possible to inline both the high and low parts
16834 of a 64-bit constant into 32-bit data processing instructions. */
16836 arm_const_double_by_immediates (rtx val
)
16838 enum machine_mode mode
= GET_MODE (val
);
16841 if (mode
== VOIDmode
)
16844 part
= gen_highpart_mode (SImode
, mode
, val
);
16846 gcc_assert (CONST_INT_P (part
));
16848 if (!const_ok_for_arm (INTVAL (part
)))
16851 part
= gen_lowpart (SImode
, val
);
16853 gcc_assert (CONST_INT_P (part
));
16855 if (!const_ok_for_arm (INTVAL (part
)))
16861 /* Scan INSN and note any of its operands that need fixing.
16862 If DO_PUSHES is false we do not actually push any of the fixups
16865 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
16869 extract_insn (insn
);
16871 if (!constrain_operands (1))
16872 fatal_insn_not_found (insn
);
16874 if (recog_data
.n_alternatives
== 0)
16877 /* Fill in recog_op_alt with information about the constraints of
16879 preprocess_constraints (insn
);
16881 const operand_alternative
*op_alt
= which_op_alt ();
16882 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16884 /* Things we need to fix can only occur in inputs. */
16885 if (recog_data
.operand_type
[opno
] != OP_IN
)
16888 /* If this alternative is a memory reference, then any mention
16889 of constants in this alternative is really to fool reload
16890 into allowing us to accept one there. We need to fix them up
16891 now so that we output the right code. */
16892 if (op_alt
[opno
].memory_ok
)
16894 rtx op
= recog_data
.operand
[opno
];
16896 if (CONSTANT_P (op
))
16899 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16900 recog_data
.operand_mode
[opno
], op
);
16902 else if (MEM_P (op
)
16903 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16904 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16908 rtx cop
= avoid_constant_pool_reference (op
);
16910 /* Casting the address of something to a mode narrower
16911 than a word can cause avoid_constant_pool_reference()
16912 to return the pool reference itself. That's no good to
16913 us here. Lets just hope that we can use the
16914 constant pool value directly. */
16916 cop
= get_pool_constant (XEXP (op
, 0));
16918 push_minipool_fix (insn
, address
,
16919 recog_data
.operand_loc
[opno
],
16920 recog_data
.operand_mode
[opno
], cop
);
16930 /* Rewrite move insn into subtract of 0 if the condition codes will
16931 be useful in next conditional jump insn. */
16934 thumb1_reorg (void)
16938 FOR_EACH_BB_FN (bb
, cfun
)
16941 rtx pat
, op0
, set
= NULL
;
16942 rtx prev
, insn
= BB_END (bb
);
16943 bool insn_clobbered
= false;
16945 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
16946 insn
= PREV_INSN (insn
);
16948 /* Find the last cbranchsi4_insn in basic block BB. */
16949 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
16952 /* Get the register with which we are comparing. */
16953 pat
= PATTERN (insn
);
16954 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
16956 /* Find the first flag setting insn before INSN in basic block BB. */
16957 gcc_assert (insn
!= BB_HEAD (bb
));
16958 for (prev
= PREV_INSN (insn
);
16960 && prev
!= BB_HEAD (bb
)
16962 || DEBUG_INSN_P (prev
)
16963 || ((set
= single_set (prev
)) != NULL
16964 && get_attr_conds (prev
) == CONDS_NOCOND
)));
16965 prev
= PREV_INSN (prev
))
16967 if (reg_set_p (op0
, prev
))
16968 insn_clobbered
= true;
16971 /* Skip if op0 is clobbered by insn other than prev. */
16972 if (insn_clobbered
)
16978 dest
= SET_DEST (set
);
16979 src
= SET_SRC (set
);
16980 if (!low_register_operand (dest
, SImode
)
16981 || !low_register_operand (src
, SImode
))
16984 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16985 in INSN. Both src and dest of the move insn are checked. */
16986 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
16988 dest
= copy_rtx (dest
);
16989 src
= copy_rtx (src
);
16990 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
16991 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
16992 INSN_CODE (prev
) = -1;
16993 /* Set test register in INSN to dest. */
16994 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
16995 INSN_CODE (insn
) = -1;
17000 /* Convert instructions to their cc-clobbering variant if possible, since
17001 that allows us to use smaller encodings. */
17004 thumb2_reorg (void)
17009 INIT_REG_SET (&live
);
17011 /* We are freeing block_for_insn in the toplev to keep compatibility
17012 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17013 compute_bb_for_insn ();
17016 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17018 FOR_EACH_BB_FN (bb
, cfun
)
17020 if (current_tune
->disparage_flag_setting_t16_encodings
17021 && optimize_bb_for_speed_p (bb
))
17025 Convert_Action action
= SKIP
;
17026 Convert_Action action_for_partial_flag_setting
17027 = (current_tune
->disparage_partial_flag_setting_t16_encodings
17028 && optimize_bb_for_speed_p (bb
))
17031 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17032 df_simulate_initialize_backwards (bb
, &live
);
17033 FOR_BB_INSNS_REVERSE (bb
, insn
)
17035 if (NONJUMP_INSN_P (insn
)
17036 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17037 && GET_CODE (PATTERN (insn
)) == SET
)
17040 rtx pat
= PATTERN (insn
);
17041 rtx dst
= XEXP (pat
, 0);
17042 rtx src
= XEXP (pat
, 1);
17043 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17045 if (!OBJECT_P (src
))
17046 op0
= XEXP (src
, 0);
17048 if (BINARY_P (src
))
17049 op1
= XEXP (src
, 1);
17051 if (low_register_operand (dst
, SImode
))
17053 switch (GET_CODE (src
))
17056 /* Adding two registers and storing the result
17057 in the first source is already a 16-bit
17059 if (rtx_equal_p (dst
, op0
)
17060 && register_operand (op1
, SImode
))
17063 if (low_register_operand (op0
, SImode
))
17065 /* ADDS <Rd>,<Rn>,<Rm> */
17066 if (low_register_operand (op1
, SImode
))
17068 /* ADDS <Rdn>,#<imm8> */
17069 /* SUBS <Rdn>,#<imm8> */
17070 else if (rtx_equal_p (dst
, op0
)
17071 && CONST_INT_P (op1
)
17072 && IN_RANGE (INTVAL (op1
), -255, 255))
17074 /* ADDS <Rd>,<Rn>,#<imm3> */
17075 /* SUBS <Rd>,<Rn>,#<imm3> */
17076 else if (CONST_INT_P (op1
)
17077 && IN_RANGE (INTVAL (op1
), -7, 7))
17080 /* ADCS <Rd>, <Rn> */
17081 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17082 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17083 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17085 && COMPARISON_P (op1
)
17086 && cc_register (XEXP (op1
, 0), VOIDmode
)
17087 && maybe_get_arm_condition_code (op1
) == ARM_CS
17088 && XEXP (op1
, 1) == const0_rtx
)
17093 /* RSBS <Rd>,<Rn>,#0
17094 Not handled here: see NEG below. */
17095 /* SUBS <Rd>,<Rn>,#<imm3>
17097 Not handled here: see PLUS above. */
17098 /* SUBS <Rd>,<Rn>,<Rm> */
17099 if (low_register_operand (op0
, SImode
)
17100 && low_register_operand (op1
, SImode
))
17105 /* MULS <Rdm>,<Rn>,<Rdm>
17106 As an exception to the rule, this is only used
17107 when optimizing for size since MULS is slow on all
17108 known implementations. We do not even want to use
17109 MULS in cold code, if optimizing for speed, so we
17110 test the global flag here. */
17111 if (!optimize_size
)
17113 /* else fall through. */
17117 /* ANDS <Rdn>,<Rm> */
17118 if (rtx_equal_p (dst
, op0
)
17119 && low_register_operand (op1
, SImode
))
17120 action
= action_for_partial_flag_setting
;
17121 else if (rtx_equal_p (dst
, op1
)
17122 && low_register_operand (op0
, SImode
))
17123 action
= action_for_partial_flag_setting
== SKIP
17124 ? SKIP
: SWAP_CONV
;
17130 /* ASRS <Rdn>,<Rm> */
17131 /* LSRS <Rdn>,<Rm> */
17132 /* LSLS <Rdn>,<Rm> */
17133 if (rtx_equal_p (dst
, op0
)
17134 && low_register_operand (op1
, SImode
))
17135 action
= action_for_partial_flag_setting
;
17136 /* ASRS <Rd>,<Rm>,#<imm5> */
17137 /* LSRS <Rd>,<Rm>,#<imm5> */
17138 /* LSLS <Rd>,<Rm>,#<imm5> */
17139 else if (low_register_operand (op0
, SImode
)
17140 && CONST_INT_P (op1
)
17141 && IN_RANGE (INTVAL (op1
), 0, 31))
17142 action
= action_for_partial_flag_setting
;
17146 /* RORS <Rdn>,<Rm> */
17147 if (rtx_equal_p (dst
, op0
)
17148 && low_register_operand (op1
, SImode
))
17149 action
= action_for_partial_flag_setting
;
17153 /* MVNS <Rd>,<Rm> */
17154 if (low_register_operand (op0
, SImode
))
17155 action
= action_for_partial_flag_setting
;
17159 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17160 if (low_register_operand (op0
, SImode
))
17165 /* MOVS <Rd>,#<imm8> */
17166 if (CONST_INT_P (src
)
17167 && IN_RANGE (INTVAL (src
), 0, 255))
17168 action
= action_for_partial_flag_setting
;
17172 /* MOVS and MOV<c> with registers have different
17173 encodings, so are not relevant here. */
17181 if (action
!= SKIP
)
17183 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17184 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17187 if (action
== SWAP_CONV
)
17189 src
= copy_rtx (src
);
17190 XEXP (src
, 0) = op1
;
17191 XEXP (src
, 1) = op0
;
17192 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
17193 vec
= gen_rtvec (2, pat
, clobber
);
17195 else /* action == CONV */
17196 vec
= gen_rtvec (2, pat
, clobber
);
17198 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17199 INSN_CODE (insn
) = -1;
17203 if (NONDEBUG_INSN_P (insn
))
17204 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17208 CLEAR_REG_SET (&live
);
17211 /* Gcc puts the pool in the wrong place for ARM, since we can only
17212 load addresses a limited distance around the pc. We do some
17213 special munging to move the constant pool values to the correct
17214 point in the code. */
17219 HOST_WIDE_INT address
= 0;
17224 else if (TARGET_THUMB2
)
17227 /* Ensure all insns that must be split have been split at this point.
17228 Otherwise, the pool placement code below may compute incorrect
17229 insn lengths. Note that when optimizing, all insns have already
17230 been split at this point. */
17232 split_all_insns_noflow ();
17234 minipool_fix_head
= minipool_fix_tail
= NULL
;
17236 /* The first insn must always be a note, or the code below won't
17237 scan it properly. */
17238 insn
= get_insns ();
17239 gcc_assert (NOTE_P (insn
));
17242 /* Scan all the insns and record the operands that will need fixing. */
17243 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17245 if (BARRIER_P (insn
))
17246 push_minipool_barrier (insn
, address
);
17247 else if (INSN_P (insn
))
17251 note_invalid_constants (insn
, address
, true);
17252 address
+= get_attr_length (insn
);
17254 /* If the insn is a vector jump, add the size of the table
17255 and skip the table. */
17256 if (tablejump_p (insn
, NULL
, &table
))
17258 address
+= get_jump_table_size (table
);
17262 else if (LABEL_P (insn
))
17263 /* Add the worst-case padding due to alignment. We don't add
17264 the _current_ padding because the minipool insertions
17265 themselves might change it. */
17266 address
+= get_label_padding (insn
);
17269 fix
= minipool_fix_head
;
17271 /* Now scan the fixups and perform the required changes. */
17276 Mfix
* last_added_fix
;
17277 Mfix
* last_barrier
= NULL
;
17280 /* Skip any further barriers before the next fix. */
17281 while (fix
&& BARRIER_P (fix
->insn
))
17284 /* No more fixes. */
17288 last_added_fix
= NULL
;
17290 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17292 if (BARRIER_P (ftmp
->insn
))
17294 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17297 last_barrier
= ftmp
;
17299 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17302 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17305 /* If we found a barrier, drop back to that; any fixes that we
17306 could have reached but come after the barrier will now go in
17307 the next mini-pool. */
17308 if (last_barrier
!= NULL
)
17310 /* Reduce the refcount for those fixes that won't go into this
17312 for (fdel
= last_barrier
->next
;
17313 fdel
&& fdel
!= ftmp
;
17316 fdel
->minipool
->refcount
--;
17317 fdel
->minipool
= NULL
;
17320 ftmp
= last_barrier
;
17324 /* ftmp is first fix that we can't fit into this pool and
17325 there no natural barriers that we could use. Insert a
17326 new barrier in the code somewhere between the previous
17327 fix and this one, and arrange to jump around it. */
17328 HOST_WIDE_INT max_address
;
17330 /* The last item on the list of fixes must be a barrier, so
17331 we can never run off the end of the list of fixes without
17332 last_barrier being set. */
17335 max_address
= minipool_vector_head
->max_address
;
17336 /* Check that there isn't another fix that is in range that
17337 we couldn't fit into this pool because the pool was
17338 already too large: we need to put the pool before such an
17339 instruction. The pool itself may come just after the
17340 fix because create_fix_barrier also allows space for a
17341 jump instruction. */
17342 if (ftmp
->address
< max_address
)
17343 max_address
= ftmp
->address
+ 1;
17345 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17348 assign_minipool_offsets (last_barrier
);
17352 if (!BARRIER_P (ftmp
->insn
)
17353 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17360 /* Scan over the fixes we have identified for this pool, fixing them
17361 up and adding the constants to the pool itself. */
17362 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17363 this_fix
= this_fix
->next
)
17364 if (!BARRIER_P (this_fix
->insn
))
17367 = plus_constant (Pmode
,
17368 gen_rtx_LABEL_REF (VOIDmode
,
17369 minipool_vector_label
),
17370 this_fix
->minipool
->offset
);
17371 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17374 dump_minipool (last_barrier
->insn
);
17378 /* From now on we must synthesize any constants that we can't handle
17379 directly. This can happen if the RTL gets split during final
17380 instruction generation. */
17381 cfun
->machine
->after_arm_reorg
= 1;
17383 /* Free the minipool memory. */
17384 obstack_free (&minipool_obstack
, minipool_startobj
);
17387 /* Routines to output assembly language. */
17389 /* If the rtx is the correct value then return the string of the number.
17390 In this way we can ensure that valid double constants are generated even
17391 when cross compiling. */
17393 fp_immediate_constant (rtx x
)
17397 if (!fp_consts_inited
)
17400 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
17402 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
17406 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17407 static const char *
17408 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17410 if (!fp_consts_inited
)
17413 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17417 /* OPERANDS[0] is the entire list of insns that constitute pop,
17418 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17419 is in the list, UPDATE is true iff the list contains explicit
17420 update of base register. */
17422 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17428 const char *conditional
;
17429 int num_saves
= XVECLEN (operands
[0], 0);
17430 unsigned int regno
;
17431 unsigned int regno_base
= REGNO (operands
[1]);
17434 offset
+= update
? 1 : 0;
17435 offset
+= return_pc
? 1 : 0;
17437 /* Is the base register in the list? */
17438 for (i
= offset
; i
< num_saves
; i
++)
17440 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17441 /* If SP is in the list, then the base register must be SP. */
17442 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17443 /* If base register is in the list, there must be no explicit update. */
17444 if (regno
== regno_base
)
17445 gcc_assert (!update
);
17448 conditional
= reverse
? "%?%D0" : "%?%d0";
17449 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17451 /* Output pop (not stmfd) because it has a shorter encoding. */
17452 gcc_assert (update
);
17453 sprintf (pattern
, "pop%s\t{", conditional
);
17457 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17458 It's just a convention, their semantics are identical. */
17459 if (regno_base
== SP_REGNUM
)
17460 sprintf (pattern
, "ldm%sfd\t", conditional
);
17461 else if (TARGET_UNIFIED_ASM
)
17462 sprintf (pattern
, "ldmia%s\t", conditional
);
17464 sprintf (pattern
, "ldm%sia\t", conditional
);
17466 strcat (pattern
, reg_names
[regno_base
]);
17468 strcat (pattern
, "!, {");
17470 strcat (pattern
, ", {");
17473 /* Output the first destination register. */
17475 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17477 /* Output the rest of the destination registers. */
17478 for (i
= offset
+ 1; i
< num_saves
; i
++)
17480 strcat (pattern
, ", ");
17482 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17485 strcat (pattern
, "}");
17487 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17488 strcat (pattern
, "^");
17490 output_asm_insn (pattern
, &cond
);
17494 /* Output the assembly for a store multiple. */
17497 vfp_output_fstmd (rtx
* operands
)
17504 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
17505 p
= strlen (pattern
);
17507 gcc_assert (REG_P (operands
[1]));
17509 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17510 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17512 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17514 strcpy (&pattern
[p
], "}");
17516 output_asm_insn (pattern
, operands
);
17521 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17522 number of bytes pushed. */
17525 vfp_emit_fstmd (int base_reg
, int count
)
17532 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17533 register pairs are stored by a store multiple insn. We avoid this
17534 by pushing an extra pair. */
17535 if (count
== 2 && !arm_arch6
)
17537 if (base_reg
== LAST_VFP_REGNUM
- 3)
17542 /* FSTMD may not store more than 16 doubleword registers at once. Split
17543 larger stores into multiple parts (up to a maximum of two, in
17548 /* NOTE: base_reg is an internal register number, so each D register
17550 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17551 saved
+= vfp_emit_fstmd (base_reg
, 16);
17555 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17556 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17558 reg
= gen_rtx_REG (DFmode
, base_reg
);
17561 XVECEXP (par
, 0, 0)
17562 = gen_rtx_SET (VOIDmode
,
17565 gen_rtx_PRE_MODIFY (Pmode
,
17568 (Pmode
, stack_pointer_rtx
,
17571 gen_rtx_UNSPEC (BLKmode
,
17572 gen_rtvec (1, reg
),
17573 UNSPEC_PUSH_MULT
));
17575 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17576 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17577 RTX_FRAME_RELATED_P (tmp
) = 1;
17578 XVECEXP (dwarf
, 0, 0) = tmp
;
17580 tmp
= gen_rtx_SET (VOIDmode
,
17581 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17583 RTX_FRAME_RELATED_P (tmp
) = 1;
17584 XVECEXP (dwarf
, 0, 1) = tmp
;
17586 for (i
= 1; i
< count
; i
++)
17588 reg
= gen_rtx_REG (DFmode
, base_reg
);
17590 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17592 tmp
= gen_rtx_SET (VOIDmode
,
17593 gen_frame_mem (DFmode
,
17594 plus_constant (Pmode
,
17598 RTX_FRAME_RELATED_P (tmp
) = 1;
17599 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17602 par
= emit_insn (par
);
17603 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17604 RTX_FRAME_RELATED_P (par
) = 1;
17609 /* Emit a call instruction with pattern PAT. ADDR is the address of
17610 the call target. */
17613 arm_emit_call_insn (rtx pat
, rtx addr
)
17617 insn
= emit_call_insn (pat
);
17619 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17620 If the call might use such an entry, add a use of the PIC register
17621 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17622 if (TARGET_VXWORKS_RTP
17624 && GET_CODE (addr
) == SYMBOL_REF
17625 && (SYMBOL_REF_DECL (addr
)
17626 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17627 : !SYMBOL_REF_LOCAL_P (addr
)))
17629 require_pic_register ();
17630 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17634 /* Output a 'call' insn. */
17636 output_call (rtx
*operands
)
17638 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17640 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17641 if (REGNO (operands
[0]) == LR_REGNUM
)
17643 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17644 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17647 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17649 if (TARGET_INTERWORK
|| arm_arch4t
)
17650 output_asm_insn ("bx%?\t%0", operands
);
17652 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17657 /* Output a 'call' insn that is a reference in memory. This is
17658 disabled for ARMv5 and we prefer a blx instead because otherwise
17659 there's a significant performance overhead. */
17661 output_call_mem (rtx
*operands
)
17663 gcc_assert (!arm_arch5
);
17664 if (TARGET_INTERWORK
)
17666 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17667 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17668 output_asm_insn ("bx%?\t%|ip", operands
);
17670 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17672 /* LR is used in the memory address. We load the address in the
17673 first instruction. It's safe to use IP as the target of the
17674 load since the call will kill it anyway. */
17675 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17676 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17678 output_asm_insn ("bx%?\t%|ip", operands
);
17680 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17684 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17685 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17692 /* Output a move from arm registers to arm registers of a long double
17693 OPERANDS[0] is the destination.
17694 OPERANDS[1] is the source. */
17696 output_mov_long_double_arm_from_arm (rtx
*operands
)
17698 /* We have to be careful here because the two might overlap. */
17699 int dest_start
= REGNO (operands
[0]);
17700 int src_start
= REGNO (operands
[1]);
17704 if (dest_start
< src_start
)
17706 for (i
= 0; i
< 3; i
++)
17708 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17709 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17710 output_asm_insn ("mov%?\t%0, %1", ops
);
17715 for (i
= 2; i
>= 0; i
--)
17717 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17718 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17719 output_asm_insn ("mov%?\t%0, %1", ops
);
17727 arm_emit_movpair (rtx dest
, rtx src
)
17729 /* If the src is an immediate, simplify it. */
17730 if (CONST_INT_P (src
))
17732 HOST_WIDE_INT val
= INTVAL (src
);
17733 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17734 if ((val
>> 16) & 0x0000ffff)
17735 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17737 GEN_INT ((val
>> 16) & 0x0000ffff));
17740 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17741 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17744 /* Output a move between double words. It must be REG<-MEM
17747 output_move_double (rtx
*operands
, bool emit
, int *count
)
17749 enum rtx_code code0
= GET_CODE (operands
[0]);
17750 enum rtx_code code1
= GET_CODE (operands
[1]);
17755 /* The only case when this might happen is when
17756 you are looking at the length of a DImode instruction
17757 that has an invalid constant in it. */
17758 if (code0
== REG
&& code1
!= MEM
)
17760 gcc_assert (!emit
);
17767 unsigned int reg0
= REGNO (operands
[0]);
17769 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17771 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17773 switch (GET_CODE (XEXP (operands
[1], 0)))
17780 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17781 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17783 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17788 gcc_assert (TARGET_LDRD
);
17790 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17797 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17799 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17807 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17809 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17814 gcc_assert (TARGET_LDRD
);
17816 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17821 /* Autoicrement addressing modes should never have overlapping
17822 base and destination registers, and overlapping index registers
17823 are already prohibited, so this doesn't need to worry about
17825 otherops
[0] = operands
[0];
17826 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17827 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17829 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17831 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17833 /* Registers overlap so split out the increment. */
17836 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17837 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17844 /* Use a single insn if we can.
17845 FIXME: IWMMXT allows offsets larger than ldrd can
17846 handle, fix these up with a pair of ldr. */
17848 || !CONST_INT_P (otherops
[2])
17849 || (INTVAL (otherops
[2]) > -256
17850 && INTVAL (otherops
[2]) < 256))
17853 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17859 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17860 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17870 /* Use a single insn if we can.
17871 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17872 fix these up with a pair of ldr. */
17874 || !CONST_INT_P (otherops
[2])
17875 || (INTVAL (otherops
[2]) > -256
17876 && INTVAL (otherops
[2]) < 256))
17879 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
17885 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17886 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17896 /* We might be able to use ldrd %0, %1 here. However the range is
17897 different to ldr/adr, and it is broken on some ARMv7-M
17898 implementations. */
17899 /* Use the second register of the pair to avoid problematic
17901 otherops
[1] = operands
[1];
17903 output_asm_insn ("adr%?\t%0, %1", otherops
);
17904 operands
[1] = otherops
[0];
17908 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17910 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
17917 /* ??? This needs checking for thumb2. */
17919 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17920 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17922 otherops
[0] = operands
[0];
17923 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17924 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17926 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17928 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17930 switch ((int) INTVAL (otherops
[2]))
17934 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
17940 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
17946 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
17950 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17951 operands
[1] = otherops
[0];
17953 && (REG_P (otherops
[2])
17955 || (CONST_INT_P (otherops
[2])
17956 && INTVAL (otherops
[2]) > -256
17957 && INTVAL (otherops
[2]) < 256)))
17959 if (reg_overlap_mentioned_p (operands
[0],
17963 /* Swap base and index registers over to
17964 avoid a conflict. */
17966 otherops
[1] = otherops
[2];
17969 /* If both registers conflict, it will usually
17970 have been fixed by a splitter. */
17971 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
17972 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
17976 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17977 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17984 otherops
[0] = operands
[0];
17986 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
17991 if (CONST_INT_P (otherops
[2]))
17995 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
17996 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
17998 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18004 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18010 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18017 return "ldr%(d%)\t%0, [%1]";
18019 return "ldm%(ia%)\t%1, %M0";
18023 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18024 /* Take care of overlapping base/data reg. */
18025 if (reg_mentioned_p (operands
[0], operands
[1]))
18029 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18030 output_asm_insn ("ldr%?\t%0, %1", operands
);
18040 output_asm_insn ("ldr%?\t%0, %1", operands
);
18041 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18051 /* Constraints should ensure this. */
18052 gcc_assert (code0
== MEM
&& code1
== REG
);
18053 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18054 || (TARGET_ARM
&& TARGET_LDRD
));
18056 switch (GET_CODE (XEXP (operands
[0], 0)))
18062 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18064 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18069 gcc_assert (TARGET_LDRD
);
18071 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18078 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18080 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18088 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18090 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18095 gcc_assert (TARGET_LDRD
);
18097 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18102 otherops
[0] = operands
[1];
18103 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18104 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18106 /* IWMMXT allows offsets larger than ldrd can handle,
18107 fix these up with a pair of ldr. */
18109 && CONST_INT_P (otherops
[2])
18110 && (INTVAL(otherops
[2]) <= -256
18111 || INTVAL(otherops
[2]) >= 256))
18113 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18117 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18118 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18127 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18128 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18134 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18137 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18142 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18147 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18148 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18150 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18154 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18161 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18168 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18173 && (REG_P (otherops
[2])
18175 || (CONST_INT_P (otherops
[2])
18176 && INTVAL (otherops
[2]) > -256
18177 && INTVAL (otherops
[2]) < 256)))
18179 otherops
[0] = operands
[1];
18180 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18182 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18188 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18189 otherops
[1] = operands
[1];
18192 output_asm_insn ("str%?\t%1, %0", operands
);
18193 output_asm_insn ("str%?\t%H1, %0", otherops
);
18203 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18204 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18207 output_move_quad (rtx
*operands
)
18209 if (REG_P (operands
[0]))
18211 /* Load, or reg->reg move. */
18213 if (MEM_P (operands
[1]))
18215 switch (GET_CODE (XEXP (operands
[1], 0)))
18218 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18223 output_asm_insn ("adr%?\t%0, %1", operands
);
18224 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18228 gcc_unreachable ();
18236 gcc_assert (REG_P (operands
[1]));
18238 dest
= REGNO (operands
[0]);
18239 src
= REGNO (operands
[1]);
18241 /* This seems pretty dumb, but hopefully GCC won't try to do it
18244 for (i
= 0; i
< 4; i
++)
18246 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18247 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18248 output_asm_insn ("mov%?\t%0, %1", ops
);
18251 for (i
= 3; i
>= 0; i
--)
18253 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18254 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18255 output_asm_insn ("mov%?\t%0, %1", ops
);
18261 gcc_assert (MEM_P (operands
[0]));
18262 gcc_assert (REG_P (operands
[1]));
18263 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18265 switch (GET_CODE (XEXP (operands
[0], 0)))
18268 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18272 gcc_unreachable ();
18279 /* Output a VFP load or store instruction. */
18282 output_move_vfp (rtx
*operands
)
18284 rtx reg
, mem
, addr
, ops
[2];
18285 int load
= REG_P (operands
[0]);
18286 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18287 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18290 enum machine_mode mode
;
18292 reg
= operands
[!load
];
18293 mem
= operands
[load
];
18295 mode
= GET_MODE (reg
);
18297 gcc_assert (REG_P (reg
));
18298 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18299 gcc_assert (mode
== SFmode
18303 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18304 gcc_assert (MEM_P (mem
));
18306 addr
= XEXP (mem
, 0);
18308 switch (GET_CODE (addr
))
18311 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18312 ops
[0] = XEXP (addr
, 0);
18317 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
18318 ops
[0] = XEXP (addr
, 0);
18323 templ
= "f%s%c%%?\t%%%s0, %%1%s";
18329 sprintf (buff
, templ
,
18330 load
? "ld" : "st",
18333 integer_p
? "\t%@ int" : "");
18334 output_asm_insn (buff
, ops
);
18339 /* Output a Neon double-word or quad-word load or store, or a load
18340 or store for larger structure modes.
18342 WARNING: The ordering of elements is weird in big-endian mode,
18343 because the EABI requires that vectors stored in memory appear
18344 as though they were stored by a VSTM, as required by the EABI.
18345 GCC RTL defines element ordering based on in-memory order.
18346 This can be different from the architectural ordering of elements
18347 within a NEON register. The intrinsics defined in arm_neon.h use the
18348 NEON register element ordering, not the GCC RTL element ordering.
18350 For example, the in-memory ordering of a big-endian a quadword
18351 vector with 16-bit elements when stored from register pair {d0,d1}
18352 will be (lowest address first, d0[N] is NEON register element N):
18354 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18356 When necessary, quadword registers (dN, dN+1) are moved to ARM
18357 registers from rN in the order:
18359 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18361 So that STM/LDM can be used on vectors in ARM registers, and the
18362 same memory layout will result as if VSTM/VLDM were used.
18364 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18365 possible, which allows use of appropriate alignment tags.
18366 Note that the choice of "64" is independent of the actual vector
18367 element size; this size simply ensures that the behavior is
18368 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18370 Due to limitations of those instructions, use of VST1.64/VLD1.64
18371 is not possible if:
18372 - the address contains PRE_DEC, or
18373 - the mode refers to more than 4 double-word registers
18375 In those cases, it would be possible to replace VSTM/VLDM by a
18376 sequence of instructions; this is not currently implemented since
18377 this is not certain to actually improve performance. */
18380 output_move_neon (rtx
*operands
)
18382 rtx reg
, mem
, addr
, ops
[2];
18383 int regno
, nregs
, load
= REG_P (operands
[0]);
18386 enum machine_mode mode
;
18388 reg
= operands
[!load
];
18389 mem
= operands
[load
];
18391 mode
= GET_MODE (reg
);
18393 gcc_assert (REG_P (reg
));
18394 regno
= REGNO (reg
);
18395 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18396 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18397 || NEON_REGNO_OK_FOR_QUAD (regno
));
18398 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18399 || VALID_NEON_QREG_MODE (mode
)
18400 || VALID_NEON_STRUCT_MODE (mode
));
18401 gcc_assert (MEM_P (mem
));
18403 addr
= XEXP (mem
, 0);
18405 /* Strip off const from addresses like (const (plus (...))). */
18406 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18407 addr
= XEXP (addr
, 0);
18409 switch (GET_CODE (addr
))
18412 /* We have to use vldm / vstm for too-large modes. */
18415 templ
= "v%smia%%?\t%%0!, %%h1";
18416 ops
[0] = XEXP (addr
, 0);
18420 templ
= "v%s1.64\t%%h1, %%A0";
18427 /* We have to use vldm / vstm in this case, since there is no
18428 pre-decrement form of the vld1 / vst1 instructions. */
18429 templ
= "v%smdb%%?\t%%0!, %%h1";
18430 ops
[0] = XEXP (addr
, 0);
18435 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18436 gcc_unreachable ();
18443 for (i
= 0; i
< nregs
; i
++)
18445 /* We're only using DImode here because it's a convenient size. */
18446 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18447 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18448 if (reg_overlap_mentioned_p (ops
[0], mem
))
18450 gcc_assert (overlap
== -1);
18455 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18456 output_asm_insn (buff
, ops
);
18461 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18462 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18463 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18464 output_asm_insn (buff
, ops
);
18471 /* We have to use vldm / vstm for too-large modes. */
18473 templ
= "v%smia%%?\t%%m0, %%h1";
18475 templ
= "v%s1.64\t%%h1, %%A0";
18481 sprintf (buff
, templ
, load
? "ld" : "st");
18482 output_asm_insn (buff
, ops
);
18487 /* Compute and return the length of neon_mov<mode>, where <mode> is
18488 one of VSTRUCT modes: EI, OI, CI or XI. */
18490 arm_attr_length_move_neon (rtx insn
)
18492 rtx reg
, mem
, addr
;
18494 enum machine_mode mode
;
18496 extract_insn_cached (insn
);
18498 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18500 mode
= GET_MODE (recog_data
.operand
[0]);
18511 gcc_unreachable ();
18515 load
= REG_P (recog_data
.operand
[0]);
18516 reg
= recog_data
.operand
[!load
];
18517 mem
= recog_data
.operand
[load
];
18519 gcc_assert (MEM_P (mem
));
18521 mode
= GET_MODE (reg
);
18522 addr
= XEXP (mem
, 0);
18524 /* Strip off const from addresses like (const (plus (...))). */
18525 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18526 addr
= XEXP (addr
, 0);
18528 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18530 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18537 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18541 arm_address_offset_is_imm (rtx insn
)
18545 extract_insn_cached (insn
);
18547 if (REG_P (recog_data
.operand
[0]))
18550 mem
= recog_data
.operand
[0];
18552 gcc_assert (MEM_P (mem
));
18554 addr
= XEXP (mem
, 0);
18557 || (GET_CODE (addr
) == PLUS
18558 && REG_P (XEXP (addr
, 0))
18559 && CONST_INT_P (XEXP (addr
, 1))))
18565 /* Output an ADD r, s, #n where n may be too big for one instruction.
18566 If adding zero to one register, output nothing. */
18568 output_add_immediate (rtx
*operands
)
18570 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18572 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18575 output_multi_immediate (operands
,
18576 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18579 output_multi_immediate (operands
,
18580 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18587 /* Output a multiple immediate operation.
18588 OPERANDS is the vector of operands referred to in the output patterns.
18589 INSTR1 is the output pattern to use for the first constant.
18590 INSTR2 is the output pattern to use for subsequent constants.
18591 IMMED_OP is the index of the constant slot in OPERANDS.
18592 N is the constant value. */
18593 static const char *
18594 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18595 int immed_op
, HOST_WIDE_INT n
)
18597 #if HOST_BITS_PER_WIDE_INT > 32
18603 /* Quick and easy output. */
18604 operands
[immed_op
] = const0_rtx
;
18605 output_asm_insn (instr1
, operands
);
18610 const char * instr
= instr1
;
18612 /* Note that n is never zero here (which would give no output). */
18613 for (i
= 0; i
< 32; i
+= 2)
18617 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18618 output_asm_insn (instr
, operands
);
18628 /* Return the name of a shifter operation. */
18629 static const char *
18630 arm_shift_nmem(enum rtx_code code
)
18635 return ARM_LSL_NAME
;
18651 /* Return the appropriate ARM instruction for the operation code.
18652 The returned result should not be overwritten. OP is the rtx of the
18653 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18656 arithmetic_instr (rtx op
, int shift_first_arg
)
18658 switch (GET_CODE (op
))
18664 return shift_first_arg
? "rsb" : "sub";
18679 return arm_shift_nmem(GET_CODE(op
));
18682 gcc_unreachable ();
18686 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18687 for the operation code. The returned result should not be overwritten.
18688 OP is the rtx code of the shift.
18689 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18691 static const char *
18692 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18695 enum rtx_code code
= GET_CODE (op
);
18700 if (!CONST_INT_P (XEXP (op
, 1)))
18702 output_operand_lossage ("invalid shift operand");
18707 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18715 mnem
= arm_shift_nmem(code
);
18716 if (CONST_INT_P (XEXP (op
, 1)))
18718 *amountp
= INTVAL (XEXP (op
, 1));
18720 else if (REG_P (XEXP (op
, 1)))
18727 output_operand_lossage ("invalid shift operand");
18733 /* We never have to worry about the amount being other than a
18734 power of 2, since this case can never be reloaded from a reg. */
18735 if (!CONST_INT_P (XEXP (op
, 1)))
18737 output_operand_lossage ("invalid shift operand");
18741 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18743 /* Amount must be a power of two. */
18744 if (*amountp
& (*amountp
- 1))
18746 output_operand_lossage ("invalid shift operand");
18750 *amountp
= int_log2 (*amountp
);
18751 return ARM_LSL_NAME
;
18754 output_operand_lossage ("invalid shift operand");
18758 /* This is not 100% correct, but follows from the desire to merge
18759 multiplication by a power of 2 with the recognizer for a
18760 shift. >=32 is not a valid shift for "lsl", so we must try and
18761 output a shift that produces the correct arithmetical result.
18762 Using lsr #32 is identical except for the fact that the carry bit
18763 is not set correctly if we set the flags; but we never use the
18764 carry bit from such an operation, so we can ignore that. */
18765 if (code
== ROTATERT
)
18766 /* Rotate is just modulo 32. */
18768 else if (*amountp
!= (*amountp
& 31))
18770 if (code
== ASHIFT
)
18775 /* Shifts of 0 are no-ops. */
18782 /* Obtain the shift from the POWER of two. */
18784 static HOST_WIDE_INT
18785 int_log2 (HOST_WIDE_INT power
)
18787 HOST_WIDE_INT shift
= 0;
18789 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18791 gcc_assert (shift
<= 31);
18798 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18799 because /bin/as is horribly restrictive. The judgement about
18800 whether or not each character is 'printable' (and can be output as
18801 is) or not (and must be printed with an octal escape) must be made
18802 with reference to the *host* character set -- the situation is
18803 similar to that discussed in the comments above pp_c_char in
18804 c-pretty-print.c. */
18806 #define MAX_ASCII_LEN 51
18809 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18812 int len_so_far
= 0;
18814 fputs ("\t.ascii\t\"", stream
);
18816 for (i
= 0; i
< len
; i
++)
18820 if (len_so_far
>= MAX_ASCII_LEN
)
18822 fputs ("\"\n\t.ascii\t\"", stream
);
18828 if (c
== '\\' || c
== '\"')
18830 putc ('\\', stream
);
18838 fprintf (stream
, "\\%03o", c
);
18843 fputs ("\"\n", stream
);
18846 /* Compute the register save mask for registers 0 through 12
18847 inclusive. This code is used by arm_compute_save_reg_mask. */
18849 static unsigned long
18850 arm_compute_save_reg0_reg12_mask (void)
18852 unsigned long func_type
= arm_current_func_type ();
18853 unsigned long save_reg_mask
= 0;
18856 if (IS_INTERRUPT (func_type
))
18858 unsigned int max_reg
;
18859 /* Interrupt functions must not corrupt any registers,
18860 even call clobbered ones. If this is a leaf function
18861 we can just examine the registers used by the RTL, but
18862 otherwise we have to assume that whatever function is
18863 called might clobber anything, and so we have to save
18864 all the call-clobbered registers as well. */
18865 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18866 /* FIQ handlers have registers r8 - r12 banked, so
18867 we only need to check r0 - r7, Normal ISRs only
18868 bank r14 and r15, so we must check up to r12.
18869 r13 is the stack pointer which is always preserved,
18870 so we do not need to consider it here. */
18875 for (reg
= 0; reg
<= max_reg
; reg
++)
18876 if (df_regs_ever_live_p (reg
)
18877 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18878 save_reg_mask
|= (1 << reg
);
18880 /* Also save the pic base register if necessary. */
18882 && !TARGET_SINGLE_PIC_BASE
18883 && arm_pic_register
!= INVALID_REGNUM
18884 && crtl
->uses_pic_offset_table
)
18885 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18887 else if (IS_VOLATILE(func_type
))
18889 /* For noreturn functions we historically omitted register saves
18890 altogether. However this really messes up debugging. As a
18891 compromise save just the frame pointers. Combined with the link
18892 register saved elsewhere this should be sufficient to get
18894 if (frame_pointer_needed
)
18895 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18896 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18897 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18898 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18899 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18903 /* In the normal case we only need to save those registers
18904 which are call saved and which are used by this function. */
18905 for (reg
= 0; reg
<= 11; reg
++)
18906 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
18907 save_reg_mask
|= (1 << reg
);
18909 /* Handle the frame pointer as a special case. */
18910 if (frame_pointer_needed
)
18911 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18913 /* If we aren't loading the PIC register,
18914 don't stack it even though it may be live. */
18916 && !TARGET_SINGLE_PIC_BASE
18917 && arm_pic_register
!= INVALID_REGNUM
18918 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18919 || crtl
->uses_pic_offset_table
))
18920 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18922 /* The prologue will copy SP into R0, so save it. */
18923 if (IS_STACKALIGN (func_type
))
18924 save_reg_mask
|= 1;
18927 /* Save registers so the exception handler can modify them. */
18928 if (crtl
->calls_eh_return
)
18934 reg
= EH_RETURN_DATA_REGNO (i
);
18935 if (reg
== INVALID_REGNUM
)
18937 save_reg_mask
|= 1 << reg
;
18941 return save_reg_mask
;
18944 /* Return true if r3 is live at the start of the function. */
18947 arm_r3_live_at_start_p (void)
18949 /* Just look at cfg info, which is still close enough to correct at this
18950 point. This gives false positives for broken functions that might use
18951 uninitialized data that happens to be allocated in r3, but who cares? */
18952 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
18955 /* Compute the number of bytes used to store the static chain register on the
18956 stack, above the stack frame. We need to know this accurately to get the
18957 alignment of the rest of the stack frame correct. */
18960 arm_compute_static_chain_stack_bytes (void)
18962 /* See the defining assertion in arm_expand_prologue. */
18963 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
18964 && IS_NESTED (arm_current_func_type ())
18965 && arm_r3_live_at_start_p ()
18966 && crtl
->args
.pretend_args_size
== 0)
18972 /* Compute a bit mask of which registers need to be
18973 saved on the stack for the current function.
18974 This is used by arm_get_frame_offsets, which may add extra registers. */
18976 static unsigned long
18977 arm_compute_save_reg_mask (void)
18979 unsigned int save_reg_mask
= 0;
18980 unsigned long func_type
= arm_current_func_type ();
18983 if (IS_NAKED (func_type
))
18984 /* This should never really happen. */
18987 /* If we are creating a stack frame, then we must save the frame pointer,
18988 IP (which will hold the old stack pointer), LR and the PC. */
18989 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18991 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
18994 | (1 << PC_REGNUM
);
18996 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
18998 /* Decide if we need to save the link register.
18999 Interrupt routines have their own banked link register,
19000 so they never need to save it.
19001 Otherwise if we do not use the link register we do not need to save
19002 it. If we are pushing other registers onto the stack however, we
19003 can save an instruction in the epilogue by pushing the link register
19004 now and then popping it back into the PC. This incurs extra memory
19005 accesses though, so we only do it when optimizing for size, and only
19006 if we know that we will not need a fancy return sequence. */
19007 if (df_regs_ever_live_p (LR_REGNUM
)
19010 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19011 && !crtl
->calls_eh_return
))
19012 save_reg_mask
|= 1 << LR_REGNUM
;
19014 if (cfun
->machine
->lr_save_eliminated
)
19015 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19017 if (TARGET_REALLY_IWMMXT
19018 && ((bit_count (save_reg_mask
)
19019 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19020 arm_compute_static_chain_stack_bytes())
19023 /* The total number of registers that are going to be pushed
19024 onto the stack is odd. We need to ensure that the stack
19025 is 64-bit aligned before we start to save iWMMXt registers,
19026 and also before we start to create locals. (A local variable
19027 might be a double or long long which we will load/store using
19028 an iWMMXt instruction). Therefore we need to push another
19029 ARM register, so that the stack will be 64-bit aligned. We
19030 try to avoid using the arg registers (r0 -r3) as they might be
19031 used to pass values in a tail call. */
19032 for (reg
= 4; reg
<= 12; reg
++)
19033 if ((save_reg_mask
& (1 << reg
)) == 0)
19037 save_reg_mask
|= (1 << reg
);
19040 cfun
->machine
->sibcall_blocked
= 1;
19041 save_reg_mask
|= (1 << 3);
19045 /* We may need to push an additional register for use initializing the
19046 PIC base register. */
19047 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19048 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19050 reg
= thumb_find_work_register (1 << 4);
19051 if (!call_used_regs
[reg
])
19052 save_reg_mask
|= (1 << reg
);
19055 return save_reg_mask
;
19059 /* Compute a bit mask of which registers need to be
19060 saved on the stack for the current function. */
19061 static unsigned long
19062 thumb1_compute_save_reg_mask (void)
19064 unsigned long mask
;
19068 for (reg
= 0; reg
< 12; reg
++)
19069 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
19073 && !TARGET_SINGLE_PIC_BASE
19074 && arm_pic_register
!= INVALID_REGNUM
19075 && crtl
->uses_pic_offset_table
)
19076 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19078 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19079 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19080 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19082 /* LR will also be pushed if any lo regs are pushed. */
19083 if (mask
& 0xff || thumb_force_lr_save ())
19084 mask
|= (1 << LR_REGNUM
);
19086 /* Make sure we have a low work register if we need one.
19087 We will need one if we are going to push a high register,
19088 but we are not currently intending to push a low register. */
19089 if ((mask
& 0xff) == 0
19090 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19092 /* Use thumb_find_work_register to choose which register
19093 we will use. If the register is live then we will
19094 have to push it. Use LAST_LO_REGNUM as our fallback
19095 choice for the register to select. */
19096 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19097 /* Make sure the register returned by thumb_find_work_register is
19098 not part of the return value. */
19099 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19100 reg
= LAST_LO_REGNUM
;
19102 if (! call_used_regs
[reg
])
19106 /* The 504 below is 8 bytes less than 512 because there are two possible
19107 alignment words. We can't tell here if they will be present or not so we
19108 have to play it safe and assume that they are. */
19109 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19110 ROUND_UP_WORD (get_frame_size ()) +
19111 crtl
->outgoing_args_size
) >= 504)
19113 /* This is the same as the code in thumb1_expand_prologue() which
19114 determines which register to use for stack decrement. */
19115 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19116 if (mask
& (1 << reg
))
19119 if (reg
> LAST_LO_REGNUM
)
19121 /* Make sure we have a register available for stack decrement. */
19122 mask
|= 1 << LAST_LO_REGNUM
;
19130 /* Return the number of bytes required to save VFP registers. */
19132 arm_get_vfp_saved_size (void)
19134 unsigned int regno
;
19139 /* Space for saved VFP registers. */
19140 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19143 for (regno
= FIRST_VFP_REGNUM
;
19144 regno
< LAST_VFP_REGNUM
;
19147 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19148 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19152 /* Workaround ARM10 VFPr1 bug. */
19153 if (count
== 2 && !arm_arch6
)
19155 saved
+= count
* 8;
19164 if (count
== 2 && !arm_arch6
)
19166 saved
+= count
* 8;
19173 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19174 everything bar the final return instruction. If simple_return is true,
19175 then do not output epilogue, because it has already been emitted in RTL. */
19177 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19178 bool simple_return
)
19180 char conditional
[10];
19183 unsigned long live_regs_mask
;
19184 unsigned long func_type
;
19185 arm_stack_offsets
*offsets
;
19187 func_type
= arm_current_func_type ();
19189 if (IS_NAKED (func_type
))
19192 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19194 /* If this function was declared non-returning, and we have
19195 found a tail call, then we have to trust that the called
19196 function won't return. */
19201 /* Otherwise, trap an attempted return by aborting. */
19203 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19205 assemble_external_libcall (ops
[1]);
19206 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19212 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19214 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19216 cfun
->machine
->return_used_this_function
= 1;
19218 offsets
= arm_get_frame_offsets ();
19219 live_regs_mask
= offsets
->saved_regs_mask
;
19221 if (!simple_return
&& live_regs_mask
)
19223 const char * return_reg
;
19225 /* If we do not have any special requirements for function exit
19226 (e.g. interworking) then we can load the return address
19227 directly into the PC. Otherwise we must load it into LR. */
19229 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19230 return_reg
= reg_names
[PC_REGNUM
];
19232 return_reg
= reg_names
[LR_REGNUM
];
19234 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19236 /* There are three possible reasons for the IP register
19237 being saved. 1) a stack frame was created, in which case
19238 IP contains the old stack pointer, or 2) an ISR routine
19239 corrupted it, or 3) it was saved to align the stack on
19240 iWMMXt. In case 1, restore IP into SP, otherwise just
19242 if (frame_pointer_needed
)
19244 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19245 live_regs_mask
|= (1 << SP_REGNUM
);
19248 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19251 /* On some ARM architectures it is faster to use LDR rather than
19252 LDM to load a single register. On other architectures, the
19253 cost is the same. In 26 bit mode, or for exception handlers,
19254 we have to use LDM to load the PC so that the CPSR is also
19256 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19257 if (live_regs_mask
== (1U << reg
))
19260 if (reg
<= LAST_ARM_REGNUM
19261 && (reg
!= LR_REGNUM
19263 || ! IS_INTERRUPT (func_type
)))
19265 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19266 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19273 /* Generate the load multiple instruction to restore the
19274 registers. Note we can get here, even if
19275 frame_pointer_needed is true, but only if sp already
19276 points to the base of the saved core registers. */
19277 if (live_regs_mask
& (1 << SP_REGNUM
))
19279 unsigned HOST_WIDE_INT stack_adjust
;
19281 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19282 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19284 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19285 if (TARGET_UNIFIED_ASM
)
19286 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19288 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19291 /* If we can't use ldmib (SA110 bug),
19292 then try to pop r3 instead. */
19294 live_regs_mask
|= 1 << 3;
19296 if (TARGET_UNIFIED_ASM
)
19297 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19299 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19303 if (TARGET_UNIFIED_ASM
)
19304 sprintf (instr
, "pop%s\t{", conditional
);
19306 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19308 p
= instr
+ strlen (instr
);
19310 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19311 if (live_regs_mask
& (1 << reg
))
19313 int l
= strlen (reg_names
[reg
]);
19319 memcpy (p
, ", ", 2);
19323 memcpy (p
, "%|", 2);
19324 memcpy (p
+ 2, reg_names
[reg
], l
);
19328 if (live_regs_mask
& (1 << LR_REGNUM
))
19330 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19331 /* If returning from an interrupt, restore the CPSR. */
19332 if (IS_INTERRUPT (func_type
))
19339 output_asm_insn (instr
, & operand
);
19341 /* See if we need to generate an extra instruction to
19342 perform the actual function return. */
19344 && func_type
!= ARM_FT_INTERWORKED
19345 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19347 /* The return has already been handled
19348 by loading the LR into the PC. */
19355 switch ((int) ARM_FUNC_TYPE (func_type
))
19359 /* ??? This is wrong for unified assembly syntax. */
19360 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19363 case ARM_FT_INTERWORKED
:
19364 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19367 case ARM_FT_EXCEPTION
:
19368 /* ??? This is wrong for unified assembly syntax. */
19369 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19373 /* Use bx if it's available. */
19374 if (arm_arch5
|| arm_arch4t
)
19375 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19377 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19381 output_asm_insn (instr
, & operand
);
19387 /* Write the function name into the code section, directly preceding
19388 the function prologue.
19390 Code will be output similar to this:
19392 .ascii "arm_poke_function_name", 0
19395 .word 0xff000000 + (t1 - t0)
19396 arm_poke_function_name
19398 stmfd sp!, {fp, ip, lr, pc}
19401 When performing a stack backtrace, code can inspect the value
19402 of 'pc' stored at 'fp' + 0. If the trace function then looks
19403 at location pc - 12 and the top 8 bits are set, then we know
19404 that there is a function name embedded immediately preceding this
19405 location and has length ((pc[-3]) & 0xff000000).
19407 We assume that pc is declared as a pointer to an unsigned long.
19409 It is of no benefit to output the function name if we are assembling
19410 a leaf function. These function types will not contain a stack
19411 backtrace structure, therefore it is not possible to determine the
19414 arm_poke_function_name (FILE *stream
, const char *name
)
19416 unsigned long alignlength
;
19417 unsigned long length
;
19420 length
= strlen (name
) + 1;
19421 alignlength
= ROUND_UP_WORD (length
);
19423 ASM_OUTPUT_ASCII (stream
, name
, length
);
19424 ASM_OUTPUT_ALIGN (stream
, 2);
19425 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19426 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19429 /* Place some comments into the assembler stream
19430 describing the current function. */
19432 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19434 unsigned long func_type
;
19436 /* ??? Do we want to print some of the below anyway? */
19440 /* Sanity check. */
19441 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19443 func_type
= arm_current_func_type ();
19445 switch ((int) ARM_FUNC_TYPE (func_type
))
19448 case ARM_FT_NORMAL
:
19450 case ARM_FT_INTERWORKED
:
19451 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19454 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19457 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19459 case ARM_FT_EXCEPTION
:
19460 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19464 if (IS_NAKED (func_type
))
19465 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19467 if (IS_VOLATILE (func_type
))
19468 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19470 if (IS_NESTED (func_type
))
19471 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19472 if (IS_STACKALIGN (func_type
))
19473 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19475 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19477 crtl
->args
.pretend_args_size
, frame_size
);
19479 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19480 frame_pointer_needed
,
19481 cfun
->machine
->uses_anonymous_args
);
19483 if (cfun
->machine
->lr_save_eliminated
)
19484 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19486 if (crtl
->calls_eh_return
)
19487 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19492 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19493 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19495 arm_stack_offsets
*offsets
;
19501 /* Emit any call-via-reg trampolines that are needed for v4t support
19502 of call_reg and call_value_reg type insns. */
19503 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19505 rtx label
= cfun
->machine
->call_via
[regno
];
19509 switch_to_section (function_section (current_function_decl
));
19510 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19511 CODE_LABEL_NUMBER (label
));
19512 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19516 /* ??? Probably not safe to set this here, since it assumes that a
19517 function will be emitted as assembly immediately after we generate
19518 RTL for it. This does not happen for inline functions. */
19519 cfun
->machine
->return_used_this_function
= 0;
19521 else /* TARGET_32BIT */
19523 /* We need to take into account any stack-frame rounding. */
19524 offsets
= arm_get_frame_offsets ();
19526 gcc_assert (!use_return_insn (FALSE
, NULL
)
19527 || (cfun
->machine
->return_used_this_function
!= 0)
19528 || offsets
->saved_regs
== offsets
->outgoing_args
19529 || frame_pointer_needed
);
19533 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19534 STR and STRD. If an even number of registers are being pushed, one
19535 or more STRD patterns are created for each register pair. If an
19536 odd number of registers are pushed, emit an initial STR followed by
19537 as many STRD instructions as are needed. This works best when the
19538 stack is initially 64-bit aligned (the normal case), since it
19539 ensures that each STRD is also 64-bit aligned. */
19541 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19546 rtx par
= NULL_RTX
;
19547 rtx dwarf
= NULL_RTX
;
19551 num_regs
= bit_count (saved_regs_mask
);
19553 /* Must be at least one register to save, and can't save SP or PC. */
19554 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19555 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19556 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19558 /* Create sequence for DWARF info. All the frame-related data for
19559 debugging is held in this wrapper. */
19560 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19562 /* Describe the stack adjustment. */
19563 tmp
= gen_rtx_SET (VOIDmode
,
19565 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19566 RTX_FRAME_RELATED_P (tmp
) = 1;
19567 XVECEXP (dwarf
, 0, 0) = tmp
;
19569 /* Find the first register. */
19570 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19575 /* If there's an odd number of registers to push. Start off by
19576 pushing a single register. This ensures that subsequent strd
19577 operations are dword aligned (assuming that SP was originally
19578 64-bit aligned). */
19579 if ((num_regs
& 1) != 0)
19581 rtx reg
, mem
, insn
;
19583 reg
= gen_rtx_REG (SImode
, regno
);
19585 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19586 stack_pointer_rtx
));
19588 mem
= gen_frame_mem (Pmode
,
19590 (Pmode
, stack_pointer_rtx
,
19591 plus_constant (Pmode
, stack_pointer_rtx
,
19594 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19595 RTX_FRAME_RELATED_P (tmp
) = 1;
19596 insn
= emit_insn (tmp
);
19597 RTX_FRAME_RELATED_P (insn
) = 1;
19598 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19599 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19601 RTX_FRAME_RELATED_P (tmp
) = 1;
19604 XVECEXP (dwarf
, 0, i
) = tmp
;
19608 while (i
< num_regs
)
19609 if (saved_regs_mask
& (1 << regno
))
19611 rtx reg1
, reg2
, mem1
, mem2
;
19612 rtx tmp0
, tmp1
, tmp2
;
19615 /* Find the register to pair with this one. */
19616 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19620 reg1
= gen_rtx_REG (SImode
, regno
);
19621 reg2
= gen_rtx_REG (SImode
, regno2
);
19628 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19631 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19633 -4 * (num_regs
- 1)));
19634 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19635 plus_constant (Pmode
, stack_pointer_rtx
,
19637 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19638 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19639 RTX_FRAME_RELATED_P (tmp0
) = 1;
19640 RTX_FRAME_RELATED_P (tmp1
) = 1;
19641 RTX_FRAME_RELATED_P (tmp2
) = 1;
19642 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19643 XVECEXP (par
, 0, 0) = tmp0
;
19644 XVECEXP (par
, 0, 1) = tmp1
;
19645 XVECEXP (par
, 0, 2) = tmp2
;
19646 insn
= emit_insn (par
);
19647 RTX_FRAME_RELATED_P (insn
) = 1;
19648 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19652 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19655 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19658 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19659 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19660 RTX_FRAME_RELATED_P (tmp1
) = 1;
19661 RTX_FRAME_RELATED_P (tmp2
) = 1;
19662 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19663 XVECEXP (par
, 0, 0) = tmp1
;
19664 XVECEXP (par
, 0, 1) = tmp2
;
19668 /* Create unwind information. This is an approximation. */
19669 tmp1
= gen_rtx_SET (VOIDmode
,
19670 gen_frame_mem (Pmode
,
19671 plus_constant (Pmode
,
19675 tmp2
= gen_rtx_SET (VOIDmode
,
19676 gen_frame_mem (Pmode
,
19677 plus_constant (Pmode
,
19682 RTX_FRAME_RELATED_P (tmp1
) = 1;
19683 RTX_FRAME_RELATED_P (tmp2
) = 1;
19684 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19685 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19687 regno
= regno2
+ 1;
19695 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19696 whenever possible, otherwise it emits single-word stores. The first store
19697 also allocates stack space for all saved registers, using writeback with
19698 post-addressing mode. All other stores use offset addressing. If no STRD
19699 can be emitted, this function emits a sequence of single-word stores,
19700 and not an STM as before, because single-word stores provide more freedom
19701 scheduling and can be turned into an STM by peephole optimizations. */
19703 arm_emit_strd_push (unsigned long saved_regs_mask
)
19706 int i
, j
, dwarf_index
= 0;
19708 rtx dwarf
= NULL_RTX
;
19709 rtx insn
= NULL_RTX
;
19712 /* TODO: A more efficient code can be emitted by changing the
19713 layout, e.g., first push all pairs that can use STRD to keep the
19714 stack aligned, and then push all other registers. */
19715 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19716 if (saved_regs_mask
& (1 << i
))
19719 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19720 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19721 gcc_assert (num_regs
> 0);
19723 /* Create sequence for DWARF info. */
19724 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19726 /* For dwarf info, we generate explicit stack update. */
19727 tmp
= gen_rtx_SET (VOIDmode
,
19729 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19730 RTX_FRAME_RELATED_P (tmp
) = 1;
19731 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19733 /* Save registers. */
19734 offset
= - 4 * num_regs
;
19736 while (j
<= LAST_ARM_REGNUM
)
19737 if (saved_regs_mask
& (1 << j
))
19740 && (saved_regs_mask
& (1 << (j
+ 1))))
19742 /* Current register and previous register form register pair for
19743 which STRD can be generated. */
19746 /* Allocate stack space for all saved registers. */
19747 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19748 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19749 mem
= gen_frame_mem (DImode
, tmp
);
19752 else if (offset
> 0)
19753 mem
= gen_frame_mem (DImode
,
19754 plus_constant (Pmode
,
19758 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19760 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19761 RTX_FRAME_RELATED_P (tmp
) = 1;
19762 tmp
= emit_insn (tmp
);
19764 /* Record the first store insn. */
19765 if (dwarf_index
== 1)
19768 /* Generate dwarf info. */
19769 mem
= gen_frame_mem (SImode
,
19770 plus_constant (Pmode
,
19773 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19774 RTX_FRAME_RELATED_P (tmp
) = 1;
19775 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19777 mem
= gen_frame_mem (SImode
,
19778 plus_constant (Pmode
,
19781 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19782 RTX_FRAME_RELATED_P (tmp
) = 1;
19783 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19790 /* Emit a single word store. */
19793 /* Allocate stack space for all saved registers. */
19794 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19795 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19796 mem
= gen_frame_mem (SImode
, tmp
);
19799 else if (offset
> 0)
19800 mem
= gen_frame_mem (SImode
,
19801 plus_constant (Pmode
,
19805 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19807 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19808 RTX_FRAME_RELATED_P (tmp
) = 1;
19809 tmp
= emit_insn (tmp
);
19811 /* Record the first store insn. */
19812 if (dwarf_index
== 1)
19815 /* Generate dwarf info. */
19816 mem
= gen_frame_mem (SImode
,
19817 plus_constant(Pmode
,
19820 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19821 RTX_FRAME_RELATED_P (tmp
) = 1;
19822 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19831 /* Attach dwarf info to the first insn we generate. */
19832 gcc_assert (insn
!= NULL_RTX
);
19833 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19834 RTX_FRAME_RELATED_P (insn
) = 1;
19837 /* Generate and emit an insn that we will recognize as a push_multi.
19838 Unfortunately, since this insn does not reflect very well the actual
19839 semantics of the operation, we need to annotate the insn for the benefit
19840 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19841 MASK for registers that should be annotated for DWARF2 frame unwind
19844 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19847 int num_dwarf_regs
= 0;
19851 int dwarf_par_index
;
19854 /* We don't record the PC in the dwarf frame information. */
19855 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
19857 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19859 if (mask
& (1 << i
))
19861 if (dwarf_regs_mask
& (1 << i
))
19865 gcc_assert (num_regs
&& num_regs
<= 16);
19866 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
19868 /* For the body of the insn we are going to generate an UNSPEC in
19869 parallel with several USEs. This allows the insn to be recognized
19870 by the push_multi pattern in the arm.md file.
19872 The body of the insn looks something like this:
19875 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19876 (const_int:SI <num>)))
19877 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19883 For the frame note however, we try to be more explicit and actually
19884 show each register being stored into the stack frame, plus a (single)
19885 decrement of the stack pointer. We do it this way in order to be
19886 friendly to the stack unwinding code, which only wants to see a single
19887 stack decrement per instruction. The RTL we generate for the note looks
19888 something like this:
19891 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19892 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19893 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19894 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19898 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19899 instead we'd have a parallel expression detailing all
19900 the stores to the various memory addresses so that debug
19901 information is more up-to-date. Remember however while writing
19902 this to take care of the constraints with the push instruction.
19904 Note also that this has to be taken care of for the VFP registers.
19906 For more see PR43399. */
19908 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19909 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19910 dwarf_par_index
= 1;
19912 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19914 if (mask
& (1 << i
))
19916 reg
= gen_rtx_REG (SImode
, i
);
19918 XVECEXP (par
, 0, 0)
19919 = gen_rtx_SET (VOIDmode
,
19922 gen_rtx_PRE_MODIFY (Pmode
,
19925 (Pmode
, stack_pointer_rtx
,
19928 gen_rtx_UNSPEC (BLKmode
,
19929 gen_rtvec (1, reg
),
19930 UNSPEC_PUSH_MULT
));
19932 if (dwarf_regs_mask
& (1 << i
))
19934 tmp
= gen_rtx_SET (VOIDmode
,
19935 gen_frame_mem (SImode
, stack_pointer_rtx
),
19937 RTX_FRAME_RELATED_P (tmp
) = 1;
19938 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19945 for (j
= 1, i
++; j
< num_regs
; i
++)
19947 if (mask
& (1 << i
))
19949 reg
= gen_rtx_REG (SImode
, i
);
19951 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
19953 if (dwarf_regs_mask
& (1 << i
))
19956 = gen_rtx_SET (VOIDmode
,
19959 plus_constant (Pmode
, stack_pointer_rtx
,
19962 RTX_FRAME_RELATED_P (tmp
) = 1;
19963 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19970 par
= emit_insn (par
);
19972 tmp
= gen_rtx_SET (VOIDmode
,
19974 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19975 RTX_FRAME_RELATED_P (tmp
) = 1;
19976 XVECEXP (dwarf
, 0, 0) = tmp
;
19978 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19983 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19984 SIZE is the offset to be adjusted.
19985 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19987 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
19991 RTX_FRAME_RELATED_P (insn
) = 1;
19992 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
19993 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
19996 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19997 SAVED_REGS_MASK shows which registers need to be restored.
19999 Unfortunately, since this insn does not reflect very well the actual
20000 semantics of the operation, we need to annotate the insn for the benefit
20001 of DWARF2 frame unwind information. */
20003 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20008 rtx dwarf
= NULL_RTX
;
20014 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20015 offset_adj
= return_in_pc
? 1 : 0;
20016 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20017 if (saved_regs_mask
& (1 << i
))
20020 gcc_assert (num_regs
&& num_regs
<= 16);
20022 /* If SP is in reglist, then we don't emit SP update insn. */
20023 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20025 /* The parallel needs to hold num_regs SETs
20026 and one SET for the stack update. */
20027 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20032 XVECEXP (par
, 0, 0) = tmp
;
20037 /* Increment the stack pointer, based on there being
20038 num_regs 4-byte registers to restore. */
20039 tmp
= gen_rtx_SET (VOIDmode
,
20041 plus_constant (Pmode
,
20044 RTX_FRAME_RELATED_P (tmp
) = 1;
20045 XVECEXP (par
, 0, offset_adj
) = tmp
;
20048 /* Now restore every reg, which may include PC. */
20049 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20050 if (saved_regs_mask
& (1 << i
))
20052 reg
= gen_rtx_REG (SImode
, i
);
20053 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20055 /* Emit single load with writeback. */
20056 tmp
= gen_frame_mem (SImode
,
20057 gen_rtx_POST_INC (Pmode
,
20058 stack_pointer_rtx
));
20059 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
20060 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20064 tmp
= gen_rtx_SET (VOIDmode
,
20068 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20069 RTX_FRAME_RELATED_P (tmp
) = 1;
20070 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20072 /* We need to maintain a sequence for DWARF info too. As dwarf info
20073 should not have PC, skip PC. */
20074 if (i
!= PC_REGNUM
)
20075 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20081 par
= emit_jump_insn (par
);
20083 par
= emit_insn (par
);
20085 REG_NOTES (par
) = dwarf
;
20087 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20088 stack_pointer_rtx
, stack_pointer_rtx
);
20091 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20092 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20094 Unfortunately, since this insn does not reflect very well the actual
20095 semantics of the operation, we need to annotate the insn for the benefit
20096 of DWARF2 frame unwind information. */
20098 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20102 rtx dwarf
= NULL_RTX
;
20105 gcc_assert (num_regs
&& num_regs
<= 32);
20107 /* Workaround ARM10 VFPr1 bug. */
20108 if (num_regs
== 2 && !arm_arch6
)
20110 if (first_reg
== 15)
20116 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20117 there could be up to 32 D-registers to restore.
20118 If there are more than 16 D-registers, make two recursive calls,
20119 each of which emits one pop_multi instruction. */
20122 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20123 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20127 /* The parallel needs to hold num_regs SETs
20128 and one SET for the stack update. */
20129 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20131 /* Increment the stack pointer, based on there being
20132 num_regs 8-byte registers to restore. */
20133 tmp
= gen_rtx_SET (VOIDmode
,
20135 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20136 RTX_FRAME_RELATED_P (tmp
) = 1;
20137 XVECEXP (par
, 0, 0) = tmp
;
20139 /* Now show every reg that will be restored, using a SET for each. */
20140 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20142 reg
= gen_rtx_REG (DFmode
, i
);
20144 tmp
= gen_rtx_SET (VOIDmode
,
20148 plus_constant (Pmode
, base_reg
, 8 * j
)));
20149 RTX_FRAME_RELATED_P (tmp
) = 1;
20150 XVECEXP (par
, 0, j
+ 1) = tmp
;
20152 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20157 par
= emit_insn (par
);
20158 REG_NOTES (par
) = dwarf
;
20160 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20161 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20163 RTX_FRAME_RELATED_P (par
) = 1;
20164 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20167 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20168 base_reg
, base_reg
);
20171 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20172 number of registers are being popped, multiple LDRD patterns are created for
20173 all register pairs. If odd number of registers are popped, last register is
20174 loaded by using LDR pattern. */
20176 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20180 rtx par
= NULL_RTX
;
20181 rtx dwarf
= NULL_RTX
;
20182 rtx tmp
, reg
, tmp1
;
20185 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20186 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20187 if (saved_regs_mask
& (1 << i
))
20190 gcc_assert (num_regs
&& num_regs
<= 16);
20192 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20193 to be popped. So, if num_regs is even, now it will become odd,
20194 and we can generate pop with PC. If num_regs is odd, it will be
20195 even now, and ldr with return can be generated for PC. */
20199 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20201 /* Var j iterates over all the registers to gather all the registers in
20202 saved_regs_mask. Var i gives index of saved registers in stack frame.
20203 A PARALLEL RTX of register-pair is created here, so that pattern for
20204 LDRD can be matched. As PC is always last register to be popped, and
20205 we have already decremented num_regs if PC, we don't have to worry
20206 about PC in this loop. */
20207 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20208 if (saved_regs_mask
& (1 << j
))
20210 /* Create RTX for memory load. */
20211 reg
= gen_rtx_REG (SImode
, j
);
20212 tmp
= gen_rtx_SET (SImode
,
20214 gen_frame_mem (SImode
,
20215 plus_constant (Pmode
,
20216 stack_pointer_rtx
, 4 * i
)));
20217 RTX_FRAME_RELATED_P (tmp
) = 1;
20221 /* When saved-register index (i) is even, the RTX to be emitted is
20222 yet to be created. Hence create it first. The LDRD pattern we
20223 are generating is :
20224 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20225 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20226 where target registers need not be consecutive. */
20227 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20231 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20232 added as 0th element and if i is odd, reg_i is added as 1st element
20233 of LDRD pattern shown above. */
20234 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20235 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20239 /* When saved-register index (i) is odd, RTXs for both the registers
20240 to be loaded are generated in above given LDRD pattern, and the
20241 pattern can be emitted now. */
20242 par
= emit_insn (par
);
20243 REG_NOTES (par
) = dwarf
;
20244 RTX_FRAME_RELATED_P (par
) = 1;
20250 /* If the number of registers pushed is odd AND return_in_pc is false OR
20251 number of registers are even AND return_in_pc is true, last register is
20252 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20253 then LDR with post increment. */
20255 /* Increment the stack pointer, based on there being
20256 num_regs 4-byte registers to restore. */
20257 tmp
= gen_rtx_SET (VOIDmode
,
20259 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20260 RTX_FRAME_RELATED_P (tmp
) = 1;
20261 tmp
= emit_insn (tmp
);
20264 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20265 stack_pointer_rtx
, stack_pointer_rtx
);
20270 if (((num_regs
% 2) == 1 && !return_in_pc
)
20271 || ((num_regs
% 2) == 0 && return_in_pc
))
20273 /* Scan for the single register to be popped. Skip until the saved
20274 register is found. */
20275 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20277 /* Gen LDR with post increment here. */
20278 tmp1
= gen_rtx_MEM (SImode
,
20279 gen_rtx_POST_INC (SImode
,
20280 stack_pointer_rtx
));
20281 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20283 reg
= gen_rtx_REG (SImode
, j
);
20284 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20285 RTX_FRAME_RELATED_P (tmp
) = 1;
20286 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20290 /* If return_in_pc, j must be PC_REGNUM. */
20291 gcc_assert (j
== PC_REGNUM
);
20292 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20293 XVECEXP (par
, 0, 0) = ret_rtx
;
20294 XVECEXP (par
, 0, 1) = tmp
;
20295 par
= emit_jump_insn (par
);
20299 par
= emit_insn (tmp
);
20300 REG_NOTES (par
) = dwarf
;
20301 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20302 stack_pointer_rtx
, stack_pointer_rtx
);
20306 else if ((num_regs
% 2) == 1 && return_in_pc
)
20308 /* There are 2 registers to be popped. So, generate the pattern
20309 pop_multiple_with_stack_update_and_return to pop in PC. */
20310 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20316 /* LDRD in ARM mode needs consecutive registers as operands. This function
20317 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20318 offset addressing and then generates one separate stack udpate. This provides
20319 more scheduling freedom, compared to writeback on every load. However,
20320 if the function returns using load into PC directly
20321 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20322 before the last load. TODO: Add a peephole optimization to recognize
20323 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20324 peephole optimization to merge the load at stack-offset zero
20325 with the stack update instruction using load with writeback
20326 in post-index addressing mode. */
20328 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20332 rtx par
= NULL_RTX
;
20333 rtx dwarf
= NULL_RTX
;
20336 /* Restore saved registers. */
20337 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20339 while (j
<= LAST_ARM_REGNUM
)
20340 if (saved_regs_mask
& (1 << j
))
20343 && (saved_regs_mask
& (1 << (j
+ 1)))
20344 && (j
+ 1) != PC_REGNUM
)
20346 /* Current register and next register form register pair for which
20347 LDRD can be generated. PC is always the last register popped, and
20348 we handle it separately. */
20350 mem
= gen_frame_mem (DImode
,
20351 plus_constant (Pmode
,
20355 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20357 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20358 tmp
= emit_insn (tmp
);
20359 RTX_FRAME_RELATED_P (tmp
) = 1;
20361 /* Generate dwarf info. */
20363 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20364 gen_rtx_REG (SImode
, j
),
20366 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20367 gen_rtx_REG (SImode
, j
+ 1),
20370 REG_NOTES (tmp
) = dwarf
;
20375 else if (j
!= PC_REGNUM
)
20377 /* Emit a single word load. */
20379 mem
= gen_frame_mem (SImode
,
20380 plus_constant (Pmode
,
20384 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20386 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20387 tmp
= emit_insn (tmp
);
20388 RTX_FRAME_RELATED_P (tmp
) = 1;
20390 /* Generate dwarf info. */
20391 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20392 gen_rtx_REG (SImode
, j
),
20398 else /* j == PC_REGNUM */
20404 /* Update the stack. */
20407 tmp
= gen_rtx_SET (Pmode
,
20409 plus_constant (Pmode
,
20412 tmp
= emit_insn (tmp
);
20413 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20414 stack_pointer_rtx
, stack_pointer_rtx
);
20418 if (saved_regs_mask
& (1 << PC_REGNUM
))
20420 /* Only PC is to be popped. */
20421 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20422 XVECEXP (par
, 0, 0) = ret_rtx
;
20423 tmp
= gen_rtx_SET (SImode
,
20424 gen_rtx_REG (SImode
, PC_REGNUM
),
20425 gen_frame_mem (SImode
,
20426 gen_rtx_POST_INC (SImode
,
20427 stack_pointer_rtx
)));
20428 RTX_FRAME_RELATED_P (tmp
) = 1;
20429 XVECEXP (par
, 0, 1) = tmp
;
20430 par
= emit_jump_insn (par
);
20432 /* Generate dwarf info. */
20433 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20434 gen_rtx_REG (SImode
, PC_REGNUM
),
20436 REG_NOTES (par
) = dwarf
;
20437 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20438 stack_pointer_rtx
, stack_pointer_rtx
);
20442 /* Calculate the size of the return value that is passed in registers. */
20444 arm_size_return_regs (void)
20446 enum machine_mode mode
;
20448 if (crtl
->return_rtx
!= 0)
20449 mode
= GET_MODE (crtl
->return_rtx
);
20451 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20453 return GET_MODE_SIZE (mode
);
20456 /* Return true if the current function needs to save/restore LR. */
20458 thumb_force_lr_save (void)
20460 return !cfun
->machine
->lr_save_eliminated
20461 && (!leaf_function_p ()
20462 || thumb_far_jump_used_p ()
20463 || df_regs_ever_live_p (LR_REGNUM
));
20466 /* We do not know if r3 will be available because
20467 we do have an indirect tailcall happening in this
20468 particular case. */
20470 is_indirect_tailcall_p (rtx call
)
20472 rtx pat
= PATTERN (call
);
20474 /* Indirect tail call. */
20475 pat
= XVECEXP (pat
, 0, 0);
20476 if (GET_CODE (pat
) == SET
)
20477 pat
= SET_SRC (pat
);
20479 pat
= XEXP (XEXP (pat
, 0), 0);
20480 return REG_P (pat
);
20483 /* Return true if r3 is used by any of the tail call insns in the
20484 current function. */
20486 any_sibcall_could_use_r3 (void)
20491 if (!crtl
->tail_call_emit
)
20493 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20494 if (e
->flags
& EDGE_SIBCALL
)
20496 rtx call
= BB_END (e
->src
);
20497 if (!CALL_P (call
))
20498 call
= prev_nonnote_nondebug_insn (call
);
20499 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20500 if (find_regno_fusage (call
, USE
, 3)
20501 || is_indirect_tailcall_p (call
))
20508 /* Compute the distance from register FROM to register TO.
20509 These can be the arg pointer (26), the soft frame pointer (25),
20510 the stack pointer (13) or the hard frame pointer (11).
20511 In thumb mode r7 is used as the soft frame pointer, if needed.
20512 Typical stack layout looks like this:
20514 old stack pointer -> | |
20517 | | saved arguments for
20518 | | vararg functions
20521 hard FP & arg pointer -> | | \
20529 soft frame pointer -> | | /
20534 locals base pointer -> | | /
20539 current stack pointer -> | | /
20542 For a given function some or all of these stack components
20543 may not be needed, giving rise to the possibility of
20544 eliminating some of the registers.
20546 The values returned by this function must reflect the behavior
20547 of arm_expand_prologue() and arm_compute_save_reg_mask().
20549 The sign of the number returned reflects the direction of stack
20550 growth, so the values are positive for all eliminations except
20551 from the soft frame pointer to the hard frame pointer.
20553 SFP may point just inside the local variables block to ensure correct
20557 /* Calculate stack offsets. These are used to calculate register elimination
20558 offsets and in prologue/epilogue code. Also calculates which registers
20559 should be saved. */
20561 static arm_stack_offsets
*
20562 arm_get_frame_offsets (void)
20564 struct arm_stack_offsets
*offsets
;
20565 unsigned long func_type
;
20569 HOST_WIDE_INT frame_size
;
20572 offsets
= &cfun
->machine
->stack_offsets
;
20574 /* We need to know if we are a leaf function. Unfortunately, it
20575 is possible to be called after start_sequence has been called,
20576 which causes get_insns to return the insns for the sequence,
20577 not the function, which will cause leaf_function_p to return
20578 the incorrect result.
20580 to know about leaf functions once reload has completed, and the
20581 frame size cannot be changed after that time, so we can safely
20582 use the cached value. */
20584 if (reload_completed
)
20587 /* Initially this is the size of the local variables. It will translated
20588 into an offset once we have determined the size of preceding data. */
20589 frame_size
= ROUND_UP_WORD (get_frame_size ());
20591 leaf
= leaf_function_p ();
20593 /* Space for variadic functions. */
20594 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20596 /* In Thumb mode this is incorrect, but never used. */
20598 = (offsets
->saved_args
20599 + arm_compute_static_chain_stack_bytes ()
20600 + (frame_pointer_needed
? 4 : 0));
20604 unsigned int regno
;
20606 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20607 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20608 saved
= core_saved
;
20610 /* We know that SP will be doubleword aligned on entry, and we must
20611 preserve that condition at any subroutine call. We also require the
20612 soft frame pointer to be doubleword aligned. */
20614 if (TARGET_REALLY_IWMMXT
)
20616 /* Check for the call-saved iWMMXt registers. */
20617 for (regno
= FIRST_IWMMXT_REGNUM
;
20618 regno
<= LAST_IWMMXT_REGNUM
;
20620 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20624 func_type
= arm_current_func_type ();
20625 /* Space for saved VFP registers. */
20626 if (! IS_VOLATILE (func_type
)
20627 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20628 saved
+= arm_get_vfp_saved_size ();
20630 else /* TARGET_THUMB1 */
20632 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20633 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20634 saved
= core_saved
;
20635 if (TARGET_BACKTRACE
)
20639 /* Saved registers include the stack frame. */
20640 offsets
->saved_regs
20641 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20642 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20644 /* A leaf function does not need any stack alignment if it has nothing
20646 if (leaf
&& frame_size
== 0
20647 /* However if it calls alloca(), we have a dynamically allocated
20648 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20649 && ! cfun
->calls_alloca
)
20651 offsets
->outgoing_args
= offsets
->soft_frame
;
20652 offsets
->locals_base
= offsets
->soft_frame
;
20656 /* Ensure SFP has the correct alignment. */
20657 if (ARM_DOUBLEWORD_ALIGN
20658 && (offsets
->soft_frame
& 7))
20660 offsets
->soft_frame
+= 4;
20661 /* Try to align stack by pushing an extra reg. Don't bother doing this
20662 when there is a stack frame as the alignment will be rolled into
20663 the normal stack adjustment. */
20664 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20668 /* If it is safe to use r3, then do so. This sometimes
20669 generates better code on Thumb-2 by avoiding the need to
20670 use 32-bit push/pop instructions. */
20671 if (! any_sibcall_could_use_r3 ()
20672 && arm_size_return_regs () <= 12
20673 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20675 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20680 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20682 /* Avoid fixed registers; they may be changed at
20683 arbitrary times so it's unsafe to restore them
20684 during the epilogue. */
20686 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20695 offsets
->saved_regs
+= 4;
20696 offsets
->saved_regs_mask
|= (1 << reg
);
20701 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20702 offsets
->outgoing_args
= (offsets
->locals_base
20703 + crtl
->outgoing_args_size
);
20705 if (ARM_DOUBLEWORD_ALIGN
)
20707 /* Ensure SP remains doubleword aligned. */
20708 if (offsets
->outgoing_args
& 7)
20709 offsets
->outgoing_args
+= 4;
20710 gcc_assert (!(offsets
->outgoing_args
& 7));
20717 /* Calculate the relative offsets for the different stack pointers. Positive
20718 offsets are in the direction of stack growth. */
20721 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20723 arm_stack_offsets
*offsets
;
20725 offsets
= arm_get_frame_offsets ();
20727 /* OK, now we have enough information to compute the distances.
20728 There must be an entry in these switch tables for each pair
20729 of registers in ELIMINABLE_REGS, even if some of the entries
20730 seem to be redundant or useless. */
20733 case ARG_POINTER_REGNUM
:
20736 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20739 case FRAME_POINTER_REGNUM
:
20740 /* This is the reverse of the soft frame pointer
20741 to hard frame pointer elimination below. */
20742 return offsets
->soft_frame
- offsets
->saved_args
;
20744 case ARM_HARD_FRAME_POINTER_REGNUM
:
20745 /* This is only non-zero in the case where the static chain register
20746 is stored above the frame. */
20747 return offsets
->frame
- offsets
->saved_args
- 4;
20749 case STACK_POINTER_REGNUM
:
20750 /* If nothing has been pushed on the stack at all
20751 then this will return -4. This *is* correct! */
20752 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20755 gcc_unreachable ();
20757 gcc_unreachable ();
20759 case FRAME_POINTER_REGNUM
:
20762 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20765 case ARM_HARD_FRAME_POINTER_REGNUM
:
20766 /* The hard frame pointer points to the top entry in the
20767 stack frame. The soft frame pointer to the bottom entry
20768 in the stack frame. If there is no stack frame at all,
20769 then they are identical. */
20771 return offsets
->frame
- offsets
->soft_frame
;
20773 case STACK_POINTER_REGNUM
:
20774 return offsets
->outgoing_args
- offsets
->soft_frame
;
20777 gcc_unreachable ();
20779 gcc_unreachable ();
20782 /* You cannot eliminate from the stack pointer.
20783 In theory you could eliminate from the hard frame
20784 pointer to the stack pointer, but this will never
20785 happen, since if a stack frame is not needed the
20786 hard frame pointer will never be used. */
20787 gcc_unreachable ();
20791 /* Given FROM and TO register numbers, say whether this elimination is
20792 allowed. Frame pointer elimination is automatically handled.
20794 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20795 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20796 pointer, we must eliminate FRAME_POINTER_REGNUM into
20797 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20798 ARG_POINTER_REGNUM. */
20801 arm_can_eliminate (const int from
, const int to
)
20803 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20804 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20805 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20806 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20810 /* Emit RTL to save coprocessor registers on function entry. Returns the
20811 number of bytes pushed. */
20814 arm_save_coproc_regs(void)
20816 int saved_size
= 0;
20818 unsigned start_reg
;
20821 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20822 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20824 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20825 insn
= gen_rtx_MEM (V2SImode
, insn
);
20826 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20827 RTX_FRAME_RELATED_P (insn
) = 1;
20831 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20833 start_reg
= FIRST_VFP_REGNUM
;
20835 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20837 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20838 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20840 if (start_reg
!= reg
)
20841 saved_size
+= vfp_emit_fstmd (start_reg
,
20842 (reg
- start_reg
) / 2);
20843 start_reg
= reg
+ 2;
20846 if (start_reg
!= reg
)
20847 saved_size
+= vfp_emit_fstmd (start_reg
,
20848 (reg
- start_reg
) / 2);
20854 /* Set the Thumb frame pointer from the stack pointer. */
20857 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20859 HOST_WIDE_INT amount
;
20862 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20864 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20865 stack_pointer_rtx
, GEN_INT (amount
)));
20868 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20869 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20870 expects the first two operands to be the same. */
20873 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20875 hard_frame_pointer_rtx
));
20879 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20880 hard_frame_pointer_rtx
,
20881 stack_pointer_rtx
));
20883 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
20884 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20885 RTX_FRAME_RELATED_P (dwarf
) = 1;
20886 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20889 RTX_FRAME_RELATED_P (insn
) = 1;
20892 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20895 arm_expand_prologue (void)
20900 unsigned long live_regs_mask
;
20901 unsigned long func_type
;
20903 int saved_pretend_args
= 0;
20904 int saved_regs
= 0;
20905 unsigned HOST_WIDE_INT args_to_push
;
20906 arm_stack_offsets
*offsets
;
20908 func_type
= arm_current_func_type ();
20910 /* Naked functions don't have prologues. */
20911 if (IS_NAKED (func_type
))
20914 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20915 args_to_push
= crtl
->args
.pretend_args_size
;
20917 /* Compute which register we will have to save onto the stack. */
20918 offsets
= arm_get_frame_offsets ();
20919 live_regs_mask
= offsets
->saved_regs_mask
;
20921 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20923 if (IS_STACKALIGN (func_type
))
20927 /* Handle a word-aligned stack pointer. We generate the following:
20932 <save and restore r0 in normal prologue/epilogue>
20936 The unwinder doesn't need to know about the stack realignment.
20937 Just tell it we saved SP in r0. */
20938 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20940 r0
= gen_rtx_REG (SImode
, 0);
20941 r1
= gen_rtx_REG (SImode
, 1);
20943 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20944 RTX_FRAME_RELATED_P (insn
) = 1;
20945 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
20947 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
20949 /* ??? The CFA changes here, which may cause GDB to conclude that it
20950 has entered a different function. That said, the unwind info is
20951 correct, individually, before and after this instruction because
20952 we've described the save of SP, which will override the default
20953 handling of SP as restoring from the CFA. */
20954 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
20957 /* For APCS frames, if IP register is clobbered
20958 when creating frame, save that register in a special
20960 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20962 if (IS_INTERRUPT (func_type
))
20964 /* Interrupt functions must not corrupt any registers.
20965 Creating a frame pointer however, corrupts the IP
20966 register, so we must push it first. */
20967 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
20969 /* Do not set RTX_FRAME_RELATED_P on this insn.
20970 The dwarf stack unwinding code only wants to see one
20971 stack decrement per function, and this is not it. If
20972 this instruction is labeled as being part of the frame
20973 creation sequence then dwarf2out_frame_debug_expr will
20974 die when it encounters the assignment of IP to FP
20975 later on, since the use of SP here establishes SP as
20976 the CFA register and not IP.
20978 Anyway this instruction is not really part of the stack
20979 frame creation although it is part of the prologue. */
20981 else if (IS_NESTED (func_type
))
20983 /* The static chain register is the same as the IP register
20984 used as a scratch register during stack frame creation.
20985 To get around this need to find somewhere to store IP
20986 whilst the frame is being created. We try the following
20989 1. The last argument register r3 if it is available.
20990 2. A slot on the stack above the frame if there are no
20991 arguments to push onto the stack.
20992 3. Register r3 again, after pushing the argument registers
20993 onto the stack, if this is a varargs function.
20994 4. The last slot on the stack created for the arguments to
20995 push, if this isn't a varargs function.
20997 Note - we only need to tell the dwarf2 backend about the SP
20998 adjustment in the second variant; the static chain register
20999 doesn't need to be unwound, as it doesn't contain a value
21000 inherited from the caller. */
21002 if (!arm_r3_live_at_start_p ())
21003 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21004 else if (args_to_push
== 0)
21008 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21011 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21012 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21015 /* Just tell the dwarf backend that we adjusted SP. */
21016 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21017 plus_constant (Pmode
, stack_pointer_rtx
,
21019 RTX_FRAME_RELATED_P (insn
) = 1;
21020 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21024 /* Store the args on the stack. */
21025 if (cfun
->machine
->uses_anonymous_args
)
21028 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21029 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21030 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21031 saved_pretend_args
= 1;
21037 if (args_to_push
== 4)
21038 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21041 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21042 plus_constant (Pmode
,
21046 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21048 /* Just tell the dwarf backend that we adjusted SP. */
21050 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21051 plus_constant (Pmode
, stack_pointer_rtx
,
21053 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21056 RTX_FRAME_RELATED_P (insn
) = 1;
21057 fp_offset
= args_to_push
;
21062 insn
= emit_set_insn (ip_rtx
,
21063 plus_constant (Pmode
, stack_pointer_rtx
,
21065 RTX_FRAME_RELATED_P (insn
) = 1;
21070 /* Push the argument registers, or reserve space for them. */
21071 if (cfun
->machine
->uses_anonymous_args
)
21072 insn
= emit_multi_reg_push
21073 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21074 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21077 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21078 GEN_INT (- args_to_push
)));
21079 RTX_FRAME_RELATED_P (insn
) = 1;
21082 /* If this is an interrupt service routine, and the link register
21083 is going to be pushed, and we're not generating extra
21084 push of IP (needed when frame is needed and frame layout if apcs),
21085 subtracting four from LR now will mean that the function return
21086 can be done with a single instruction. */
21087 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21088 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21089 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21092 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21094 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21097 if (live_regs_mask
)
21099 unsigned long dwarf_regs_mask
= live_regs_mask
;
21101 saved_regs
+= bit_count (live_regs_mask
) * 4;
21102 if (optimize_size
&& !frame_pointer_needed
21103 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21105 /* If no coprocessor registers are being pushed and we don't have
21106 to worry about a frame pointer then push extra registers to
21107 create the stack frame. This is done is a way that does not
21108 alter the frame layout, so is independent of the epilogue. */
21112 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21114 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21115 if (frame
&& n
* 4 >= frame
)
21118 live_regs_mask
|= (1 << n
) - 1;
21119 saved_regs
+= frame
;
21124 && current_tune
->prefer_ldrd_strd
21125 && !optimize_function_for_size_p (cfun
))
21127 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21129 thumb2_emit_strd_push (live_regs_mask
);
21130 else if (TARGET_ARM
21131 && !TARGET_APCS_FRAME
21132 && !IS_INTERRUPT (func_type
))
21133 arm_emit_strd_push (live_regs_mask
);
21136 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21137 RTX_FRAME_RELATED_P (insn
) = 1;
21142 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21143 RTX_FRAME_RELATED_P (insn
) = 1;
21147 if (! IS_VOLATILE (func_type
))
21148 saved_regs
+= arm_save_coproc_regs ();
21150 if (frame_pointer_needed
&& TARGET_ARM
)
21152 /* Create the new frame pointer. */
21153 if (TARGET_APCS_FRAME
)
21155 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21156 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21157 RTX_FRAME_RELATED_P (insn
) = 1;
21159 if (IS_NESTED (func_type
))
21161 /* Recover the static chain register. */
21162 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21163 insn
= gen_rtx_REG (SImode
, 3);
21166 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21167 insn
= gen_frame_mem (SImode
, insn
);
21169 emit_set_insn (ip_rtx
, insn
);
21170 /* Add a USE to stop propagate_one_insn() from barfing. */
21171 emit_insn (gen_force_register_use (ip_rtx
));
21176 insn
= GEN_INT (saved_regs
- 4);
21177 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21178 stack_pointer_rtx
, insn
));
21179 RTX_FRAME_RELATED_P (insn
) = 1;
21183 if (flag_stack_usage_info
)
21184 current_function_static_stack_size
21185 = offsets
->outgoing_args
- offsets
->saved_args
;
21187 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21189 /* This add can produce multiple insns for a large constant, so we
21190 need to get tricky. */
21191 rtx last
= get_last_insn ();
21193 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21194 - offsets
->outgoing_args
);
21196 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21200 last
= last
? NEXT_INSN (last
) : get_insns ();
21201 RTX_FRAME_RELATED_P (last
) = 1;
21203 while (last
!= insn
);
21205 /* If the frame pointer is needed, emit a special barrier that
21206 will prevent the scheduler from moving stores to the frame
21207 before the stack adjustment. */
21208 if (frame_pointer_needed
)
21209 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21210 hard_frame_pointer_rtx
));
21214 if (frame_pointer_needed
&& TARGET_THUMB2
)
21215 thumb_set_frame_pointer (offsets
);
21217 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21219 unsigned long mask
;
21221 mask
= live_regs_mask
;
21222 mask
&= THUMB2_WORK_REGS
;
21223 if (!IS_NESTED (func_type
))
21224 mask
|= (1 << IP_REGNUM
);
21225 arm_load_pic_register (mask
);
21228 /* If we are profiling, make sure no instructions are scheduled before
21229 the call to mcount. Similarly if the user has requested no
21230 scheduling in the prolog. Similarly if we want non-call exceptions
21231 using the EABI unwinder, to prevent faulting instructions from being
21232 swapped with a stack adjustment. */
21233 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21234 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21235 && cfun
->can_throw_non_call_exceptions
))
21236 emit_insn (gen_blockage ());
21238 /* If the link register is being kept alive, with the return address in it,
21239 then make sure that it does not get reused by the ce2 pass. */
21240 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21241 cfun
->machine
->lr_save_eliminated
= 1;
21244 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21246 arm_print_condition (FILE *stream
)
21248 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21250 /* Branch conversion is not implemented for Thumb-2. */
21253 output_operand_lossage ("predicated Thumb instruction");
21256 if (current_insn_predicate
!= NULL
)
21258 output_operand_lossage
21259 ("predicated instruction in conditional sequence");
21263 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21265 else if (current_insn_predicate
)
21267 enum arm_cond_code code
;
21271 output_operand_lossage ("predicated Thumb instruction");
21275 code
= get_arm_condition_code (current_insn_predicate
);
21276 fputs (arm_condition_codes
[code
], stream
);
21281 /* Globally reserved letters: acln
21282 Puncutation letters currently used: @_|?().!#
21283 Lower case letters currently used: bcdefhimpqtvwxyz
21284 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21285 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21287 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21289 If CODE is 'd', then the X is a condition operand and the instruction
21290 should only be executed if the condition is true.
21291 if CODE is 'D', then the X is a condition operand and the instruction
21292 should only be executed if the condition is false: however, if the mode
21293 of the comparison is CCFPEmode, then always execute the instruction -- we
21294 do this because in these circumstances !GE does not necessarily imply LT;
21295 in these cases the instruction pattern will take care to make sure that
21296 an instruction containing %d will follow, thereby undoing the effects of
21297 doing this instruction unconditionally.
21298 If CODE is 'N' then X is a floating point operand that must be negated
21300 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21301 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21303 arm_print_operand (FILE *stream
, rtx x
, int code
)
21308 fputs (ASM_COMMENT_START
, stream
);
21312 fputs (user_label_prefix
, stream
);
21316 fputs (REGISTER_PREFIX
, stream
);
21320 arm_print_condition (stream
);
21324 /* Nothing in unified syntax, otherwise the current condition code. */
21325 if (!TARGET_UNIFIED_ASM
)
21326 arm_print_condition (stream
);
21330 /* The current condition code in unified syntax, otherwise nothing. */
21331 if (TARGET_UNIFIED_ASM
)
21332 arm_print_condition (stream
);
21336 /* The current condition code for a condition code setting instruction.
21337 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21338 if (TARGET_UNIFIED_ASM
)
21340 fputc('s', stream
);
21341 arm_print_condition (stream
);
21345 arm_print_condition (stream
);
21346 fputc('s', stream
);
21351 /* If the instruction is conditionally executed then print
21352 the current condition code, otherwise print 's'. */
21353 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21354 if (current_insn_predicate
)
21355 arm_print_condition (stream
);
21357 fputc('s', stream
);
21360 /* %# is a "break" sequence. It doesn't output anything, but is used to
21361 separate e.g. operand numbers from following text, if that text consists
21362 of further digits which we don't want to be part of the operand
21370 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21371 r
= real_value_negate (&r
);
21372 fprintf (stream
, "%s", fp_const_from_val (&r
));
21376 /* An integer or symbol address without a preceding # sign. */
21378 switch (GET_CODE (x
))
21381 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21385 output_addr_const (stream
, x
);
21389 if (GET_CODE (XEXP (x
, 0)) == PLUS
21390 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21392 output_addr_const (stream
, x
);
21395 /* Fall through. */
21398 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21402 /* An integer that we want to print in HEX. */
21404 switch (GET_CODE (x
))
21407 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21411 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21416 if (CONST_INT_P (x
))
21419 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21420 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21424 putc ('~', stream
);
21425 output_addr_const (stream
, x
);
21430 /* Print the log2 of a CONST_INT. */
21434 if (!CONST_INT_P (x
)
21435 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21436 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21438 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21443 /* The low 16 bits of an immediate constant. */
21444 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21448 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21452 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21460 shift
= shift_op (x
, &val
);
21464 fprintf (stream
, ", %s ", shift
);
21466 arm_print_operand (stream
, XEXP (x
, 1), 0);
21468 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21473 /* An explanation of the 'Q', 'R' and 'H' register operands:
21475 In a pair of registers containing a DI or DF value the 'Q'
21476 operand returns the register number of the register containing
21477 the least significant part of the value. The 'R' operand returns
21478 the register number of the register containing the most
21479 significant part of the value.
21481 The 'H' operand returns the higher of the two register numbers.
21482 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21483 same as the 'Q' operand, since the most significant part of the
21484 value is held in the lower number register. The reverse is true
21485 on systems where WORDS_BIG_ENDIAN is false.
21487 The purpose of these operands is to distinguish between cases
21488 where the endian-ness of the values is important (for example
21489 when they are added together), and cases where the endian-ness
21490 is irrelevant, but the order of register operations is important.
21491 For example when loading a value from memory into a register
21492 pair, the endian-ness does not matter. Provided that the value
21493 from the lower memory address is put into the lower numbered
21494 register, and the value from the higher address is put into the
21495 higher numbered register, the load will work regardless of whether
21496 the value being loaded is big-wordian or little-wordian. The
21497 order of the two register loads can matter however, if the address
21498 of the memory location is actually held in one of the registers
21499 being overwritten by the load.
21501 The 'Q' and 'R' constraints are also available for 64-bit
21504 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21506 rtx part
= gen_lowpart (SImode
, x
);
21507 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21511 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21513 output_operand_lossage ("invalid operand for code '%c'", code
);
21517 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21521 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21523 enum machine_mode mode
= GET_MODE (x
);
21526 if (mode
== VOIDmode
)
21528 part
= gen_highpart_mode (SImode
, mode
, x
);
21529 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21533 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21535 output_operand_lossage ("invalid operand for code '%c'", code
);
21539 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21543 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21545 output_operand_lossage ("invalid operand for code '%c'", code
);
21549 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21553 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21555 output_operand_lossage ("invalid operand for code '%c'", code
);
21559 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21563 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21565 output_operand_lossage ("invalid operand for code '%c'", code
);
21569 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21573 asm_fprintf (stream
, "%r",
21574 REG_P (XEXP (x
, 0))
21575 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21579 asm_fprintf (stream
, "{%r-%r}",
21581 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21584 /* Like 'M', but writing doubleword vector registers, for use by Neon
21588 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21589 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21591 asm_fprintf (stream
, "{d%d}", regno
);
21593 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21598 /* CONST_TRUE_RTX means always -- that's the default. */
21599 if (x
== const_true_rtx
)
21602 if (!COMPARISON_P (x
))
21604 output_operand_lossage ("invalid operand for code '%c'", code
);
21608 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21613 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21614 want to do that. */
21615 if (x
== const_true_rtx
)
21617 output_operand_lossage ("instruction never executed");
21620 if (!COMPARISON_P (x
))
21622 output_operand_lossage ("invalid operand for code '%c'", code
);
21626 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21627 (get_arm_condition_code (x
))],
21637 /* Former Maverick support, removed after GCC-4.7. */
21638 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21643 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21644 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21645 /* Bad value for wCG register number. */
21647 output_operand_lossage ("invalid operand for code '%c'", code
);
21652 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21655 /* Print an iWMMXt control register name. */
21657 if (!CONST_INT_P (x
)
21659 || INTVAL (x
) >= 16)
21660 /* Bad value for wC register number. */
21662 output_operand_lossage ("invalid operand for code '%c'", code
);
21668 static const char * wc_reg_names
[16] =
21670 "wCID", "wCon", "wCSSF", "wCASF",
21671 "wC4", "wC5", "wC6", "wC7",
21672 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21673 "wC12", "wC13", "wC14", "wC15"
21676 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21680 /* Print the high single-precision register of a VFP double-precision
21684 enum machine_mode mode
= GET_MODE (x
);
21687 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21689 output_operand_lossage ("invalid operand for code '%c'", code
);
21694 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21696 output_operand_lossage ("invalid operand for code '%c'", code
);
21700 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21704 /* Print a VFP/Neon double precision or quad precision register name. */
21708 enum machine_mode mode
= GET_MODE (x
);
21709 int is_quad
= (code
== 'q');
21712 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21714 output_operand_lossage ("invalid operand for code '%c'", code
);
21719 || !IS_VFP_REGNUM (REGNO (x
)))
21721 output_operand_lossage ("invalid operand for code '%c'", code
);
21726 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21727 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21729 output_operand_lossage ("invalid operand for code '%c'", code
);
21733 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21734 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21738 /* These two codes print the low/high doubleword register of a Neon quad
21739 register, respectively. For pair-structure types, can also print
21740 low/high quadword registers. */
21744 enum machine_mode mode
= GET_MODE (x
);
21747 if ((GET_MODE_SIZE (mode
) != 16
21748 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21750 output_operand_lossage ("invalid operand for code '%c'", code
);
21755 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21757 output_operand_lossage ("invalid operand for code '%c'", code
);
21761 if (GET_MODE_SIZE (mode
) == 16)
21762 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21763 + (code
== 'f' ? 1 : 0));
21765 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21766 + (code
== 'f' ? 1 : 0));
21770 /* Print a VFPv3 floating-point constant, represented as an integer
21774 int index
= vfp3_const_double_index (x
);
21775 gcc_assert (index
!= -1);
21776 fprintf (stream
, "%d", index
);
21780 /* Print bits representing opcode features for Neon.
21782 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21783 and polynomials as unsigned.
21785 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21787 Bit 2 is 1 for rounding functions, 0 otherwise. */
21789 /* Identify the type as 's', 'u', 'p' or 'f'. */
21792 HOST_WIDE_INT bits
= INTVAL (x
);
21793 fputc ("uspf"[bits
& 3], stream
);
21797 /* Likewise, but signed and unsigned integers are both 'i'. */
21800 HOST_WIDE_INT bits
= INTVAL (x
);
21801 fputc ("iipf"[bits
& 3], stream
);
21805 /* As for 'T', but emit 'u' instead of 'p'. */
21808 HOST_WIDE_INT bits
= INTVAL (x
);
21809 fputc ("usuf"[bits
& 3], stream
);
21813 /* Bit 2: rounding (vs none). */
21816 HOST_WIDE_INT bits
= INTVAL (x
);
21817 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21821 /* Memory operand for vld1/vst1 instruction. */
21825 bool postinc
= FALSE
;
21826 unsigned align
, memsize
, align_bits
;
21828 gcc_assert (MEM_P (x
));
21829 addr
= XEXP (x
, 0);
21830 if (GET_CODE (addr
) == POST_INC
)
21833 addr
= XEXP (addr
, 0);
21835 asm_fprintf (stream
, "[%r", REGNO (addr
));
21837 /* We know the alignment of this access, so we can emit a hint in the
21838 instruction (for some alignments) as an aid to the memory subsystem
21840 align
= MEM_ALIGN (x
) >> 3;
21841 memsize
= MEM_SIZE (x
);
21843 /* Only certain alignment specifiers are supported by the hardware. */
21844 if (memsize
== 32 && (align
% 32) == 0)
21846 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21848 else if (memsize
>= 8 && (align
% 8) == 0)
21853 if (align_bits
!= 0)
21854 asm_fprintf (stream
, ":%d", align_bits
);
21856 asm_fprintf (stream
, "]");
21859 fputs("!", stream
);
21867 gcc_assert (MEM_P (x
));
21868 addr
= XEXP (x
, 0);
21869 gcc_assert (REG_P (addr
));
21870 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21874 /* Translate an S register number into a D register number and element index. */
21877 enum machine_mode mode
= GET_MODE (x
);
21880 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21882 output_operand_lossage ("invalid operand for code '%c'", code
);
21887 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21889 output_operand_lossage ("invalid operand for code '%c'", code
);
21893 regno
= regno
- FIRST_VFP_REGNUM
;
21894 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21899 gcc_assert (CONST_DOUBLE_P (x
));
21901 result
= vfp3_const_double_for_fract_bits (x
);
21903 result
= vfp3_const_double_for_bits (x
);
21904 fprintf (stream
, "#%d", result
);
21907 /* Register specifier for vld1.16/vst1.16. Translate the S register
21908 number into a D register number and element index. */
21911 enum machine_mode mode
= GET_MODE (x
);
21914 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21916 output_operand_lossage ("invalid operand for code '%c'", code
);
21921 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21923 output_operand_lossage ("invalid operand for code '%c'", code
);
21927 regno
= regno
- FIRST_VFP_REGNUM
;
21928 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21935 output_operand_lossage ("missing operand");
21939 switch (GET_CODE (x
))
21942 asm_fprintf (stream
, "%r", REGNO (x
));
21946 output_memory_reference_mode
= GET_MODE (x
);
21947 output_address (XEXP (x
, 0));
21954 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
21955 sizeof (fpstr
), 0, 1);
21956 fprintf (stream
, "#%s", fpstr
);
21959 fprintf (stream
, "#%s", fp_immediate_constant (x
));
21963 gcc_assert (GET_CODE (x
) != NEG
);
21964 fputc ('#', stream
);
21965 if (GET_CODE (x
) == HIGH
)
21967 fputs (":lower16:", stream
);
21971 output_addr_const (stream
, x
);
21977 /* Target hook for printing a memory address. */
21979 arm_print_operand_address (FILE *stream
, rtx x
)
21983 int is_minus
= GET_CODE (x
) == MINUS
;
21986 asm_fprintf (stream
, "[%r]", REGNO (x
));
21987 else if (GET_CODE (x
) == PLUS
|| is_minus
)
21989 rtx base
= XEXP (x
, 0);
21990 rtx index
= XEXP (x
, 1);
21991 HOST_WIDE_INT offset
= 0;
21993 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
21995 /* Ensure that BASE is a register. */
21996 /* (one of them must be). */
21997 /* Also ensure the SP is not used as in index register. */
22002 switch (GET_CODE (index
))
22005 offset
= INTVAL (index
);
22008 asm_fprintf (stream
, "[%r, #%wd]",
22009 REGNO (base
), offset
);
22013 asm_fprintf (stream
, "[%r, %s%r]",
22014 REGNO (base
), is_minus
? "-" : "",
22024 asm_fprintf (stream
, "[%r, %s%r",
22025 REGNO (base
), is_minus
? "-" : "",
22026 REGNO (XEXP (index
, 0)));
22027 arm_print_operand (stream
, index
, 'S');
22028 fputs ("]", stream
);
22033 gcc_unreachable ();
22036 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22037 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22039 extern enum machine_mode output_memory_reference_mode
;
22041 gcc_assert (REG_P (XEXP (x
, 0)));
22043 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22044 asm_fprintf (stream
, "[%r, #%s%d]!",
22045 REGNO (XEXP (x
, 0)),
22046 GET_CODE (x
) == PRE_DEC
? "-" : "",
22047 GET_MODE_SIZE (output_memory_reference_mode
));
22049 asm_fprintf (stream
, "[%r], #%s%d",
22050 REGNO (XEXP (x
, 0)),
22051 GET_CODE (x
) == POST_DEC
? "-" : "",
22052 GET_MODE_SIZE (output_memory_reference_mode
));
22054 else if (GET_CODE (x
) == PRE_MODIFY
)
22056 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22057 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22058 asm_fprintf (stream
, "#%wd]!",
22059 INTVAL (XEXP (XEXP (x
, 1), 1)));
22061 asm_fprintf (stream
, "%r]!",
22062 REGNO (XEXP (XEXP (x
, 1), 1)));
22064 else if (GET_CODE (x
) == POST_MODIFY
)
22066 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22067 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22068 asm_fprintf (stream
, "#%wd",
22069 INTVAL (XEXP (XEXP (x
, 1), 1)));
22071 asm_fprintf (stream
, "%r",
22072 REGNO (XEXP (XEXP (x
, 1), 1)));
22074 else output_addr_const (stream
, x
);
22079 asm_fprintf (stream
, "[%r]", REGNO (x
));
22080 else if (GET_CODE (x
) == POST_INC
)
22081 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22082 else if (GET_CODE (x
) == PLUS
)
22084 gcc_assert (REG_P (XEXP (x
, 0)));
22085 if (CONST_INT_P (XEXP (x
, 1)))
22086 asm_fprintf (stream
, "[%r, #%wd]",
22087 REGNO (XEXP (x
, 0)),
22088 INTVAL (XEXP (x
, 1)));
22090 asm_fprintf (stream
, "[%r, %r]",
22091 REGNO (XEXP (x
, 0)),
22092 REGNO (XEXP (x
, 1)));
22095 output_addr_const (stream
, x
);
22099 /* Target hook for indicating whether a punctuation character for
22100 TARGET_PRINT_OPERAND is valid. */
22102 arm_print_operand_punct_valid_p (unsigned char code
)
22104 return (code
== '@' || code
== '|' || code
== '.'
22105 || code
== '(' || code
== ')' || code
== '#'
22106 || (TARGET_32BIT
&& (code
== '?'))
22107 || (TARGET_THUMB2
&& (code
== '!'))
22108 || (TARGET_THUMB
&& (code
== '_')));
22111 /* Target hook for assembling integer objects. The ARM version needs to
22112 handle word-sized values specially. */
22114 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22116 enum machine_mode mode
;
22118 if (size
== UNITS_PER_WORD
&& aligned_p
)
22120 fputs ("\t.word\t", asm_out_file
);
22121 output_addr_const (asm_out_file
, x
);
22123 /* Mark symbols as position independent. We only do this in the
22124 .text segment, not in the .data segment. */
22125 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22126 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22128 /* See legitimize_pic_address for an explanation of the
22129 TARGET_VXWORKS_RTP check. */
22130 if (!arm_pic_data_is_text_relative
22131 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22132 fputs ("(GOT)", asm_out_file
);
22134 fputs ("(GOTOFF)", asm_out_file
);
22136 fputc ('\n', asm_out_file
);
22140 mode
= GET_MODE (x
);
22142 if (arm_vector_mode_supported_p (mode
))
22146 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22148 units
= CONST_VECTOR_NUNITS (x
);
22149 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22151 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22152 for (i
= 0; i
< units
; i
++)
22154 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22156 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22159 for (i
= 0; i
< units
; i
++)
22161 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22162 REAL_VALUE_TYPE rval
;
22164 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22167 (rval
, GET_MODE_INNER (mode
),
22168 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22174 return default_assemble_integer (x
, size
, aligned_p
);
22178 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22182 if (!TARGET_AAPCS_BASED
)
22185 default_named_section_asm_out_constructor
22186 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22190 /* Put these in the .init_array section, using a special relocation. */
22191 if (priority
!= DEFAULT_INIT_PRIORITY
)
22194 sprintf (buf
, "%s.%.5u",
22195 is_ctor
? ".init_array" : ".fini_array",
22197 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22204 switch_to_section (s
);
22205 assemble_align (POINTER_SIZE
);
22206 fputs ("\t.word\t", asm_out_file
);
22207 output_addr_const (asm_out_file
, symbol
);
22208 fputs ("(target1)\n", asm_out_file
);
22211 /* Add a function to the list of static constructors. */
22214 arm_elf_asm_constructor (rtx symbol
, int priority
)
22216 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22219 /* Add a function to the list of static destructors. */
22222 arm_elf_asm_destructor (rtx symbol
, int priority
)
22224 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22227 /* A finite state machine takes care of noticing whether or not instructions
22228 can be conditionally executed, and thus decrease execution time and code
22229 size by deleting branch instructions. The fsm is controlled by
22230 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22232 /* The state of the fsm controlling condition codes are:
22233 0: normal, do nothing special
22234 1: make ASM_OUTPUT_OPCODE not output this instruction
22235 2: make ASM_OUTPUT_OPCODE not output this instruction
22236 3: make instructions conditional
22237 4: make instructions conditional
22239 State transitions (state->state by whom under condition):
22240 0 -> 1 final_prescan_insn if the `target' is a label
22241 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22242 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22243 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22244 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22245 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22246 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22247 (the target insn is arm_target_insn).
22249 If the jump clobbers the conditions then we use states 2 and 4.
22251 A similar thing can be done with conditional return insns.
22253 XXX In case the `target' is an unconditional branch, this conditionalising
22254 of the instructions always reduces code size, but not always execution
22255 time. But then, I want to reduce the code size to somewhere near what
22256 /bin/cc produces. */
22258 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22259 instructions. When a COND_EXEC instruction is seen the subsequent
22260 instructions are scanned so that multiple conditional instructions can be
22261 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22262 specify the length and true/false mask for the IT block. These will be
22263 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22265 /* Returns the index of the ARM condition code string in
22266 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22267 COMPARISON should be an rtx like `(eq (...) (...))'. */
22270 maybe_get_arm_condition_code (rtx comparison
)
22272 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22273 enum arm_cond_code code
;
22274 enum rtx_code comp_code
= GET_CODE (comparison
);
22276 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22277 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22278 XEXP (comparison
, 1));
22282 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22283 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22284 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22285 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22286 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22287 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22288 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22289 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22290 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22291 case CC_DLTUmode
: code
= ARM_CC
;
22294 if (comp_code
== EQ
)
22295 return ARM_INVERSE_CONDITION_CODE (code
);
22296 if (comp_code
== NE
)
22303 case NE
: return ARM_NE
;
22304 case EQ
: return ARM_EQ
;
22305 case GE
: return ARM_PL
;
22306 case LT
: return ARM_MI
;
22307 default: return ARM_NV
;
22313 case NE
: return ARM_NE
;
22314 case EQ
: return ARM_EQ
;
22315 default: return ARM_NV
;
22321 case NE
: return ARM_MI
;
22322 case EQ
: return ARM_PL
;
22323 default: return ARM_NV
;
22328 /* We can handle all cases except UNEQ and LTGT. */
22331 case GE
: return ARM_GE
;
22332 case GT
: return ARM_GT
;
22333 case LE
: return ARM_LS
;
22334 case LT
: return ARM_MI
;
22335 case NE
: return ARM_NE
;
22336 case EQ
: return ARM_EQ
;
22337 case ORDERED
: return ARM_VC
;
22338 case UNORDERED
: return ARM_VS
;
22339 case UNLT
: return ARM_LT
;
22340 case UNLE
: return ARM_LE
;
22341 case UNGT
: return ARM_HI
;
22342 case UNGE
: return ARM_PL
;
22343 /* UNEQ and LTGT do not have a representation. */
22344 case UNEQ
: /* Fall through. */
22345 case LTGT
: /* Fall through. */
22346 default: return ARM_NV
;
22352 case NE
: return ARM_NE
;
22353 case EQ
: return ARM_EQ
;
22354 case GE
: return ARM_LE
;
22355 case GT
: return ARM_LT
;
22356 case LE
: return ARM_GE
;
22357 case LT
: return ARM_GT
;
22358 case GEU
: return ARM_LS
;
22359 case GTU
: return ARM_CC
;
22360 case LEU
: return ARM_CS
;
22361 case LTU
: return ARM_HI
;
22362 default: return ARM_NV
;
22368 case LTU
: return ARM_CS
;
22369 case GEU
: return ARM_CC
;
22370 default: return ARM_NV
;
22376 case NE
: return ARM_NE
;
22377 case EQ
: return ARM_EQ
;
22378 case GEU
: return ARM_CS
;
22379 case GTU
: return ARM_HI
;
22380 case LEU
: return ARM_LS
;
22381 case LTU
: return ARM_CC
;
22382 default: return ARM_NV
;
22388 case GE
: return ARM_GE
;
22389 case LT
: return ARM_LT
;
22390 case GEU
: return ARM_CS
;
22391 case LTU
: return ARM_CC
;
22392 default: return ARM_NV
;
22398 case NE
: return ARM_NE
;
22399 case EQ
: return ARM_EQ
;
22400 case GE
: return ARM_GE
;
22401 case GT
: return ARM_GT
;
22402 case LE
: return ARM_LE
;
22403 case LT
: return ARM_LT
;
22404 case GEU
: return ARM_CS
;
22405 case GTU
: return ARM_HI
;
22406 case LEU
: return ARM_LS
;
22407 case LTU
: return ARM_CC
;
22408 default: return ARM_NV
;
22411 default: gcc_unreachable ();
22415 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22416 static enum arm_cond_code
22417 get_arm_condition_code (rtx comparison
)
22419 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22420 gcc_assert (code
!= ARM_NV
);
22424 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22427 thumb2_final_prescan_insn (rtx insn
)
22429 rtx first_insn
= insn
;
22430 rtx body
= PATTERN (insn
);
22432 enum arm_cond_code code
;
22437 /* max_insns_skipped in the tune was already taken into account in the
22438 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22439 just emit the IT blocks as we can. It does not make sense to split
22441 max
= MAX_INSN_PER_IT_BLOCK
;
22443 /* Remove the previous insn from the count of insns to be output. */
22444 if (arm_condexec_count
)
22445 arm_condexec_count
--;
22447 /* Nothing to do if we are already inside a conditional block. */
22448 if (arm_condexec_count
)
22451 if (GET_CODE (body
) != COND_EXEC
)
22454 /* Conditional jumps are implemented directly. */
22458 predicate
= COND_EXEC_TEST (body
);
22459 arm_current_cc
= get_arm_condition_code (predicate
);
22461 n
= get_attr_ce_count (insn
);
22462 arm_condexec_count
= 1;
22463 arm_condexec_mask
= (1 << n
) - 1;
22464 arm_condexec_masklen
= n
;
22465 /* See if subsequent instructions can be combined into the same block. */
22468 insn
= next_nonnote_insn (insn
);
22470 /* Jumping into the middle of an IT block is illegal, so a label or
22471 barrier terminates the block. */
22472 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22475 body
= PATTERN (insn
);
22476 /* USE and CLOBBER aren't really insns, so just skip them. */
22477 if (GET_CODE (body
) == USE
22478 || GET_CODE (body
) == CLOBBER
)
22481 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22482 if (GET_CODE (body
) != COND_EXEC
)
22484 /* Maximum number of conditionally executed instructions in a block. */
22485 n
= get_attr_ce_count (insn
);
22486 if (arm_condexec_masklen
+ n
> max
)
22489 predicate
= COND_EXEC_TEST (body
);
22490 code
= get_arm_condition_code (predicate
);
22491 mask
= (1 << n
) - 1;
22492 if (arm_current_cc
== code
)
22493 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22494 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22497 arm_condexec_count
++;
22498 arm_condexec_masklen
+= n
;
22500 /* A jump must be the last instruction in a conditional block. */
22504 /* Restore recog_data (getting the attributes of other insns can
22505 destroy this array, but final.c assumes that it remains intact
22506 across this call). */
22507 extract_constrain_insn_cached (first_insn
);
22511 arm_final_prescan_insn (rtx insn
)
22513 /* BODY will hold the body of INSN. */
22514 rtx body
= PATTERN (insn
);
22516 /* This will be 1 if trying to repeat the trick, and things need to be
22517 reversed if it appears to fail. */
22520 /* If we start with a return insn, we only succeed if we find another one. */
22521 int seeking_return
= 0;
22522 enum rtx_code return_code
= UNKNOWN
;
22524 /* START_INSN will hold the insn from where we start looking. This is the
22525 first insn after the following code_label if REVERSE is true. */
22526 rtx start_insn
= insn
;
22528 /* If in state 4, check if the target branch is reached, in order to
22529 change back to state 0. */
22530 if (arm_ccfsm_state
== 4)
22532 if (insn
== arm_target_insn
)
22534 arm_target_insn
= NULL
;
22535 arm_ccfsm_state
= 0;
22540 /* If in state 3, it is possible to repeat the trick, if this insn is an
22541 unconditional branch to a label, and immediately following this branch
22542 is the previous target label which is only used once, and the label this
22543 branch jumps to is not too far off. */
22544 if (arm_ccfsm_state
== 3)
22546 if (simplejump_p (insn
))
22548 start_insn
= next_nonnote_insn (start_insn
);
22549 if (BARRIER_P (start_insn
))
22551 /* XXX Isn't this always a barrier? */
22552 start_insn
= next_nonnote_insn (start_insn
);
22554 if (LABEL_P (start_insn
)
22555 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22556 && LABEL_NUSES (start_insn
) == 1)
22561 else if (ANY_RETURN_P (body
))
22563 start_insn
= next_nonnote_insn (start_insn
);
22564 if (BARRIER_P (start_insn
))
22565 start_insn
= next_nonnote_insn (start_insn
);
22566 if (LABEL_P (start_insn
)
22567 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22568 && LABEL_NUSES (start_insn
) == 1)
22571 seeking_return
= 1;
22572 return_code
= GET_CODE (body
);
22581 gcc_assert (!arm_ccfsm_state
|| reverse
);
22582 if (!JUMP_P (insn
))
22585 /* This jump might be paralleled with a clobber of the condition codes
22586 the jump should always come first */
22587 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22588 body
= XVECEXP (body
, 0, 0);
22591 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22592 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22595 int fail
= FALSE
, succeed
= FALSE
;
22596 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22597 int then_not_else
= TRUE
;
22598 rtx this_insn
= start_insn
, label
= 0;
22600 /* Register the insn jumped to. */
22603 if (!seeking_return
)
22604 label
= XEXP (SET_SRC (body
), 0);
22606 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22607 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22608 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22610 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22611 then_not_else
= FALSE
;
22613 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22615 seeking_return
= 1;
22616 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22618 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22620 seeking_return
= 1;
22621 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22622 then_not_else
= FALSE
;
22625 gcc_unreachable ();
22627 /* See how many insns this branch skips, and what kind of insns. If all
22628 insns are okay, and the label or unconditional branch to the same
22629 label is not too far away, succeed. */
22630 for (insns_skipped
= 0;
22631 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22635 this_insn
= next_nonnote_insn (this_insn
);
22639 switch (GET_CODE (this_insn
))
22642 /* Succeed if it is the target label, otherwise fail since
22643 control falls in from somewhere else. */
22644 if (this_insn
== label
)
22646 arm_ccfsm_state
= 1;
22654 /* Succeed if the following insn is the target label.
22656 If return insns are used then the last insn in a function
22657 will be a barrier. */
22658 this_insn
= next_nonnote_insn (this_insn
);
22659 if (this_insn
&& this_insn
== label
)
22661 arm_ccfsm_state
= 1;
22669 /* The AAPCS says that conditional calls should not be
22670 used since they make interworking inefficient (the
22671 linker can't transform BL<cond> into BLX). That's
22672 only a problem if the machine has BLX. */
22679 /* Succeed if the following insn is the target label, or
22680 if the following two insns are a barrier and the
22682 this_insn
= next_nonnote_insn (this_insn
);
22683 if (this_insn
&& BARRIER_P (this_insn
))
22684 this_insn
= next_nonnote_insn (this_insn
);
22686 if (this_insn
&& this_insn
== label
22687 && insns_skipped
< max_insns_skipped
)
22689 arm_ccfsm_state
= 1;
22697 /* If this is an unconditional branch to the same label, succeed.
22698 If it is to another label, do nothing. If it is conditional,
22700 /* XXX Probably, the tests for SET and the PC are
22703 scanbody
= PATTERN (this_insn
);
22704 if (GET_CODE (scanbody
) == SET
22705 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22707 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22708 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22710 arm_ccfsm_state
= 2;
22713 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22716 /* Fail if a conditional return is undesirable (e.g. on a
22717 StrongARM), but still allow this if optimizing for size. */
22718 else if (GET_CODE (scanbody
) == return_code
22719 && !use_return_insn (TRUE
, NULL
)
22722 else if (GET_CODE (scanbody
) == return_code
)
22724 arm_ccfsm_state
= 2;
22727 else if (GET_CODE (scanbody
) == PARALLEL
)
22729 switch (get_attr_conds (this_insn
))
22739 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22744 /* Instructions using or affecting the condition codes make it
22746 scanbody
= PATTERN (this_insn
);
22747 if (!(GET_CODE (scanbody
) == SET
22748 || GET_CODE (scanbody
) == PARALLEL
)
22749 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22759 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22760 arm_target_label
= CODE_LABEL_NUMBER (label
);
22763 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22765 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22767 this_insn
= next_nonnote_insn (this_insn
);
22768 gcc_assert (!this_insn
22769 || (!BARRIER_P (this_insn
)
22770 && !LABEL_P (this_insn
)));
22774 /* Oh, dear! we ran off the end.. give up. */
22775 extract_constrain_insn_cached (insn
);
22776 arm_ccfsm_state
= 0;
22777 arm_target_insn
= NULL
;
22780 arm_target_insn
= this_insn
;
22783 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22786 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22788 if (reverse
|| then_not_else
)
22789 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22792 /* Restore recog_data (getting the attributes of other insns can
22793 destroy this array, but final.c assumes that it remains intact
22794 across this call. */
22795 extract_constrain_insn_cached (insn
);
22799 /* Output IT instructions. */
22801 thumb2_asm_output_opcode (FILE * stream
)
22806 if (arm_condexec_mask
)
22808 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22809 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22811 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22812 arm_condition_codes
[arm_current_cc
]);
22813 arm_condexec_mask
= 0;
22817 /* Returns true if REGNO is a valid register
22818 for holding a quantity of type MODE. */
22820 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
22822 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22823 return (regno
== CC_REGNUM
22824 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22825 && regno
== VFPCC_REGNUM
));
22828 /* For the Thumb we only allow values bigger than SImode in
22829 registers 0 - 6, so that there is always a second low
22830 register available to hold the upper part of the value.
22831 We probably we ought to ensure that the register is the
22832 start of an even numbered register pair. */
22833 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22835 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22836 && IS_VFP_REGNUM (regno
))
22838 if (mode
== SFmode
|| mode
== SImode
)
22839 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22841 if (mode
== DFmode
)
22842 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22844 /* VFP registers can hold HFmode values, but there is no point in
22845 putting them there unless we have hardware conversion insns. */
22846 if (mode
== HFmode
)
22847 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22850 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22851 || (VALID_NEON_QREG_MODE (mode
)
22852 && NEON_REGNO_OK_FOR_QUAD (regno
))
22853 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22854 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22855 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22856 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22857 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22862 if (TARGET_REALLY_IWMMXT
)
22864 if (IS_IWMMXT_GR_REGNUM (regno
))
22865 return mode
== SImode
;
22867 if (IS_IWMMXT_REGNUM (regno
))
22868 return VALID_IWMMXT_REG_MODE (mode
);
22871 /* We allow almost any value to be stored in the general registers.
22872 Restrict doubleword quantities to even register pairs in ARM state
22873 so that we can use ldrd. Do not allow very large Neon structure
22874 opaque modes in general registers; they would use too many. */
22875 if (regno
<= LAST_ARM_REGNUM
)
22877 if (ARM_NUM_REGS (mode
) > 4)
22883 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
22886 if (regno
== FRAME_POINTER_REGNUM
22887 || regno
== ARG_POINTER_REGNUM
)
22888 /* We only allow integers in the fake hard registers. */
22889 return GET_MODE_CLASS (mode
) == MODE_INT
;
22894 /* Implement MODES_TIEABLE_P. */
22897 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
22899 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22902 /* We specifically want to allow elements of "structure" modes to
22903 be tieable to the structure. This more general condition allows
22904 other rarer situations too. */
22906 && (VALID_NEON_DREG_MODE (mode1
)
22907 || VALID_NEON_QREG_MODE (mode1
)
22908 || VALID_NEON_STRUCT_MODE (mode1
))
22909 && (VALID_NEON_DREG_MODE (mode2
)
22910 || VALID_NEON_QREG_MODE (mode2
)
22911 || VALID_NEON_STRUCT_MODE (mode2
)))
22917 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22918 not used in arm mode. */
22921 arm_regno_class (int regno
)
22925 if (regno
== STACK_POINTER_REGNUM
)
22927 if (regno
== CC_REGNUM
)
22934 if (TARGET_THUMB2
&& regno
< 8)
22937 if ( regno
<= LAST_ARM_REGNUM
22938 || regno
== FRAME_POINTER_REGNUM
22939 || regno
== ARG_POINTER_REGNUM
)
22940 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
22942 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
22943 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
22945 if (IS_VFP_REGNUM (regno
))
22947 if (regno
<= D7_VFP_REGNUM
)
22948 return VFP_D0_D7_REGS
;
22949 else if (regno
<= LAST_LO_VFP_REGNUM
)
22950 return VFP_LO_REGS
;
22952 return VFP_HI_REGS
;
22955 if (IS_IWMMXT_REGNUM (regno
))
22956 return IWMMXT_REGS
;
22958 if (IS_IWMMXT_GR_REGNUM (regno
))
22959 return IWMMXT_GR_REGS
;
22964 /* Handle a special case when computing the offset
22965 of an argument from the frame pointer. */
22967 arm_debugger_arg_offset (int value
, rtx addr
)
22971 /* We are only interested if dbxout_parms() failed to compute the offset. */
22975 /* We can only cope with the case where the address is held in a register. */
22979 /* If we are using the frame pointer to point at the argument, then
22980 an offset of 0 is correct. */
22981 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
22984 /* If we are using the stack pointer to point at the
22985 argument, then an offset of 0 is correct. */
22986 /* ??? Check this is consistent with thumb2 frame layout. */
22987 if ((TARGET_THUMB
|| !frame_pointer_needed
)
22988 && REGNO (addr
) == SP_REGNUM
)
22991 /* Oh dear. The argument is pointed to by a register rather
22992 than being held in a register, or being stored at a known
22993 offset from the frame pointer. Since GDB only understands
22994 those two kinds of argument we must translate the address
22995 held in the register into an offset from the frame pointer.
22996 We do this by searching through the insns for the function
22997 looking to see where this register gets its value. If the
22998 register is initialized from the frame pointer plus an offset
22999 then we are in luck and we can continue, otherwise we give up.
23001 This code is exercised by producing debugging information
23002 for a function with arguments like this:
23004 double func (double a, double b, int c, double d) {return d;}
23006 Without this code the stab for parameter 'd' will be set to
23007 an offset of 0 from the frame pointer, rather than 8. */
23009 /* The if() statement says:
23011 If the insn is a normal instruction
23012 and if the insn is setting the value in a register
23013 and if the register being set is the register holding the address of the argument
23014 and if the address is computing by an addition
23015 that involves adding to a register
23016 which is the frame pointer
23021 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23023 if ( NONJUMP_INSN_P (insn
)
23024 && GET_CODE (PATTERN (insn
)) == SET
23025 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23026 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23027 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23028 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23029 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23032 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23041 warning (0, "unable to compute real location of stacked parameter");
23042 value
= 8; /* XXX magic hack */
23063 T_MAX
/* Size of enum. Keep last. */
23064 } neon_builtin_type_mode
;
23066 #define TYPE_MODE_BIT(X) (1 << (X))
23068 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23069 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23070 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23071 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23072 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23073 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23075 #define v8qi_UP T_V8QI
23076 #define v4hi_UP T_V4HI
23077 #define v4hf_UP T_V4HF
23078 #define v2si_UP T_V2SI
23079 #define v2sf_UP T_V2SF
23081 #define v16qi_UP T_V16QI
23082 #define v8hi_UP T_V8HI
23083 #define v4si_UP T_V4SI
23084 #define v4sf_UP T_V4SF
23085 #define v2di_UP T_V2DI
23090 #define UP(X) X##_UP
23126 NEON_LOADSTRUCTLANE
,
23128 NEON_STORESTRUCTLANE
,
23137 const neon_itype itype
;
23138 const neon_builtin_type_mode mode
;
23139 const enum insn_code code
;
23140 unsigned int fcode
;
23141 } neon_builtin_datum
;
23143 #define CF(N,X) CODE_FOR_neon_##N##X
23145 #define VAR1(T, N, A) \
23146 {#N, NEON_##T, UP (A), CF (N, A), 0}
23147 #define VAR2(T, N, A, B) \
23149 {#N, NEON_##T, UP (B), CF (N, B), 0}
23150 #define VAR3(T, N, A, B, C) \
23151 VAR2 (T, N, A, B), \
23152 {#N, NEON_##T, UP (C), CF (N, C), 0}
23153 #define VAR4(T, N, A, B, C, D) \
23154 VAR3 (T, N, A, B, C), \
23155 {#N, NEON_##T, UP (D), CF (N, D), 0}
23156 #define VAR5(T, N, A, B, C, D, E) \
23157 VAR4 (T, N, A, B, C, D), \
23158 {#N, NEON_##T, UP (E), CF (N, E), 0}
23159 #define VAR6(T, N, A, B, C, D, E, F) \
23160 VAR5 (T, N, A, B, C, D, E), \
23161 {#N, NEON_##T, UP (F), CF (N, F), 0}
23162 #define VAR7(T, N, A, B, C, D, E, F, G) \
23163 VAR6 (T, N, A, B, C, D, E, F), \
23164 {#N, NEON_##T, UP (G), CF (N, G), 0}
23165 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23166 VAR7 (T, N, A, B, C, D, E, F, G), \
23167 {#N, NEON_##T, UP (H), CF (N, H), 0}
23168 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23169 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23170 {#N, NEON_##T, UP (I), CF (N, I), 0}
23171 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23172 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23173 {#N, NEON_##T, UP (J), CF (N, J), 0}
23175 /* The NEON builtin data can be found in arm_neon_builtins.def.
23176 The mode entries in the following table correspond to the "key" type of the
23177 instruction variant, i.e. equivalent to that which would be specified after
23178 the assembler mnemonic, which usually refers to the last vector operand.
23179 (Signed/unsigned/polynomial types are not differentiated between though, and
23180 are all mapped onto the same mode for a given element size.) The modes
23181 listed per instruction should be the same as those defined for that
23182 instruction's pattern in neon.md. */
23184 static neon_builtin_datum neon_builtin_data
[] =
23186 #include "arm_neon_builtins.def"
23201 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23202 #define VAR1(T, N, A) \
23204 #define VAR2(T, N, A, B) \
23207 #define VAR3(T, N, A, B, C) \
23208 VAR2 (T, N, A, B), \
23210 #define VAR4(T, N, A, B, C, D) \
23211 VAR3 (T, N, A, B, C), \
23213 #define VAR5(T, N, A, B, C, D, E) \
23214 VAR4 (T, N, A, B, C, D), \
23216 #define VAR6(T, N, A, B, C, D, E, F) \
23217 VAR5 (T, N, A, B, C, D, E), \
23219 #define VAR7(T, N, A, B, C, D, E, F, G) \
23220 VAR6 (T, N, A, B, C, D, E, F), \
23222 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23223 VAR7 (T, N, A, B, C, D, E, F, G), \
23225 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23226 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23228 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23229 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23233 ARM_BUILTIN_GETWCGR0
,
23234 ARM_BUILTIN_GETWCGR1
,
23235 ARM_BUILTIN_GETWCGR2
,
23236 ARM_BUILTIN_GETWCGR3
,
23238 ARM_BUILTIN_SETWCGR0
,
23239 ARM_BUILTIN_SETWCGR1
,
23240 ARM_BUILTIN_SETWCGR2
,
23241 ARM_BUILTIN_SETWCGR3
,
23245 ARM_BUILTIN_WAVG2BR
,
23246 ARM_BUILTIN_WAVG2HR
,
23247 ARM_BUILTIN_WAVG2B
,
23248 ARM_BUILTIN_WAVG2H
,
23255 ARM_BUILTIN_WMACSZ
,
23257 ARM_BUILTIN_WMACUZ
,
23260 ARM_BUILTIN_WSADBZ
,
23262 ARM_BUILTIN_WSADHZ
,
23264 ARM_BUILTIN_WALIGNI
,
23265 ARM_BUILTIN_WALIGNR0
,
23266 ARM_BUILTIN_WALIGNR1
,
23267 ARM_BUILTIN_WALIGNR2
,
23268 ARM_BUILTIN_WALIGNR3
,
23271 ARM_BUILTIN_TMIAPH
,
23272 ARM_BUILTIN_TMIABB
,
23273 ARM_BUILTIN_TMIABT
,
23274 ARM_BUILTIN_TMIATB
,
23275 ARM_BUILTIN_TMIATT
,
23277 ARM_BUILTIN_TMOVMSKB
,
23278 ARM_BUILTIN_TMOVMSKH
,
23279 ARM_BUILTIN_TMOVMSKW
,
23281 ARM_BUILTIN_TBCSTB
,
23282 ARM_BUILTIN_TBCSTH
,
23283 ARM_BUILTIN_TBCSTW
,
23285 ARM_BUILTIN_WMADDS
,
23286 ARM_BUILTIN_WMADDU
,
23288 ARM_BUILTIN_WPACKHSS
,
23289 ARM_BUILTIN_WPACKWSS
,
23290 ARM_BUILTIN_WPACKDSS
,
23291 ARM_BUILTIN_WPACKHUS
,
23292 ARM_BUILTIN_WPACKWUS
,
23293 ARM_BUILTIN_WPACKDUS
,
23298 ARM_BUILTIN_WADDSSB
,
23299 ARM_BUILTIN_WADDSSH
,
23300 ARM_BUILTIN_WADDSSW
,
23301 ARM_BUILTIN_WADDUSB
,
23302 ARM_BUILTIN_WADDUSH
,
23303 ARM_BUILTIN_WADDUSW
,
23307 ARM_BUILTIN_WSUBSSB
,
23308 ARM_BUILTIN_WSUBSSH
,
23309 ARM_BUILTIN_WSUBSSW
,
23310 ARM_BUILTIN_WSUBUSB
,
23311 ARM_BUILTIN_WSUBUSH
,
23312 ARM_BUILTIN_WSUBUSW
,
23319 ARM_BUILTIN_WCMPEQB
,
23320 ARM_BUILTIN_WCMPEQH
,
23321 ARM_BUILTIN_WCMPEQW
,
23322 ARM_BUILTIN_WCMPGTUB
,
23323 ARM_BUILTIN_WCMPGTUH
,
23324 ARM_BUILTIN_WCMPGTUW
,
23325 ARM_BUILTIN_WCMPGTSB
,
23326 ARM_BUILTIN_WCMPGTSH
,
23327 ARM_BUILTIN_WCMPGTSW
,
23329 ARM_BUILTIN_TEXTRMSB
,
23330 ARM_BUILTIN_TEXTRMSH
,
23331 ARM_BUILTIN_TEXTRMSW
,
23332 ARM_BUILTIN_TEXTRMUB
,
23333 ARM_BUILTIN_TEXTRMUH
,
23334 ARM_BUILTIN_TEXTRMUW
,
23335 ARM_BUILTIN_TINSRB
,
23336 ARM_BUILTIN_TINSRH
,
23337 ARM_BUILTIN_TINSRW
,
23339 ARM_BUILTIN_WMAXSW
,
23340 ARM_BUILTIN_WMAXSH
,
23341 ARM_BUILTIN_WMAXSB
,
23342 ARM_BUILTIN_WMAXUW
,
23343 ARM_BUILTIN_WMAXUH
,
23344 ARM_BUILTIN_WMAXUB
,
23345 ARM_BUILTIN_WMINSW
,
23346 ARM_BUILTIN_WMINSH
,
23347 ARM_BUILTIN_WMINSB
,
23348 ARM_BUILTIN_WMINUW
,
23349 ARM_BUILTIN_WMINUH
,
23350 ARM_BUILTIN_WMINUB
,
23352 ARM_BUILTIN_WMULUM
,
23353 ARM_BUILTIN_WMULSM
,
23354 ARM_BUILTIN_WMULUL
,
23356 ARM_BUILTIN_PSADBH
,
23357 ARM_BUILTIN_WSHUFH
,
23371 ARM_BUILTIN_WSLLHI
,
23372 ARM_BUILTIN_WSLLWI
,
23373 ARM_BUILTIN_WSLLDI
,
23374 ARM_BUILTIN_WSRAHI
,
23375 ARM_BUILTIN_WSRAWI
,
23376 ARM_BUILTIN_WSRADI
,
23377 ARM_BUILTIN_WSRLHI
,
23378 ARM_BUILTIN_WSRLWI
,
23379 ARM_BUILTIN_WSRLDI
,
23380 ARM_BUILTIN_WRORHI
,
23381 ARM_BUILTIN_WRORWI
,
23382 ARM_BUILTIN_WRORDI
,
23384 ARM_BUILTIN_WUNPCKIHB
,
23385 ARM_BUILTIN_WUNPCKIHH
,
23386 ARM_BUILTIN_WUNPCKIHW
,
23387 ARM_BUILTIN_WUNPCKILB
,
23388 ARM_BUILTIN_WUNPCKILH
,
23389 ARM_BUILTIN_WUNPCKILW
,
23391 ARM_BUILTIN_WUNPCKEHSB
,
23392 ARM_BUILTIN_WUNPCKEHSH
,
23393 ARM_BUILTIN_WUNPCKEHSW
,
23394 ARM_BUILTIN_WUNPCKEHUB
,
23395 ARM_BUILTIN_WUNPCKEHUH
,
23396 ARM_BUILTIN_WUNPCKEHUW
,
23397 ARM_BUILTIN_WUNPCKELSB
,
23398 ARM_BUILTIN_WUNPCKELSH
,
23399 ARM_BUILTIN_WUNPCKELSW
,
23400 ARM_BUILTIN_WUNPCKELUB
,
23401 ARM_BUILTIN_WUNPCKELUH
,
23402 ARM_BUILTIN_WUNPCKELUW
,
23408 ARM_BUILTIN_WADDSUBHX
,
23409 ARM_BUILTIN_WSUBADDHX
,
23411 ARM_BUILTIN_WABSDIFFB
,
23412 ARM_BUILTIN_WABSDIFFH
,
23413 ARM_BUILTIN_WABSDIFFW
,
23415 ARM_BUILTIN_WADDCH
,
23416 ARM_BUILTIN_WADDCW
,
23419 ARM_BUILTIN_WAVG4R
,
23421 ARM_BUILTIN_WMADDSX
,
23422 ARM_BUILTIN_WMADDUX
,
23424 ARM_BUILTIN_WMADDSN
,
23425 ARM_BUILTIN_WMADDUN
,
23427 ARM_BUILTIN_WMULWSM
,
23428 ARM_BUILTIN_WMULWUM
,
23430 ARM_BUILTIN_WMULWSMR
,
23431 ARM_BUILTIN_WMULWUMR
,
23433 ARM_BUILTIN_WMULWL
,
23435 ARM_BUILTIN_WMULSMR
,
23436 ARM_BUILTIN_WMULUMR
,
23438 ARM_BUILTIN_WQMULM
,
23439 ARM_BUILTIN_WQMULMR
,
23441 ARM_BUILTIN_WQMULWM
,
23442 ARM_BUILTIN_WQMULWMR
,
23444 ARM_BUILTIN_WADDBHUSM
,
23445 ARM_BUILTIN_WADDBHUSL
,
23447 ARM_BUILTIN_WQMIABB
,
23448 ARM_BUILTIN_WQMIABT
,
23449 ARM_BUILTIN_WQMIATB
,
23450 ARM_BUILTIN_WQMIATT
,
23452 ARM_BUILTIN_WQMIABBN
,
23453 ARM_BUILTIN_WQMIABTN
,
23454 ARM_BUILTIN_WQMIATBN
,
23455 ARM_BUILTIN_WQMIATTN
,
23457 ARM_BUILTIN_WMIABB
,
23458 ARM_BUILTIN_WMIABT
,
23459 ARM_BUILTIN_WMIATB
,
23460 ARM_BUILTIN_WMIATT
,
23462 ARM_BUILTIN_WMIABBN
,
23463 ARM_BUILTIN_WMIABTN
,
23464 ARM_BUILTIN_WMIATBN
,
23465 ARM_BUILTIN_WMIATTN
,
23467 ARM_BUILTIN_WMIAWBB
,
23468 ARM_BUILTIN_WMIAWBT
,
23469 ARM_BUILTIN_WMIAWTB
,
23470 ARM_BUILTIN_WMIAWTT
,
23472 ARM_BUILTIN_WMIAWBBN
,
23473 ARM_BUILTIN_WMIAWBTN
,
23474 ARM_BUILTIN_WMIAWTBN
,
23475 ARM_BUILTIN_WMIAWTTN
,
23477 ARM_BUILTIN_WMERGE
,
23479 ARM_BUILTIN_CRC32B
,
23480 ARM_BUILTIN_CRC32H
,
23481 ARM_BUILTIN_CRC32W
,
23482 ARM_BUILTIN_CRC32CB
,
23483 ARM_BUILTIN_CRC32CH
,
23484 ARM_BUILTIN_CRC32CW
,
23486 ARM_BUILTIN_GET_FPSCR
,
23487 ARM_BUILTIN_SET_FPSCR
,
23493 #define CRYPTO1(L, U, M1, M2) \
23494 ARM_BUILTIN_CRYPTO_##U,
23495 #define CRYPTO2(L, U, M1, M2, M3) \
23496 ARM_BUILTIN_CRYPTO_##U,
23497 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23498 ARM_BUILTIN_CRYPTO_##U,
23500 #include "crypto.def"
23506 #include "arm_neon_builtins.def"
23511 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23525 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
23527 #define NUM_DREG_TYPES 5
23528 #define NUM_QREG_TYPES 6
23531 arm_init_neon_builtins (void)
23533 unsigned int i
, fcode
;
23536 tree neon_intQI_type_node
;
23537 tree neon_intHI_type_node
;
23538 tree neon_floatHF_type_node
;
23539 tree neon_polyQI_type_node
;
23540 tree neon_polyHI_type_node
;
23541 tree neon_intSI_type_node
;
23542 tree neon_intDI_type_node
;
23543 tree neon_intUTI_type_node
;
23544 tree neon_float_type_node
;
23546 tree intQI_pointer_node
;
23547 tree intHI_pointer_node
;
23548 tree intSI_pointer_node
;
23549 tree intDI_pointer_node
;
23550 tree float_pointer_node
;
23552 tree const_intQI_node
;
23553 tree const_intHI_node
;
23554 tree const_intSI_node
;
23555 tree const_intDI_node
;
23556 tree const_float_node
;
23558 tree const_intQI_pointer_node
;
23559 tree const_intHI_pointer_node
;
23560 tree const_intSI_pointer_node
;
23561 tree const_intDI_pointer_node
;
23562 tree const_float_pointer_node
;
23564 tree V8QI_type_node
;
23565 tree V4HI_type_node
;
23566 tree V4UHI_type_node
;
23567 tree V4HF_type_node
;
23568 tree V2SI_type_node
;
23569 tree V2USI_type_node
;
23570 tree V2SF_type_node
;
23571 tree V16QI_type_node
;
23572 tree V8HI_type_node
;
23573 tree V8UHI_type_node
;
23574 tree V4SI_type_node
;
23575 tree V4USI_type_node
;
23576 tree V4SF_type_node
;
23577 tree V2DI_type_node
;
23578 tree V2UDI_type_node
;
23580 tree intUQI_type_node
;
23581 tree intUHI_type_node
;
23582 tree intUSI_type_node
;
23583 tree intUDI_type_node
;
23585 tree intEI_type_node
;
23586 tree intOI_type_node
;
23587 tree intCI_type_node
;
23588 tree intXI_type_node
;
23590 tree reinterp_ftype_dreg
[NUM_DREG_TYPES
][NUM_DREG_TYPES
];
23591 tree reinterp_ftype_qreg
[NUM_QREG_TYPES
][NUM_QREG_TYPES
];
23592 tree dreg_types
[NUM_DREG_TYPES
], qreg_types
[NUM_QREG_TYPES
];
23594 /* Create distinguished type nodes for NEON vector element types,
23595 and pointers to values of such types, so we can detect them later. */
23596 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23597 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23598 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23599 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23600 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
23601 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
23602 neon_float_type_node
= make_node (REAL_TYPE
);
23603 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
23604 layout_type (neon_float_type_node
);
23605 neon_floatHF_type_node
= make_node (REAL_TYPE
);
23606 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
23607 layout_type (neon_floatHF_type_node
);
23609 /* Define typedefs which exactly correspond to the modes we are basing vector
23610 types on. If you change these names you'll need to change
23611 the table used by arm_mangle_type too. */
23612 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
23613 "__builtin_neon_qi");
23614 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
23615 "__builtin_neon_hi");
23616 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
23617 "__builtin_neon_hf");
23618 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
23619 "__builtin_neon_si");
23620 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
23621 "__builtin_neon_sf");
23622 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
23623 "__builtin_neon_di");
23624 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
23625 "__builtin_neon_poly8");
23626 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
23627 "__builtin_neon_poly16");
23629 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
23630 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
23631 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
23632 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
23633 float_pointer_node
= build_pointer_type (neon_float_type_node
);
23635 /* Next create constant-qualified versions of the above types. */
23636 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
23638 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
23640 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
23642 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
23644 const_float_node
= build_qualified_type (neon_float_type_node
,
23647 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
23648 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
23649 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
23650 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
23651 const_float_pointer_node
= build_pointer_type (const_float_node
);
23653 /* Unsigned integer types for various mode sizes. */
23654 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
23655 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
23656 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
23657 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
23658 neon_intUTI_type_node
= make_unsigned_type (GET_MODE_PRECISION (TImode
));
23659 /* Now create vector types based on our NEON element types. */
23660 /* 64-bit vectors. */
23662 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
23664 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
23666 build_vector_type_for_mode (intUHI_type_node
, V4HImode
);
23668 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
23670 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
23672 build_vector_type_for_mode (intUSI_type_node
, V2SImode
);
23674 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
23675 /* 128-bit vectors. */
23677 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
23679 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
23681 build_vector_type_for_mode (intUHI_type_node
, V8HImode
);
23683 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
23685 build_vector_type_for_mode (intUSI_type_node
, V4SImode
);
23687 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
23689 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
23691 build_vector_type_for_mode (intUDI_type_node
, V2DImode
);
23694 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
23695 "__builtin_neon_uqi");
23696 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
23697 "__builtin_neon_uhi");
23698 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
23699 "__builtin_neon_usi");
23700 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23701 "__builtin_neon_udi");
23702 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23703 "__builtin_neon_poly64");
23704 (*lang_hooks
.types
.register_builtin_type
) (neon_intUTI_type_node
,
23705 "__builtin_neon_poly128");
23707 /* Opaque integer types for structures of vectors. */
23708 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
23709 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
23710 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
23711 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
23713 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
23714 "__builtin_neon_ti");
23715 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
23716 "__builtin_neon_ei");
23717 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
23718 "__builtin_neon_oi");
23719 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
23720 "__builtin_neon_ci");
23721 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
23722 "__builtin_neon_xi");
23724 if (TARGET_CRYPTO
&& TARGET_HARD_FLOAT
)
23727 tree V16UQI_type_node
=
23728 build_vector_type_for_mode (intUQI_type_node
, V16QImode
);
23730 tree v16uqi_ftype_v16uqi
23731 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
, NULL_TREE
);
23733 tree v16uqi_ftype_v16uqi_v16uqi
23734 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
,
23735 V16UQI_type_node
, NULL_TREE
);
23737 tree v4usi_ftype_v4usi
23738 = build_function_type_list (V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23740 tree v4usi_ftype_v4usi_v4usi
23741 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23742 V4USI_type_node
, NULL_TREE
);
23744 tree v4usi_ftype_v4usi_v4usi_v4usi
23745 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23746 V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23748 tree uti_ftype_udi_udi
23749 = build_function_type_list (neon_intUTI_type_node
, intUDI_type_node
,
23750 intUDI_type_node
, NULL_TREE
);
23763 ARM_BUILTIN_CRYPTO_##U
23765 "__builtin_arm_crypto_"#L
23766 #define FT1(R, A) \
23768 #define FT2(R, A1, A2) \
23769 R##_ftype_##A1##_##A2
23770 #define FT3(R, A1, A2, A3) \
23771 R##_ftype_##A1##_##A2##_##A3
23772 #define CRYPTO1(L, U, R, A) \
23773 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23774 C (U), BUILT_IN_MD, \
23776 #define CRYPTO2(L, U, R, A1, A2) \
23777 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23778 C (U), BUILT_IN_MD, \
23781 #define CRYPTO3(L, U, R, A1, A2, A3) \
23782 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23783 C (U), BUILT_IN_MD, \
23785 #include "crypto.def"
23796 dreg_types
[0] = V8QI_type_node
;
23797 dreg_types
[1] = V4HI_type_node
;
23798 dreg_types
[2] = V2SI_type_node
;
23799 dreg_types
[3] = V2SF_type_node
;
23800 dreg_types
[4] = neon_intDI_type_node
;
23802 qreg_types
[0] = V16QI_type_node
;
23803 qreg_types
[1] = V8HI_type_node
;
23804 qreg_types
[2] = V4SI_type_node
;
23805 qreg_types
[3] = V4SF_type_node
;
23806 qreg_types
[4] = V2DI_type_node
;
23807 qreg_types
[5] = neon_intUTI_type_node
;
23809 for (i
= 0; i
< NUM_QREG_TYPES
; i
++)
23812 for (j
= 0; j
< NUM_QREG_TYPES
; j
++)
23814 if (i
< NUM_DREG_TYPES
&& j
< NUM_DREG_TYPES
)
23815 reinterp_ftype_dreg
[i
][j
]
23816 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
23818 reinterp_ftype_qreg
[i
][j
]
23819 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
23823 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
23824 i
< ARRAY_SIZE (neon_builtin_data
);
23827 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
23829 const char* const modenames
[] = {
23830 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23831 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23836 int is_load
= 0, is_store
= 0;
23838 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
23845 case NEON_LOAD1LANE
:
23846 case NEON_LOADSTRUCT
:
23847 case NEON_LOADSTRUCTLANE
:
23849 /* Fall through. */
23851 case NEON_STORE1LANE
:
23852 case NEON_STORESTRUCT
:
23853 case NEON_STORESTRUCTLANE
:
23856 /* Fall through. */
23860 case NEON_LOGICBINOP
:
23861 case NEON_SHIFTINSERT
:
23868 case NEON_SHIFTIMM
:
23869 case NEON_SHIFTACC
:
23875 case NEON_LANEMULL
:
23876 case NEON_LANEMULH
:
23878 case NEON_SCALARMUL
:
23879 case NEON_SCALARMULL
:
23880 case NEON_SCALARMULH
:
23881 case NEON_SCALARMAC
:
23887 tree return_type
= void_type_node
, args
= void_list_node
;
23889 /* Build a function type directly from the insn_data for
23890 this builtin. The build_function_type() function takes
23891 care of removing duplicates for us. */
23892 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
23896 if (is_load
&& k
== 1)
23898 /* Neon load patterns always have the memory
23899 operand in the operand 1 position. */
23900 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23901 == neon_struct_operand
);
23907 eltype
= const_intQI_pointer_node
;
23912 eltype
= const_intHI_pointer_node
;
23917 eltype
= const_intSI_pointer_node
;
23922 eltype
= const_float_pointer_node
;
23927 eltype
= const_intDI_pointer_node
;
23930 default: gcc_unreachable ();
23933 else if (is_store
&& k
== 0)
23935 /* Similarly, Neon store patterns use operand 0 as
23936 the memory location to store to. */
23937 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23938 == neon_struct_operand
);
23944 eltype
= intQI_pointer_node
;
23949 eltype
= intHI_pointer_node
;
23954 eltype
= intSI_pointer_node
;
23959 eltype
= float_pointer_node
;
23964 eltype
= intDI_pointer_node
;
23967 default: gcc_unreachable ();
23972 switch (insn_data
[d
->code
].operand
[k
].mode
)
23974 case VOIDmode
: eltype
= void_type_node
; break;
23976 case QImode
: eltype
= neon_intQI_type_node
; break;
23977 case HImode
: eltype
= neon_intHI_type_node
; break;
23978 case SImode
: eltype
= neon_intSI_type_node
; break;
23979 case SFmode
: eltype
= neon_float_type_node
; break;
23980 case DImode
: eltype
= neon_intDI_type_node
; break;
23981 case TImode
: eltype
= intTI_type_node
; break;
23982 case EImode
: eltype
= intEI_type_node
; break;
23983 case OImode
: eltype
= intOI_type_node
; break;
23984 case CImode
: eltype
= intCI_type_node
; break;
23985 case XImode
: eltype
= intXI_type_node
; break;
23986 /* 64-bit vectors. */
23987 case V8QImode
: eltype
= V8QI_type_node
; break;
23988 case V4HImode
: eltype
= V4HI_type_node
; break;
23989 case V2SImode
: eltype
= V2SI_type_node
; break;
23990 case V2SFmode
: eltype
= V2SF_type_node
; break;
23991 /* 128-bit vectors. */
23992 case V16QImode
: eltype
= V16QI_type_node
; break;
23993 case V8HImode
: eltype
= V8HI_type_node
; break;
23994 case V4SImode
: eltype
= V4SI_type_node
; break;
23995 case V4SFmode
: eltype
= V4SF_type_node
; break;
23996 case V2DImode
: eltype
= V2DI_type_node
; break;
23997 default: gcc_unreachable ();
24001 if (k
== 0 && !is_store
)
24002 return_type
= eltype
;
24004 args
= tree_cons (NULL_TREE
, eltype
, args
);
24007 ftype
= build_function_type (return_type
, args
);
24011 case NEON_REINTERP
:
24013 /* We iterate over NUM_DREG_TYPES doubleword types,
24014 then NUM_QREG_TYPES quadword types.
24015 V4HF is not a type used in reinterpret, so we translate
24016 d->mode to the correct index in reinterp_ftype_dreg. */
24018 = GET_MODE_SIZE (insn_data
[d
->code
].operand
[0].mode
) > 8;
24019 int rhs
= (d
->mode
- ((!qreg_p
&& (d
->mode
> T_V4HF
)) ? 1 : 0))
24021 switch (insn_data
[d
->code
].operand
[0].mode
)
24023 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
24024 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
24025 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
24026 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
24027 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
24028 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
24029 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
24030 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
24031 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
24032 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
24033 case TImode
: ftype
= reinterp_ftype_qreg
[5][rhs
]; break;
24034 default: gcc_unreachable ();
24038 case NEON_FLOAT_WIDEN
:
24040 tree eltype
= NULL_TREE
;
24041 tree return_type
= NULL_TREE
;
24043 switch (insn_data
[d
->code
].operand
[1].mode
)
24046 eltype
= V4HF_type_node
;
24047 return_type
= V4SF_type_node
;
24049 default: gcc_unreachable ();
24051 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
24054 case NEON_FLOAT_NARROW
:
24056 tree eltype
= NULL_TREE
;
24057 tree return_type
= NULL_TREE
;
24059 switch (insn_data
[d
->code
].operand
[1].mode
)
24062 eltype
= V4SF_type_node
;
24063 return_type
= V4HF_type_node
;
24065 default: gcc_unreachable ();
24067 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
24072 tree eltype
= NULL_TREE
;
24073 switch (insn_data
[d
->code
].operand
[1].mode
)
24076 eltype
= V4UHI_type_node
;
24079 eltype
= V8UHI_type_node
;
24082 eltype
= V2USI_type_node
;
24085 eltype
= V4USI_type_node
;
24088 eltype
= V2UDI_type_node
;
24090 default: gcc_unreachable ();
24092 ftype
= build_function_type_list (eltype
, eltype
, NULL
);
24096 gcc_unreachable ();
24099 gcc_assert (ftype
!= NULL
);
24101 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
24103 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
24105 arm_builtin_decls
[fcode
] = decl
;
24109 #undef NUM_DREG_TYPES
24110 #undef NUM_QREG_TYPES
24112 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24115 if ((MASK) & insn_flags) \
24118 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24119 BUILT_IN_MD, NULL, NULL_TREE); \
24120 arm_builtin_decls[CODE] = bdecl; \
24125 struct builtin_description
24127 const unsigned int mask
;
24128 const enum insn_code icode
;
24129 const char * const name
;
24130 const enum arm_builtins code
;
24131 const enum rtx_code comparison
;
24132 const unsigned int flag
;
24135 static const struct builtin_description bdesc_2arg
[] =
24137 #define IWMMXT_BUILTIN(code, string, builtin) \
24138 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24139 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24141 #define IWMMXT2_BUILTIN(code, string, builtin) \
24142 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24143 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24145 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
24146 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
24147 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
24148 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
24149 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
24150 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
24151 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
24152 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
24153 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
24154 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
24155 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
24156 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
24157 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
24158 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
24159 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
24160 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
24161 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
24162 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
24163 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
24164 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
24165 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
24166 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
24167 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
24168 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
24169 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
24170 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
24171 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
24172 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
24173 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
24174 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
24175 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
24176 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
24177 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
24178 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
24179 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
24180 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
24181 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
24182 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
24183 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
24184 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
24185 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
24186 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
24187 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
24188 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
24189 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
24190 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
24191 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
24192 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
24193 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
24194 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
24195 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
24196 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
24197 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
24198 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
24199 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
24200 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
24201 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
24202 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
24203 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
24204 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
24205 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
24206 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
24207 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
24208 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
24209 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
24210 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
24211 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
24212 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
24213 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
24214 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
24215 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
24216 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
24217 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
24218 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
24219 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
24220 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
24221 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
24222 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
24224 #define IWMMXT_BUILTIN2(code, builtin) \
24225 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24227 #define IWMMXT2_BUILTIN2(code, builtin) \
24228 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24230 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
24231 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
24232 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
24233 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
24234 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
24235 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
24236 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
24237 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
24238 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
24239 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
24242 #define FP_BUILTIN(L, U) \
24243 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24246 FP_BUILTIN (set_fpscr
, GET_FPSCR
)
24247 FP_BUILTIN (get_fpscr
, SET_FPSCR
)
24250 #define CRC32_BUILTIN(L, U) \
24251 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24253 CRC32_BUILTIN (crc32b
, CRC32B
)
24254 CRC32_BUILTIN (crc32h
, CRC32H
)
24255 CRC32_BUILTIN (crc32w
, CRC32W
)
24256 CRC32_BUILTIN (crc32cb
, CRC32CB
)
24257 CRC32_BUILTIN (crc32ch
, CRC32CH
)
24258 CRC32_BUILTIN (crc32cw
, CRC32CW
)
24259 #undef CRC32_BUILTIN
24262 #define CRYPTO_BUILTIN(L, U) \
24263 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24268 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24269 #define CRYPTO1(L, U, R, A)
24270 #define CRYPTO3(L, U, R, A1, A2, A3)
24271 #include "crypto.def"
24278 static const struct builtin_description bdesc_1arg
[] =
24280 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
24281 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
24282 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
24283 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
24284 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
24285 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
24286 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
24287 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
24288 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
24289 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
24290 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
24291 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
24292 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
24293 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
24294 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
24295 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
24296 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
24297 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
24298 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
24299 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
24300 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
24301 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
24302 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
24303 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
24305 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24306 #define CRYPTO2(L, U, R, A1, A2)
24307 #define CRYPTO3(L, U, R, A1, A2, A3)
24308 #include "crypto.def"
24314 static const struct builtin_description bdesc_3arg
[] =
24316 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24317 #define CRYPTO1(L, U, R, A)
24318 #define CRYPTO2(L, U, R, A1, A2)
24319 #include "crypto.def"
24324 #undef CRYPTO_BUILTIN
24326 /* Set up all the iWMMXt builtins. This is not called if
24327 TARGET_IWMMXT is zero. */
24330 arm_init_iwmmxt_builtins (void)
24332 const struct builtin_description
* d
;
24335 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
24336 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
24337 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
24339 tree v8qi_ftype_v8qi_v8qi_int
24340 = build_function_type_list (V8QI_type_node
,
24341 V8QI_type_node
, V8QI_type_node
,
24342 integer_type_node
, NULL_TREE
);
24343 tree v4hi_ftype_v4hi_int
24344 = build_function_type_list (V4HI_type_node
,
24345 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24346 tree v2si_ftype_v2si_int
24347 = build_function_type_list (V2SI_type_node
,
24348 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24349 tree v2si_ftype_di_di
24350 = build_function_type_list (V2SI_type_node
,
24351 long_long_integer_type_node
,
24352 long_long_integer_type_node
,
24354 tree di_ftype_di_int
24355 = build_function_type_list (long_long_integer_type_node
,
24356 long_long_integer_type_node
,
24357 integer_type_node
, NULL_TREE
);
24358 tree di_ftype_di_int_int
24359 = build_function_type_list (long_long_integer_type_node
,
24360 long_long_integer_type_node
,
24362 integer_type_node
, NULL_TREE
);
24363 tree int_ftype_v8qi
24364 = build_function_type_list (integer_type_node
,
24365 V8QI_type_node
, NULL_TREE
);
24366 tree int_ftype_v4hi
24367 = build_function_type_list (integer_type_node
,
24368 V4HI_type_node
, NULL_TREE
);
24369 tree int_ftype_v2si
24370 = build_function_type_list (integer_type_node
,
24371 V2SI_type_node
, NULL_TREE
);
24372 tree int_ftype_v8qi_int
24373 = build_function_type_list (integer_type_node
,
24374 V8QI_type_node
, integer_type_node
, NULL_TREE
);
24375 tree int_ftype_v4hi_int
24376 = build_function_type_list (integer_type_node
,
24377 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24378 tree int_ftype_v2si_int
24379 = build_function_type_list (integer_type_node
,
24380 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24381 tree v8qi_ftype_v8qi_int_int
24382 = build_function_type_list (V8QI_type_node
,
24383 V8QI_type_node
, integer_type_node
,
24384 integer_type_node
, NULL_TREE
);
24385 tree v4hi_ftype_v4hi_int_int
24386 = build_function_type_list (V4HI_type_node
,
24387 V4HI_type_node
, integer_type_node
,
24388 integer_type_node
, NULL_TREE
);
24389 tree v2si_ftype_v2si_int_int
24390 = build_function_type_list (V2SI_type_node
,
24391 V2SI_type_node
, integer_type_node
,
24392 integer_type_node
, NULL_TREE
);
24393 /* Miscellaneous. */
24394 tree v8qi_ftype_v4hi_v4hi
24395 = build_function_type_list (V8QI_type_node
,
24396 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24397 tree v4hi_ftype_v2si_v2si
24398 = build_function_type_list (V4HI_type_node
,
24399 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24400 tree v8qi_ftype_v4hi_v8qi
24401 = build_function_type_list (V8QI_type_node
,
24402 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
24403 tree v2si_ftype_v4hi_v4hi
24404 = build_function_type_list (V2SI_type_node
,
24405 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24406 tree v2si_ftype_v8qi_v8qi
24407 = build_function_type_list (V2SI_type_node
,
24408 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24409 tree v4hi_ftype_v4hi_di
24410 = build_function_type_list (V4HI_type_node
,
24411 V4HI_type_node
, long_long_integer_type_node
,
24413 tree v2si_ftype_v2si_di
24414 = build_function_type_list (V2SI_type_node
,
24415 V2SI_type_node
, long_long_integer_type_node
,
24418 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
24419 tree int_ftype_void
24420 = build_function_type_list (integer_type_node
, NULL_TREE
);
24422 = build_function_type_list (long_long_integer_type_node
,
24423 V8QI_type_node
, NULL_TREE
);
24425 = build_function_type_list (long_long_integer_type_node
,
24426 V4HI_type_node
, NULL_TREE
);
24428 = build_function_type_list (long_long_integer_type_node
,
24429 V2SI_type_node
, NULL_TREE
);
24430 tree v2si_ftype_v4hi
24431 = build_function_type_list (V2SI_type_node
,
24432 V4HI_type_node
, NULL_TREE
);
24433 tree v4hi_ftype_v8qi
24434 = build_function_type_list (V4HI_type_node
,
24435 V8QI_type_node
, NULL_TREE
);
24436 tree v8qi_ftype_v8qi
24437 = build_function_type_list (V8QI_type_node
,
24438 V8QI_type_node
, NULL_TREE
);
24439 tree v4hi_ftype_v4hi
24440 = build_function_type_list (V4HI_type_node
,
24441 V4HI_type_node
, NULL_TREE
);
24442 tree v2si_ftype_v2si
24443 = build_function_type_list (V2SI_type_node
,
24444 V2SI_type_node
, NULL_TREE
);
24446 tree di_ftype_di_v4hi_v4hi
24447 = build_function_type_list (long_long_unsigned_type_node
,
24448 long_long_unsigned_type_node
,
24449 V4HI_type_node
, V4HI_type_node
,
24452 tree di_ftype_v4hi_v4hi
24453 = build_function_type_list (long_long_unsigned_type_node
,
24454 V4HI_type_node
,V4HI_type_node
,
24457 tree v2si_ftype_v2si_v4hi_v4hi
24458 = build_function_type_list (V2SI_type_node
,
24459 V2SI_type_node
, V4HI_type_node
,
24460 V4HI_type_node
, NULL_TREE
);
24462 tree v2si_ftype_v2si_v8qi_v8qi
24463 = build_function_type_list (V2SI_type_node
,
24464 V2SI_type_node
, V8QI_type_node
,
24465 V8QI_type_node
, NULL_TREE
);
24467 tree di_ftype_di_v2si_v2si
24468 = build_function_type_list (long_long_unsigned_type_node
,
24469 long_long_unsigned_type_node
,
24470 V2SI_type_node
, V2SI_type_node
,
24473 tree di_ftype_di_di_int
24474 = build_function_type_list (long_long_unsigned_type_node
,
24475 long_long_unsigned_type_node
,
24476 long_long_unsigned_type_node
,
24477 integer_type_node
, NULL_TREE
);
24479 tree void_ftype_int
24480 = build_function_type_list (void_type_node
,
24481 integer_type_node
, NULL_TREE
);
24483 tree v8qi_ftype_char
24484 = build_function_type_list (V8QI_type_node
,
24485 signed_char_type_node
, NULL_TREE
);
24487 tree v4hi_ftype_short
24488 = build_function_type_list (V4HI_type_node
,
24489 short_integer_type_node
, NULL_TREE
);
24491 tree v2si_ftype_int
24492 = build_function_type_list (V2SI_type_node
,
24493 integer_type_node
, NULL_TREE
);
24495 /* Normal vector binops. */
24496 tree v8qi_ftype_v8qi_v8qi
24497 = build_function_type_list (V8QI_type_node
,
24498 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24499 tree v4hi_ftype_v4hi_v4hi
24500 = build_function_type_list (V4HI_type_node
,
24501 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
24502 tree v2si_ftype_v2si_v2si
24503 = build_function_type_list (V2SI_type_node
,
24504 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24505 tree di_ftype_di_di
24506 = build_function_type_list (long_long_unsigned_type_node
,
24507 long_long_unsigned_type_node
,
24508 long_long_unsigned_type_node
,
24511 /* Add all builtins that are more or less simple operations on two
24513 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24515 /* Use one of the operands; the target can have a different mode for
24516 mask-generating compares. */
24517 enum machine_mode mode
;
24520 if (d
->name
== 0 || !(d
->mask
== FL_IWMMXT
|| d
->mask
== FL_IWMMXT2
))
24523 mode
= insn_data
[d
->icode
].operand
[1].mode
;
24528 type
= v8qi_ftype_v8qi_v8qi
;
24531 type
= v4hi_ftype_v4hi_v4hi
;
24534 type
= v2si_ftype_v2si_v2si
;
24537 type
= di_ftype_di_di
;
24541 gcc_unreachable ();
24544 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
24547 /* Add the remaining MMX insns with somewhat more complicated types. */
24548 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24549 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24550 ARM_BUILTIN_ ## CODE)
24552 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24553 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24554 ARM_BUILTIN_ ## CODE)
24556 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
24557 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
24558 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
24559 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
24560 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
24561 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
24562 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
24563 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
24564 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
24566 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
24567 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
24568 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
24569 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
24570 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
24571 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
24573 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
24574 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
24575 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
24576 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
24577 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
24578 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
24580 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
24581 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
24582 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
24583 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
24584 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
24585 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
24587 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
24588 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
24589 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
24590 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
24591 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
24592 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
24594 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
24596 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
24597 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
24598 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
24599 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
24600 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
24601 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
24602 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
24603 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
24604 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
24605 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
24607 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
24608 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
24609 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
24610 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
24611 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
24612 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
24613 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
24614 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
24615 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
24617 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
24618 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
24619 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
24621 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
24622 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
24623 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
24625 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
24626 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
24628 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
24629 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
24630 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
24631 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
24632 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
24633 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
24635 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
24636 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
24637 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
24638 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
24639 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
24640 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
24641 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
24642 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
24643 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
24644 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
24645 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
24646 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
24648 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
24649 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
24650 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
24651 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
24653 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
24654 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
24655 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
24656 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
24657 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
24658 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
24659 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
24661 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
24662 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
24663 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
24665 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
24666 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
24667 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
24668 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
24670 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
24671 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
24672 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
24673 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
24675 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
24676 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
24677 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
24678 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
24680 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
24681 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
24682 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
24683 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
24685 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
24686 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
24687 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
24688 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
24690 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
24691 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
24692 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
24693 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
24695 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
24697 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
24698 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
24699 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
24701 #undef iwmmx_mbuiltin
24702 #undef iwmmx2_mbuiltin
24706 arm_init_fp16_builtins (void)
24708 tree fp16_type
= make_node (REAL_TYPE
);
24709 TYPE_PRECISION (fp16_type
) = 16;
24710 layout_type (fp16_type
);
24711 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
24715 arm_init_crc32_builtins ()
24717 tree si_ftype_si_qi
24718 = build_function_type_list (unsigned_intSI_type_node
,
24719 unsigned_intSI_type_node
,
24720 unsigned_intQI_type_node
, NULL_TREE
);
24721 tree si_ftype_si_hi
24722 = build_function_type_list (unsigned_intSI_type_node
,
24723 unsigned_intSI_type_node
,
24724 unsigned_intHI_type_node
, NULL_TREE
);
24725 tree si_ftype_si_si
24726 = build_function_type_list (unsigned_intSI_type_node
,
24727 unsigned_intSI_type_node
,
24728 unsigned_intSI_type_node
, NULL_TREE
);
24730 arm_builtin_decls
[ARM_BUILTIN_CRC32B
]
24731 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi
,
24732 ARM_BUILTIN_CRC32B
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24733 arm_builtin_decls
[ARM_BUILTIN_CRC32H
]
24734 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi
,
24735 ARM_BUILTIN_CRC32H
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24736 arm_builtin_decls
[ARM_BUILTIN_CRC32W
]
24737 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si
,
24738 ARM_BUILTIN_CRC32W
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24739 arm_builtin_decls
[ARM_BUILTIN_CRC32CB
]
24740 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi
,
24741 ARM_BUILTIN_CRC32CB
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24742 arm_builtin_decls
[ARM_BUILTIN_CRC32CH
]
24743 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi
,
24744 ARM_BUILTIN_CRC32CH
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24745 arm_builtin_decls
[ARM_BUILTIN_CRC32CW
]
24746 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si
,
24747 ARM_BUILTIN_CRC32CW
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24751 arm_init_builtins (void)
24753 if (TARGET_REALLY_IWMMXT
)
24754 arm_init_iwmmxt_builtins ();
24757 arm_init_neon_builtins ();
24759 if (arm_fp16_format
)
24760 arm_init_fp16_builtins ();
24763 arm_init_crc32_builtins ();
24767 tree ftype_set_fpscr
24768 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL
);
24769 tree ftype_get_fpscr
24770 = build_function_type_list (unsigned_type_node
, NULL
);
24772 arm_builtin_decls
[ARM_BUILTIN_GET_FPSCR
]
24773 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr
,
24774 ARM_BUILTIN_GET_FPSCR
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24775 arm_builtin_decls
[ARM_BUILTIN_SET_FPSCR
]
24776 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr
,
24777 ARM_BUILTIN_SET_FPSCR
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24781 /* Return the ARM builtin for CODE. */
24784 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
24786 if (code
>= ARM_BUILTIN_MAX
)
24787 return error_mark_node
;
24789 return arm_builtin_decls
[code
];
24792 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24794 static const char *
24795 arm_invalid_parameter_type (const_tree t
)
24797 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24798 return N_("function parameters cannot have __fp16 type");
24802 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24804 static const char *
24805 arm_invalid_return_type (const_tree t
)
24807 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24808 return N_("functions cannot return __fp16 type");
24812 /* Implement TARGET_PROMOTED_TYPE. */
24815 arm_promoted_type (const_tree t
)
24817 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24818 return float_type_node
;
24822 /* Implement TARGET_CONVERT_TO_TYPE.
24823 Specifically, this hook implements the peculiarity of the ARM
24824 half-precision floating-point C semantics that requires conversions between
24825 __fp16 to or from double to do an intermediate conversion to float. */
24828 arm_convert_to_type (tree type
, tree expr
)
24830 tree fromtype
= TREE_TYPE (expr
);
24831 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
24833 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
24834 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
24835 return convert (type
, convert (float_type_node
, expr
));
24839 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24840 This simply adds HFmode as a supported mode; even though we don't
24841 implement arithmetic on this type directly, it's supported by
24842 optabs conversions, much the way the double-word arithmetic is
24843 special-cased in the default hook. */
24846 arm_scalar_mode_supported_p (enum machine_mode mode
)
24848 if (mode
== HFmode
)
24849 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24850 else if (ALL_FIXED_POINT_MODE_P (mode
))
24853 return default_scalar_mode_supported_p (mode
);
24856 /* Errors in the source file can cause expand_expr to return const0_rtx
24857 where we expect a vector. To avoid crashing, use one of the vector
24858 clear instructions. */
24861 safe_vector_operand (rtx x
, enum machine_mode mode
)
24863 if (x
!= const0_rtx
)
24865 x
= gen_reg_rtx (mode
);
24867 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
24868 : gen_rtx_SUBREG (DImode
, x
, 0)));
24872 /* Function to expand ternary builtins. */
24874 arm_expand_ternop_builtin (enum insn_code icode
,
24875 tree exp
, rtx target
)
24878 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24879 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24880 tree arg2
= CALL_EXPR_ARG (exp
, 2);
24882 rtx op0
= expand_normal (arg0
);
24883 rtx op1
= expand_normal (arg1
);
24884 rtx op2
= expand_normal (arg2
);
24885 rtx op3
= NULL_RTX
;
24887 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24888 lane operand depending on endianness. */
24889 bool builtin_sha1cpm_p
= false;
24891 if (insn_data
[icode
].n_operands
== 5)
24893 gcc_assert (icode
== CODE_FOR_crypto_sha1c
24894 || icode
== CODE_FOR_crypto_sha1p
24895 || icode
== CODE_FOR_crypto_sha1m
);
24896 builtin_sha1cpm_p
= true;
24898 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24899 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24900 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24901 enum machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
24904 if (VECTOR_MODE_P (mode0
))
24905 op0
= safe_vector_operand (op0
, mode0
);
24906 if (VECTOR_MODE_P (mode1
))
24907 op1
= safe_vector_operand (op1
, mode1
);
24908 if (VECTOR_MODE_P (mode2
))
24909 op2
= safe_vector_operand (op2
, mode2
);
24912 || GET_MODE (target
) != tmode
24913 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24914 target
= gen_reg_rtx (tmode
);
24916 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24917 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
24918 && (GET_MODE (op2
) == mode2
|| GET_MODE (op2
) == VOIDmode
));
24920 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24921 op0
= copy_to_mode_reg (mode0
, op0
);
24922 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24923 op1
= copy_to_mode_reg (mode1
, op1
);
24924 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
24925 op2
= copy_to_mode_reg (mode2
, op2
);
24926 if (builtin_sha1cpm_p
)
24927 op3
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24929 if (builtin_sha1cpm_p
)
24930 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
24932 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
24939 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24942 arm_expand_binop_builtin (enum insn_code icode
,
24943 tree exp
, rtx target
)
24946 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24947 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24948 rtx op0
= expand_normal (arg0
);
24949 rtx op1
= expand_normal (arg1
);
24950 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24951 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24952 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24954 if (VECTOR_MODE_P (mode0
))
24955 op0
= safe_vector_operand (op0
, mode0
);
24956 if (VECTOR_MODE_P (mode1
))
24957 op1
= safe_vector_operand (op1
, mode1
);
24960 || GET_MODE (target
) != tmode
24961 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24962 target
= gen_reg_rtx (tmode
);
24964 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24965 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
24967 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24968 op0
= copy_to_mode_reg (mode0
, op0
);
24969 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24970 op1
= copy_to_mode_reg (mode1
, op1
);
24972 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24979 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24982 arm_expand_unop_builtin (enum insn_code icode
,
24983 tree exp
, rtx target
, int do_load
)
24986 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24987 rtx op0
= expand_normal (arg0
);
24988 rtx op1
= NULL_RTX
;
24989 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24990 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24991 bool builtin_sha1h_p
= false;
24993 if (insn_data
[icode
].n_operands
== 3)
24995 gcc_assert (icode
== CODE_FOR_crypto_sha1h
);
24996 builtin_sha1h_p
= true;
25000 || GET_MODE (target
) != tmode
25001 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25002 target
= gen_reg_rtx (tmode
);
25004 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
25007 if (VECTOR_MODE_P (mode0
))
25008 op0
= safe_vector_operand (op0
, mode0
);
25010 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25011 op0
= copy_to_mode_reg (mode0
, op0
);
25013 if (builtin_sha1h_p
)
25014 op1
= GEN_INT (TARGET_BIG_END
? 1 : 0);
25016 if (builtin_sha1h_p
)
25017 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25019 pat
= GEN_FCN (icode
) (target
, op0
);
25027 NEON_ARG_COPY_TO_REG
,
25033 #define NEON_MAX_BUILTIN_ARGS 5
25035 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25036 and return an expression for the accessed memory.
25038 The intrinsic function operates on a block of registers that has
25039 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25040 function references the memory at EXP of type TYPE and in mode
25041 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25045 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
25046 enum machine_mode reg_mode
,
25047 neon_builtin_type_mode type_mode
)
25049 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
25050 tree elem_type
, upper_bound
, array_type
;
25052 /* Work out the size of the register block in bytes. */
25053 reg_size
= GET_MODE_SIZE (reg_mode
);
25055 /* Work out the size of each vector in bytes. */
25056 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
25057 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
25059 /* Work out how many vectors there are. */
25060 gcc_assert (reg_size
% vector_size
== 0);
25061 nvectors
= reg_size
/ vector_size
;
25063 /* Work out the type of each element. */
25064 gcc_assert (POINTER_TYPE_P (type
));
25065 elem_type
= TREE_TYPE (type
);
25067 /* Work out how many elements are being loaded or stored.
25068 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25069 and memory elements; anything else implies a lane load or store. */
25070 if (mem_mode
== reg_mode
)
25071 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
25075 /* Create a type that describes the full access. */
25076 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
25077 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
25079 /* Dereference EXP using that type. */
25080 return fold_build2 (MEM_REF
, array_type
, exp
,
25081 build_int_cst (build_pointer_type (array_type
), 0));
25084 /* Expand a Neon builtin. */
25086 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
25087 neon_builtin_type_mode type_mode
,
25088 tree exp
, int fcode
, ...)
25092 tree arg
[NEON_MAX_BUILTIN_ARGS
];
25093 rtx op
[NEON_MAX_BUILTIN_ARGS
];
25096 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
25097 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
25098 enum machine_mode other_mode
;
25104 || GET_MODE (target
) != tmode
25105 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
25106 target
= gen_reg_rtx (tmode
);
25108 va_start (ap
, fcode
);
25110 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
25114 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
25116 if (thisarg
== NEON_ARG_STOP
)
25120 opno
= argc
+ have_retval
;
25121 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
25122 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
25123 arg_type
= TREE_VALUE (formals
);
25124 if (thisarg
== NEON_ARG_MEMORY
)
25126 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
25127 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
25128 mode
[argc
], other_mode
,
25132 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25134 op
[argc
] = expand_expr (arg
[argc
], NULL_RTX
, VOIDmode
,
25135 (thisarg
== NEON_ARG_MEMORY
25136 ? EXPAND_MEMORY
: EXPAND_NORMAL
));
25140 case NEON_ARG_COPY_TO_REG
:
25141 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25142 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25143 (op
[argc
], mode
[argc
]))
25144 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
25147 case NEON_ARG_CONSTANT
:
25148 /* FIXME: This error message is somewhat unhelpful. */
25149 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25150 (op
[argc
], mode
[argc
]))
25151 error ("argument must be a constant");
25154 case NEON_ARG_MEMORY
:
25155 /* Check if expand failed. */
25156 if (op
[argc
] == const0_rtx
)
25158 gcc_assert (MEM_P (op
[argc
]));
25159 PUT_MODE (op
[argc
], mode
[argc
]);
25160 /* ??? arm_neon.h uses the same built-in functions for signed
25161 and unsigned accesses, casting where necessary. This isn't
25163 set_mem_alias_set (op
[argc
], 0);
25164 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
25165 (op
[argc
], mode
[argc
]))
25166 op
[argc
] = (replace_equiv_address
25167 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
25170 case NEON_ARG_STOP
:
25171 gcc_unreachable ();
25175 formals
= TREE_CHAIN (formals
);
25185 pat
= GEN_FCN (icode
) (target
, op
[0]);
25189 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
25193 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
25197 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
25201 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
25205 gcc_unreachable ();
25211 pat
= GEN_FCN (icode
) (op
[0]);
25215 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
25219 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
25223 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
25227 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
25231 gcc_unreachable ();
25242 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25243 constants defined per-instruction or per instruction-variant. Instead, the
25244 required info is looked up in the table neon_builtin_data. */
25246 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
25248 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
25249 neon_itype itype
= d
->itype
;
25250 enum insn_code icode
= d
->code
;
25251 neon_builtin_type_mode type_mode
= d
->mode
;
25258 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25259 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25263 case NEON_SCALARMUL
:
25264 case NEON_SCALARMULL
:
25265 case NEON_SCALARMULH
:
25266 case NEON_SHIFTINSERT
:
25267 case NEON_LOGICBINOP
:
25268 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25269 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25273 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25274 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25275 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25279 case NEON_SHIFTIMM
:
25280 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25281 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
25285 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25286 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25291 case NEON_FLOAT_WIDEN
:
25292 case NEON_FLOAT_NARROW
:
25294 case NEON_REINTERP
:
25295 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25296 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25300 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25301 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25304 case NEON_LANEMULL
:
25305 case NEON_LANEMULH
:
25306 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25307 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25308 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25311 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25312 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25313 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25315 case NEON_SHIFTACC
:
25316 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25317 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25318 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25320 case NEON_SCALARMAC
:
25321 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25322 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25323 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25327 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25328 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25332 case NEON_LOADSTRUCT
:
25333 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25334 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
25336 case NEON_LOAD1LANE
:
25337 case NEON_LOADSTRUCTLANE
:
25338 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25339 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25343 case NEON_STORESTRUCT
:
25344 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25345 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25347 case NEON_STORE1LANE
:
25348 case NEON_STORESTRUCTLANE
:
25349 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25350 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25354 gcc_unreachable ();
25357 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25359 neon_reinterpret (rtx dest
, rtx src
)
25361 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
25364 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25365 not to early-clobber SRC registers in the process.
25367 We assume that the operands described by SRC and DEST represent a
25368 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25369 number of components into which the copy has been decomposed. */
25371 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25375 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25376 || REGNO (operands
[0]) < REGNO (operands
[1]))
25378 for (i
= 0; i
< count
; i
++)
25380 operands
[2 * i
] = dest
[i
];
25381 operands
[2 * i
+ 1] = src
[i
];
25386 for (i
= 0; i
< count
; i
++)
25388 operands
[2 * i
] = dest
[count
- i
- 1];
25389 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25394 /* Split operands into moves from op[1] + op[2] into op[0]. */
25397 neon_split_vcombine (rtx operands
[3])
25399 unsigned int dest
= REGNO (operands
[0]);
25400 unsigned int src1
= REGNO (operands
[1]);
25401 unsigned int src2
= REGNO (operands
[2]);
25402 enum machine_mode halfmode
= GET_MODE (operands
[1]);
25403 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
25404 rtx destlo
, desthi
;
25406 if (src1
== dest
&& src2
== dest
+ halfregs
)
25408 /* No-op move. Can't split to nothing; emit something. */
25409 emit_note (NOTE_INSN_DELETED
);
25413 /* Preserve register attributes for variable tracking. */
25414 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25415 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25416 GET_MODE_SIZE (halfmode
));
25418 /* Special case of reversed high/low parts. Use VSWP. */
25419 if (src2
== dest
&& src1
== dest
+ halfregs
)
25421 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
25422 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
25423 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25427 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25429 /* Try to avoid unnecessary moves if part of the result
25430 is in the right place already. */
25432 emit_move_insn (destlo
, operands
[1]);
25433 if (src2
!= dest
+ halfregs
)
25434 emit_move_insn (desthi
, operands
[2]);
25438 if (src2
!= dest
+ halfregs
)
25439 emit_move_insn (desthi
, operands
[2]);
25441 emit_move_insn (destlo
, operands
[1]);
25445 /* Expand an expression EXP that calls a built-in function,
25446 with result going to TARGET if that's convenient
25447 (and in mode MODE if that's convenient).
25448 SUBTARGET may be used as the target for computing one of EXP's operands.
25449 IGNORE is nonzero if the value is to be ignored. */
25452 arm_expand_builtin (tree exp
,
25454 rtx subtarget ATTRIBUTE_UNUSED
,
25455 enum machine_mode mode ATTRIBUTE_UNUSED
,
25456 int ignore ATTRIBUTE_UNUSED
)
25458 const struct builtin_description
* d
;
25459 enum insn_code icode
;
25460 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25468 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25470 enum machine_mode tmode
;
25471 enum machine_mode mode0
;
25472 enum machine_mode mode1
;
25473 enum machine_mode mode2
;
25479 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
25480 return arm_expand_neon_builtin (fcode
, exp
, target
);
25484 case ARM_BUILTIN_GET_FPSCR
:
25485 case ARM_BUILTIN_SET_FPSCR
:
25486 if (fcode
== ARM_BUILTIN_GET_FPSCR
)
25488 icode
= CODE_FOR_get_fpscr
;
25489 target
= gen_reg_rtx (SImode
);
25490 pat
= GEN_FCN (icode
) (target
);
25495 icode
= CODE_FOR_set_fpscr
;
25496 arg0
= CALL_EXPR_ARG (exp
, 0);
25497 op0
= expand_normal (arg0
);
25498 pat
= GEN_FCN (icode
) (op0
);
25503 case ARM_BUILTIN_TEXTRMSB
:
25504 case ARM_BUILTIN_TEXTRMUB
:
25505 case ARM_BUILTIN_TEXTRMSH
:
25506 case ARM_BUILTIN_TEXTRMUH
:
25507 case ARM_BUILTIN_TEXTRMSW
:
25508 case ARM_BUILTIN_TEXTRMUW
:
25509 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
25510 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
25511 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
25512 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
25513 : CODE_FOR_iwmmxt_textrmw
);
25515 arg0
= CALL_EXPR_ARG (exp
, 0);
25516 arg1
= CALL_EXPR_ARG (exp
, 1);
25517 op0
= expand_normal (arg0
);
25518 op1
= expand_normal (arg1
);
25519 tmode
= insn_data
[icode
].operand
[0].mode
;
25520 mode0
= insn_data
[icode
].operand
[1].mode
;
25521 mode1
= insn_data
[icode
].operand
[2].mode
;
25523 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25524 op0
= copy_to_mode_reg (mode0
, op0
);
25525 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25527 /* @@@ better error message */
25528 error ("selector must be an immediate");
25529 return gen_reg_rtx (tmode
);
25532 opint
= INTVAL (op1
);
25533 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
25535 if (opint
> 7 || opint
< 0)
25536 error ("the range of selector should be in 0 to 7");
25538 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
25540 if (opint
> 3 || opint
< 0)
25541 error ("the range of selector should be in 0 to 3");
25543 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25545 if (opint
> 1 || opint
< 0)
25546 error ("the range of selector should be in 0 to 1");
25550 || GET_MODE (target
) != tmode
25551 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25552 target
= gen_reg_rtx (tmode
);
25553 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25559 case ARM_BUILTIN_WALIGNI
:
25560 /* If op2 is immediate, call walighi, else call walighr. */
25561 arg0
= CALL_EXPR_ARG (exp
, 0);
25562 arg1
= CALL_EXPR_ARG (exp
, 1);
25563 arg2
= CALL_EXPR_ARG (exp
, 2);
25564 op0
= expand_normal (arg0
);
25565 op1
= expand_normal (arg1
);
25566 op2
= expand_normal (arg2
);
25567 if (CONST_INT_P (op2
))
25569 icode
= CODE_FOR_iwmmxt_waligni
;
25570 tmode
= insn_data
[icode
].operand
[0].mode
;
25571 mode0
= insn_data
[icode
].operand
[1].mode
;
25572 mode1
= insn_data
[icode
].operand
[2].mode
;
25573 mode2
= insn_data
[icode
].operand
[3].mode
;
25574 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25575 op0
= copy_to_mode_reg (mode0
, op0
);
25576 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25577 op1
= copy_to_mode_reg (mode1
, op1
);
25578 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
25579 selector
= INTVAL (op2
);
25580 if (selector
> 7 || selector
< 0)
25581 error ("the range of selector should be in 0 to 7");
25585 icode
= CODE_FOR_iwmmxt_walignr
;
25586 tmode
= insn_data
[icode
].operand
[0].mode
;
25587 mode0
= insn_data
[icode
].operand
[1].mode
;
25588 mode1
= insn_data
[icode
].operand
[2].mode
;
25589 mode2
= insn_data
[icode
].operand
[3].mode
;
25590 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25591 op0
= copy_to_mode_reg (mode0
, op0
);
25592 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25593 op1
= copy_to_mode_reg (mode1
, op1
);
25594 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25595 op2
= copy_to_mode_reg (mode2
, op2
);
25598 || GET_MODE (target
) != tmode
25599 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25600 target
= gen_reg_rtx (tmode
);
25601 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25607 case ARM_BUILTIN_TINSRB
:
25608 case ARM_BUILTIN_TINSRH
:
25609 case ARM_BUILTIN_TINSRW
:
25610 case ARM_BUILTIN_WMERGE
:
25611 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
25612 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
25613 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
25614 : CODE_FOR_iwmmxt_tinsrw
);
25615 arg0
= CALL_EXPR_ARG (exp
, 0);
25616 arg1
= CALL_EXPR_ARG (exp
, 1);
25617 arg2
= CALL_EXPR_ARG (exp
, 2);
25618 op0
= expand_normal (arg0
);
25619 op1
= expand_normal (arg1
);
25620 op2
= expand_normal (arg2
);
25621 tmode
= insn_data
[icode
].operand
[0].mode
;
25622 mode0
= insn_data
[icode
].operand
[1].mode
;
25623 mode1
= insn_data
[icode
].operand
[2].mode
;
25624 mode2
= insn_data
[icode
].operand
[3].mode
;
25626 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25627 op0
= copy_to_mode_reg (mode0
, op0
);
25628 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25629 op1
= copy_to_mode_reg (mode1
, op1
);
25630 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25632 error ("selector must be an immediate");
25635 if (icode
== CODE_FOR_iwmmxt_wmerge
)
25637 selector
= INTVAL (op2
);
25638 if (selector
> 7 || selector
< 0)
25639 error ("the range of selector should be in 0 to 7");
25641 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
25642 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
25643 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
25646 selector
= INTVAL (op2
);
25647 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
25648 error ("the range of selector should be in 0 to 7");
25649 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
25650 error ("the range of selector should be in 0 to 3");
25651 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
25652 error ("the range of selector should be in 0 to 1");
25654 op2
= GEN_INT (mask
);
25657 || GET_MODE (target
) != tmode
25658 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25659 target
= gen_reg_rtx (tmode
);
25660 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25666 case ARM_BUILTIN_SETWCGR0
:
25667 case ARM_BUILTIN_SETWCGR1
:
25668 case ARM_BUILTIN_SETWCGR2
:
25669 case ARM_BUILTIN_SETWCGR3
:
25670 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
25671 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
25672 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
25673 : CODE_FOR_iwmmxt_setwcgr3
);
25674 arg0
= CALL_EXPR_ARG (exp
, 0);
25675 op0
= expand_normal (arg0
);
25676 mode0
= insn_data
[icode
].operand
[0].mode
;
25677 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25678 op0
= copy_to_mode_reg (mode0
, op0
);
25679 pat
= GEN_FCN (icode
) (op0
);
25685 case ARM_BUILTIN_GETWCGR0
:
25686 case ARM_BUILTIN_GETWCGR1
:
25687 case ARM_BUILTIN_GETWCGR2
:
25688 case ARM_BUILTIN_GETWCGR3
:
25689 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
25690 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
25691 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
25692 : CODE_FOR_iwmmxt_getwcgr3
);
25693 tmode
= insn_data
[icode
].operand
[0].mode
;
25695 || GET_MODE (target
) != tmode
25696 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25697 target
= gen_reg_rtx (tmode
);
25698 pat
= GEN_FCN (icode
) (target
);
25704 case ARM_BUILTIN_WSHUFH
:
25705 icode
= CODE_FOR_iwmmxt_wshufh
;
25706 arg0
= CALL_EXPR_ARG (exp
, 0);
25707 arg1
= CALL_EXPR_ARG (exp
, 1);
25708 op0
= expand_normal (arg0
);
25709 op1
= expand_normal (arg1
);
25710 tmode
= insn_data
[icode
].operand
[0].mode
;
25711 mode1
= insn_data
[icode
].operand
[1].mode
;
25712 mode2
= insn_data
[icode
].operand
[2].mode
;
25714 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
25715 op0
= copy_to_mode_reg (mode1
, op0
);
25716 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
25718 error ("mask must be an immediate");
25721 selector
= INTVAL (op1
);
25722 if (selector
< 0 || selector
> 255)
25723 error ("the range of mask should be in 0 to 255");
25725 || GET_MODE (target
) != tmode
25726 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25727 target
= gen_reg_rtx (tmode
);
25728 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25734 case ARM_BUILTIN_WMADDS
:
25735 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
25736 case ARM_BUILTIN_WMADDSX
:
25737 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
25738 case ARM_BUILTIN_WMADDSN
:
25739 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
25740 case ARM_BUILTIN_WMADDU
:
25741 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
25742 case ARM_BUILTIN_WMADDUX
:
25743 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
25744 case ARM_BUILTIN_WMADDUN
:
25745 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
25746 case ARM_BUILTIN_WSADBZ
:
25747 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
25748 case ARM_BUILTIN_WSADHZ
:
25749 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
25751 /* Several three-argument builtins. */
25752 case ARM_BUILTIN_WMACS
:
25753 case ARM_BUILTIN_WMACU
:
25754 case ARM_BUILTIN_TMIA
:
25755 case ARM_BUILTIN_TMIAPH
:
25756 case ARM_BUILTIN_TMIATT
:
25757 case ARM_BUILTIN_TMIATB
:
25758 case ARM_BUILTIN_TMIABT
:
25759 case ARM_BUILTIN_TMIABB
:
25760 case ARM_BUILTIN_WQMIABB
:
25761 case ARM_BUILTIN_WQMIABT
:
25762 case ARM_BUILTIN_WQMIATB
:
25763 case ARM_BUILTIN_WQMIATT
:
25764 case ARM_BUILTIN_WQMIABBN
:
25765 case ARM_BUILTIN_WQMIABTN
:
25766 case ARM_BUILTIN_WQMIATBN
:
25767 case ARM_BUILTIN_WQMIATTN
:
25768 case ARM_BUILTIN_WMIABB
:
25769 case ARM_BUILTIN_WMIABT
:
25770 case ARM_BUILTIN_WMIATB
:
25771 case ARM_BUILTIN_WMIATT
:
25772 case ARM_BUILTIN_WMIABBN
:
25773 case ARM_BUILTIN_WMIABTN
:
25774 case ARM_BUILTIN_WMIATBN
:
25775 case ARM_BUILTIN_WMIATTN
:
25776 case ARM_BUILTIN_WMIAWBB
:
25777 case ARM_BUILTIN_WMIAWBT
:
25778 case ARM_BUILTIN_WMIAWTB
:
25779 case ARM_BUILTIN_WMIAWTT
:
25780 case ARM_BUILTIN_WMIAWBBN
:
25781 case ARM_BUILTIN_WMIAWBTN
:
25782 case ARM_BUILTIN_WMIAWTBN
:
25783 case ARM_BUILTIN_WMIAWTTN
:
25784 case ARM_BUILTIN_WSADB
:
25785 case ARM_BUILTIN_WSADH
:
25786 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
25787 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
25788 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
25789 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
25790 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
25791 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
25792 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
25793 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
25794 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
25795 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
25796 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
25797 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
25798 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
25799 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
25800 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
25801 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
25802 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
25803 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
25804 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
25805 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
25806 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
25807 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
25808 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
25809 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
25810 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
25811 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
25812 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
25813 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
25814 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
25815 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
25816 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
25817 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
25818 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
25819 : CODE_FOR_iwmmxt_wsadh
);
25820 arg0
= CALL_EXPR_ARG (exp
, 0);
25821 arg1
= CALL_EXPR_ARG (exp
, 1);
25822 arg2
= CALL_EXPR_ARG (exp
, 2);
25823 op0
= expand_normal (arg0
);
25824 op1
= expand_normal (arg1
);
25825 op2
= expand_normal (arg2
);
25826 tmode
= insn_data
[icode
].operand
[0].mode
;
25827 mode0
= insn_data
[icode
].operand
[1].mode
;
25828 mode1
= insn_data
[icode
].operand
[2].mode
;
25829 mode2
= insn_data
[icode
].operand
[3].mode
;
25831 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25832 op0
= copy_to_mode_reg (mode0
, op0
);
25833 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25834 op1
= copy_to_mode_reg (mode1
, op1
);
25835 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25836 op2
= copy_to_mode_reg (mode2
, op2
);
25838 || GET_MODE (target
) != tmode
25839 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25840 target
= gen_reg_rtx (tmode
);
25841 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25847 case ARM_BUILTIN_WZERO
:
25848 target
= gen_reg_rtx (DImode
);
25849 emit_insn (gen_iwmmxt_clrdi (target
));
25852 case ARM_BUILTIN_WSRLHI
:
25853 case ARM_BUILTIN_WSRLWI
:
25854 case ARM_BUILTIN_WSRLDI
:
25855 case ARM_BUILTIN_WSLLHI
:
25856 case ARM_BUILTIN_WSLLWI
:
25857 case ARM_BUILTIN_WSLLDI
:
25858 case ARM_BUILTIN_WSRAHI
:
25859 case ARM_BUILTIN_WSRAWI
:
25860 case ARM_BUILTIN_WSRADI
:
25861 case ARM_BUILTIN_WRORHI
:
25862 case ARM_BUILTIN_WRORWI
:
25863 case ARM_BUILTIN_WRORDI
:
25864 case ARM_BUILTIN_WSRLH
:
25865 case ARM_BUILTIN_WSRLW
:
25866 case ARM_BUILTIN_WSRLD
:
25867 case ARM_BUILTIN_WSLLH
:
25868 case ARM_BUILTIN_WSLLW
:
25869 case ARM_BUILTIN_WSLLD
:
25870 case ARM_BUILTIN_WSRAH
:
25871 case ARM_BUILTIN_WSRAW
:
25872 case ARM_BUILTIN_WSRAD
:
25873 case ARM_BUILTIN_WRORH
:
25874 case ARM_BUILTIN_WRORW
:
25875 case ARM_BUILTIN_WRORD
:
25876 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
25877 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
25878 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
25879 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
25880 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
25881 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
25882 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
25883 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
25884 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
25885 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
25886 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
25887 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
25888 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
25889 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
25890 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
25891 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
25892 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
25893 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
25894 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
25895 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
25896 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
25897 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
25898 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
25899 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
25900 : CODE_FOR_nothing
);
25901 arg1
= CALL_EXPR_ARG (exp
, 1);
25902 op1
= expand_normal (arg1
);
25903 if (GET_MODE (op1
) == VOIDmode
)
25905 imm
= INTVAL (op1
);
25906 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
25907 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
25908 && (imm
< 0 || imm
> 32))
25910 if (fcode
== ARM_BUILTIN_WRORHI
)
25911 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25912 else if (fcode
== ARM_BUILTIN_WRORWI
)
25913 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25914 else if (fcode
== ARM_BUILTIN_WRORH
)
25915 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25917 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25919 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
25920 && (imm
< 0 || imm
> 64))
25922 if (fcode
== ARM_BUILTIN_WRORDI
)
25923 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25925 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25929 if (fcode
== ARM_BUILTIN_WSRLHI
)
25930 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25931 else if (fcode
== ARM_BUILTIN_WSRLWI
)
25932 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25933 else if (fcode
== ARM_BUILTIN_WSRLDI
)
25934 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25935 else if (fcode
== ARM_BUILTIN_WSLLHI
)
25936 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25937 else if (fcode
== ARM_BUILTIN_WSLLWI
)
25938 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25939 else if (fcode
== ARM_BUILTIN_WSLLDI
)
25940 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25941 else if (fcode
== ARM_BUILTIN_WSRAHI
)
25942 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25943 else if (fcode
== ARM_BUILTIN_WSRAWI
)
25944 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25945 else if (fcode
== ARM_BUILTIN_WSRADI
)
25946 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25947 else if (fcode
== ARM_BUILTIN_WSRLH
)
25948 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25949 else if (fcode
== ARM_BUILTIN_WSRLW
)
25950 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25951 else if (fcode
== ARM_BUILTIN_WSRLD
)
25952 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25953 else if (fcode
== ARM_BUILTIN_WSLLH
)
25954 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25955 else if (fcode
== ARM_BUILTIN_WSLLW
)
25956 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25957 else if (fcode
== ARM_BUILTIN_WSLLD
)
25958 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25959 else if (fcode
== ARM_BUILTIN_WSRAH
)
25960 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25961 else if (fcode
== ARM_BUILTIN_WSRAW
)
25962 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25964 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25967 return arm_expand_binop_builtin (icode
, exp
, target
);
25973 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
25974 if (d
->code
== (const enum arm_builtins
) fcode
)
25975 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
25977 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
25978 if (d
->code
== (const enum arm_builtins
) fcode
)
25979 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
25981 for (i
= 0, d
= bdesc_3arg
; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
25982 if (d
->code
== (const enum arm_builtins
) fcode
)
25983 return arm_expand_ternop_builtin (d
->icode
, exp
, target
);
25985 /* @@@ Should really do something sensible here. */
25989 /* Return the number (counting from 0) of
25990 the least significant set bit in MASK. */
25993 number_of_first_bit_set (unsigned mask
)
25995 return ctz_hwi (mask
);
25998 /* Like emit_multi_reg_push, but allowing for a different set of
25999 registers to be described as saved. MASK is the set of registers
26000 to be saved; REAL_REGS is the set of registers to be described as
26001 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26004 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
26006 unsigned long regno
;
26007 rtx par
[10], tmp
, reg
, insn
;
26010 /* Build the parallel of the registers actually being stored. */
26011 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
26013 regno
= ctz_hwi (mask
);
26014 reg
= gen_rtx_REG (SImode
, regno
);
26017 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
26019 tmp
= gen_rtx_USE (VOIDmode
, reg
);
26024 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26025 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
26026 tmp
= gen_frame_mem (BLKmode
, tmp
);
26027 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
26030 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
26031 insn
= emit_insn (tmp
);
26033 /* Always build the stack adjustment note for unwind info. */
26034 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
26035 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
26038 /* Build the parallel of the registers recorded as saved for unwind. */
26039 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26041 regno
= ctz_hwi (real_regs
);
26042 reg
= gen_rtx_REG (SImode
, regno
);
26044 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26045 tmp
= gen_frame_mem (SImode
, tmp
);
26046 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
26047 RTX_FRAME_RELATED_P (tmp
) = 1;
26055 RTX_FRAME_RELATED_P (par
[0]) = 1;
26056 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26059 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26064 /* Emit code to push or pop registers to or from the stack. F is the
26065 assembly file. MASK is the registers to pop. */
26067 thumb_pop (FILE *f
, unsigned long mask
)
26070 int lo_mask
= mask
& 0xFF;
26071 int pushed_words
= 0;
26075 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26077 /* Special case. Do not generate a POP PC statement here, do it in
26079 thumb_exit (f
, -1);
26083 fprintf (f
, "\tpop\t{");
26085 /* Look at the low registers first. */
26086 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26090 asm_fprintf (f
, "%r", regno
);
26092 if ((lo_mask
& ~1) != 0)
26099 if (mask
& (1 << PC_REGNUM
))
26101 /* Catch popping the PC. */
26102 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
26103 || crtl
->calls_eh_return
)
26105 /* The PC is never poped directly, instead
26106 it is popped into r3 and then BX is used. */
26107 fprintf (f
, "}\n");
26109 thumb_exit (f
, -1);
26118 asm_fprintf (f
, "%r", PC_REGNUM
);
26122 fprintf (f
, "}\n");
26125 /* Generate code to return from a thumb function.
26126 If 'reg_containing_return_addr' is -1, then the return address is
26127 actually on the stack, at the stack pointer. */
26129 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26131 unsigned regs_available_for_popping
;
26132 unsigned regs_to_pop
;
26134 unsigned available
;
26136 enum machine_mode mode
;
26138 int restore_a4
= FALSE
;
26140 /* Compute the registers we need to pop. */
26144 if (reg_containing_return_addr
== -1)
26146 regs_to_pop
|= 1 << LR_REGNUM
;
26150 if (TARGET_BACKTRACE
)
26152 /* Restore the (ARM) frame pointer and stack pointer. */
26153 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26157 /* If there is nothing to pop then just emit the BX instruction and
26159 if (pops_needed
== 0)
26161 if (crtl
->calls_eh_return
)
26162 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26164 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26167 /* Otherwise if we are not supporting interworking and we have not created
26168 a backtrace structure and the function was not entered in ARM mode then
26169 just pop the return address straight into the PC. */
26170 else if (!TARGET_INTERWORK
26171 && !TARGET_BACKTRACE
26172 && !is_called_in_ARM_mode (current_function_decl
)
26173 && !crtl
->calls_eh_return
)
26175 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26179 /* Find out how many of the (return) argument registers we can corrupt. */
26180 regs_available_for_popping
= 0;
26182 /* If returning via __builtin_eh_return, the bottom three registers
26183 all contain information needed for the return. */
26184 if (crtl
->calls_eh_return
)
26188 /* If we can deduce the registers used from the function's
26189 return value. This is more reliable that examining
26190 df_regs_ever_live_p () because that will be set if the register is
26191 ever used in the function, not just if the register is used
26192 to hold a return value. */
26194 if (crtl
->return_rtx
!= 0)
26195 mode
= GET_MODE (crtl
->return_rtx
);
26197 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26199 size
= GET_MODE_SIZE (mode
);
26203 /* In a void function we can use any argument register.
26204 In a function that returns a structure on the stack
26205 we can use the second and third argument registers. */
26206 if (mode
== VOIDmode
)
26207 regs_available_for_popping
=
26208 (1 << ARG_REGISTER (1))
26209 | (1 << ARG_REGISTER (2))
26210 | (1 << ARG_REGISTER (3));
26212 regs_available_for_popping
=
26213 (1 << ARG_REGISTER (2))
26214 | (1 << ARG_REGISTER (3));
26216 else if (size
<= 4)
26217 regs_available_for_popping
=
26218 (1 << ARG_REGISTER (2))
26219 | (1 << ARG_REGISTER (3));
26220 else if (size
<= 8)
26221 regs_available_for_popping
=
26222 (1 << ARG_REGISTER (3));
26225 /* Match registers to be popped with registers into which we pop them. */
26226 for (available
= regs_available_for_popping
,
26227 required
= regs_to_pop
;
26228 required
!= 0 && available
!= 0;
26229 available
&= ~(available
& - available
),
26230 required
&= ~(required
& - required
))
26233 /* If we have any popping registers left over, remove them. */
26235 regs_available_for_popping
&= ~available
;
26237 /* Otherwise if we need another popping register we can use
26238 the fourth argument register. */
26239 else if (pops_needed
)
26241 /* If we have not found any free argument registers and
26242 reg a4 contains the return address, we must move it. */
26243 if (regs_available_for_popping
== 0
26244 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26246 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26247 reg_containing_return_addr
= LR_REGNUM
;
26249 else if (size
> 12)
26251 /* Register a4 is being used to hold part of the return value,
26252 but we have dire need of a free, low register. */
26255 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26258 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26260 /* The fourth argument register is available. */
26261 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26267 /* Pop as many registers as we can. */
26268 thumb_pop (f
, regs_available_for_popping
);
26270 /* Process the registers we popped. */
26271 if (reg_containing_return_addr
== -1)
26273 /* The return address was popped into the lowest numbered register. */
26274 regs_to_pop
&= ~(1 << LR_REGNUM
);
26276 reg_containing_return_addr
=
26277 number_of_first_bit_set (regs_available_for_popping
);
26279 /* Remove this register for the mask of available registers, so that
26280 the return address will not be corrupted by further pops. */
26281 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26284 /* If we popped other registers then handle them here. */
26285 if (regs_available_for_popping
)
26289 /* Work out which register currently contains the frame pointer. */
26290 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26292 /* Move it into the correct place. */
26293 asm_fprintf (f
, "\tmov\t%r, %r\n",
26294 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26296 /* (Temporarily) remove it from the mask of popped registers. */
26297 regs_available_for_popping
&= ~(1 << frame_pointer
);
26298 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26300 if (regs_available_for_popping
)
26304 /* We popped the stack pointer as well,
26305 find the register that contains it. */
26306 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26308 /* Move it into the stack register. */
26309 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26311 /* At this point we have popped all necessary registers, so
26312 do not worry about restoring regs_available_for_popping
26313 to its correct value:
26315 assert (pops_needed == 0)
26316 assert (regs_available_for_popping == (1 << frame_pointer))
26317 assert (regs_to_pop == (1 << STACK_POINTER)) */
26321 /* Since we have just move the popped value into the frame
26322 pointer, the popping register is available for reuse, and
26323 we know that we still have the stack pointer left to pop. */
26324 regs_available_for_popping
|= (1 << frame_pointer
);
26328 /* If we still have registers left on the stack, but we no longer have
26329 any registers into which we can pop them, then we must move the return
26330 address into the link register and make available the register that
26332 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26334 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26336 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26337 reg_containing_return_addr
);
26339 reg_containing_return_addr
= LR_REGNUM
;
26342 /* If we have registers left on the stack then pop some more.
26343 We know that at most we will want to pop FP and SP. */
26344 if (pops_needed
> 0)
26349 thumb_pop (f
, regs_available_for_popping
);
26351 /* We have popped either FP or SP.
26352 Move whichever one it is into the correct register. */
26353 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26354 move_to
= number_of_first_bit_set (regs_to_pop
);
26356 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26358 regs_to_pop
&= ~(1 << move_to
);
26363 /* If we still have not popped everything then we must have only
26364 had one register available to us and we are now popping the SP. */
26365 if (pops_needed
> 0)
26369 thumb_pop (f
, regs_available_for_popping
);
26371 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26373 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26375 assert (regs_to_pop == (1 << STACK_POINTER))
26376 assert (pops_needed == 1)
26380 /* If necessary restore the a4 register. */
26383 if (reg_containing_return_addr
!= LR_REGNUM
)
26385 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26386 reg_containing_return_addr
= LR_REGNUM
;
26389 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26392 if (crtl
->calls_eh_return
)
26393 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26395 /* Return to caller. */
26396 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26399 /* Scan INSN just before assembler is output for it.
26400 For Thumb-1, we track the status of the condition codes; this
26401 information is used in the cbranchsi4_insn pattern. */
26403 thumb1_final_prescan_insn (rtx insn
)
26405 if (flag_print_asm_name
)
26406 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26407 INSN_ADDRESSES (INSN_UID (insn
)));
26408 /* Don't overwrite the previous setter when we get to a cbranch. */
26409 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26411 enum attr_conds conds
;
26413 if (cfun
->machine
->thumb1_cc_insn
)
26415 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26416 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26419 conds
= get_attr_conds (insn
);
26420 if (conds
== CONDS_SET
)
26422 rtx set
= single_set (insn
);
26423 cfun
->machine
->thumb1_cc_insn
= insn
;
26424 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26425 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26426 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
26427 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26429 rtx src1
= XEXP (SET_SRC (set
), 1);
26430 if (src1
== const0_rtx
)
26431 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26433 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26435 /* Record the src register operand instead of dest because
26436 cprop_hardreg pass propagates src. */
26437 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26440 else if (conds
!= CONDS_NOCOND
)
26441 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26444 /* Check if unexpected far jump is used. */
26445 if (cfun
->machine
->lr_save_eliminated
26446 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26447 internal_error("Unexpected thumb1 far jump");
26451 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26453 unsigned HOST_WIDE_INT mask
= 0xff;
26456 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26457 if (val
== 0) /* XXX */
26460 for (i
= 0; i
< 25; i
++)
26461 if ((val
& (mask
<< i
)) == val
)
26467 /* Returns nonzero if the current function contains,
26468 or might contain a far jump. */
26470 thumb_far_jump_used_p (void)
26473 bool far_jump
= false;
26474 unsigned int func_size
= 0;
26476 /* This test is only important for leaf functions. */
26477 /* assert (!leaf_function_p ()); */
26479 /* If we have already decided that far jumps may be used,
26480 do not bother checking again, and always return true even if
26481 it turns out that they are not being used. Once we have made
26482 the decision that far jumps are present (and that hence the link
26483 register will be pushed onto the stack) we cannot go back on it. */
26484 if (cfun
->machine
->far_jump_used
)
26487 /* If this function is not being called from the prologue/epilogue
26488 generation code then it must be being called from the
26489 INITIAL_ELIMINATION_OFFSET macro. */
26490 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26492 /* In this case we know that we are being asked about the elimination
26493 of the arg pointer register. If that register is not being used,
26494 then there are no arguments on the stack, and we do not have to
26495 worry that a far jump might force the prologue to push the link
26496 register, changing the stack offsets. In this case we can just
26497 return false, since the presence of far jumps in the function will
26498 not affect stack offsets.
26500 If the arg pointer is live (or if it was live, but has now been
26501 eliminated and so set to dead) then we do have to test to see if
26502 the function might contain a far jump. This test can lead to some
26503 false negatives, since before reload is completed, then length of
26504 branch instructions is not known, so gcc defaults to returning their
26505 longest length, which in turn sets the far jump attribute to true.
26507 A false negative will not result in bad code being generated, but it
26508 will result in a needless push and pop of the link register. We
26509 hope that this does not occur too often.
26511 If we need doubleword stack alignment this could affect the other
26512 elimination offsets so we can't risk getting it wrong. */
26513 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26514 cfun
->machine
->arg_pointer_live
= 1;
26515 else if (!cfun
->machine
->arg_pointer_live
)
26519 /* We should not change far_jump_used during or after reload, as there is
26520 no chance to change stack frame layout. */
26521 if (reload_in_progress
|| reload_completed
)
26524 /* Check to see if the function contains a branch
26525 insn with the far jump attribute set. */
26526 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26528 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26532 func_size
+= get_attr_length (insn
);
26535 /* Attribute far_jump will always be true for thumb1 before
26536 shorten_branch pass. So checking far_jump attribute before
26537 shorten_branch isn't much useful.
26539 Following heuristic tries to estimate more accurately if a far jump
26540 may finally be used. The heuristic is very conservative as there is
26541 no chance to roll-back the decision of not to use far jump.
26543 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26544 2-byte insn is associated with a 4 byte constant pool. Using
26545 function size 2048/3 as the threshold is conservative enough. */
26548 if ((func_size
* 3) >= 2048)
26550 /* Record the fact that we have decided that
26551 the function does use far jumps. */
26552 cfun
->machine
->far_jump_used
= 1;
26560 /* Return nonzero if FUNC must be entered in ARM mode. */
26562 is_called_in_ARM_mode (tree func
)
26564 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26566 /* Ignore the problem about functions whose address is taken. */
26567 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26571 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26577 /* Given the stack offsets and register mask in OFFSETS, decide how
26578 many additional registers to push instead of subtracting a constant
26579 from SP. For epilogues the principle is the same except we use pop.
26580 FOR_PROLOGUE indicates which we're generating. */
26582 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26584 HOST_WIDE_INT amount
;
26585 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26586 /* Extract a mask of the ones we can give to the Thumb's push/pop
26588 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26589 /* Then count how many other high registers will need to be pushed. */
26590 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26591 int n_free
, reg_base
, size
;
26593 if (!for_prologue
&& frame_pointer_needed
)
26594 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26596 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26598 /* If the stack frame size is 512 exactly, we can save one load
26599 instruction, which should make this a win even when optimizing
26601 if (!optimize_size
&& amount
!= 512)
26604 /* Can't do this if there are high registers to push. */
26605 if (high_regs_pushed
!= 0)
26608 /* Shouldn't do it in the prologue if no registers would normally
26609 be pushed at all. In the epilogue, also allow it if we'll have
26610 a pop insn for the PC. */
26613 || TARGET_BACKTRACE
26614 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26615 || TARGET_INTERWORK
26616 || crtl
->args
.pretend_args_size
!= 0))
26619 /* Don't do this if thumb_expand_prologue wants to emit instructions
26620 between the push and the stack frame allocation. */
26622 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26623 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26630 size
= arm_size_return_regs ();
26631 reg_base
= ARM_NUM_INTS (size
);
26632 live_regs_mask
>>= reg_base
;
26635 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26636 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
26638 live_regs_mask
>>= 1;
26644 gcc_assert (amount
/ 4 * 4 == amount
);
26646 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26647 return (amount
- 508) / 4;
26648 if (amount
<= n_free
* 4)
26653 /* The bits which aren't usefully expanded as rtl. */
26655 thumb1_unexpanded_epilogue (void)
26657 arm_stack_offsets
*offsets
;
26659 unsigned long live_regs_mask
= 0;
26660 int high_regs_pushed
= 0;
26662 int had_to_push_lr
;
26665 if (cfun
->machine
->return_used_this_function
!= 0)
26668 if (IS_NAKED (arm_current_func_type ()))
26671 offsets
= arm_get_frame_offsets ();
26672 live_regs_mask
= offsets
->saved_regs_mask
;
26673 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26675 /* If we can deduce the registers used from the function's return value.
26676 This is more reliable that examining df_regs_ever_live_p () because that
26677 will be set if the register is ever used in the function, not just if
26678 the register is used to hold a return value. */
26679 size
= arm_size_return_regs ();
26681 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26684 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26685 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26688 /* The prolog may have pushed some high registers to use as
26689 work registers. e.g. the testsuite file:
26690 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26691 compiles to produce:
26692 push {r4, r5, r6, r7, lr}
26696 as part of the prolog. We have to undo that pushing here. */
26698 if (high_regs_pushed
)
26700 unsigned long mask
= live_regs_mask
& 0xff;
26703 /* The available low registers depend on the size of the value we are
26711 /* Oh dear! We have no low registers into which we can pop
26714 ("no low registers available for popping high registers");
26716 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
26717 if (live_regs_mask
& (1 << next_hi_reg
))
26720 while (high_regs_pushed
)
26722 /* Find lo register(s) into which the high register(s) can
26724 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26726 if (mask
& (1 << regno
))
26727 high_regs_pushed
--;
26728 if (high_regs_pushed
== 0)
26732 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
26734 /* Pop the values into the low register(s). */
26735 thumb_pop (asm_out_file
, mask
);
26737 /* Move the value(s) into the high registers. */
26738 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26740 if (mask
& (1 << regno
))
26742 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26745 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
26746 if (live_regs_mask
& (1 << next_hi_reg
))
26751 live_regs_mask
&= ~0x0f00;
26754 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26755 live_regs_mask
&= 0xff;
26757 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26759 /* Pop the return address into the PC. */
26760 if (had_to_push_lr
)
26761 live_regs_mask
|= 1 << PC_REGNUM
;
26763 /* Either no argument registers were pushed or a backtrace
26764 structure was created which includes an adjusted stack
26765 pointer, so just pop everything. */
26766 if (live_regs_mask
)
26767 thumb_pop (asm_out_file
, live_regs_mask
);
26769 /* We have either just popped the return address into the
26770 PC or it is was kept in LR for the entire function.
26771 Note that thumb_pop has already called thumb_exit if the
26772 PC was in the list. */
26773 if (!had_to_push_lr
)
26774 thumb_exit (asm_out_file
, LR_REGNUM
);
26778 /* Pop everything but the return address. */
26779 if (live_regs_mask
)
26780 thumb_pop (asm_out_file
, live_regs_mask
);
26782 if (had_to_push_lr
)
26786 /* We have no free low regs, so save one. */
26787 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26791 /* Get the return address into a temporary register. */
26792 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26796 /* Move the return address to lr. */
26797 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26799 /* Restore the low register. */
26800 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26805 regno
= LAST_ARG_REGNUM
;
26810 /* Remove the argument registers that were pushed onto the stack. */
26811 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26812 SP_REGNUM
, SP_REGNUM
,
26813 crtl
->args
.pretend_args_size
);
26815 thumb_exit (asm_out_file
, regno
);
26821 /* Functions to save and restore machine-specific function data. */
26822 static struct machine_function
*
26823 arm_init_machine_status (void)
26825 struct machine_function
*machine
;
26826 machine
= ggc_cleared_alloc
<machine_function
> ();
26828 #if ARM_FT_UNKNOWN != 0
26829 machine
->func_type
= ARM_FT_UNKNOWN
;
26834 /* Return an RTX indicating where the return address to the
26835 calling function can be found. */
26837 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26842 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26845 /* Do anything needed before RTL is emitted for each function. */
26847 arm_init_expanders (void)
26849 /* Arrange to initialize and mark the machine per-function status. */
26850 init_machine_status
= arm_init_machine_status
;
26852 /* This is to stop the combine pass optimizing away the alignment
26853 adjustment of va_arg. */
26854 /* ??? It is claimed that this should not be necessary. */
26856 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26860 /* Like arm_compute_initial_elimination offset. Simpler because there
26861 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26862 to point at the base of the local variables after static stack
26863 space for a function has been allocated. */
26866 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
26868 arm_stack_offsets
*offsets
;
26870 offsets
= arm_get_frame_offsets ();
26874 case ARG_POINTER_REGNUM
:
26877 case STACK_POINTER_REGNUM
:
26878 return offsets
->outgoing_args
- offsets
->saved_args
;
26880 case FRAME_POINTER_REGNUM
:
26881 return offsets
->soft_frame
- offsets
->saved_args
;
26883 case ARM_HARD_FRAME_POINTER_REGNUM
:
26884 return offsets
->saved_regs
- offsets
->saved_args
;
26886 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26887 return offsets
->locals_base
- offsets
->saved_args
;
26890 gcc_unreachable ();
26894 case FRAME_POINTER_REGNUM
:
26897 case STACK_POINTER_REGNUM
:
26898 return offsets
->outgoing_args
- offsets
->soft_frame
;
26900 case ARM_HARD_FRAME_POINTER_REGNUM
:
26901 return offsets
->saved_regs
- offsets
->soft_frame
;
26903 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26904 return offsets
->locals_base
- offsets
->soft_frame
;
26907 gcc_unreachable ();
26912 gcc_unreachable ();
26916 /* Generate the function's prologue. */
26919 thumb1_expand_prologue (void)
26923 HOST_WIDE_INT amount
;
26924 arm_stack_offsets
*offsets
;
26925 unsigned long func_type
;
26927 unsigned long live_regs_mask
;
26928 unsigned long l_mask
;
26929 unsigned high_regs_pushed
= 0;
26931 func_type
= arm_current_func_type ();
26933 /* Naked functions don't have prologues. */
26934 if (IS_NAKED (func_type
))
26937 if (IS_INTERRUPT (func_type
))
26939 error ("interrupt Service Routines cannot be coded in Thumb mode");
26943 if (is_called_in_ARM_mode (current_function_decl
))
26944 emit_insn (gen_prologue_thumb1_interwork ());
26946 offsets
= arm_get_frame_offsets ();
26947 live_regs_mask
= offsets
->saved_regs_mask
;
26949 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26950 l_mask
= live_regs_mask
& 0x40ff;
26951 /* Then count how many other high registers will need to be pushed. */
26952 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26954 if (crtl
->args
.pretend_args_size
)
26956 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
26958 if (cfun
->machine
->uses_anonymous_args
)
26960 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
26961 unsigned long mask
;
26963 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
26964 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
26966 insn
= thumb1_emit_multi_reg_push (mask
, 0);
26970 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26971 stack_pointer_rtx
, x
));
26973 RTX_FRAME_RELATED_P (insn
) = 1;
26976 if (TARGET_BACKTRACE
)
26978 HOST_WIDE_INT offset
= 0;
26979 unsigned work_register
;
26980 rtx work_reg
, x
, arm_hfp_rtx
;
26982 /* We have been asked to create a stack backtrace structure.
26983 The code looks like this:
26987 0 sub SP, #16 Reserve space for 4 registers.
26988 2 push {R7} Push low registers.
26989 4 add R7, SP, #20 Get the stack pointer before the push.
26990 6 str R7, [SP, #8] Store the stack pointer
26991 (before reserving the space).
26992 8 mov R7, PC Get hold of the start of this code + 12.
26993 10 str R7, [SP, #16] Store it.
26994 12 mov R7, FP Get hold of the current frame pointer.
26995 14 str R7, [SP, #4] Store it.
26996 16 mov R7, LR Get hold of the current return address.
26997 18 str R7, [SP, #12] Store it.
26998 20 add R7, SP, #16 Point at the start of the
26999 backtrace structure.
27000 22 mov FP, R7 Put this value into the frame pointer. */
27002 work_register
= thumb_find_work_register (live_regs_mask
);
27003 work_reg
= gen_rtx_REG (SImode
, work_register
);
27004 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27006 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27007 stack_pointer_rtx
, GEN_INT (-16)));
27008 RTX_FRAME_RELATED_P (insn
) = 1;
27012 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27013 RTX_FRAME_RELATED_P (insn
) = 1;
27015 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27018 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27019 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27021 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27022 x
= gen_frame_mem (SImode
, x
);
27023 emit_move_insn (x
, work_reg
);
27025 /* Make sure that the instruction fetching the PC is in the right place
27026 to calculate "start of backtrace creation code + 12". */
27027 /* ??? The stores using the common WORK_REG ought to be enough to
27028 prevent the scheduler from doing anything weird. Failing that
27029 we could always move all of the following into an UNSPEC_VOLATILE. */
27032 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27033 emit_move_insn (work_reg
, x
);
27035 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27036 x
= gen_frame_mem (SImode
, x
);
27037 emit_move_insn (x
, work_reg
);
27039 emit_move_insn (work_reg
, arm_hfp_rtx
);
27041 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27042 x
= gen_frame_mem (SImode
, x
);
27043 emit_move_insn (x
, work_reg
);
27047 emit_move_insn (work_reg
, arm_hfp_rtx
);
27049 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27050 x
= gen_frame_mem (SImode
, x
);
27051 emit_move_insn (x
, work_reg
);
27053 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27054 emit_move_insn (work_reg
, x
);
27056 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27057 x
= gen_frame_mem (SImode
, x
);
27058 emit_move_insn (x
, work_reg
);
27061 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27062 emit_move_insn (work_reg
, x
);
27064 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27065 x
= gen_frame_mem (SImode
, x
);
27066 emit_move_insn (x
, work_reg
);
27068 x
= GEN_INT (offset
+ 12);
27069 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27071 emit_move_insn (arm_hfp_rtx
, work_reg
);
27073 /* Optimization: If we are not pushing any low registers but we are going
27074 to push some high registers then delay our first push. This will just
27075 be a push of LR and we can combine it with the push of the first high
27077 else if ((l_mask
& 0xff) != 0
27078 || (high_regs_pushed
== 0 && l_mask
))
27080 unsigned long mask
= l_mask
;
27081 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27082 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27083 RTX_FRAME_RELATED_P (insn
) = 1;
27086 if (high_regs_pushed
)
27088 unsigned pushable_regs
;
27089 unsigned next_hi_reg
;
27090 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27091 : crtl
->args
.info
.nregs
;
27092 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27094 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27095 if (live_regs_mask
& (1 << next_hi_reg
))
27098 /* Here we need to mask out registers used for passing arguments
27099 even if they can be pushed. This is to avoid using them to stash the high
27100 registers. Such kind of stash may clobber the use of arguments. */
27101 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
27103 if (pushable_regs
== 0)
27104 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27106 while (high_regs_pushed
> 0)
27108 unsigned long real_regs_mask
= 0;
27110 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
27112 if (pushable_regs
& (1 << regno
))
27114 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27115 gen_rtx_REG (SImode
, next_hi_reg
));
27117 high_regs_pushed
--;
27118 real_regs_mask
|= (1 << next_hi_reg
);
27120 if (high_regs_pushed
)
27122 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27124 if (live_regs_mask
& (1 << next_hi_reg
))
27129 pushable_regs
&= ~((1 << regno
) - 1);
27135 /* If we had to find a work register and we have not yet
27136 saved the LR then add it to the list of regs to push. */
27137 if (l_mask
== (1 << LR_REGNUM
))
27139 pushable_regs
|= l_mask
;
27140 real_regs_mask
|= l_mask
;
27144 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
27145 RTX_FRAME_RELATED_P (insn
) = 1;
27149 /* Load the pic register before setting the frame pointer,
27150 so we can use r7 as a temporary work register. */
27151 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27152 arm_load_pic_register (live_regs_mask
);
27154 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27155 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27156 stack_pointer_rtx
);
27158 if (flag_stack_usage_info
)
27159 current_function_static_stack_size
27160 = offsets
->outgoing_args
- offsets
->saved_args
;
27162 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27163 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27168 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27169 GEN_INT (- amount
)));
27170 RTX_FRAME_RELATED_P (insn
) = 1;
27176 /* The stack decrement is too big for an immediate value in a single
27177 insn. In theory we could issue multiple subtracts, but after
27178 three of them it becomes more space efficient to place the full
27179 value in the constant pool and load into a register. (Also the
27180 ARM debugger really likes to see only one stack decrement per
27181 function). So instead we look for a scratch register into which
27182 we can load the decrement, and then we subtract this from the
27183 stack pointer. Unfortunately on the thumb the only available
27184 scratch registers are the argument registers, and we cannot use
27185 these as they may hold arguments to the function. Instead we
27186 attempt to locate a call preserved register which is used by this
27187 function. If we can find one, then we know that it will have
27188 been pushed at the start of the prologue and so we can corrupt
27190 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27191 if (live_regs_mask
& (1 << regno
))
27194 gcc_assert(regno
<= LAST_LO_REGNUM
);
27196 reg
= gen_rtx_REG (SImode
, regno
);
27198 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27200 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27201 stack_pointer_rtx
, reg
));
27203 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
27204 plus_constant (Pmode
, stack_pointer_rtx
,
27206 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27207 RTX_FRAME_RELATED_P (insn
) = 1;
27211 if (frame_pointer_needed
)
27212 thumb_set_frame_pointer (offsets
);
27214 /* If we are profiling, make sure no instructions are scheduled before
27215 the call to mcount. Similarly if the user has requested no
27216 scheduling in the prolog. Similarly if we want non-call exceptions
27217 using the EABI unwinder, to prevent faulting instructions from being
27218 swapped with a stack adjustment. */
27219 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27220 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27221 && cfun
->can_throw_non_call_exceptions
))
27222 emit_insn (gen_blockage ());
27224 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27225 if (live_regs_mask
& 0xff)
27226 cfun
->machine
->lr_save_eliminated
= 0;
27229 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27230 POP instruction can be generated. LR should be replaced by PC. All
27231 the checks required are already done by USE_RETURN_INSN (). Hence,
27232 all we really need to check here is if single register is to be
27233 returned, or multiple register return. */
27235 thumb2_expand_return (bool simple_return
)
27238 unsigned long saved_regs_mask
;
27239 arm_stack_offsets
*offsets
;
27241 offsets
= arm_get_frame_offsets ();
27242 saved_regs_mask
= offsets
->saved_regs_mask
;
27244 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27245 if (saved_regs_mask
& (1 << i
))
27248 if (!simple_return
&& saved_regs_mask
)
27252 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27253 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27254 rtx addr
= gen_rtx_MEM (SImode
,
27255 gen_rtx_POST_INC (SImode
,
27256 stack_pointer_rtx
));
27257 set_mem_alias_set (addr
, get_frame_alias_set ());
27258 XVECEXP (par
, 0, 0) = ret_rtx
;
27259 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
27260 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27261 emit_jump_insn (par
);
27265 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27266 saved_regs_mask
|= (1 << PC_REGNUM
);
27267 arm_emit_multi_reg_pop (saved_regs_mask
);
27272 emit_jump_insn (simple_return_rtx
);
27277 thumb1_expand_epilogue (void)
27279 HOST_WIDE_INT amount
;
27280 arm_stack_offsets
*offsets
;
27283 /* Naked functions don't have prologues. */
27284 if (IS_NAKED (arm_current_func_type ()))
27287 offsets
= arm_get_frame_offsets ();
27288 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27290 if (frame_pointer_needed
)
27292 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27293 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27295 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27297 gcc_assert (amount
>= 0);
27300 emit_insn (gen_blockage ());
27303 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27304 GEN_INT (amount
)));
27307 /* r3 is always free in the epilogue. */
27308 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27310 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27311 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27315 /* Emit a USE (stack_pointer_rtx), so that
27316 the stack adjustment will not be deleted. */
27317 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27319 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27320 emit_insn (gen_blockage ());
27322 /* Emit a clobber for each insn that will be restored in the epilogue,
27323 so that flow2 will get register lifetimes correct. */
27324 for (regno
= 0; regno
< 13; regno
++)
27325 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
27326 emit_clobber (gen_rtx_REG (SImode
, regno
));
27328 if (! df_regs_ever_live_p (LR_REGNUM
))
27329 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27332 /* Epilogue code for APCS frame. */
27334 arm_expand_epilogue_apcs_frame (bool really_return
)
27336 unsigned long func_type
;
27337 unsigned long saved_regs_mask
;
27340 int floats_from_frame
= 0;
27341 arm_stack_offsets
*offsets
;
27343 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27344 func_type
= arm_current_func_type ();
27346 /* Get frame offsets for ARM. */
27347 offsets
= arm_get_frame_offsets ();
27348 saved_regs_mask
= offsets
->saved_regs_mask
;
27350 /* Find the offset of the floating-point save area in the frame. */
27352 = (offsets
->saved_args
27353 + arm_compute_static_chain_stack_bytes ()
27356 /* Compute how many core registers saved and how far away the floats are. */
27357 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27358 if (saved_regs_mask
& (1 << i
))
27361 floats_from_frame
+= 4;
27364 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27367 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27369 /* The offset is from IP_REGNUM. */
27370 int saved_size
= arm_get_vfp_saved_size ();
27371 if (saved_size
> 0)
27374 floats_from_frame
+= saved_size
;
27375 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27376 hard_frame_pointer_rtx
,
27377 GEN_INT (-floats_from_frame
)));
27378 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27379 ip_rtx
, hard_frame_pointer_rtx
);
27382 /* Generate VFP register multi-pop. */
27383 start_reg
= FIRST_VFP_REGNUM
;
27385 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27386 /* Look for a case where a reg does not need restoring. */
27387 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27388 && (!df_regs_ever_live_p (i
+ 1)
27389 || call_used_regs
[i
+ 1]))
27391 if (start_reg
!= i
)
27392 arm_emit_vfp_multi_reg_pop (start_reg
,
27393 (i
- start_reg
) / 2,
27394 gen_rtx_REG (SImode
,
27399 /* Restore the remaining regs that we have discovered (or possibly
27400 even all of them, if the conditional in the for loop never
27402 if (start_reg
!= i
)
27403 arm_emit_vfp_multi_reg_pop (start_reg
,
27404 (i
- start_reg
) / 2,
27405 gen_rtx_REG (SImode
, IP_REGNUM
));
27410 /* The frame pointer is guaranteed to be non-double-word aligned, as
27411 it is set to double-word-aligned old_stack_pointer - 4. */
27413 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27415 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27416 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27418 rtx addr
= gen_frame_mem (V2SImode
,
27419 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27421 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27422 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27423 gen_rtx_REG (V2SImode
, i
),
27429 /* saved_regs_mask should contain IP which contains old stack pointer
27430 at the time of activation creation. Since SP and IP are adjacent registers,
27431 we can restore the value directly into SP. */
27432 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27433 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27434 saved_regs_mask
|= (1 << SP_REGNUM
);
27436 /* There are two registers left in saved_regs_mask - LR and PC. We
27437 only need to restore LR (the return address), but to
27438 save time we can load it directly into PC, unless we need a
27439 special function exit sequence, or we are not really returning. */
27441 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27442 && !crtl
->calls_eh_return
)
27443 /* Delete LR from the register mask, so that LR on
27444 the stack is loaded into the PC in the register mask. */
27445 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27447 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27449 num_regs
= bit_count (saved_regs_mask
);
27450 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27453 emit_insn (gen_blockage ());
27454 /* Unwind the stack to just below the saved registers. */
27455 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27456 hard_frame_pointer_rtx
,
27457 GEN_INT (- 4 * num_regs
)));
27459 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27460 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27463 arm_emit_multi_reg_pop (saved_regs_mask
);
27465 if (IS_INTERRUPT (func_type
))
27467 /* Interrupt handlers will have pushed the
27468 IP onto the stack, so restore it now. */
27470 rtx addr
= gen_rtx_MEM (SImode
,
27471 gen_rtx_POST_INC (SImode
,
27472 stack_pointer_rtx
));
27473 set_mem_alias_set (addr
, get_frame_alias_set ());
27474 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27475 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27476 gen_rtx_REG (SImode
, IP_REGNUM
),
27480 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27483 if (crtl
->calls_eh_return
)
27484 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27486 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27488 if (IS_STACKALIGN (func_type
))
27489 /* Restore the original stack pointer. Before prologue, the stack was
27490 realigned and the original stack pointer saved in r0. For details,
27491 see comment in arm_expand_prologue. */
27492 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27494 emit_jump_insn (simple_return_rtx
);
27497 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27498 function is not a sibcall. */
27500 arm_expand_epilogue (bool really_return
)
27502 unsigned long func_type
;
27503 unsigned long saved_regs_mask
;
27507 arm_stack_offsets
*offsets
;
27509 func_type
= arm_current_func_type ();
27511 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27512 let output_return_instruction take care of instruction emission if any. */
27513 if (IS_NAKED (func_type
)
27514 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27517 emit_jump_insn (simple_return_rtx
);
27521 /* If we are throwing an exception, then we really must be doing a
27522 return, so we can't tail-call. */
27523 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27525 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27527 arm_expand_epilogue_apcs_frame (really_return
);
27531 /* Get frame offsets for ARM. */
27532 offsets
= arm_get_frame_offsets ();
27533 saved_regs_mask
= offsets
->saved_regs_mask
;
27534 num_regs
= bit_count (saved_regs_mask
);
27536 if (frame_pointer_needed
)
27539 /* Restore stack pointer if necessary. */
27542 /* In ARM mode, frame pointer points to first saved register.
27543 Restore stack pointer to last saved register. */
27544 amount
= offsets
->frame
- offsets
->saved_regs
;
27546 /* Force out any pending memory operations that reference stacked data
27547 before stack de-allocation occurs. */
27548 emit_insn (gen_blockage ());
27549 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27550 hard_frame_pointer_rtx
,
27551 GEN_INT (amount
)));
27552 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27554 hard_frame_pointer_rtx
);
27556 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27558 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27562 /* In Thumb-2 mode, the frame pointer points to the last saved
27564 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27567 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27568 hard_frame_pointer_rtx
,
27569 GEN_INT (amount
)));
27570 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27571 hard_frame_pointer_rtx
,
27572 hard_frame_pointer_rtx
);
27575 /* Force out any pending memory operations that reference stacked data
27576 before stack de-allocation occurs. */
27577 emit_insn (gen_blockage ());
27578 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27579 hard_frame_pointer_rtx
));
27580 arm_add_cfa_adjust_cfa_note (insn
, 0,
27582 hard_frame_pointer_rtx
);
27583 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27585 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27590 /* Pop off outgoing args and local frame to adjust stack pointer to
27591 last saved register. */
27592 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27596 /* Force out any pending memory operations that reference stacked data
27597 before stack de-allocation occurs. */
27598 emit_insn (gen_blockage ());
27599 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27601 GEN_INT (amount
)));
27602 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27603 stack_pointer_rtx
, stack_pointer_rtx
);
27604 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27606 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27610 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27612 /* Generate VFP register multi-pop. */
27613 int end_reg
= LAST_VFP_REGNUM
+ 1;
27615 /* Scan the registers in reverse order. We need to match
27616 any groupings made in the prologue and generate matching
27617 vldm operations. The need to match groups is because,
27618 unlike pop, vldm can only do consecutive regs. */
27619 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27620 /* Look for a case where a reg does not need restoring. */
27621 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27622 && (!df_regs_ever_live_p (i
+ 1)
27623 || call_used_regs
[i
+ 1]))
27625 /* Restore the regs discovered so far (from reg+2 to
27627 if (end_reg
> i
+ 2)
27628 arm_emit_vfp_multi_reg_pop (i
+ 2,
27629 (end_reg
- (i
+ 2)) / 2,
27630 stack_pointer_rtx
);
27634 /* Restore the remaining regs that we have discovered (or possibly
27635 even all of them, if the conditional in the for loop never
27637 if (end_reg
> i
+ 2)
27638 arm_emit_vfp_multi_reg_pop (i
+ 2,
27639 (end_reg
- (i
+ 2)) / 2,
27640 stack_pointer_rtx
);
27644 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27645 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27648 rtx addr
= gen_rtx_MEM (V2SImode
,
27649 gen_rtx_POST_INC (SImode
,
27650 stack_pointer_rtx
));
27651 set_mem_alias_set (addr
, get_frame_alias_set ());
27652 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27653 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27654 gen_rtx_REG (V2SImode
, i
),
27656 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27657 stack_pointer_rtx
, stack_pointer_rtx
);
27660 if (saved_regs_mask
)
27663 bool return_in_pc
= false;
27665 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27666 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27667 && !IS_STACKALIGN (func_type
)
27669 && crtl
->args
.pretend_args_size
== 0
27670 && saved_regs_mask
& (1 << LR_REGNUM
)
27671 && !crtl
->calls_eh_return
)
27673 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27674 saved_regs_mask
|= (1 << PC_REGNUM
);
27675 return_in_pc
= true;
27678 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27680 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27681 if (saved_regs_mask
& (1 << i
))
27683 rtx addr
= gen_rtx_MEM (SImode
,
27684 gen_rtx_POST_INC (SImode
,
27685 stack_pointer_rtx
));
27686 set_mem_alias_set (addr
, get_frame_alias_set ());
27688 if (i
== PC_REGNUM
)
27690 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27691 XVECEXP (insn
, 0, 0) = ret_rtx
;
27692 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
27693 gen_rtx_REG (SImode
, i
),
27695 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27696 insn
= emit_jump_insn (insn
);
27700 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27702 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27703 gen_rtx_REG (SImode
, i
),
27705 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27707 stack_pointer_rtx
);
27714 && current_tune
->prefer_ldrd_strd
27715 && !optimize_function_for_size_p (cfun
))
27718 thumb2_emit_ldrd_pop (saved_regs_mask
);
27719 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27720 arm_emit_ldrd_pop (saved_regs_mask
);
27722 arm_emit_multi_reg_pop (saved_regs_mask
);
27725 arm_emit_multi_reg_pop (saved_regs_mask
);
27728 if (return_in_pc
== true)
27732 if (crtl
->args
.pretend_args_size
)
27735 rtx dwarf
= NULL_RTX
;
27736 rtx tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27738 GEN_INT (crtl
->args
.pretend_args_size
)));
27740 RTX_FRAME_RELATED_P (tmp
) = 1;
27742 if (cfun
->machine
->uses_anonymous_args
)
27744 /* Restore pretend args. Refer arm_expand_prologue on how to save
27745 pretend_args in stack. */
27746 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27747 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27748 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27749 if (saved_regs_mask
& (1 << i
))
27751 rtx reg
= gen_rtx_REG (SImode
, i
);
27752 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27755 REG_NOTES (tmp
) = dwarf
;
27757 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
27758 stack_pointer_rtx
, stack_pointer_rtx
);
27761 if (!really_return
)
27764 if (crtl
->calls_eh_return
)
27765 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27767 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27769 if (IS_STACKALIGN (func_type
))
27770 /* Restore the original stack pointer. Before prologue, the stack was
27771 realigned and the original stack pointer saved in r0. For details,
27772 see comment in arm_expand_prologue. */
27773 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27775 emit_jump_insn (simple_return_rtx
);
27778 /* Implementation of insn prologue_thumb1_interwork. This is the first
27779 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27782 thumb1_output_interwork (void)
27785 FILE *f
= asm_out_file
;
27787 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27788 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27790 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27792 /* Generate code sequence to switch us into Thumb mode. */
27793 /* The .code 32 directive has already been emitted by
27794 ASM_DECLARE_FUNCTION_NAME. */
27795 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27796 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27798 /* Generate a label, so that the debugger will notice the
27799 change in instruction sets. This label is also used by
27800 the assembler to bypass the ARM code when this function
27801 is called from a Thumb encoded function elsewhere in the
27802 same file. Hence the definition of STUB_NAME here must
27803 agree with the definition in gas/config/tc-arm.c. */
27805 #define STUB_NAME ".real_start_of"
27807 fprintf (f
, "\t.code\t16\n");
27809 if (arm_dllexport_name_p (name
))
27810 name
= arm_strip_name_encoding (name
);
27812 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
27813 fprintf (f
, "\t.thumb_func\n");
27814 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
27819 /* Handle the case of a double word load into a low register from
27820 a computed memory address. The computed address may involve a
27821 register which is overwritten by the load. */
27823 thumb_load_double_from_address (rtx
*operands
)
27831 gcc_assert (REG_P (operands
[0]));
27832 gcc_assert (MEM_P (operands
[1]));
27834 /* Get the memory address. */
27835 addr
= XEXP (operands
[1], 0);
27837 /* Work out how the memory address is computed. */
27838 switch (GET_CODE (addr
))
27841 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27843 if (REGNO (operands
[0]) == REGNO (addr
))
27845 output_asm_insn ("ldr\t%H0, %2", operands
);
27846 output_asm_insn ("ldr\t%0, %1", operands
);
27850 output_asm_insn ("ldr\t%0, %1", operands
);
27851 output_asm_insn ("ldr\t%H0, %2", operands
);
27856 /* Compute <address> + 4 for the high order load. */
27857 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27859 output_asm_insn ("ldr\t%0, %1", operands
);
27860 output_asm_insn ("ldr\t%H0, %2", operands
);
27864 arg1
= XEXP (addr
, 0);
27865 arg2
= XEXP (addr
, 1);
27867 if (CONSTANT_P (arg1
))
27868 base
= arg2
, offset
= arg1
;
27870 base
= arg1
, offset
= arg2
;
27872 gcc_assert (REG_P (base
));
27874 /* Catch the case of <address> = <reg> + <reg> */
27875 if (REG_P (offset
))
27877 int reg_offset
= REGNO (offset
);
27878 int reg_base
= REGNO (base
);
27879 int reg_dest
= REGNO (operands
[0]);
27881 /* Add the base and offset registers together into the
27882 higher destination register. */
27883 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
27884 reg_dest
+ 1, reg_base
, reg_offset
);
27886 /* Load the lower destination register from the address in
27887 the higher destination register. */
27888 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
27889 reg_dest
, reg_dest
+ 1);
27891 /* Load the higher destination register from its own address
27893 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
27894 reg_dest
+ 1, reg_dest
+ 1);
27898 /* Compute <address> + 4 for the high order load. */
27899 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27901 /* If the computed address is held in the low order register
27902 then load the high order register first, otherwise always
27903 load the low order register first. */
27904 if (REGNO (operands
[0]) == REGNO (base
))
27906 output_asm_insn ("ldr\t%H0, %2", operands
);
27907 output_asm_insn ("ldr\t%0, %1", operands
);
27911 output_asm_insn ("ldr\t%0, %1", operands
);
27912 output_asm_insn ("ldr\t%H0, %2", operands
);
27918 /* With no registers to worry about we can just load the value
27920 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27922 output_asm_insn ("ldr\t%H0, %2", operands
);
27923 output_asm_insn ("ldr\t%0, %1", operands
);
27927 gcc_unreachable ();
27934 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
27941 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27944 operands
[4] = operands
[5];
27947 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
27948 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
27952 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27955 operands
[4] = operands
[5];
27958 if (REGNO (operands
[5]) > REGNO (operands
[6]))
27961 operands
[5] = operands
[6];
27964 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27967 operands
[4] = operands
[5];
27971 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
27972 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
27976 gcc_unreachable ();
27982 /* Output a call-via instruction for thumb state. */
27984 thumb_call_via_reg (rtx reg
)
27986 int regno
= REGNO (reg
);
27989 gcc_assert (regno
< LR_REGNUM
);
27991 /* If we are in the normal text section we can use a single instance
27992 per compilation unit. If we are doing function sections, then we need
27993 an entry per section, since we can't rely on reachability. */
27994 if (in_section
== text_section
)
27996 thumb_call_reg_needed
= 1;
27998 if (thumb_call_via_label
[regno
] == NULL
)
27999 thumb_call_via_label
[regno
] = gen_label_rtx ();
28000 labelp
= thumb_call_via_label
+ regno
;
28004 if (cfun
->machine
->call_via
[regno
] == NULL
)
28005 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28006 labelp
= cfun
->machine
->call_via
+ regno
;
28009 output_asm_insn ("bl\t%a0", labelp
);
28013 /* Routines for generating rtl. */
28015 thumb_expand_movmemqi (rtx
*operands
)
28017 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28018 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28019 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28020 HOST_WIDE_INT offset
= 0;
28024 emit_insn (gen_movmem12b (out
, in
, out
, in
));
28030 emit_insn (gen_movmem8b (out
, in
, out
, in
));
28036 rtx reg
= gen_reg_rtx (SImode
);
28037 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28038 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28045 rtx reg
= gen_reg_rtx (HImode
);
28046 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28047 plus_constant (Pmode
, in
,
28049 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28058 rtx reg
= gen_reg_rtx (QImode
);
28059 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28060 plus_constant (Pmode
, in
,
28062 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28069 thumb_reload_out_hi (rtx
*operands
)
28071 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28074 /* Handle reading a half-word from memory during reload. */
28076 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
28078 gcc_unreachable ();
28081 /* Return the length of a function name prefix
28082 that starts with the character 'c'. */
28084 arm_get_strip_length (int c
)
28088 ARM_NAME_ENCODING_LENGTHS
28093 /* Return a pointer to a function's name with any
28094 and all prefix encodings stripped from it. */
28096 arm_strip_name_encoding (const char *name
)
28100 while ((skip
= arm_get_strip_length (* name
)))
28106 /* If there is a '*' anywhere in the name's prefix, then
28107 emit the stripped name verbatim, otherwise prepend an
28108 underscore if leading underscores are being used. */
28110 arm_asm_output_labelref (FILE *stream
, const char *name
)
28115 while ((skip
= arm_get_strip_length (* name
)))
28117 verbatim
|= (*name
== '*');
28122 fputs (name
, stream
);
28124 asm_fprintf (stream
, "%U%s", name
);
28127 /* This function is used to emit an EABI tag and its associated value.
28128 We emit the numerical value of the tag in case the assembler does not
28129 support textual tags. (Eg gas prior to 2.20). If requested we include
28130 the tag name in a comment so that anyone reading the assembler output
28131 will know which tag is being set.
28133 This function is not static because arm-c.c needs it too. */
28136 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28138 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28139 if (flag_verbose_asm
|| flag_debug_asm
)
28140 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28141 asm_fprintf (asm_out_file
, "\n");
28145 arm_file_start (void)
28149 if (TARGET_UNIFIED_ASM
)
28150 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
28154 const char *fpu_name
;
28155 if (arm_selected_arch
)
28157 /* armv7ve doesn't support any extensions. */
28158 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
28160 /* Keep backward compatability for assemblers
28161 which don't support armv7ve. */
28162 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
28163 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
28164 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
28165 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
28166 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
28170 const char* pos
= strchr (arm_selected_arch
->name
, '+');
28174 gcc_assert (strlen (arm_selected_arch
->name
)
28175 <= sizeof (buf
) / sizeof (*pos
));
28176 strncpy (buf
, arm_selected_arch
->name
,
28177 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
28178 buf
[pos
- arm_selected_arch
->name
] = '\0';
28179 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
28180 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
28183 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
28186 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
28187 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
28190 const char* truncated_name
28191 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
28192 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
28195 if (TARGET_SOFT_FLOAT
)
28197 fpu_name
= "softvfp";
28201 fpu_name
= arm_fpu_desc
->name
;
28202 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
28204 if (TARGET_HARD_FLOAT
)
28205 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28206 if (TARGET_HARD_FLOAT_ABI
)
28207 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28210 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
28212 /* Some of these attributes only apply when the corresponding features
28213 are used. However we don't have any easy way of figuring this out.
28214 Conservatively record the setting that would have been used. */
28216 if (flag_rounding_math
)
28217 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28219 if (!flag_unsafe_math_optimizations
)
28221 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28222 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28224 if (flag_signaling_nans
)
28225 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28227 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28228 flag_finite_math_only
? 1 : 3);
28230 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28231 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28232 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28233 flag_short_enums
? 1 : 2);
28235 /* Tag_ABI_optimization_goals. */
28238 else if (optimize
>= 2)
28244 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28246 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28249 if (arm_fp16_format
)
28250 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28251 (int) arm_fp16_format
);
28253 if (arm_lang_output_object_attributes_hook
)
28254 arm_lang_output_object_attributes_hook();
28257 default_file_start ();
28261 arm_file_end (void)
28265 if (NEED_INDICATE_EXEC_STACK
)
28266 /* Add .note.GNU-stack. */
28267 file_end_indicate_exec_stack ();
28269 if (! thumb_call_reg_needed
)
28272 switch_to_section (text_section
);
28273 asm_fprintf (asm_out_file
, "\t.code 16\n");
28274 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28276 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28278 rtx label
= thumb_call_via_label
[regno
];
28282 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28283 CODE_LABEL_NUMBER (label
));
28284 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28290 /* Symbols in the text segment can be accessed without indirecting via the
28291 constant pool; it may take an extra binary operation, but this is still
28292 faster than indirecting via memory. Don't do this when not optimizing,
28293 since we won't be calculating al of the offsets necessary to do this
28297 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28299 if (optimize
> 0 && TREE_CONSTANT (decl
))
28300 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28302 default_encode_section_info (decl
, rtl
, first
);
28304 #endif /* !ARM_PE */
28307 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28309 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28310 && !strcmp (prefix
, "L"))
28312 arm_ccfsm_state
= 0;
28313 arm_target_insn
= NULL
;
28315 default_internal_label (stream
, prefix
, labelno
);
28318 /* Output code to add DELTA to the first argument, and then jump
28319 to FUNCTION. Used for C++ multiple inheritance. */
28321 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
28322 HOST_WIDE_INT delta
,
28323 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
28326 static int thunk_label
= 0;
28329 int mi_delta
= delta
;
28330 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28332 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28335 mi_delta
= - mi_delta
;
28337 final_start_function (emit_barrier (), file
, 1);
28341 int labelno
= thunk_label
++;
28342 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28343 /* Thunks are entered in arm mode when avaiable. */
28344 if (TARGET_THUMB1_ONLY
)
28346 /* push r3 so we can use it as a temporary. */
28347 /* TODO: Omit this save if r3 is not used. */
28348 fputs ("\tpush {r3}\n", file
);
28349 fputs ("\tldr\tr3, ", file
);
28353 fputs ("\tldr\tr12, ", file
);
28355 assemble_name (file
, label
);
28356 fputc ('\n', file
);
28359 /* If we are generating PIC, the ldr instruction below loads
28360 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28361 the address of the add + 8, so we have:
28363 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28366 Note that we have "+ 1" because some versions of GNU ld
28367 don't set the low bit of the result for R_ARM_REL32
28368 relocations against thumb function symbols.
28369 On ARMv6M this is +4, not +8. */
28370 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28371 assemble_name (file
, labelpc
);
28372 fputs (":\n", file
);
28373 if (TARGET_THUMB1_ONLY
)
28375 /* This is 2 insns after the start of the thunk, so we know it
28376 is 4-byte aligned. */
28377 fputs ("\tadd\tr3, pc, r3\n", file
);
28378 fputs ("\tmov r12, r3\n", file
);
28381 fputs ("\tadd\tr12, pc, r12\n", file
);
28383 else if (TARGET_THUMB1_ONLY
)
28384 fputs ("\tmov r12, r3\n", file
);
28386 if (TARGET_THUMB1_ONLY
)
28388 if (mi_delta
> 255)
28390 fputs ("\tldr\tr3, ", file
);
28391 assemble_name (file
, label
);
28392 fputs ("+4\n", file
);
28393 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
28394 mi_op
, this_regno
, this_regno
);
28396 else if (mi_delta
!= 0)
28398 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28399 mi_op
, this_regno
, this_regno
,
28405 /* TODO: Use movw/movt for large constants when available. */
28406 while (mi_delta
!= 0)
28408 if ((mi_delta
& (3 << shift
)) == 0)
28412 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28413 mi_op
, this_regno
, this_regno
,
28414 mi_delta
& (0xff << shift
));
28415 mi_delta
&= ~(0xff << shift
);
28422 if (TARGET_THUMB1_ONLY
)
28423 fputs ("\tpop\t{r3}\n", file
);
28425 fprintf (file
, "\tbx\tr12\n");
28426 ASM_OUTPUT_ALIGN (file
, 2);
28427 assemble_name (file
, label
);
28428 fputs (":\n", file
);
28431 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28432 rtx tem
= XEXP (DECL_RTL (function
), 0);
28433 tem
= plus_constant (GET_MODE (tem
), tem
, -7);
28434 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28436 gen_rtx_SYMBOL_REF (Pmode
,
28437 ggc_strdup (labelpc
)));
28438 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28441 /* Output ".word .LTHUNKn". */
28442 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28444 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28445 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28449 fputs ("\tb\t", file
);
28450 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28451 if (NEED_PLT_RELOC
)
28452 fputs ("(PLT)", file
);
28453 fputc ('\n', file
);
28456 final_end_function ();
28460 arm_emit_vector_const (FILE *file
, rtx x
)
28463 const char * pattern
;
28465 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28467 switch (GET_MODE (x
))
28469 case V2SImode
: pattern
= "%08x"; break;
28470 case V4HImode
: pattern
= "%04x"; break;
28471 case V8QImode
: pattern
= "%02x"; break;
28472 default: gcc_unreachable ();
28475 fprintf (file
, "0x");
28476 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28480 element
= CONST_VECTOR_ELT (x
, i
);
28481 fprintf (file
, pattern
, INTVAL (element
));
28487 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28488 HFmode constant pool entries are actually loaded with ldr. */
28490 arm_emit_fp16_const (rtx c
)
28495 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
28496 bits
= real_to_target (NULL
, &r
, HFmode
);
28497 if (WORDS_BIG_ENDIAN
)
28498 assemble_zeros (2);
28499 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28500 if (!WORDS_BIG_ENDIAN
)
28501 assemble_zeros (2);
28505 arm_output_load_gr (rtx
*operands
)
28512 if (!MEM_P (operands
[1])
28513 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28514 || !REG_P (reg
= XEXP (sum
, 0))
28515 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28516 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28517 return "wldrw%?\t%0, %1";
28519 /* Fix up an out-of-range load of a GR register. */
28520 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28521 wcgr
= operands
[0];
28523 output_asm_insn ("ldr%?\t%0, %1", operands
);
28525 operands
[0] = wcgr
;
28527 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28528 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28533 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28535 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28536 named arg and all anonymous args onto the stack.
28537 XXX I know the prologue shouldn't be pushing registers, but it is faster
28541 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28542 enum machine_mode mode
,
28545 int second_time ATTRIBUTE_UNUSED
)
28547 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28550 cfun
->machine
->uses_anonymous_args
= 1;
28551 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28553 nregs
= pcum
->aapcs_ncrn
;
28554 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
28558 nregs
= pcum
->nregs
;
28560 if (nregs
< NUM_ARG_REGS
)
28561 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28564 /* We can't rely on the caller doing the proper promotion when
28565 using APCS or ATPCS. */
28568 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28570 return !TARGET_AAPCS_BASED
;
28573 static enum machine_mode
28574 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28575 enum machine_mode mode
,
28576 int *punsignedp ATTRIBUTE_UNUSED
,
28577 const_tree fntype ATTRIBUTE_UNUSED
,
28578 int for_return ATTRIBUTE_UNUSED
)
28580 if (GET_MODE_CLASS (mode
) == MODE_INT
28581 && GET_MODE_SIZE (mode
) < 4)
28587 /* AAPCS based ABIs use short enums by default. */
28590 arm_default_short_enums (void)
28592 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
28596 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28599 arm_align_anon_bitfield (void)
28601 return TARGET_AAPCS_BASED
;
28605 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28608 arm_cxx_guard_type (void)
28610 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28614 /* The EABI says test the least significant bit of a guard variable. */
28617 arm_cxx_guard_mask_bit (void)
28619 return TARGET_AAPCS_BASED
;
28623 /* The EABI specifies that all array cookies are 8 bytes long. */
28626 arm_get_cookie_size (tree type
)
28630 if (!TARGET_AAPCS_BASED
)
28631 return default_cxx_get_cookie_size (type
);
28633 size
= build_int_cst (sizetype
, 8);
28638 /* The EABI says that array cookies should also contain the element size. */
28641 arm_cookie_has_size (void)
28643 return TARGET_AAPCS_BASED
;
28647 /* The EABI says constructors and destructors should return a pointer to
28648 the object constructed/destroyed. */
28651 arm_cxx_cdtor_returns_this (void)
28653 return TARGET_AAPCS_BASED
;
28656 /* The EABI says that an inline function may never be the key
28660 arm_cxx_key_method_may_be_inline (void)
28662 return !TARGET_AAPCS_BASED
;
28666 arm_cxx_determine_class_data_visibility (tree decl
)
28668 if (!TARGET_AAPCS_BASED
28669 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28672 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28673 is exported. However, on systems without dynamic vague linkage,
28674 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28675 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28676 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28678 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28679 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28683 arm_cxx_class_data_always_comdat (void)
28685 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28686 vague linkage if the class has no key function. */
28687 return !TARGET_AAPCS_BASED
;
28691 /* The EABI says __aeabi_atexit should be used to register static
28695 arm_cxx_use_aeabi_atexit (void)
28697 return TARGET_AAPCS_BASED
;
28702 arm_set_return_address (rtx source
, rtx scratch
)
28704 arm_stack_offsets
*offsets
;
28705 HOST_WIDE_INT delta
;
28707 unsigned long saved_regs
;
28709 offsets
= arm_get_frame_offsets ();
28710 saved_regs
= offsets
->saved_regs_mask
;
28712 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28713 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28716 if (frame_pointer_needed
)
28717 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28720 /* LR will be the first saved register. */
28721 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28726 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
28727 GEN_INT (delta
& ~4095)));
28732 addr
= stack_pointer_rtx
;
28734 addr
= plus_constant (Pmode
, addr
, delta
);
28736 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28742 thumb_set_return_address (rtx source
, rtx scratch
)
28744 arm_stack_offsets
*offsets
;
28745 HOST_WIDE_INT delta
;
28746 HOST_WIDE_INT limit
;
28749 unsigned long mask
;
28753 offsets
= arm_get_frame_offsets ();
28754 mask
= offsets
->saved_regs_mask
;
28755 if (mask
& (1 << LR_REGNUM
))
28758 /* Find the saved regs. */
28759 if (frame_pointer_needed
)
28761 delta
= offsets
->soft_frame
- offsets
->saved_args
;
28762 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
28768 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
28771 /* Allow for the stack frame. */
28772 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
28774 /* The link register is always the first saved register. */
28777 /* Construct the address. */
28778 addr
= gen_rtx_REG (SImode
, reg
);
28781 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
28782 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
28786 addr
= plus_constant (Pmode
, addr
, delta
);
28788 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28791 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28794 /* Implements target hook vector_mode_supported_p. */
28796 arm_vector_mode_supported_p (enum machine_mode mode
)
28798 /* Neon also supports V2SImode, etc. listed in the clause below. */
28799 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
28800 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
28803 if ((TARGET_NEON
|| TARGET_IWMMXT
)
28804 && ((mode
== V2SImode
)
28805 || (mode
== V4HImode
)
28806 || (mode
== V8QImode
)))
28809 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
28810 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
28811 || mode
== V2HAmode
))
28817 /* Implements target hook array_mode_supported_p. */
28820 arm_array_mode_supported_p (enum machine_mode mode
,
28821 unsigned HOST_WIDE_INT nelems
)
28824 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
28825 && (nelems
>= 2 && nelems
<= 4))
28831 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28832 registers when autovectorizing for Neon, at least until multiple vector
28833 widths are supported properly by the middle-end. */
28835 static enum machine_mode
28836 arm_preferred_simd_mode (enum machine_mode mode
)
28842 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
28844 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
28846 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
28848 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
28850 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28857 if (TARGET_REALLY_IWMMXT
)
28873 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28875 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28876 using r0-r4 for function arguments, r7 for the stack frame and don't have
28877 enough left over to do doubleword arithmetic. For Thumb-2 all the
28878 potentially problematic instructions accept high registers so this is not
28879 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28880 that require many low registers. */
28882 arm_class_likely_spilled_p (reg_class_t rclass
)
28884 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
28885 || rclass
== CC_REG
)
28891 /* Implements target hook small_register_classes_for_mode_p. */
28893 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
28895 return TARGET_THUMB1
;
28898 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28899 ARM insns and therefore guarantee that the shift count is modulo 256.
28900 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28901 guarantee no particular behavior for out-of-range counts. */
28903 static unsigned HOST_WIDE_INT
28904 arm_shift_truncation_mask (enum machine_mode mode
)
28906 return mode
== SImode
? 255 : 0;
28910 /* Map internal gcc register numbers to DWARF2 register numbers. */
28913 arm_dbx_register_number (unsigned int regno
)
28918 if (IS_VFP_REGNUM (regno
))
28920 /* See comment in arm_dwarf_register_span. */
28921 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28922 return 64 + regno
- FIRST_VFP_REGNUM
;
28924 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
28927 if (IS_IWMMXT_GR_REGNUM (regno
))
28928 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
28930 if (IS_IWMMXT_REGNUM (regno
))
28931 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
28933 gcc_unreachable ();
28936 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28937 GCC models tham as 64 32-bit registers, so we need to describe this to
28938 the DWARF generation code. Other registers can use the default. */
28940 arm_dwarf_register_span (rtx rtl
)
28942 enum machine_mode mode
;
28948 regno
= REGNO (rtl
);
28949 if (!IS_VFP_REGNUM (regno
))
28952 /* XXX FIXME: The EABI defines two VFP register ranges:
28953 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28955 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28956 corresponding D register. Until GDB supports this, we shall use the
28957 legacy encodings. We also use these encodings for D0-D15 for
28958 compatibility with older debuggers. */
28959 mode
= GET_MODE (rtl
);
28960 if (GET_MODE_SIZE (mode
) < 8)
28963 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28965 nregs
= GET_MODE_SIZE (mode
) / 4;
28966 for (i
= 0; i
< nregs
; i
+= 2)
28967 if (TARGET_BIG_END
)
28969 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28970 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
28974 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
28975 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28980 nregs
= GET_MODE_SIZE (mode
) / 8;
28981 for (i
= 0; i
< nregs
; i
++)
28982 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
28985 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
28988 #if ARM_UNWIND_INFO
28989 /* Emit unwind directives for a store-multiple instruction or stack pointer
28990 push during alignment.
28991 These should only ever be generated by the function prologue code, so
28992 expect them to have a particular form.
28993 The store-multiple instruction sometimes pushes pc as the last register,
28994 although it should not be tracked into unwind information, or for -Os
28995 sometimes pushes some dummy registers before first register that needs
28996 to be tracked in unwind information; such dummy registers are there just
28997 to avoid separate stack adjustment, and will not be restored in the
29001 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
29004 HOST_WIDE_INT offset
;
29005 HOST_WIDE_INT nregs
;
29009 unsigned padfirst
= 0, padlast
= 0;
29012 e
= XVECEXP (p
, 0, 0);
29013 gcc_assert (GET_CODE (e
) == SET
);
29015 /* First insn will adjust the stack pointer. */
29016 gcc_assert (GET_CODE (e
) == SET
29017 && REG_P (SET_DEST (e
))
29018 && REGNO (SET_DEST (e
)) == SP_REGNUM
29019 && GET_CODE (SET_SRC (e
)) == PLUS
);
29021 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29022 nregs
= XVECLEN (p
, 0) - 1;
29023 gcc_assert (nregs
);
29025 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29028 /* For -Os dummy registers can be pushed at the beginning to
29029 avoid separate stack pointer adjustment. */
29030 e
= XVECEXP (p
, 0, 1);
29031 e
= XEXP (SET_DEST (e
), 0);
29032 if (GET_CODE (e
) == PLUS
)
29033 padfirst
= INTVAL (XEXP (e
, 1));
29034 gcc_assert (padfirst
== 0 || optimize_size
);
29035 /* The function prologue may also push pc, but not annotate it as it is
29036 never restored. We turn this into a stack pointer adjustment. */
29037 e
= XVECEXP (p
, 0, nregs
);
29038 e
= XEXP (SET_DEST (e
), 0);
29039 if (GET_CODE (e
) == PLUS
)
29040 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29042 padlast
= offset
- 4;
29043 gcc_assert (padlast
== 0 || padlast
== 4);
29045 fprintf (asm_out_file
, "\t.pad #4\n");
29047 fprintf (asm_out_file
, "\t.save {");
29049 else if (IS_VFP_REGNUM (reg
))
29052 fprintf (asm_out_file
, "\t.vsave {");
29055 /* Unknown register type. */
29056 gcc_unreachable ();
29058 /* If the stack increment doesn't match the size of the saved registers,
29059 something has gone horribly wrong. */
29060 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29064 /* The remaining insns will describe the stores. */
29065 for (i
= 1; i
<= nregs
; i
++)
29067 /* Expect (set (mem <addr>) (reg)).
29068 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29069 e
= XVECEXP (p
, 0, i
);
29070 gcc_assert (GET_CODE (e
) == SET
29071 && MEM_P (SET_DEST (e
))
29072 && REG_P (SET_SRC (e
)));
29074 reg
= REGNO (SET_SRC (e
));
29075 gcc_assert (reg
>= lastreg
);
29078 fprintf (asm_out_file
, ", ");
29079 /* We can't use %r for vfp because we need to use the
29080 double precision register names. */
29081 if (IS_VFP_REGNUM (reg
))
29082 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29084 asm_fprintf (asm_out_file
, "%r", reg
);
29086 #ifdef ENABLE_CHECKING
29087 /* Check that the addresses are consecutive. */
29088 e
= XEXP (SET_DEST (e
), 0);
29089 if (GET_CODE (e
) == PLUS
)
29090 gcc_assert (REG_P (XEXP (e
, 0))
29091 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29092 && CONST_INT_P (XEXP (e
, 1))
29093 && offset
== INTVAL (XEXP (e
, 1)));
29097 && REGNO (e
) == SP_REGNUM
);
29098 offset
+= reg_size
;
29101 fprintf (asm_out_file
, "}\n");
29103 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
29106 /* Emit unwind directives for a SET. */
29109 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
29117 switch (GET_CODE (e0
))
29120 /* Pushing a single register. */
29121 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29122 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29123 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
29126 asm_fprintf (asm_out_file
, "\t.save ");
29127 if (IS_VFP_REGNUM (REGNO (e1
)))
29128 asm_fprintf(asm_out_file
, "{d%d}\n",
29129 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
29131 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
29135 if (REGNO (e0
) == SP_REGNUM
)
29137 /* A stack increment. */
29138 if (GET_CODE (e1
) != PLUS
29139 || !REG_P (XEXP (e1
, 0))
29140 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
29141 || !CONST_INT_P (XEXP (e1
, 1)))
29144 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
29145 -INTVAL (XEXP (e1
, 1)));
29147 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
29149 HOST_WIDE_INT offset
;
29151 if (GET_CODE (e1
) == PLUS
)
29153 if (!REG_P (XEXP (e1
, 0))
29154 || !CONST_INT_P (XEXP (e1
, 1)))
29156 reg
= REGNO (XEXP (e1
, 0));
29157 offset
= INTVAL (XEXP (e1
, 1));
29158 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
29159 HARD_FRAME_POINTER_REGNUM
, reg
,
29162 else if (REG_P (e1
))
29165 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
29166 HARD_FRAME_POINTER_REGNUM
, reg
);
29171 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
29173 /* Move from sp to reg. */
29174 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
29176 else if (GET_CODE (e1
) == PLUS
29177 && REG_P (XEXP (e1
, 0))
29178 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
29179 && CONST_INT_P (XEXP (e1
, 1)))
29181 /* Set reg to offset from sp. */
29182 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
29183 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
29195 /* Emit unwind directives for the given insn. */
29198 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
29201 bool handled_one
= false;
29203 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29206 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29207 && (TREE_NOTHROW (current_function_decl
)
29208 || crtl
->all_throwers_are_sibcalls
))
29211 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
29214 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
29216 switch (REG_NOTE_KIND (note
))
29218 case REG_FRAME_RELATED_EXPR
:
29219 pat
= XEXP (note
, 0);
29222 case REG_CFA_REGISTER
:
29223 pat
= XEXP (note
, 0);
29226 pat
= PATTERN (insn
);
29227 if (GET_CODE (pat
) == PARALLEL
)
29228 pat
= XVECEXP (pat
, 0, 0);
29231 /* Only emitted for IS_STACKALIGN re-alignment. */
29236 src
= SET_SRC (pat
);
29237 dest
= SET_DEST (pat
);
29239 gcc_assert (src
== stack_pointer_rtx
);
29240 reg
= REGNO (dest
);
29241 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29244 handled_one
= true;
29247 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29248 to get correct dwarf information for shrink-wrap. We should not
29249 emit unwind information for it because these are used either for
29250 pretend arguments or notes to adjust sp and restore registers from
29252 case REG_CFA_DEF_CFA
:
29253 case REG_CFA_ADJUST_CFA
:
29254 case REG_CFA_RESTORE
:
29257 case REG_CFA_EXPRESSION
:
29258 case REG_CFA_OFFSET
:
29259 /* ??? Only handling here what we actually emit. */
29260 gcc_unreachable ();
29268 pat
= PATTERN (insn
);
29271 switch (GET_CODE (pat
))
29274 arm_unwind_emit_set (asm_out_file
, pat
);
29278 /* Store multiple. */
29279 arm_unwind_emit_sequence (asm_out_file
, pat
);
29288 /* Output a reference from a function exception table to the type_info
29289 object X. The EABI specifies that the symbol should be relocated by
29290 an R_ARM_TARGET2 relocation. */
29293 arm_output_ttype (rtx x
)
29295 fputs ("\t.word\t", asm_out_file
);
29296 output_addr_const (asm_out_file
, x
);
29297 /* Use special relocations for symbol references. */
29298 if (!CONST_INT_P (x
))
29299 fputs ("(TARGET2)", asm_out_file
);
29300 fputc ('\n', asm_out_file
);
29305 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29308 arm_asm_emit_except_personality (rtx personality
)
29310 fputs ("\t.personality\t", asm_out_file
);
29311 output_addr_const (asm_out_file
, personality
);
29312 fputc ('\n', asm_out_file
);
29315 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29318 arm_asm_init_sections (void)
29320 exception_section
= get_unnamed_section (0, output_section_asm_op
,
29323 #endif /* ARM_UNWIND_INFO */
29325 /* Output unwind directives for the start/end of a function. */
29328 arm_output_fn_unwind (FILE * f
, bool prologue
)
29330 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29334 fputs ("\t.fnstart\n", f
);
29337 /* If this function will never be unwound, then mark it as such.
29338 The came condition is used in arm_unwind_emit to suppress
29339 the frame annotations. */
29340 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29341 && (TREE_NOTHROW (current_function_decl
)
29342 || crtl
->all_throwers_are_sibcalls
))
29343 fputs("\t.cantunwind\n", f
);
29345 fputs ("\t.fnend\n", f
);
29350 arm_emit_tls_decoration (FILE *fp
, rtx x
)
29352 enum tls_reloc reloc
;
29355 val
= XVECEXP (x
, 0, 0);
29356 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
29358 output_addr_const (fp
, val
);
29363 fputs ("(tlsgd)", fp
);
29366 fputs ("(tlsldm)", fp
);
29369 fputs ("(tlsldo)", fp
);
29372 fputs ("(gottpoff)", fp
);
29375 fputs ("(tpoff)", fp
);
29378 fputs ("(tlsdesc)", fp
);
29381 gcc_unreachable ();
29390 fputs (" + (. - ", fp
);
29391 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29392 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29393 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29394 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29404 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29407 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29409 gcc_assert (size
== 4);
29410 fputs ("\t.word\t", file
);
29411 output_addr_const (file
, x
);
29412 fputs ("(tlsldo)", file
);
29415 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29418 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29420 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29421 return arm_emit_tls_decoration (fp
, x
);
29422 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29425 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29427 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29428 assemble_name_raw (fp
, label
);
29432 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29434 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29438 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29442 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29444 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29448 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29452 else if (GET_CODE (x
) == CONST_VECTOR
)
29453 return arm_emit_vector_const (fp
, x
);
29458 /* Output assembly for a shift instruction.
29459 SET_FLAGS determines how the instruction modifies the condition codes.
29460 0 - Do not set condition codes.
29461 1 - Set condition codes.
29462 2 - Use smallest instruction. */
29464 arm_output_shift(rtx
* operands
, int set_flags
)
29467 static const char flag_chars
[3] = {'?', '.', '!'};
29472 c
= flag_chars
[set_flags
];
29473 if (TARGET_UNIFIED_ASM
)
29475 shift
= shift_op(operands
[3], &val
);
29479 operands
[2] = GEN_INT(val
);
29480 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29483 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29486 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
29487 output_asm_insn (pattern
, operands
);
29491 /* Output assembly for a WMMX immediate shift instruction. */
29493 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29495 int shift
= INTVAL (operands
[2]);
29497 enum machine_mode opmode
= GET_MODE (operands
[0]);
29499 gcc_assert (shift
>= 0);
29501 /* If the shift value in the register versions is > 63 (for D qualifier),
29502 31 (for W qualifier) or 15 (for H qualifier). */
29503 if (((opmode
== V4HImode
) && (shift
> 15))
29504 || ((opmode
== V2SImode
) && (shift
> 31))
29505 || ((opmode
== DImode
) && (shift
> 63)))
29509 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29510 output_asm_insn (templ
, operands
);
29511 if (opmode
== DImode
)
29513 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29514 output_asm_insn (templ
, operands
);
29519 /* The destination register will contain all zeros. */
29520 sprintf (templ
, "wzero\t%%0");
29521 output_asm_insn (templ
, operands
);
29526 if ((opmode
== DImode
) && (shift
> 32))
29528 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29529 output_asm_insn (templ
, operands
);
29530 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29531 output_asm_insn (templ
, operands
);
29535 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29536 output_asm_insn (templ
, operands
);
29541 /* Output assembly for a WMMX tinsr instruction. */
29543 arm_output_iwmmxt_tinsr (rtx
*operands
)
29545 int mask
= INTVAL (operands
[3]);
29548 int units
= mode_nunits
[GET_MODE (operands
[0])];
29549 gcc_assert ((mask
& (mask
- 1)) == 0);
29550 for (i
= 0; i
< units
; ++i
)
29552 if ((mask
& 0x01) == 1)
29558 gcc_assert (i
< units
);
29560 switch (GET_MODE (operands
[0]))
29563 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29566 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29569 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29572 gcc_unreachable ();
29575 output_asm_insn (templ
, operands
);
29580 /* Output a Thumb-1 casesi dispatch sequence. */
29582 thumb1_output_casesi (rtx
*operands
)
29584 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[0]));
29586 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29588 switch (GET_MODE(diff_vec
))
29591 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29592 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29594 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29595 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29597 return "bl\t%___gnu_thumb1_case_si";
29599 gcc_unreachable ();
29603 /* Output a Thumb-2 casesi instruction. */
29605 thumb2_output_casesi (rtx
*operands
)
29607 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
29609 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29611 output_asm_insn ("cmp\t%0, %1", operands
);
29612 output_asm_insn ("bhi\t%l3", operands
);
29613 switch (GET_MODE(diff_vec
))
29616 return "tbb\t[%|pc, %0]";
29618 return "tbh\t[%|pc, %0, lsl #1]";
29622 output_asm_insn ("adr\t%4, %l2", operands
);
29623 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29624 output_asm_insn ("add\t%4, %4, %5", operands
);
29629 output_asm_insn ("adr\t%4, %l2", operands
);
29630 return "ldr\t%|pc, [%4, %0, lsl #2]";
29633 gcc_unreachable ();
29637 /* Most ARM cores are single issue, but some newer ones can dual issue.
29638 The scheduler descriptions rely on this being correct. */
29640 arm_issue_rate (void)
29667 /* A table and a function to perform ARM-specific name mangling for
29668 NEON vector types in order to conform to the AAPCS (see "Procedure
29669 Call Standard for the ARM Architecture", Appendix A). To qualify
29670 for emission with the mangled names defined in that document, a
29671 vector type must not only be of the correct mode but also be
29672 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29675 enum machine_mode mode
;
29676 const char *element_type_name
;
29677 const char *aapcs_name
;
29678 } arm_mangle_map_entry
;
29680 static arm_mangle_map_entry arm_mangle_map
[] = {
29681 /* 64-bit containerized types. */
29682 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
29683 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29684 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
29685 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29686 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
29687 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
29688 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
29689 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
29690 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29691 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29693 /* 128-bit containerized types. */
29694 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
29695 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29696 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
29697 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29698 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
29699 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
29700 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
29701 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29702 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29703 { VOIDmode
, NULL
, NULL
}
29707 arm_mangle_type (const_tree type
)
29709 arm_mangle_map_entry
*pos
= arm_mangle_map
;
29711 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29712 has to be managled as if it is in the "std" namespace. */
29713 if (TARGET_AAPCS_BASED
29714 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29715 return "St9__va_list";
29717 /* Half-precision float. */
29718 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
29721 if (TREE_CODE (type
) != VECTOR_TYPE
)
29724 /* Check the mode of the vector type, and the name of the vector
29725 element type, against the table. */
29726 while (pos
->mode
!= VOIDmode
)
29728 tree elt_type
= TREE_TYPE (type
);
29730 if (pos
->mode
== TYPE_MODE (type
)
29731 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
29732 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
29733 pos
->element_type_name
))
29734 return pos
->aapcs_name
;
29739 /* Use the default mangling for unrecognized (possibly user-defined)
29744 /* Order of allocation of core registers for Thumb: this allocation is
29745 written over the corresponding initial entries of the array
29746 initialized with REG_ALLOC_ORDER. We allocate all low registers
29747 first. Saving and restoring a low register is usually cheaper than
29748 using a call-clobbered high register. */
29750 static const int thumb_core_reg_alloc_order
[] =
29752 3, 2, 1, 0, 4, 5, 6, 7,
29753 14, 12, 8, 9, 10, 11
29756 /* Adjust register allocation order when compiling for Thumb. */
29759 arm_order_regs_for_local_alloc (void)
29761 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
29762 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
29764 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
29765 sizeof (thumb_core_reg_alloc_order
));
29768 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29771 arm_frame_pointer_required (void)
29773 return (cfun
->has_nonlocal_label
29774 || SUBTARGET_FRAME_POINTER_REQUIRED
29775 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
29778 /* Only thumb1 can't support conditional execution, so return true if
29779 the target is not thumb1. */
29781 arm_have_conditional_execution (void)
29783 return !TARGET_THUMB1
;
29787 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
29789 enum machine_mode in_mode
, out_mode
;
29792 if (TREE_CODE (type_out
) != VECTOR_TYPE
29793 || TREE_CODE (type_in
) != VECTOR_TYPE
)
29796 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29797 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29798 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29799 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29801 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29802 decl of the vectorized builtin for the appropriate vector mode.
29803 NULL_TREE is returned if no such builtin is available. */
29804 #undef ARM_CHECK_BUILTIN_MODE
29805 #define ARM_CHECK_BUILTIN_MODE(C) \
29806 (TARGET_NEON && TARGET_FPU_ARMV8 \
29807 && flag_unsafe_math_optimizations \
29808 && ARM_CHECK_BUILTIN_MODE_1 (C))
29810 #undef ARM_CHECK_BUILTIN_MODE_1
29811 #define ARM_CHECK_BUILTIN_MODE_1(C) \
29812 (out_mode == SFmode && out_n == C \
29813 && in_mode == SFmode && in_n == C)
29815 #undef ARM_FIND_VRINT_VARIANT
29816 #define ARM_FIND_VRINT_VARIANT(N) \
29817 (ARM_CHECK_BUILTIN_MODE (2) \
29818 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29819 : (ARM_CHECK_BUILTIN_MODE (4) \
29820 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29823 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
29825 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29828 case BUILT_IN_FLOORF
:
29829 return ARM_FIND_VRINT_VARIANT (vrintm
);
29830 case BUILT_IN_CEILF
:
29831 return ARM_FIND_VRINT_VARIANT (vrintp
);
29832 case BUILT_IN_TRUNCF
:
29833 return ARM_FIND_VRINT_VARIANT (vrintz
);
29834 case BUILT_IN_ROUNDF
:
29835 return ARM_FIND_VRINT_VARIANT (vrinta
);
29836 #undef ARM_CHECK_BUILTIN_MODE
29837 #define ARM_CHECK_BUILTIN_MODE(C, N) \
29838 (out_mode == N##Imode && out_n == C \
29839 && in_mode == N##Imode && in_n == C)
29840 case BUILT_IN_BSWAP16
:
29841 if (ARM_CHECK_BUILTIN_MODE (4, H
))
29842 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi
, false);
29843 else if (ARM_CHECK_BUILTIN_MODE (8, H
))
29844 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi
, false);
29847 case BUILT_IN_BSWAP32
:
29848 if (ARM_CHECK_BUILTIN_MODE (2, S
))
29849 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si
, false);
29850 else if (ARM_CHECK_BUILTIN_MODE (4, S
))
29851 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si
, false);
29854 case BUILT_IN_BSWAP64
:
29855 if (ARM_CHECK_BUILTIN_MODE (2, D
))
29856 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di
, false);
29866 #undef ARM_CHECK_BUILTIN_MODE
29867 #undef ARM_FIND_VRINT_VARIANT
29869 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29870 static HOST_WIDE_INT
29871 arm_vector_alignment (const_tree type
)
29873 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
29875 if (TARGET_AAPCS_BASED
)
29876 align
= MIN (align
, 64);
29881 static unsigned int
29882 arm_autovectorize_vector_sizes (void)
29884 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
29888 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
29890 /* Vectors which aren't in packed structures will not be less aligned than
29891 the natural alignment of their element type, so this is safe. */
29892 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29895 return default_builtin_vector_alignment_reachable (type
, is_packed
);
29899 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
29900 const_tree type
, int misalignment
,
29903 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29905 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
29910 /* If the misalignment is unknown, we should be able to handle the access
29911 so long as it is not to a member of a packed data structure. */
29912 if (misalignment
== -1)
29915 /* Return true if the misalignment is a multiple of the natural alignment
29916 of the vector's element type. This is probably always going to be
29917 true in practice, since we've already established that this isn't a
29919 return ((misalignment
% align
) == 0);
29922 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
29927 arm_conditional_register_usage (void)
29931 if (TARGET_THUMB1
&& optimize_size
)
29933 /* When optimizing for size on Thumb-1, it's better not
29934 to use the HI regs, because of the overhead of
29936 for (regno
= FIRST_HI_REGNUM
;
29937 regno
<= LAST_HI_REGNUM
; ++regno
)
29938 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
29941 /* The link register can be clobbered by any branch insn,
29942 but we have no way to track that at present, so mark
29943 it as unavailable. */
29945 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
29947 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
29949 /* VFPv3 registers are disabled when earlier VFP
29950 versions are selected due to the definition of
29951 LAST_VFP_REGNUM. */
29952 for (regno
= FIRST_VFP_REGNUM
;
29953 regno
<= LAST_VFP_REGNUM
; ++ regno
)
29955 fixed_regs
[regno
] = 0;
29956 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
29957 || regno
>= FIRST_VFP_REGNUM
+ 32;
29961 if (TARGET_REALLY_IWMMXT
)
29963 regno
= FIRST_IWMMXT_GR_REGNUM
;
29964 /* The 2002/10/09 revision of the XScale ABI has wCG0
29965 and wCG1 as call-preserved registers. The 2002/11/21
29966 revision changed this so that all wCG registers are
29967 scratch registers. */
29968 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
29969 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
29970 fixed_regs
[regno
] = 0;
29971 /* The XScale ABI has wR0 - wR9 as scratch registers,
29972 the rest as call-preserved registers. */
29973 for (regno
= FIRST_IWMMXT_REGNUM
;
29974 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
29976 fixed_regs
[regno
] = 0;
29977 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
29981 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
29983 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29984 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29986 else if (TARGET_APCS_STACK
)
29988 fixed_regs
[10] = 1;
29989 call_used_regs
[10] = 1;
29991 /* -mcaller-super-interworking reserves r11 for calls to
29992 _interwork_r11_call_via_rN(). Making the register global
29993 is an easy way of ensuring that it remains valid for all
29995 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
29996 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
29998 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29999 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30000 if (TARGET_CALLER_INTERWORKING
)
30001 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30003 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30007 arm_preferred_rename_class (reg_class_t rclass
)
30009 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30010 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30011 and code size can be reduced. */
30012 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30018 /* Compute the atrribute "length" of insn "*push_multi".
30019 So this function MUST be kept in sync with that insn pattern. */
30021 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30023 int i
, regno
, hi_reg
;
30024 int num_saves
= XVECLEN (parallel_op
, 0);
30034 regno
= REGNO (first_op
);
30035 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30036 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30038 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30039 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30047 /* Compute the number of instructions emitted by output_move_double. */
30049 arm_count_output_move_double_insns (rtx
*operands
)
30053 /* output_move_double may modify the operands array, so call it
30054 here on a copy of the array. */
30055 ops
[0] = operands
[0];
30056 ops
[1] = operands
[1];
30057 output_move_double (ops
, false, &count
);
30062 vfp3_const_double_for_fract_bits (rtx operand
)
30064 REAL_VALUE_TYPE r0
;
30066 if (!CONST_DOUBLE_P (operand
))
30069 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
30070 if (exact_real_inverse (DFmode
, &r0
))
30072 if (exact_real_truncate (DFmode
, &r0
))
30074 HOST_WIDE_INT value
= real_to_integer (&r0
);
30075 value
= value
& 0xffffffff;
30076 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30077 return int_log2 (value
);
30084 vfp3_const_double_for_bits (rtx operand
)
30086 REAL_VALUE_TYPE r0
;
30088 if (!CONST_DOUBLE_P (operand
))
30091 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
30092 if (exact_real_truncate (DFmode
, &r0
))
30094 HOST_WIDE_INT value
= real_to_integer (&r0
);
30095 value
= value
& 0xffffffff;
30096 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30097 return int_log2 (value
);
30103 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30106 arm_pre_atomic_barrier (enum memmodel model
)
30108 if (need_atomic_barrier_p (model
, true))
30109 emit_insn (gen_memory_barrier ());
30113 arm_post_atomic_barrier (enum memmodel model
)
30115 if (need_atomic_barrier_p (model
, false))
30116 emit_insn (gen_memory_barrier ());
30119 /* Emit the load-exclusive and store-exclusive instructions.
30120 Use acquire and release versions if necessary. */
30123 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
30125 rtx (*gen
) (rtx
, rtx
);
30131 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
30132 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
30133 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
30134 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
30136 gcc_unreachable ();
30143 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
30144 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
30145 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
30146 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
30148 gcc_unreachable ();
30152 emit_insn (gen (rval
, mem
));
30156 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
,
30159 rtx (*gen
) (rtx
, rtx
, rtx
);
30165 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
30166 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
30167 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
30168 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
30170 gcc_unreachable ();
30177 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
30178 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
30179 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
30180 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
30182 gcc_unreachable ();
30186 emit_insn (gen (bval
, rval
, mem
));
30189 /* Mark the previous jump instruction as unlikely. */
30192 emit_unlikely_jump (rtx insn
)
30194 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
30196 insn
= emit_jump_insn (insn
);
30197 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
30200 /* Expand a compare and swap pattern. */
30203 arm_expand_compare_and_swap (rtx operands
[])
30205 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
30206 enum machine_mode mode
;
30207 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
30209 bval
= operands
[0];
30210 rval
= operands
[1];
30212 oldval
= operands
[3];
30213 newval
= operands
[4];
30214 is_weak
= operands
[5];
30215 mod_s
= operands
[6];
30216 mod_f
= operands
[7];
30217 mode
= GET_MODE (mem
);
30219 /* Normally the succ memory model must be stronger than fail, but in the
30220 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30221 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30223 if (TARGET_HAVE_LDACQ
30224 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
30225 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
30226 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
30232 /* For narrow modes, we're going to perform the comparison in SImode,
30233 so do the zero-extension now. */
30234 rval
= gen_reg_rtx (SImode
);
30235 oldval
= convert_modes (SImode
, mode
, oldval
, true);
30239 /* Force the value into a register if needed. We waited until after
30240 the zero-extension above to do this properly. */
30241 if (!arm_add_operand (oldval
, SImode
))
30242 oldval
= force_reg (SImode
, oldval
);
30246 if (!cmpdi_operand (oldval
, mode
))
30247 oldval
= force_reg (mode
, oldval
);
30251 gcc_unreachable ();
30256 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
30257 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
30258 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
30259 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
30261 gcc_unreachable ();
30264 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
30266 if (mode
== QImode
|| mode
== HImode
)
30267 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
30269 /* In all cases, we arrange for success to be signaled by Z set.
30270 This arrangement allows for the boolean result to be used directly
30271 in a subsequent branch, post optimization. */
30272 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30273 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
30274 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
30277 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30278 another memory store between the load-exclusive and store-exclusive can
30279 reset the monitor from Exclusive to Open state. This means we must wait
30280 until after reload to split the pattern, lest we get a register spill in
30281 the middle of the atomic sequence. */
30284 arm_split_compare_and_swap (rtx operands
[])
30286 rtx rval
, mem
, oldval
, newval
, scratch
;
30287 enum machine_mode mode
;
30288 enum memmodel mod_s
, mod_f
;
30290 rtx label1
, label2
, x
, cond
;
30292 rval
= operands
[0];
30294 oldval
= operands
[2];
30295 newval
= operands
[3];
30296 is_weak
= (operands
[4] != const0_rtx
);
30297 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
30298 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
30299 scratch
= operands
[7];
30300 mode
= GET_MODE (mem
);
30302 bool use_acquire
= TARGET_HAVE_LDACQ
30303 && !(mod_s
== MEMMODEL_RELAXED
30304 || mod_s
== MEMMODEL_CONSUME
30305 || mod_s
== MEMMODEL_RELEASE
);
30307 bool use_release
= TARGET_HAVE_LDACQ
30308 && !(mod_s
== MEMMODEL_RELAXED
30309 || mod_s
== MEMMODEL_CONSUME
30310 || mod_s
== MEMMODEL_ACQUIRE
);
30312 /* Checks whether a barrier is needed and emits one accordingly. */
30313 if (!(use_acquire
|| use_release
))
30314 arm_pre_atomic_barrier (mod_s
);
30319 label1
= gen_label_rtx ();
30320 emit_label (label1
);
30322 label2
= gen_label_rtx ();
30324 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
30326 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
30327 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30328 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30329 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
30330 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30332 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
30334 /* Weak or strong, we want EQ to be true for success, so that we
30335 match the flags that we got from the compare above. */
30336 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
30337 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
30338 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
30342 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30343 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30344 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
30345 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
30348 if (mod_f
!= MEMMODEL_RELAXED
)
30349 emit_label (label2
);
30351 /* Checks whether a barrier is needed and emits one accordingly. */
30352 if (!(use_acquire
|| use_release
))
30353 arm_post_atomic_barrier (mod_s
);
30355 if (mod_f
== MEMMODEL_RELAXED
)
30356 emit_label (label2
);
30360 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
30361 rtx value
, rtx model_rtx
, rtx cond
)
30363 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
30364 enum machine_mode mode
= GET_MODE (mem
);
30365 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
30368 bool use_acquire
= TARGET_HAVE_LDACQ
30369 && !(model
== MEMMODEL_RELAXED
30370 || model
== MEMMODEL_CONSUME
30371 || model
== MEMMODEL_RELEASE
);
30373 bool use_release
= TARGET_HAVE_LDACQ
30374 && !(model
== MEMMODEL_RELAXED
30375 || model
== MEMMODEL_CONSUME
30376 || model
== MEMMODEL_ACQUIRE
);
30378 /* Checks whether a barrier is needed and emits one accordingly. */
30379 if (!(use_acquire
|| use_release
))
30380 arm_pre_atomic_barrier (model
);
30382 label
= gen_label_rtx ();
30383 emit_label (label
);
30386 new_out
= gen_lowpart (wmode
, new_out
);
30388 old_out
= gen_lowpart (wmode
, old_out
);
30391 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
30393 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
30402 x
= gen_rtx_AND (wmode
, old_out
, value
);
30403 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30404 x
= gen_rtx_NOT (wmode
, new_out
);
30405 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30409 if (CONST_INT_P (value
))
30411 value
= GEN_INT (-INTVAL (value
));
30417 if (mode
== DImode
)
30419 /* DImode plus/minus need to clobber flags. */
30420 /* The adddi3 and subdi3 patterns are incorrectly written so that
30421 they require matching operands, even when we could easily support
30422 three operands. Thankfully, this can be fixed up post-splitting,
30423 as the individual add+adc patterns do accept three operands and
30424 post-reload cprop can make these moves go away. */
30425 emit_move_insn (new_out
, old_out
);
30427 x
= gen_adddi3 (new_out
, new_out
, value
);
30429 x
= gen_subdi3 (new_out
, new_out
, value
);
30436 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30437 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30441 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30444 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30445 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30447 /* Checks whether a barrier is needed and emits one accordingly. */
30448 if (!(use_acquire
|| use_release
))
30449 arm_post_atomic_barrier (model
);
30452 #define MAX_VECT_LEN 16
30454 struct expand_vec_perm_d
30456 rtx target
, op0
, op1
;
30457 unsigned char perm
[MAX_VECT_LEN
];
30458 enum machine_mode vmode
;
30459 unsigned char nelt
;
30464 /* Generate a variable permutation. */
30467 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30469 enum machine_mode vmode
= GET_MODE (target
);
30470 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30472 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30473 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30474 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30475 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30476 gcc_checking_assert (TARGET_NEON
);
30480 if (vmode
== V8QImode
)
30481 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30483 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30489 if (vmode
== V8QImode
)
30491 pair
= gen_reg_rtx (V16QImode
);
30492 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30493 pair
= gen_lowpart (TImode
, pair
);
30494 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30498 pair
= gen_reg_rtx (OImode
);
30499 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30500 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30506 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30508 enum machine_mode vmode
= GET_MODE (target
);
30509 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
30510 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30511 rtx rmask
[MAX_VECT_LEN
], mask
;
30513 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30514 numbering of elements for big-endian, we must reverse the order. */
30515 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30517 /* The VTBL instruction does not use a modulo index, so we must take care
30518 of that ourselves. */
30519 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30520 for (i
= 0; i
< nelt
; ++i
)
30522 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
30523 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30525 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30528 /* Generate or test for an insn that supports a constant permutation. */
30530 /* Recognize patterns for the VUZP insns. */
30533 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30535 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30536 rtx out0
, out1
, in0
, in1
, x
;
30537 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30539 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30542 /* Note that these are little-endian tests. Adjust for big-endian later. */
30543 if (d
->perm
[0] == 0)
30545 else if (d
->perm
[0] == 1)
30549 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30551 for (i
= 0; i
< nelt
; i
++)
30553 unsigned elt
= (i
* 2 + odd
) & mask
;
30554 if (d
->perm
[i
] != elt
)
30564 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
30565 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
30566 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
30567 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
30568 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
30569 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
30570 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
30571 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
30573 gcc_unreachable ();
30578 if (BYTES_BIG_ENDIAN
)
30580 x
= in0
, in0
= in1
, in1
= x
;
30585 out1
= gen_reg_rtx (d
->vmode
);
30587 x
= out0
, out0
= out1
, out1
= x
;
30589 emit_insn (gen (out0
, in0
, in1
, out1
));
30593 /* Recognize patterns for the VZIP insns. */
30596 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30598 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
30599 rtx out0
, out1
, in0
, in1
, x
;
30600 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30602 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30605 /* Note that these are little-endian tests. Adjust for big-endian later. */
30607 if (d
->perm
[0] == high
)
30609 else if (d
->perm
[0] == 0)
30613 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30615 for (i
= 0; i
< nelt
/ 2; i
++)
30617 unsigned elt
= (i
+ high
) & mask
;
30618 if (d
->perm
[i
* 2] != elt
)
30620 elt
= (elt
+ nelt
) & mask
;
30621 if (d
->perm
[i
* 2 + 1] != elt
)
30631 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
30632 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
30633 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
30634 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
30635 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
30636 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
30637 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
30638 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
30640 gcc_unreachable ();
30645 if (BYTES_BIG_ENDIAN
)
30647 x
= in0
, in0
= in1
, in1
= x
;
30652 out1
= gen_reg_rtx (d
->vmode
);
30654 x
= out0
, out0
= out1
, out1
= x
;
30656 emit_insn (gen (out0
, in0
, in1
, out1
));
30660 /* Recognize patterns for the VREV insns. */
30663 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
30665 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
30666 rtx (*gen
)(rtx
, rtx
, rtx
);
30668 if (!d
->one_vector_p
)
30677 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
30678 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
30686 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
30687 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
30688 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
30689 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
30697 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
30698 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
30699 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
30700 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
30701 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
30702 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
30703 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
30704 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
30713 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
30714 for (j
= 0; j
<= diff
; j
+= 1)
30716 /* This is guaranteed to be true as the value of diff
30717 is 7, 3, 1 and we should have enough elements in the
30718 queue to generate this. Getting a vector mask with a
30719 value of diff other than these values implies that
30720 something is wrong by the time we get here. */
30721 gcc_assert (i
+ j
< nelt
);
30722 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
30730 /* ??? The third operand is an artifact of the builtin infrastructure
30731 and is ignored by the actual instruction. */
30732 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
30736 /* Recognize patterns for the VTRN insns. */
30739 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
30741 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30742 rtx out0
, out1
, in0
, in1
, x
;
30743 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30745 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30748 /* Note that these are little-endian tests. Adjust for big-endian later. */
30749 if (d
->perm
[0] == 0)
30751 else if (d
->perm
[0] == 1)
30755 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30757 for (i
= 0; i
< nelt
; i
+= 2)
30759 if (d
->perm
[i
] != i
+ odd
)
30761 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
30771 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
30772 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
30773 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
30774 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
30775 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
30776 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
30777 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
30778 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
30780 gcc_unreachable ();
30785 if (BYTES_BIG_ENDIAN
)
30787 x
= in0
, in0
= in1
, in1
= x
;
30792 out1
= gen_reg_rtx (d
->vmode
);
30794 x
= out0
, out0
= out1
, out1
= x
;
30796 emit_insn (gen (out0
, in0
, in1
, out1
));
30800 /* Recognize patterns for the VEXT insns. */
30803 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
30805 unsigned int i
, nelt
= d
->nelt
;
30806 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
30809 unsigned int location
;
30811 unsigned int next
= d
->perm
[0] + 1;
30813 /* TODO: Handle GCC's numbering of elements for big-endian. */
30814 if (BYTES_BIG_ENDIAN
)
30817 /* Check if the extracted indexes are increasing by one. */
30818 for (i
= 1; i
< nelt
; next
++, i
++)
30820 /* If we hit the most significant element of the 2nd vector in
30821 the previous iteration, no need to test further. */
30822 if (next
== 2 * nelt
)
30825 /* If we are operating on only one vector: it could be a
30826 rotation. If there are only two elements of size < 64, let
30827 arm_evpc_neon_vrev catch it. */
30828 if (d
->one_vector_p
&& (next
== nelt
))
30830 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
30836 if (d
->perm
[i
] != next
)
30840 location
= d
->perm
[0];
30844 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
30845 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
30846 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
30847 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
30848 case V2SImode
: gen
= gen_neon_vextv2si
; break;
30849 case V4SImode
: gen
= gen_neon_vextv4si
; break;
30850 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
30851 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
30852 case V2DImode
: gen
= gen_neon_vextv2di
; break;
30861 offset
= GEN_INT (location
);
30862 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
30866 /* The NEON VTBL instruction is a fully variable permuation that's even
30867 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30868 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30869 can do slightly better by expanding this as a constant where we don't
30870 have to apply a mask. */
30873 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
30875 rtx rperm
[MAX_VECT_LEN
], sel
;
30876 enum machine_mode vmode
= d
->vmode
;
30877 unsigned int i
, nelt
= d
->nelt
;
30879 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30880 numbering of elements for big-endian, we must reverse the order. */
30881 if (BYTES_BIG_ENDIAN
)
30887 /* Generic code will try constant permutation twice. Once with the
30888 original mode and again with the elements lowered to QImode.
30889 So wait and don't do the selector expansion ourselves. */
30890 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
30893 for (i
= 0; i
< nelt
; ++i
)
30894 rperm
[i
] = GEN_INT (d
->perm
[i
]);
30895 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
30896 sel
= force_reg (vmode
, sel
);
30898 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
30903 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
30905 /* Check if the input mask matches vext before reordering the
30908 if (arm_evpc_neon_vext (d
))
30911 /* The pattern matching functions above are written to look for a small
30912 number to begin the sequence (0, 1, N/2). If we begin with an index
30913 from the second operand, we can swap the operands. */
30914 if (d
->perm
[0] >= d
->nelt
)
30916 unsigned i
, nelt
= d
->nelt
;
30919 for (i
= 0; i
< nelt
; ++i
)
30920 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
30929 if (arm_evpc_neon_vuzp (d
))
30931 if (arm_evpc_neon_vzip (d
))
30933 if (arm_evpc_neon_vrev (d
))
30935 if (arm_evpc_neon_vtrn (d
))
30937 return arm_evpc_neon_vtbl (d
);
30942 /* Expand a vec_perm_const pattern. */
30945 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30947 struct expand_vec_perm_d d
;
30948 int i
, nelt
, which
;
30954 d
.vmode
= GET_MODE (target
);
30955 gcc_assert (VECTOR_MODE_P (d
.vmode
));
30956 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30957 d
.testing_p
= false;
30959 for (i
= which
= 0; i
< nelt
; ++i
)
30961 rtx e
= XVECEXP (sel
, 0, i
);
30962 int ei
= INTVAL (e
) & (2 * nelt
- 1);
30963 which
|= (ei
< nelt
? 1 : 2);
30973 d
.one_vector_p
= false;
30974 if (!rtx_equal_p (op0
, op1
))
30977 /* The elements of PERM do not suggest that only the first operand
30978 is used, but both operands are identical. Allow easier matching
30979 of the permutation by folding the permutation into the single
30983 for (i
= 0; i
< nelt
; ++i
)
30984 d
.perm
[i
] &= nelt
- 1;
30986 d
.one_vector_p
= true;
30991 d
.one_vector_p
= true;
30995 return arm_expand_vec_perm_const_1 (&d
);
30998 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31001 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
31002 const unsigned char *sel
)
31004 struct expand_vec_perm_d d
;
31005 unsigned int i
, nelt
, which
;
31009 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
31010 d
.testing_p
= true;
31011 memcpy (d
.perm
, sel
, nelt
);
31013 /* Categorize the set of elements in the selector. */
31014 for (i
= which
= 0; i
< nelt
; ++i
)
31016 unsigned char e
= d
.perm
[i
];
31017 gcc_assert (e
< 2 * nelt
);
31018 which
|= (e
< nelt
? 1 : 2);
31021 /* For all elements from second vector, fold the elements to first. */
31023 for (i
= 0; i
< nelt
; ++i
)
31026 /* Check whether the mask can be applied to the vector type. */
31027 d
.one_vector_p
= (which
!= 3);
31029 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
31030 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
31031 if (!d
.one_vector_p
)
31032 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
31035 ret
= arm_expand_vec_perm_const_1 (&d
);
31042 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
31044 /* If we are soft float and we do not have ldrd
31045 then all auto increment forms are ok. */
31046 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
31051 /* Post increment and Pre Decrement are supported for all
31052 instruction forms except for vector forms. */
31055 if (VECTOR_MODE_P (mode
))
31057 if (code
!= ARM_PRE_DEC
)
31067 /* Without LDRD and mode size greater than
31068 word size, there is no point in auto-incrementing
31069 because ldm and stm will not have these forms. */
31070 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
31073 /* Vector and floating point modes do not support
31074 these auto increment forms. */
31075 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
31088 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31089 on ARM, since we know that shifts by negative amounts are no-ops.
31090 Additionally, the default expansion code is not available or suitable
31091 for post-reload insn splits (this can occur when the register allocator
31092 chooses not to do a shift in NEON).
31094 This function is used in both initial expand and post-reload splits, and
31095 handles all kinds of 64-bit shifts.
31097 Input requirements:
31098 - It is safe for the input and output to be the same register, but
31099 early-clobber rules apply for the shift amount and scratch registers.
31100 - Shift by register requires both scratch registers. In all other cases
31101 the scratch registers may be NULL.
31102 - Ashiftrt by a register also clobbers the CC register. */
31104 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
31105 rtx amount
, rtx scratch1
, rtx scratch2
)
31107 rtx out_high
= gen_highpart (SImode
, out
);
31108 rtx out_low
= gen_lowpart (SImode
, out
);
31109 rtx in_high
= gen_highpart (SImode
, in
);
31110 rtx in_low
= gen_lowpart (SImode
, in
);
31113 in = the register pair containing the input value.
31114 out = the destination register pair.
31115 up = the high- or low-part of each pair.
31116 down = the opposite part to "up".
31117 In a shift, we can consider bits to shift from "up"-stream to
31118 "down"-stream, so in a left-shift "up" is the low-part and "down"
31119 is the high-part of each register pair. */
31121 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
31122 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
31123 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
31124 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
31126 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
31128 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
31129 && GET_MODE (out
) == DImode
);
31131 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
31132 && GET_MODE (in
) == DImode
);
31134 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
31135 && GET_MODE (amount
) == SImode
)
31136 || CONST_INT_P (amount
)));
31137 gcc_assert (scratch1
== NULL
31138 || (GET_CODE (scratch1
) == SCRATCH
)
31139 || (GET_MODE (scratch1
) == SImode
31140 && REG_P (scratch1
)));
31141 gcc_assert (scratch2
== NULL
31142 || (GET_CODE (scratch2
) == SCRATCH
)
31143 || (GET_MODE (scratch2
) == SImode
31144 && REG_P (scratch2
)));
31145 gcc_assert (!REG_P (out
) || !REG_P (amount
)
31146 || !HARD_REGISTER_P (out
)
31147 || (REGNO (out
) != REGNO (amount
)
31148 && REGNO (out
) + 1 != REGNO (amount
)));
31150 /* Macros to make following code more readable. */
31151 #define SUB_32(DEST,SRC) \
31152 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31153 #define RSB_32(DEST,SRC) \
31154 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31155 #define SUB_S_32(DEST,SRC) \
31156 gen_addsi3_compare0 ((DEST), (SRC), \
31158 #define SET(DEST,SRC) \
31159 gen_rtx_SET (SImode, (DEST), (SRC))
31160 #define SHIFT(CODE,SRC,AMOUNT) \
31161 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31162 #define LSHIFT(CODE,SRC,AMOUNT) \
31163 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31164 SImode, (SRC), (AMOUNT))
31165 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31166 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31167 SImode, (SRC), (AMOUNT))
31169 gen_rtx_IOR (SImode, (A), (B))
31170 #define BRANCH(COND,LABEL) \
31171 gen_arm_cond_branch ((LABEL), \
31172 gen_rtx_ ## COND (CCmode, cc_reg, \
31176 /* Shifts by register and shifts by constant are handled separately. */
31177 if (CONST_INT_P (amount
))
31179 /* We have a shift-by-constant. */
31181 /* First, handle out-of-range shift amounts.
31182 In both cases we try to match the result an ARM instruction in a
31183 shift-by-register would give. This helps reduce execution
31184 differences between optimization levels, but it won't stop other
31185 parts of the compiler doing different things. This is "undefined
31186 behaviour, in any case. */
31187 if (INTVAL (amount
) <= 0)
31188 emit_insn (gen_movdi (out
, in
));
31189 else if (INTVAL (amount
) >= 64)
31191 if (code
== ASHIFTRT
)
31193 rtx const31_rtx
= GEN_INT (31);
31194 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
31195 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
31198 emit_insn (gen_movdi (out
, const0_rtx
));
31201 /* Now handle valid shifts. */
31202 else if (INTVAL (amount
) < 32)
31204 /* Shifts by a constant less than 32. */
31205 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
31207 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31208 emit_insn (SET (out_down
,
31209 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
31211 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31215 /* Shifts by a constant greater than 31. */
31216 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
31218 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
31219 if (code
== ASHIFTRT
)
31220 emit_insn (gen_ashrsi3 (out_up
, in_up
,
31223 emit_insn (SET (out_up
, const0_rtx
));
31228 /* We have a shift-by-register. */
31229 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
31231 /* This alternative requires the scratch registers. */
31232 gcc_assert (scratch1
&& REG_P (scratch1
));
31233 gcc_assert (scratch2
&& REG_P (scratch2
));
31235 /* We will need the values "amount-32" and "32-amount" later.
31236 Swapping them around now allows the later code to be more general. */
31240 emit_insn (SUB_32 (scratch1
, amount
));
31241 emit_insn (RSB_32 (scratch2
, amount
));
31244 emit_insn (RSB_32 (scratch1
, amount
));
31245 /* Also set CC = amount > 32. */
31246 emit_insn (SUB_S_32 (scratch2
, amount
));
31249 emit_insn (RSB_32 (scratch1
, amount
));
31250 emit_insn (SUB_32 (scratch2
, amount
));
31253 gcc_unreachable ();
31256 /* Emit code like this:
31259 out_down = in_down << amount;
31260 out_down = (in_up << (amount - 32)) | out_down;
31261 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31262 out_up = in_up << amount;
31265 out_down = in_down >> amount;
31266 out_down = (in_up << (32 - amount)) | out_down;
31268 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31269 out_up = in_up << amount;
31272 out_down = in_down >> amount;
31273 out_down = (in_up << (32 - amount)) | out_down;
31275 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31276 out_up = in_up << amount;
31278 The ARM and Thumb2 variants are the same but implemented slightly
31279 differently. If this were only called during expand we could just
31280 use the Thumb2 case and let combine do the right thing, but this
31281 can also be called from post-reload splitters. */
31283 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
31285 if (!TARGET_THUMB2
)
31287 /* Emit code for ARM mode. */
31288 emit_insn (SET (out_down
,
31289 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
31290 if (code
== ASHIFTRT
)
31292 rtx done_label
= gen_label_rtx ();
31293 emit_jump_insn (BRANCH (LT
, done_label
));
31294 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
31296 emit_label (done_label
);
31299 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
31304 /* Emit code for Thumb2 mode.
31305 Thumb2 can't do shift and or in one insn. */
31306 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
31307 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
31309 if (code
== ASHIFTRT
)
31311 rtx done_label
= gen_label_rtx ();
31312 emit_jump_insn (BRANCH (LT
, done_label
));
31313 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
31314 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
31315 emit_label (done_label
);
31319 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
31320 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
31324 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
31339 /* Returns true if a valid comparison operation and makes
31340 the operands in a form that is valid. */
31342 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
31344 enum rtx_code code
= GET_CODE (*comparison
);
31346 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
31347 ? GET_MODE (*op2
) : GET_MODE (*op1
);
31349 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
31351 if (code
== UNEQ
|| code
== LTGT
)
31354 code_int
= (int)code
;
31355 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
31356 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
31361 if (!arm_add_operand (*op1
, mode
))
31362 *op1
= force_reg (mode
, *op1
);
31363 if (!arm_add_operand (*op2
, mode
))
31364 *op2
= force_reg (mode
, *op2
);
31368 if (!cmpdi_operand (*op1
, mode
))
31369 *op1
= force_reg (mode
, *op1
);
31370 if (!cmpdi_operand (*op2
, mode
))
31371 *op2
= force_reg (mode
, *op2
);
31376 if (!arm_float_compare_operand (*op1
, mode
))
31377 *op1
= force_reg (mode
, *op1
);
31378 if (!arm_float_compare_operand (*op2
, mode
))
31379 *op2
= force_reg (mode
, *op2
);
31389 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31391 static unsigned HOST_WIDE_INT
31392 arm_asan_shadow_offset (void)
31394 return (unsigned HOST_WIDE_INT
) 1 << 29;
31398 /* This is a temporary fix for PR60655. Ideally we need
31399 to handle most of these cases in the generic part but
31400 currently we reject minus (..) (sym_ref). We try to
31401 ameliorate the case with minus (sym_ref1) (sym_ref2)
31402 where they are in the same section. */
31405 arm_const_not_ok_for_debug_p (rtx p
)
31407 tree decl_op0
= NULL
;
31408 tree decl_op1
= NULL
;
31410 if (GET_CODE (p
) == MINUS
)
31412 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
31414 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
31416 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
31417 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
31419 if ((TREE_CODE (decl_op1
) == VAR_DECL
31420 || TREE_CODE (decl_op1
) == CONST_DECL
)
31421 && (TREE_CODE (decl_op0
) == VAR_DECL
31422 || TREE_CODE (decl_op0
) == CONST_DECL
))
31423 return (get_variable_section (decl_op1
, false)
31424 != get_variable_section (decl_op0
, false));
31426 if (TREE_CODE (decl_op1
) == LABEL_DECL
31427 && TREE_CODE (decl_op0
) == LABEL_DECL
)
31428 return (DECL_CONTEXT (decl_op1
)
31429 != DECL_CONTEXT (decl_op0
));
31440 arm_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
31442 const unsigned ARM_FE_INVALID
= 1;
31443 const unsigned ARM_FE_DIVBYZERO
= 2;
31444 const unsigned ARM_FE_OVERFLOW
= 4;
31445 const unsigned ARM_FE_UNDERFLOW
= 8;
31446 const unsigned ARM_FE_INEXACT
= 16;
31447 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT
= (ARM_FE_INVALID
31452 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT
= 8;
31453 tree fenv_var
, get_fpscr
, set_fpscr
, mask
, ld_fenv
, masked_fenv
;
31454 tree new_fenv_var
, reload_fenv
, restore_fnenv
;
31455 tree update_call
, atomic_feraiseexcept
, hold_fnclex
;
31460 /* Generate the equivalent of :
31461 unsigned int fenv_var;
31462 fenv_var = __builtin_arm_get_fpscr ();
31464 unsigned int masked_fenv;
31465 masked_fenv = fenv_var & mask;
31467 __builtin_arm_set_fpscr (masked_fenv); */
31469 fenv_var
= create_tmp_var (unsigned_type_node
, NULL
);
31470 get_fpscr
= arm_builtin_decls
[ARM_BUILTIN_GET_FPSCR
];
31471 set_fpscr
= arm_builtin_decls
[ARM_BUILTIN_SET_FPSCR
];
31472 mask
= build_int_cst (unsigned_type_node
,
31473 ~((ARM_FE_ALL_EXCEPT
<< ARM_FE_EXCEPT_SHIFT
)
31474 | ARM_FE_ALL_EXCEPT
));
31475 ld_fenv
= build2 (MODIFY_EXPR
, unsigned_type_node
,
31476 fenv_var
, build_call_expr (get_fpscr
, 0));
31477 masked_fenv
= build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
, mask
);
31478 hold_fnclex
= build_call_expr (set_fpscr
, 1, masked_fenv
);
31479 *hold
= build2 (COMPOUND_EXPR
, void_type_node
,
31480 build2 (COMPOUND_EXPR
, void_type_node
, masked_fenv
, ld_fenv
),
31483 /* Store the value of masked_fenv to clear the exceptions:
31484 __builtin_arm_set_fpscr (masked_fenv); */
31486 *clear
= build_call_expr (set_fpscr
, 1, masked_fenv
);
31488 /* Generate the equivalent of :
31489 unsigned int new_fenv_var;
31490 new_fenv_var = __builtin_arm_get_fpscr ();
31492 __builtin_arm_set_fpscr (fenv_var);
31494 __atomic_feraiseexcept (new_fenv_var); */
31496 new_fenv_var
= create_tmp_var (unsigned_type_node
, NULL
);
31497 reload_fenv
= build2 (MODIFY_EXPR
, unsigned_type_node
, new_fenv_var
,
31498 build_call_expr (get_fpscr
, 0));
31499 restore_fnenv
= build_call_expr (set_fpscr
, 1, fenv_var
);
31500 atomic_feraiseexcept
= builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
31501 update_call
= build_call_expr (atomic_feraiseexcept
, 1,
31502 fold_convert (integer_type_node
, new_fenv_var
));
31503 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
31504 build2 (COMPOUND_EXPR
, void_type_node
,
31505 reload_fenv
, restore_fnenv
), update_call
);
31508 #include "gt-arm.h"