1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
51 #include "insn-attr.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
74 /* This file should be included last. */
75 #include "target-def.h"
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode
;
79 typedef struct minipool_fixup Mfix
;
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string
;
83 static std::string arm_last_printed_fpu_string
;
85 void (*arm_lang_output_object_attributes_hook
)(void);
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx
);
94 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets
*arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
100 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap
);
103 static int arm_address_register_rtx_p (rtx
, int);
104 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
105 static bool is_called_in_ARM_mode (tree
);
106 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
107 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
108 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
109 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
110 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
111 inline static int thumb1_index_register_rtx_p (rtx
, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx
, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx
, int);
117 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
118 static bool arm_print_operand_punct_valid_p (unsigned char code
);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
120 static arm_cc
get_arm_condition_code (rtx
);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx
*, const char *, const char *,
124 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
125 static struct machine_function
*arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
128 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
129 static Mnode
*add_minipool_forward_ref (Mfix
*);
130 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
131 static Mnode
*add_minipool_backward_ref (Mfix
*);
132 static void assign_minipool_offsets (Mfix
*);
133 static void arm_print_value (FILE *, rtx
);
134 static void dump_minipool (rtx_insn
*);
135 static int arm_barrier_cost (rtx_insn
*);
136 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
137 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
138 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree
);
145 static unsigned long arm_compute_func_type (void);
146 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
147 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
148 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
152 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree
, const_tree
);
157 static void arm_set_default_type_attributes (tree
);
158 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code
,
161 unsigned HOST_WIDE_INT val
,
162 struct four_ints
*return_sequence
);
163 static int optimal_immediate_sequence_1 (enum rtx_code code
,
164 unsigned HOST_WIDE_INT val
,
165 struct four_ints
*return_sequence
,
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree
, tree
);
169 static machine_mode
arm_promote_function_mode (const_tree
,
172 static bool arm_return_in_memory (const_tree
, const_tree
);
173 static rtx
arm_function_value (const_tree
, const_tree
, bool);
174 static rtx
arm_libcall_value_1 (machine_mode
);
175 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
182 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
183 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
184 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
185 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
186 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
187 static void emit_constant_insn (rtx cond
, rtx pattern
);
188 static rtx_insn
*emit_set_insn (rtx
, rtx
);
189 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t
,
191 const function_arg_info
&);
192 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
194 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
196 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
197 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
198 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
200 static rtx
aapcs_libcall_value (machine_mode
);
201 static int aapcs_select_return_coproc (const_tree
, const_tree
);
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
205 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
208 static void arm_encode_section_info (tree
, rtx
, int);
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree
, tree
*);
215 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
217 static bool arm_pass_by_reference (cumulative_args_t
,
218 const function_arg_info
&);
219 static bool arm_promote_prototypes (const_tree
);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree
);
223 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
224 static bool arm_return_in_memory (const_tree
, const_tree
);
226 static void arm_unwind_emit (FILE *, rtx_insn
*);
227 static bool arm_output_ttype (rtx
);
228 static void arm_asm_emit_except_personality (rtx
);
230 static void arm_asm_init_sections (void);
231 static rtx
arm_dwarf_register_span (rtx
);
233 static tree
arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree
arm_get_cookie_size (tree
);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree
);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree
arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree
, rtx
);
245 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option
*, struct gcc_options
*);
248 static void arm_option_restore (struct gcc_options
*,
249 struct cl_target_option
*);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option
*);
252 static void arm_set_current_function (tree
);
253 static bool arm_can_inline_p (tree
, tree
);
254 static void arm_relayout_function (tree
);
255 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
256 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
257 static bool arm_sched_can_speculate_insn (rtx_insn
*);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn
*);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
264 static bool arm_output_addr_const_extra (FILE *, rtx
);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree
);
267 static tree
arm_promoted_type (const_tree t
);
268 static bool arm_scalar_mode_supported_p (scalar_mode
);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx
, tree
, rtx
);
273 static rtx
arm_trampoline_adjust_address (rtx
);
274 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
278 static bool arm_array_mode_supported_p (machine_mode
,
279 unsigned HOST_WIDE_INT
);
280 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
281 static bool arm_class_likely_spilled_p (reg_class_t
);
282 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
283 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
290 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
291 static void arm_autovectorize_vector_sizes (vector_sizes
*, bool);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
297 static bool arm_vectorize_vec_perm_const (machine_mode
, rtx
, rtx
, rtx
,
298 const vec_perm_indices
&);
300 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
304 int misalign ATTRIBUTE_UNUSED
);
305 static unsigned arm_add_stmt_cost (void *data
, int count
,
306 enum vect_cost_for_stmt kind
,
307 struct _stmt_vec_info
*stmt_info
,
309 enum vect_cost_model_location where
);
311 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
312 bool op0_preserve_value
);
313 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
315 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
318 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
320 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
322 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
323 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
326 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
327 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table
[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333 affects_type_identity, handler, exclude } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
337 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
338 /* Whereas these functions are always known to reside within the 26 bit
340 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
347 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
349 { "naked", 0, 0, true, false, false, false,
350 arm_handle_fndecl_attribute
, NULL
},
352 /* ARM/PE has three new attributes:
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
361 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
362 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
363 { "interfacearm", 0, 0, true, false, false, false,
364 arm_handle_fndecl_attribute
, NULL
},
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
368 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
370 { "notshared", 0, 0, false, true, false, false,
371 arm_handle_notshared_attribute
, NULL
},
373 /* ARMv8-M Security Extensions support. */
374 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375 arm_handle_cmse_nonsecure_entry
, NULL
},
376 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377 arm_handle_cmse_nonsecure_call
, NULL
},
378 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
390 #undef TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
416 #undef TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
419 #undef TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
428 #undef TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
476 #undef TARGET_ENCODE_SECTION_INFO
478 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
480 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
483 #undef TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
486 #undef TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
504 #undef TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524 arm_autovectorize_vector_sizes
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
529 #undef TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS arm_init_builtins
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
556 #undef TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604 arm_cxx_determine_class_data_visibility
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
622 /* EABI unwinding tables use a different format for the typeinfo tables. */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
632 #endif /* ARM_UNWIND_INFO */
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
640 #undef TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
660 /* The minimum is set such that the total size of the block
661 for a particular anchor is -4088 + 1 + 4095 bytes, which is
662 divisible by eight, ensuring natural spacing of anchors. */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671 arm_first_cycle_multipass_dfa_lookahead
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675 arm_first_cycle_multipass_dfa_lookahead_guard
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727 arm_builtin_vectorized_function
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734 arm_vector_alignment_reachable
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738 arm_builtin_support_vector_misalignment
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742 arm_preferred_rename_class
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749 arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755 arm_canonicalize_comparison
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
775 #undef TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
790 /* Although the architecture reserves bits 0 and 1, only the former is
791 used for ARM/Thumb ISA selection in v7 and earlier versions. */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
812 /* Obstack for minipool constant handling. */
813 static struct obstack minipool_obstack
;
814 static char * minipool_startobj
;
816 /* The maximum number of insns skipped which
817 will be conditionalised if possible. */
818 static int max_insns_skipped
= 5;
820 extern FILE * asm_out_file
;
822 /* True if we are currently building a constant table. */
823 int making_const_table
;
825 /* The processor for which instructions should be scheduled. */
826 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
828 /* The current tuning set. */
829 const struct tune_params
*current_tune
;
831 /* Which floating point hardware to schedule for. */
834 /* Used for Thumb call_via trampolines. */
835 rtx thumb_call_via_label
[14];
836 static int thumb_call_reg_needed
;
838 /* The bits in this mask specify which instruction scheduling options should
840 unsigned int tune_flags
= 0;
842 /* The highest ARM architecture version supported by the
844 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
846 /* Active target architecture and tuning. */
848 struct arm_build_target arm_active_target
;
850 /* The following are used in the arm.md file as equivalents to bits
851 in the above two flag variables. */
853 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
856 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
859 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
865 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
868 /* Nonzero if this chip supports the ARM 6K extensions. */
871 /* Nonzero if this chip supports the ARM 6KZ extensions. */
874 /* Nonzero if instructions present in ARMv6-M can be used. */
877 /* Nonzero if this chip supports the ARM 7 extensions. */
880 /* Nonzero if this chip supports the Large Physical Address Extension. */
881 int arm_arch_lpae
= 0;
883 /* Nonzero if instructions not present in the 'M' profile can be used. */
884 int arm_arch_notm
= 0;
886 /* Nonzero if instructions present in ARMv7E-M can be used. */
889 /* Nonzero if instructions present in ARMv8 can be used. */
892 /* Nonzero if this chip supports the ARMv8.1 extensions. */
895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
898 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
901 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
904 /* Nonzero if this chip supports the FP16 instructions extension of ARM
906 int arm_fp16_inst
= 0;
908 /* Nonzero if this chip can benefit from load scheduling. */
909 int arm_ld_sched
= 0;
911 /* Nonzero if this chip is a StrongARM. */
912 int arm_tune_strongarm
= 0;
914 /* Nonzero if this chip supports Intel Wireless MMX technology. */
915 int arm_arch_iwmmxt
= 0;
917 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
918 int arm_arch_iwmmxt2
= 0;
920 /* Nonzero if this chip is an XScale. */
921 int arm_arch_xscale
= 0;
923 /* Nonzero if tuning for XScale */
924 int arm_tune_xscale
= 0;
926 /* Nonzero if we want to tune for stores that access the write-buffer.
927 This typically means an ARM6 or ARM7 with MMU or MPU. */
928 int arm_tune_wbuf
= 0;
930 /* Nonzero if tuning for Cortex-A9. */
931 int arm_tune_cortex_a9
= 0;
933 /* Nonzero if we should define __THUMB_INTERWORK__ in the
935 XXX This is a bit of a hack, it's intended to help work around
936 problems in GLD which doesn't understand that armv5t code is
937 interworking clean. */
938 int arm_cpp_interwork
= 0;
940 /* Nonzero if chip supports Thumb 1. */
943 /* Nonzero if chip supports Thumb 2. */
946 /* Nonzero if chip supports integer division instruction. */
947 int arm_arch_arm_hwdiv
;
948 int arm_arch_thumb_hwdiv
;
950 /* Nonzero if chip disallows volatile memory access in IT block. */
951 int arm_arch_no_volatile_ce
;
953 /* Nonzero if we should use Neon to handle 64-bits operations rather
954 than core registers. */
955 int prefer_neon_for_64bits
= 0;
957 /* Nonzero if we shouldn't use literal pools. */
958 bool arm_disable_literal_pool
= false;
960 /* The register number to be used for the PIC offset register. */
961 unsigned arm_pic_register
= INVALID_REGNUM
;
963 enum arm_pcs arm_pcs_default
;
965 /* For an explanation of these variables, see final_prescan_insn below. */
967 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
968 enum arm_cond_code arm_current_cc
;
971 int arm_target_label
;
972 /* The number of conditionally executed insns, including the current insn. */
973 int arm_condexec_count
= 0;
974 /* A bitmask specifying the patterns for the IT block.
975 Zero means do not output an IT block before this insn. */
976 int arm_condexec_mask
= 0;
977 /* The number of bits used in arm_condexec_mask. */
978 int arm_condexec_masklen
= 0;
980 /* Nonzero if chip supports the ARMv8 CRC instructions. */
981 int arm_arch_crc
= 0;
983 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
984 int arm_arch_dotprod
= 0;
986 /* Nonzero if chip supports the ARMv8-M security extensions. */
987 int arm_arch_cmse
= 0;
989 /* Nonzero if the core has a very small, high-latency, multiply unit. */
990 int arm_m_profile_small_mul
= 0;
992 /* The condition codes of the ARM, and the inverse function. */
993 static const char * const arm_condition_codes
[] =
995 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
996 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
999 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1000 int arm_regs_in_sequence
[] =
1002 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1005 #define ARM_LSL_NAME "lsl"
1006 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1008 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1009 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1010 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1012 /* Initialization code. */
1016 enum processor_type scheduler
;
1017 unsigned int tune_flags
;
1018 const struct tune_params
*tune
;
1021 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1022 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1029 /* arm generic vectorizer costs. */
1031 struct cpu_vec_costs arm_default_vec_cost
= {
1032 1, /* scalar_stmt_cost. */
1033 1, /* scalar load_cost. */
1034 1, /* scalar_store_cost. */
1035 1, /* vec_stmt_cost. */
1036 1, /* vec_to_scalar_cost. */
1037 1, /* scalar_to_vec_cost. */
1038 1, /* vec_align_load_cost. */
1039 1, /* vec_unalign_load_cost. */
1040 1, /* vec_unalign_store_cost. */
1041 1, /* vec_store_cost. */
1042 3, /* cond_taken_branch_cost. */
1043 1, /* cond_not_taken_branch_cost. */
1046 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1047 #include "aarch-cost-tables.h"
1051 const struct cpu_cost_table cortexa9_extra_costs
=
1058 COSTS_N_INSNS (1), /* shift_reg. */
1059 COSTS_N_INSNS (1), /* arith_shift. */
1060 COSTS_N_INSNS (2), /* arith_shift_reg. */
1062 COSTS_N_INSNS (1), /* log_shift_reg. */
1063 COSTS_N_INSNS (1), /* extend. */
1064 COSTS_N_INSNS (2), /* extend_arith. */
1065 COSTS_N_INSNS (1), /* bfi. */
1066 COSTS_N_INSNS (1), /* bfx. */
1070 true /* non_exec_costs_exec. */
1075 COSTS_N_INSNS (3), /* simple. */
1076 COSTS_N_INSNS (3), /* flag_setting. */
1077 COSTS_N_INSNS (2), /* extend. */
1078 COSTS_N_INSNS (3), /* add. */
1079 COSTS_N_INSNS (2), /* extend_add. */
1080 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1084 0, /* simple (N/A). */
1085 0, /* flag_setting (N/A). */
1086 COSTS_N_INSNS (4), /* extend. */
1088 COSTS_N_INSNS (4), /* extend_add. */
1094 COSTS_N_INSNS (2), /* load. */
1095 COSTS_N_INSNS (2), /* load_sign_extend. */
1096 COSTS_N_INSNS (2), /* ldrd. */
1097 COSTS_N_INSNS (2), /* ldm_1st. */
1098 1, /* ldm_regs_per_insn_1st. */
1099 2, /* ldm_regs_per_insn_subsequent. */
1100 COSTS_N_INSNS (5), /* loadf. */
1101 COSTS_N_INSNS (5), /* loadd. */
1102 COSTS_N_INSNS (1), /* load_unaligned. */
1103 COSTS_N_INSNS (2), /* store. */
1104 COSTS_N_INSNS (2), /* strd. */
1105 COSTS_N_INSNS (2), /* stm_1st. */
1106 1, /* stm_regs_per_insn_1st. */
1107 2, /* stm_regs_per_insn_subsequent. */
1108 COSTS_N_INSNS (1), /* storef. */
1109 COSTS_N_INSNS (1), /* stored. */
1110 COSTS_N_INSNS (1), /* store_unaligned. */
1111 COSTS_N_INSNS (1), /* loadv. */
1112 COSTS_N_INSNS (1) /* storev. */
1117 COSTS_N_INSNS (14), /* div. */
1118 COSTS_N_INSNS (4), /* mult. */
1119 COSTS_N_INSNS (7), /* mult_addsub. */
1120 COSTS_N_INSNS (30), /* fma. */
1121 COSTS_N_INSNS (3), /* addsub. */
1122 COSTS_N_INSNS (1), /* fpconst. */
1123 COSTS_N_INSNS (1), /* neg. */
1124 COSTS_N_INSNS (3), /* compare. */
1125 COSTS_N_INSNS (3), /* widen. */
1126 COSTS_N_INSNS (3), /* narrow. */
1127 COSTS_N_INSNS (3), /* toint. */
1128 COSTS_N_INSNS (3), /* fromint. */
1129 COSTS_N_INSNS (3) /* roundint. */
1133 COSTS_N_INSNS (24), /* div. */
1134 COSTS_N_INSNS (5), /* mult. */
1135 COSTS_N_INSNS (8), /* mult_addsub. */
1136 COSTS_N_INSNS (30), /* fma. */
1137 COSTS_N_INSNS (3), /* addsub. */
1138 COSTS_N_INSNS (1), /* fpconst. */
1139 COSTS_N_INSNS (1), /* neg. */
1140 COSTS_N_INSNS (3), /* compare. */
1141 COSTS_N_INSNS (3), /* widen. */
1142 COSTS_N_INSNS (3), /* narrow. */
1143 COSTS_N_INSNS (3), /* toint. */
1144 COSTS_N_INSNS (3), /* fromint. */
1145 COSTS_N_INSNS (3) /* roundint. */
1150 COSTS_N_INSNS (1) /* alu. */
1154 const struct cpu_cost_table cortexa8_extra_costs
=
1160 COSTS_N_INSNS (1), /* shift. */
1162 COSTS_N_INSNS (1), /* arith_shift. */
1163 0, /* arith_shift_reg. */
1164 COSTS_N_INSNS (1), /* log_shift. */
1165 0, /* log_shift_reg. */
1167 0, /* extend_arith. */
1173 true /* non_exec_costs_exec. */
1178 COSTS_N_INSNS (1), /* simple. */
1179 COSTS_N_INSNS (1), /* flag_setting. */
1180 COSTS_N_INSNS (1), /* extend. */
1181 COSTS_N_INSNS (1), /* add. */
1182 COSTS_N_INSNS (1), /* extend_add. */
1183 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1187 0, /* simple (N/A). */
1188 0, /* flag_setting (N/A). */
1189 COSTS_N_INSNS (2), /* extend. */
1191 COSTS_N_INSNS (2), /* extend_add. */
1197 COSTS_N_INSNS (1), /* load. */
1198 COSTS_N_INSNS (1), /* load_sign_extend. */
1199 COSTS_N_INSNS (1), /* ldrd. */
1200 COSTS_N_INSNS (1), /* ldm_1st. */
1201 1, /* ldm_regs_per_insn_1st. */
1202 2, /* ldm_regs_per_insn_subsequent. */
1203 COSTS_N_INSNS (1), /* loadf. */
1204 COSTS_N_INSNS (1), /* loadd. */
1205 COSTS_N_INSNS (1), /* load_unaligned. */
1206 COSTS_N_INSNS (1), /* store. */
1207 COSTS_N_INSNS (1), /* strd. */
1208 COSTS_N_INSNS (1), /* stm_1st. */
1209 1, /* stm_regs_per_insn_1st. */
1210 2, /* stm_regs_per_insn_subsequent. */
1211 COSTS_N_INSNS (1), /* storef. */
1212 COSTS_N_INSNS (1), /* stored. */
1213 COSTS_N_INSNS (1), /* store_unaligned. */
1214 COSTS_N_INSNS (1), /* loadv. */
1215 COSTS_N_INSNS (1) /* storev. */
1220 COSTS_N_INSNS (36), /* div. */
1221 COSTS_N_INSNS (11), /* mult. */
1222 COSTS_N_INSNS (20), /* mult_addsub. */
1223 COSTS_N_INSNS (30), /* fma. */
1224 COSTS_N_INSNS (9), /* addsub. */
1225 COSTS_N_INSNS (3), /* fpconst. */
1226 COSTS_N_INSNS (3), /* neg. */
1227 COSTS_N_INSNS (6), /* compare. */
1228 COSTS_N_INSNS (4), /* widen. */
1229 COSTS_N_INSNS (4), /* narrow. */
1230 COSTS_N_INSNS (8), /* toint. */
1231 COSTS_N_INSNS (8), /* fromint. */
1232 COSTS_N_INSNS (8) /* roundint. */
1236 COSTS_N_INSNS (64), /* div. */
1237 COSTS_N_INSNS (16), /* mult. */
1238 COSTS_N_INSNS (25), /* mult_addsub. */
1239 COSTS_N_INSNS (30), /* fma. */
1240 COSTS_N_INSNS (9), /* addsub. */
1241 COSTS_N_INSNS (3), /* fpconst. */
1242 COSTS_N_INSNS (3), /* neg. */
1243 COSTS_N_INSNS (6), /* compare. */
1244 COSTS_N_INSNS (6), /* widen. */
1245 COSTS_N_INSNS (6), /* narrow. */
1246 COSTS_N_INSNS (8), /* toint. */
1247 COSTS_N_INSNS (8), /* fromint. */
1248 COSTS_N_INSNS (8) /* roundint. */
1253 COSTS_N_INSNS (1) /* alu. */
1257 const struct cpu_cost_table cortexa5_extra_costs
=
1263 COSTS_N_INSNS (1), /* shift. */
1264 COSTS_N_INSNS (1), /* shift_reg. */
1265 COSTS_N_INSNS (1), /* arith_shift. */
1266 COSTS_N_INSNS (1), /* arith_shift_reg. */
1267 COSTS_N_INSNS (1), /* log_shift. */
1268 COSTS_N_INSNS (1), /* log_shift_reg. */
1269 COSTS_N_INSNS (1), /* extend. */
1270 COSTS_N_INSNS (1), /* extend_arith. */
1271 COSTS_N_INSNS (1), /* bfi. */
1272 COSTS_N_INSNS (1), /* bfx. */
1273 COSTS_N_INSNS (1), /* clz. */
1274 COSTS_N_INSNS (1), /* rev. */
1276 true /* non_exec_costs_exec. */
1283 COSTS_N_INSNS (1), /* flag_setting. */
1284 COSTS_N_INSNS (1), /* extend. */
1285 COSTS_N_INSNS (1), /* add. */
1286 COSTS_N_INSNS (1), /* extend_add. */
1287 COSTS_N_INSNS (7) /* idiv. */
1291 0, /* simple (N/A). */
1292 0, /* flag_setting (N/A). */
1293 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (2), /* extend_add. */
1301 COSTS_N_INSNS (1), /* load. */
1302 COSTS_N_INSNS (1), /* load_sign_extend. */
1303 COSTS_N_INSNS (6), /* ldrd. */
1304 COSTS_N_INSNS (1), /* ldm_1st. */
1305 1, /* ldm_regs_per_insn_1st. */
1306 2, /* ldm_regs_per_insn_subsequent. */
1307 COSTS_N_INSNS (2), /* loadf. */
1308 COSTS_N_INSNS (4), /* loadd. */
1309 COSTS_N_INSNS (1), /* load_unaligned. */
1310 COSTS_N_INSNS (1), /* store. */
1311 COSTS_N_INSNS (3), /* strd. */
1312 COSTS_N_INSNS (1), /* stm_1st. */
1313 1, /* stm_regs_per_insn_1st. */
1314 2, /* stm_regs_per_insn_subsequent. */
1315 COSTS_N_INSNS (2), /* storef. */
1316 COSTS_N_INSNS (2), /* stored. */
1317 COSTS_N_INSNS (1), /* store_unaligned. */
1318 COSTS_N_INSNS (1), /* loadv. */
1319 COSTS_N_INSNS (1) /* storev. */
1324 COSTS_N_INSNS (15), /* div. */
1325 COSTS_N_INSNS (3), /* mult. */
1326 COSTS_N_INSNS (7), /* mult_addsub. */
1327 COSTS_N_INSNS (7), /* fma. */
1328 COSTS_N_INSNS (3), /* addsub. */
1329 COSTS_N_INSNS (3), /* fpconst. */
1330 COSTS_N_INSNS (3), /* neg. */
1331 COSTS_N_INSNS (3), /* compare. */
1332 COSTS_N_INSNS (3), /* widen. */
1333 COSTS_N_INSNS (3), /* narrow. */
1334 COSTS_N_INSNS (3), /* toint. */
1335 COSTS_N_INSNS (3), /* fromint. */
1336 COSTS_N_INSNS (3) /* roundint. */
1340 COSTS_N_INSNS (30), /* div. */
1341 COSTS_N_INSNS (6), /* mult. */
1342 COSTS_N_INSNS (10), /* mult_addsub. */
1343 COSTS_N_INSNS (7), /* fma. */
1344 COSTS_N_INSNS (3), /* addsub. */
1345 COSTS_N_INSNS (3), /* fpconst. */
1346 COSTS_N_INSNS (3), /* neg. */
1347 COSTS_N_INSNS (3), /* compare. */
1348 COSTS_N_INSNS (3), /* widen. */
1349 COSTS_N_INSNS (3), /* narrow. */
1350 COSTS_N_INSNS (3), /* toint. */
1351 COSTS_N_INSNS (3), /* fromint. */
1352 COSTS_N_INSNS (3) /* roundint. */
1357 COSTS_N_INSNS (1) /* alu. */
1362 const struct cpu_cost_table cortexa7_extra_costs
=
1368 COSTS_N_INSNS (1), /* shift. */
1369 COSTS_N_INSNS (1), /* shift_reg. */
1370 COSTS_N_INSNS (1), /* arith_shift. */
1371 COSTS_N_INSNS (1), /* arith_shift_reg. */
1372 COSTS_N_INSNS (1), /* log_shift. */
1373 COSTS_N_INSNS (1), /* log_shift_reg. */
1374 COSTS_N_INSNS (1), /* extend. */
1375 COSTS_N_INSNS (1), /* extend_arith. */
1376 COSTS_N_INSNS (1), /* bfi. */
1377 COSTS_N_INSNS (1), /* bfx. */
1378 COSTS_N_INSNS (1), /* clz. */
1379 COSTS_N_INSNS (1), /* rev. */
1381 true /* non_exec_costs_exec. */
1388 COSTS_N_INSNS (1), /* flag_setting. */
1389 COSTS_N_INSNS (1), /* extend. */
1390 COSTS_N_INSNS (1), /* add. */
1391 COSTS_N_INSNS (1), /* extend_add. */
1392 COSTS_N_INSNS (7) /* idiv. */
1396 0, /* simple (N/A). */
1397 0, /* flag_setting (N/A). */
1398 COSTS_N_INSNS (1), /* extend. */
1400 COSTS_N_INSNS (2), /* extend_add. */
1406 COSTS_N_INSNS (1), /* load. */
1407 COSTS_N_INSNS (1), /* load_sign_extend. */
1408 COSTS_N_INSNS (3), /* ldrd. */
1409 COSTS_N_INSNS (1), /* ldm_1st. */
1410 1, /* ldm_regs_per_insn_1st. */
1411 2, /* ldm_regs_per_insn_subsequent. */
1412 COSTS_N_INSNS (2), /* loadf. */
1413 COSTS_N_INSNS (2), /* loadd. */
1414 COSTS_N_INSNS (1), /* load_unaligned. */
1415 COSTS_N_INSNS (1), /* store. */
1416 COSTS_N_INSNS (3), /* strd. */
1417 COSTS_N_INSNS (1), /* stm_1st. */
1418 1, /* stm_regs_per_insn_1st. */
1419 2, /* stm_regs_per_insn_subsequent. */
1420 COSTS_N_INSNS (2), /* storef. */
1421 COSTS_N_INSNS (2), /* stored. */
1422 COSTS_N_INSNS (1), /* store_unaligned. */
1423 COSTS_N_INSNS (1), /* loadv. */
1424 COSTS_N_INSNS (1) /* storev. */
1429 COSTS_N_INSNS (15), /* div. */
1430 COSTS_N_INSNS (3), /* mult. */
1431 COSTS_N_INSNS (7), /* mult_addsub. */
1432 COSTS_N_INSNS (7), /* fma. */
1433 COSTS_N_INSNS (3), /* addsub. */
1434 COSTS_N_INSNS (3), /* fpconst. */
1435 COSTS_N_INSNS (3), /* neg. */
1436 COSTS_N_INSNS (3), /* compare. */
1437 COSTS_N_INSNS (3), /* widen. */
1438 COSTS_N_INSNS (3), /* narrow. */
1439 COSTS_N_INSNS (3), /* toint. */
1440 COSTS_N_INSNS (3), /* fromint. */
1441 COSTS_N_INSNS (3) /* roundint. */
1445 COSTS_N_INSNS (30), /* div. */
1446 COSTS_N_INSNS (6), /* mult. */
1447 COSTS_N_INSNS (10), /* mult_addsub. */
1448 COSTS_N_INSNS (7), /* fma. */
1449 COSTS_N_INSNS (3), /* addsub. */
1450 COSTS_N_INSNS (3), /* fpconst. */
1451 COSTS_N_INSNS (3), /* neg. */
1452 COSTS_N_INSNS (3), /* compare. */
1453 COSTS_N_INSNS (3), /* widen. */
1454 COSTS_N_INSNS (3), /* narrow. */
1455 COSTS_N_INSNS (3), /* toint. */
1456 COSTS_N_INSNS (3), /* fromint. */
1457 COSTS_N_INSNS (3) /* roundint. */
1462 COSTS_N_INSNS (1) /* alu. */
1466 const struct cpu_cost_table cortexa12_extra_costs
=
1473 COSTS_N_INSNS (1), /* shift_reg. */
1474 COSTS_N_INSNS (1), /* arith_shift. */
1475 COSTS_N_INSNS (1), /* arith_shift_reg. */
1476 COSTS_N_INSNS (1), /* log_shift. */
1477 COSTS_N_INSNS (1), /* log_shift_reg. */
1479 COSTS_N_INSNS (1), /* extend_arith. */
1481 COSTS_N_INSNS (1), /* bfx. */
1482 COSTS_N_INSNS (1), /* clz. */
1483 COSTS_N_INSNS (1), /* rev. */
1485 true /* non_exec_costs_exec. */
1490 COSTS_N_INSNS (2), /* simple. */
1491 COSTS_N_INSNS (3), /* flag_setting. */
1492 COSTS_N_INSNS (2), /* extend. */
1493 COSTS_N_INSNS (3), /* add. */
1494 COSTS_N_INSNS (2), /* extend_add. */
1495 COSTS_N_INSNS (18) /* idiv. */
1499 0, /* simple (N/A). */
1500 0, /* flag_setting (N/A). */
1501 COSTS_N_INSNS (3), /* extend. */
1503 COSTS_N_INSNS (3), /* extend_add. */
1509 COSTS_N_INSNS (3), /* load. */
1510 COSTS_N_INSNS (3), /* load_sign_extend. */
1511 COSTS_N_INSNS (3), /* ldrd. */
1512 COSTS_N_INSNS (3), /* ldm_1st. */
1513 1, /* ldm_regs_per_insn_1st. */
1514 2, /* ldm_regs_per_insn_subsequent. */
1515 COSTS_N_INSNS (3), /* loadf. */
1516 COSTS_N_INSNS (3), /* loadd. */
1517 0, /* load_unaligned. */
1521 1, /* stm_regs_per_insn_1st. */
1522 2, /* stm_regs_per_insn_subsequent. */
1523 COSTS_N_INSNS (2), /* storef. */
1524 COSTS_N_INSNS (2), /* stored. */
1525 0, /* store_unaligned. */
1526 COSTS_N_INSNS (1), /* loadv. */
1527 COSTS_N_INSNS (1) /* storev. */
1532 COSTS_N_INSNS (17), /* div. */
1533 COSTS_N_INSNS (4), /* mult. */
1534 COSTS_N_INSNS (8), /* mult_addsub. */
1535 COSTS_N_INSNS (8), /* fma. */
1536 COSTS_N_INSNS (4), /* addsub. */
1537 COSTS_N_INSNS (2), /* fpconst. */
1538 COSTS_N_INSNS (2), /* neg. */
1539 COSTS_N_INSNS (2), /* compare. */
1540 COSTS_N_INSNS (4), /* widen. */
1541 COSTS_N_INSNS (4), /* narrow. */
1542 COSTS_N_INSNS (4), /* toint. */
1543 COSTS_N_INSNS (4), /* fromint. */
1544 COSTS_N_INSNS (4) /* roundint. */
1548 COSTS_N_INSNS (31), /* div. */
1549 COSTS_N_INSNS (4), /* mult. */
1550 COSTS_N_INSNS (8), /* mult_addsub. */
1551 COSTS_N_INSNS (8), /* fma. */
1552 COSTS_N_INSNS (4), /* addsub. */
1553 COSTS_N_INSNS (2), /* fpconst. */
1554 COSTS_N_INSNS (2), /* neg. */
1555 COSTS_N_INSNS (2), /* compare. */
1556 COSTS_N_INSNS (4), /* widen. */
1557 COSTS_N_INSNS (4), /* narrow. */
1558 COSTS_N_INSNS (4), /* toint. */
1559 COSTS_N_INSNS (4), /* fromint. */
1560 COSTS_N_INSNS (4) /* roundint. */
1565 COSTS_N_INSNS (1) /* alu. */
1569 const struct cpu_cost_table cortexa15_extra_costs
=
1577 COSTS_N_INSNS (1), /* arith_shift. */
1578 COSTS_N_INSNS (1), /* arith_shift_reg. */
1579 COSTS_N_INSNS (1), /* log_shift. */
1580 COSTS_N_INSNS (1), /* log_shift_reg. */
1582 COSTS_N_INSNS (1), /* extend_arith. */
1583 COSTS_N_INSNS (1), /* bfi. */
1588 true /* non_exec_costs_exec. */
1593 COSTS_N_INSNS (2), /* simple. */
1594 COSTS_N_INSNS (3), /* flag_setting. */
1595 COSTS_N_INSNS (2), /* extend. */
1596 COSTS_N_INSNS (2), /* add. */
1597 COSTS_N_INSNS (2), /* extend_add. */
1598 COSTS_N_INSNS (18) /* idiv. */
1602 0, /* simple (N/A). */
1603 0, /* flag_setting (N/A). */
1604 COSTS_N_INSNS (3), /* extend. */
1606 COSTS_N_INSNS (3), /* extend_add. */
1612 COSTS_N_INSNS (3), /* load. */
1613 COSTS_N_INSNS (3), /* load_sign_extend. */
1614 COSTS_N_INSNS (3), /* ldrd. */
1615 COSTS_N_INSNS (4), /* ldm_1st. */
1616 1, /* ldm_regs_per_insn_1st. */
1617 2, /* ldm_regs_per_insn_subsequent. */
1618 COSTS_N_INSNS (4), /* loadf. */
1619 COSTS_N_INSNS (4), /* loadd. */
1620 0, /* load_unaligned. */
1623 COSTS_N_INSNS (1), /* stm_1st. */
1624 1, /* stm_regs_per_insn_1st. */
1625 2, /* stm_regs_per_insn_subsequent. */
1628 0, /* store_unaligned. */
1629 COSTS_N_INSNS (1), /* loadv. */
1630 COSTS_N_INSNS (1) /* storev. */
1635 COSTS_N_INSNS (17), /* div. */
1636 COSTS_N_INSNS (4), /* mult. */
1637 COSTS_N_INSNS (8), /* mult_addsub. */
1638 COSTS_N_INSNS (8), /* fma. */
1639 COSTS_N_INSNS (4), /* addsub. */
1640 COSTS_N_INSNS (2), /* fpconst. */
1641 COSTS_N_INSNS (2), /* neg. */
1642 COSTS_N_INSNS (5), /* compare. */
1643 COSTS_N_INSNS (4), /* widen. */
1644 COSTS_N_INSNS (4), /* narrow. */
1645 COSTS_N_INSNS (4), /* toint. */
1646 COSTS_N_INSNS (4), /* fromint. */
1647 COSTS_N_INSNS (4) /* roundint. */
1651 COSTS_N_INSNS (31), /* div. */
1652 COSTS_N_INSNS (4), /* mult. */
1653 COSTS_N_INSNS (8), /* mult_addsub. */
1654 COSTS_N_INSNS (8), /* fma. */
1655 COSTS_N_INSNS (4), /* addsub. */
1656 COSTS_N_INSNS (2), /* fpconst. */
1657 COSTS_N_INSNS (2), /* neg. */
1658 COSTS_N_INSNS (2), /* compare. */
1659 COSTS_N_INSNS (4), /* widen. */
1660 COSTS_N_INSNS (4), /* narrow. */
1661 COSTS_N_INSNS (4), /* toint. */
1662 COSTS_N_INSNS (4), /* fromint. */
1663 COSTS_N_INSNS (4) /* roundint. */
1668 COSTS_N_INSNS (1) /* alu. */
1672 const struct cpu_cost_table v7m_extra_costs
=
1680 0, /* arith_shift. */
1681 COSTS_N_INSNS (1), /* arith_shift_reg. */
1683 COSTS_N_INSNS (1), /* log_shift_reg. */
1685 COSTS_N_INSNS (1), /* extend_arith. */
1690 COSTS_N_INSNS (1), /* non_exec. */
1691 false /* non_exec_costs_exec. */
1696 COSTS_N_INSNS (1), /* simple. */
1697 COSTS_N_INSNS (1), /* flag_setting. */
1698 COSTS_N_INSNS (2), /* extend. */
1699 COSTS_N_INSNS (1), /* add. */
1700 COSTS_N_INSNS (3), /* extend_add. */
1701 COSTS_N_INSNS (8) /* idiv. */
1705 0, /* simple (N/A). */
1706 0, /* flag_setting (N/A). */
1707 COSTS_N_INSNS (2), /* extend. */
1709 COSTS_N_INSNS (3), /* extend_add. */
1715 COSTS_N_INSNS (2), /* load. */
1716 0, /* load_sign_extend. */
1717 COSTS_N_INSNS (3), /* ldrd. */
1718 COSTS_N_INSNS (2), /* ldm_1st. */
1719 1, /* ldm_regs_per_insn_1st. */
1720 1, /* ldm_regs_per_insn_subsequent. */
1721 COSTS_N_INSNS (2), /* loadf. */
1722 COSTS_N_INSNS (3), /* loadd. */
1723 COSTS_N_INSNS (1), /* load_unaligned. */
1724 COSTS_N_INSNS (2), /* store. */
1725 COSTS_N_INSNS (3), /* strd. */
1726 COSTS_N_INSNS (2), /* stm_1st. */
1727 1, /* stm_regs_per_insn_1st. */
1728 1, /* stm_regs_per_insn_subsequent. */
1729 COSTS_N_INSNS (2), /* storef. */
1730 COSTS_N_INSNS (3), /* stored. */
1731 COSTS_N_INSNS (1), /* store_unaligned. */
1732 COSTS_N_INSNS (1), /* loadv. */
1733 COSTS_N_INSNS (1) /* storev. */
1738 COSTS_N_INSNS (7), /* div. */
1739 COSTS_N_INSNS (2), /* mult. */
1740 COSTS_N_INSNS (5), /* mult_addsub. */
1741 COSTS_N_INSNS (3), /* fma. */
1742 COSTS_N_INSNS (1), /* addsub. */
1754 COSTS_N_INSNS (15), /* div. */
1755 COSTS_N_INSNS (5), /* mult. */
1756 COSTS_N_INSNS (7), /* mult_addsub. */
1757 COSTS_N_INSNS (7), /* fma. */
1758 COSTS_N_INSNS (3), /* addsub. */
1771 COSTS_N_INSNS (1) /* alu. */
1775 const struct addr_mode_cost_table generic_addr_mode_costs
=
1779 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1780 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1781 COSTS_N_INSNS (0) /* AMO_WB. */
1785 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1786 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1787 COSTS_N_INSNS (0) /* AMO_WB. */
1791 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1792 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1793 COSTS_N_INSNS (0) /* AMO_WB. */
1797 const struct tune_params arm_slowmul_tune
=
1799 &generic_extra_costs
, /* Insn extra costs. */
1800 &generic_addr_mode_costs
, /* Addressing mode costs. */
1801 NULL
, /* Sched adj cost. */
1802 arm_default_branch_cost
,
1803 &arm_default_vec_cost
,
1804 3, /* Constant limit. */
1805 5, /* Max cond insns. */
1806 8, /* Memset max inline. */
1807 1, /* Issue rate. */
1808 ARM_PREFETCH_NOT_BENEFICIAL
,
1809 tune_params::PREF_CONST_POOL_TRUE
,
1810 tune_params::PREF_LDRD_FALSE
,
1811 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1812 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1813 tune_params::DISPARAGE_FLAGS_NEITHER
,
1814 tune_params::PREF_NEON_64_FALSE
,
1815 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1816 tune_params::FUSE_NOTHING
,
1817 tune_params::SCHED_AUTOPREF_OFF
1820 const struct tune_params arm_fastmul_tune
=
1822 &generic_extra_costs
, /* Insn extra costs. */
1823 &generic_addr_mode_costs
, /* Addressing mode costs. */
1824 NULL
, /* Sched adj cost. */
1825 arm_default_branch_cost
,
1826 &arm_default_vec_cost
,
1827 1, /* Constant limit. */
1828 5, /* Max cond insns. */
1829 8, /* Memset max inline. */
1830 1, /* Issue rate. */
1831 ARM_PREFETCH_NOT_BENEFICIAL
,
1832 tune_params::PREF_CONST_POOL_TRUE
,
1833 tune_params::PREF_LDRD_FALSE
,
1834 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1835 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1836 tune_params::DISPARAGE_FLAGS_NEITHER
,
1837 tune_params::PREF_NEON_64_FALSE
,
1838 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1839 tune_params::FUSE_NOTHING
,
1840 tune_params::SCHED_AUTOPREF_OFF
1843 /* StrongARM has early execution of branches, so a sequence that is worth
1844 skipping is shorter. Set max_insns_skipped to a lower value. */
1846 const struct tune_params arm_strongarm_tune
=
1848 &generic_extra_costs
, /* Insn extra costs. */
1849 &generic_addr_mode_costs
, /* Addressing mode costs. */
1850 NULL
, /* Sched adj cost. */
1851 arm_default_branch_cost
,
1852 &arm_default_vec_cost
,
1853 1, /* Constant limit. */
1854 3, /* Max cond insns. */
1855 8, /* Memset max inline. */
1856 1, /* Issue rate. */
1857 ARM_PREFETCH_NOT_BENEFICIAL
,
1858 tune_params::PREF_CONST_POOL_TRUE
,
1859 tune_params::PREF_LDRD_FALSE
,
1860 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1862 tune_params::DISPARAGE_FLAGS_NEITHER
,
1863 tune_params::PREF_NEON_64_FALSE
,
1864 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1865 tune_params::FUSE_NOTHING
,
1866 tune_params::SCHED_AUTOPREF_OFF
1869 const struct tune_params arm_xscale_tune
=
1871 &generic_extra_costs
, /* Insn extra costs. */
1872 &generic_addr_mode_costs
, /* Addressing mode costs. */
1873 xscale_sched_adjust_cost
,
1874 arm_default_branch_cost
,
1875 &arm_default_vec_cost
,
1876 2, /* Constant limit. */
1877 3, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 1, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL
,
1881 tune_params::PREF_CONST_POOL_TRUE
,
1882 tune_params::PREF_LDRD_FALSE
,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER
,
1886 tune_params::PREF_NEON_64_FALSE
,
1887 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1888 tune_params::FUSE_NOTHING
,
1889 tune_params::SCHED_AUTOPREF_OFF
1892 const struct tune_params arm_9e_tune
=
1894 &generic_extra_costs
, /* Insn extra costs. */
1895 &generic_addr_mode_costs
, /* Addressing mode costs. */
1896 NULL
, /* Sched adj cost. */
1897 arm_default_branch_cost
,
1898 &arm_default_vec_cost
,
1899 1, /* Constant limit. */
1900 5, /* Max cond insns. */
1901 8, /* Memset max inline. */
1902 1, /* Issue rate. */
1903 ARM_PREFETCH_NOT_BENEFICIAL
,
1904 tune_params::PREF_CONST_POOL_TRUE
,
1905 tune_params::PREF_LDRD_FALSE
,
1906 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1908 tune_params::DISPARAGE_FLAGS_NEITHER
,
1909 tune_params::PREF_NEON_64_FALSE
,
1910 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1911 tune_params::FUSE_NOTHING
,
1912 tune_params::SCHED_AUTOPREF_OFF
1915 const struct tune_params arm_marvell_pj4_tune
=
1917 &generic_extra_costs
, /* Insn extra costs. */
1918 &generic_addr_mode_costs
, /* Addressing mode costs. */
1919 NULL
, /* Sched adj cost. */
1920 arm_default_branch_cost
,
1921 &arm_default_vec_cost
,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 2, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL
,
1927 tune_params::PREF_CONST_POOL_TRUE
,
1928 tune_params::PREF_LDRD_FALSE
,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER
,
1932 tune_params::PREF_NEON_64_FALSE
,
1933 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1934 tune_params::FUSE_NOTHING
,
1935 tune_params::SCHED_AUTOPREF_OFF
1938 const struct tune_params arm_v6t2_tune
=
1940 &generic_extra_costs
, /* Insn extra costs. */
1941 &generic_addr_mode_costs
, /* Addressing mode costs. */
1942 NULL
, /* Sched adj cost. */
1943 arm_default_branch_cost
,
1944 &arm_default_vec_cost
,
1945 1, /* Constant limit. */
1946 5, /* Max cond insns. */
1947 8, /* Memset max inline. */
1948 1, /* Issue rate. */
1949 ARM_PREFETCH_NOT_BENEFICIAL
,
1950 tune_params::PREF_CONST_POOL_FALSE
,
1951 tune_params::PREF_LDRD_FALSE
,
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1954 tune_params::DISPARAGE_FLAGS_NEITHER
,
1955 tune_params::PREF_NEON_64_FALSE
,
1956 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1957 tune_params::FUSE_NOTHING
,
1958 tune_params::SCHED_AUTOPREF_OFF
1962 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1963 const struct tune_params arm_cortex_tune
=
1965 &generic_extra_costs
,
1966 &generic_addr_mode_costs
, /* Addressing mode costs. */
1967 NULL
, /* Sched adj cost. */
1968 arm_default_branch_cost
,
1969 &arm_default_vec_cost
,
1970 1, /* Constant limit. */
1971 5, /* Max cond insns. */
1972 8, /* Memset max inline. */
1973 2, /* Issue rate. */
1974 ARM_PREFETCH_NOT_BENEFICIAL
,
1975 tune_params::PREF_CONST_POOL_FALSE
,
1976 tune_params::PREF_LDRD_FALSE
,
1977 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1978 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1979 tune_params::DISPARAGE_FLAGS_NEITHER
,
1980 tune_params::PREF_NEON_64_FALSE
,
1981 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1982 tune_params::FUSE_NOTHING
,
1983 tune_params::SCHED_AUTOPREF_OFF
1986 const struct tune_params arm_cortex_a8_tune
=
1988 &cortexa8_extra_costs
,
1989 &generic_addr_mode_costs
, /* Addressing mode costs. */
1990 NULL
, /* Sched adj cost. */
1991 arm_default_branch_cost
,
1992 &arm_default_vec_cost
,
1993 1, /* Constant limit. */
1994 5, /* Max cond insns. */
1995 8, /* Memset max inline. */
1996 2, /* Issue rate. */
1997 ARM_PREFETCH_NOT_BENEFICIAL
,
1998 tune_params::PREF_CONST_POOL_FALSE
,
1999 tune_params::PREF_LDRD_FALSE
,
2000 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2002 tune_params::DISPARAGE_FLAGS_NEITHER
,
2003 tune_params::PREF_NEON_64_FALSE
,
2004 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2005 tune_params::FUSE_NOTHING
,
2006 tune_params::SCHED_AUTOPREF_OFF
2009 const struct tune_params arm_cortex_a7_tune
=
2011 &cortexa7_extra_costs
,
2012 &generic_addr_mode_costs
, /* Addressing mode costs. */
2013 NULL
, /* Sched adj cost. */
2014 arm_default_branch_cost
,
2015 &arm_default_vec_cost
,
2016 1, /* Constant limit. */
2017 5, /* Max cond insns. */
2018 8, /* Memset max inline. */
2019 2, /* Issue rate. */
2020 ARM_PREFETCH_NOT_BENEFICIAL
,
2021 tune_params::PREF_CONST_POOL_FALSE
,
2022 tune_params::PREF_LDRD_FALSE
,
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2025 tune_params::DISPARAGE_FLAGS_NEITHER
,
2026 tune_params::PREF_NEON_64_FALSE
,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2028 tune_params::FUSE_NOTHING
,
2029 tune_params::SCHED_AUTOPREF_OFF
2032 const struct tune_params arm_cortex_a15_tune
=
2034 &cortexa15_extra_costs
,
2035 &generic_addr_mode_costs
, /* Addressing mode costs. */
2036 NULL
, /* Sched adj cost. */
2037 arm_default_branch_cost
,
2038 &arm_default_vec_cost
,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 8, /* Memset max inline. */
2042 3, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL
,
2044 tune_params::PREF_CONST_POOL_FALSE
,
2045 tune_params::PREF_LDRD_TRUE
,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL
,
2049 tune_params::PREF_NEON_64_FALSE
,
2050 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2051 tune_params::FUSE_NOTHING
,
2052 tune_params::SCHED_AUTOPREF_FULL
2055 const struct tune_params arm_cortex_a35_tune
=
2057 &cortexa53_extra_costs
,
2058 &generic_addr_mode_costs
, /* Addressing mode costs. */
2059 NULL
, /* Sched adj cost. */
2060 arm_default_branch_cost
,
2061 &arm_default_vec_cost
,
2062 1, /* Constant limit. */
2063 5, /* Max cond insns. */
2064 8, /* Memset max inline. */
2065 1, /* Issue rate. */
2066 ARM_PREFETCH_NOT_BENEFICIAL
,
2067 tune_params::PREF_CONST_POOL_FALSE
,
2068 tune_params::PREF_LDRD_FALSE
,
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2071 tune_params::DISPARAGE_FLAGS_NEITHER
,
2072 tune_params::PREF_NEON_64_FALSE
,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2074 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2075 tune_params::SCHED_AUTOPREF_OFF
2078 const struct tune_params arm_cortex_a53_tune
=
2080 &cortexa53_extra_costs
,
2081 &generic_addr_mode_costs
, /* Addressing mode costs. */
2082 NULL
, /* Sched adj cost. */
2083 arm_default_branch_cost
,
2084 &arm_default_vec_cost
,
2085 1, /* Constant limit. */
2086 5, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 2, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL
,
2090 tune_params::PREF_CONST_POOL_FALSE
,
2091 tune_params::PREF_LDRD_FALSE
,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_NEITHER
,
2095 tune_params::PREF_NEON_64_FALSE
,
2096 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2097 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2098 tune_params::SCHED_AUTOPREF_OFF
2101 const struct tune_params arm_cortex_a57_tune
=
2103 &cortexa57_extra_costs
,
2104 &generic_addr_mode_costs
, /* addressing mode costs */
2105 NULL
, /* Sched adj cost. */
2106 arm_default_branch_cost
,
2107 &arm_default_vec_cost
,
2108 1, /* Constant limit. */
2109 2, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 3, /* Issue rate. */
2112 ARM_PREFETCH_NOT_BENEFICIAL
,
2113 tune_params::PREF_CONST_POOL_FALSE
,
2114 tune_params::PREF_LDRD_TRUE
,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_ALL
,
2118 tune_params::PREF_NEON_64_FALSE
,
2119 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2120 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2121 tune_params::SCHED_AUTOPREF_FULL
2124 const struct tune_params arm_exynosm1_tune
=
2126 &exynosm1_extra_costs
,
2127 &generic_addr_mode_costs
, /* Addressing mode costs. */
2128 NULL
, /* Sched adj cost. */
2129 arm_default_branch_cost
,
2130 &arm_default_vec_cost
,
2131 1, /* Constant limit. */
2132 2, /* Max cond insns. */
2133 8, /* Memset max inline. */
2134 3, /* Issue rate. */
2135 ARM_PREFETCH_NOT_BENEFICIAL
,
2136 tune_params::PREF_CONST_POOL_FALSE
,
2137 tune_params::PREF_LDRD_TRUE
,
2138 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2139 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2140 tune_params::DISPARAGE_FLAGS_ALL
,
2141 tune_params::PREF_NEON_64_FALSE
,
2142 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2143 tune_params::FUSE_NOTHING
,
2144 tune_params::SCHED_AUTOPREF_OFF
2147 const struct tune_params arm_xgene1_tune
=
2149 &xgene1_extra_costs
,
2150 &generic_addr_mode_costs
, /* Addressing mode costs. */
2151 NULL
, /* Sched adj cost. */
2152 arm_default_branch_cost
,
2153 &arm_default_vec_cost
,
2154 1, /* Constant limit. */
2155 2, /* Max cond insns. */
2156 32, /* Memset max inline. */
2157 4, /* Issue rate. */
2158 ARM_PREFETCH_NOT_BENEFICIAL
,
2159 tune_params::PREF_CONST_POOL_FALSE
,
2160 tune_params::PREF_LDRD_TRUE
,
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2163 tune_params::DISPARAGE_FLAGS_ALL
,
2164 tune_params::PREF_NEON_64_FALSE
,
2165 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2166 tune_params::FUSE_NOTHING
,
2167 tune_params::SCHED_AUTOPREF_OFF
2170 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2171 less appealing. Set max_insns_skipped to a low value. */
2173 const struct tune_params arm_cortex_a5_tune
=
2175 &cortexa5_extra_costs
,
2176 &generic_addr_mode_costs
, /* Addressing mode costs. */
2177 NULL
, /* Sched adj cost. */
2178 arm_cortex_a5_branch_cost
,
2179 &arm_default_vec_cost
,
2180 1, /* Constant limit. */
2181 1, /* Max cond insns. */
2182 8, /* Memset max inline. */
2183 2, /* Issue rate. */
2184 ARM_PREFETCH_NOT_BENEFICIAL
,
2185 tune_params::PREF_CONST_POOL_FALSE
,
2186 tune_params::PREF_LDRD_FALSE
,
2187 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2189 tune_params::DISPARAGE_FLAGS_NEITHER
,
2190 tune_params::PREF_NEON_64_FALSE
,
2191 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2192 tune_params::FUSE_NOTHING
,
2193 tune_params::SCHED_AUTOPREF_OFF
2196 const struct tune_params arm_cortex_a9_tune
=
2198 &cortexa9_extra_costs
,
2199 &generic_addr_mode_costs
, /* Addressing mode costs. */
2200 cortex_a9_sched_adjust_cost
,
2201 arm_default_branch_cost
,
2202 &arm_default_vec_cost
,
2203 1, /* Constant limit. */
2204 5, /* Max cond insns. */
2205 8, /* Memset max inline. */
2206 2, /* Issue rate. */
2207 ARM_PREFETCH_BENEFICIAL(4,32,32),
2208 tune_params::PREF_CONST_POOL_FALSE
,
2209 tune_params::PREF_LDRD_FALSE
,
2210 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2211 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2212 tune_params::DISPARAGE_FLAGS_NEITHER
,
2213 tune_params::PREF_NEON_64_FALSE
,
2214 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2215 tune_params::FUSE_NOTHING
,
2216 tune_params::SCHED_AUTOPREF_OFF
2219 const struct tune_params arm_cortex_a12_tune
=
2221 &cortexa12_extra_costs
,
2222 &generic_addr_mode_costs
, /* Addressing mode costs. */
2223 NULL
, /* Sched adj cost. */
2224 arm_default_branch_cost
,
2225 &arm_default_vec_cost
, /* Vectorizer costs. */
2226 1, /* Constant limit. */
2227 2, /* Max cond insns. */
2228 8, /* Memset max inline. */
2229 2, /* Issue rate. */
2230 ARM_PREFETCH_NOT_BENEFICIAL
,
2231 tune_params::PREF_CONST_POOL_FALSE
,
2232 tune_params::PREF_LDRD_TRUE
,
2233 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2234 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2235 tune_params::DISPARAGE_FLAGS_ALL
,
2236 tune_params::PREF_NEON_64_FALSE
,
2237 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2238 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2239 tune_params::SCHED_AUTOPREF_OFF
2242 const struct tune_params arm_cortex_a73_tune
=
2244 &cortexa57_extra_costs
,
2245 &generic_addr_mode_costs
, /* Addressing mode costs. */
2246 NULL
, /* Sched adj cost. */
2247 arm_default_branch_cost
,
2248 &arm_default_vec_cost
, /* Vectorizer costs. */
2249 1, /* Constant limit. */
2250 2, /* Max cond insns. */
2251 8, /* Memset max inline. */
2252 2, /* Issue rate. */
2253 ARM_PREFETCH_NOT_BENEFICIAL
,
2254 tune_params::PREF_CONST_POOL_FALSE
,
2255 tune_params::PREF_LDRD_TRUE
,
2256 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2257 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2258 tune_params::DISPARAGE_FLAGS_ALL
,
2259 tune_params::PREF_NEON_64_FALSE
,
2260 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2261 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2262 tune_params::SCHED_AUTOPREF_FULL
2265 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2266 cycle to execute each. An LDR from the constant pool also takes two cycles
2267 to execute, but mildly increases pipelining opportunity (consecutive
2268 loads/stores can be pipelined together, saving one cycle), and may also
2269 improve icache utilisation. Hence we prefer the constant pool for such
2272 const struct tune_params arm_v7m_tune
=
2275 &generic_addr_mode_costs
, /* Addressing mode costs. */
2276 NULL
, /* Sched adj cost. */
2277 arm_cortex_m_branch_cost
,
2278 &arm_default_vec_cost
,
2279 1, /* Constant limit. */
2280 2, /* Max cond insns. */
2281 8, /* Memset max inline. */
2282 1, /* Issue rate. */
2283 ARM_PREFETCH_NOT_BENEFICIAL
,
2284 tune_params::PREF_CONST_POOL_TRUE
,
2285 tune_params::PREF_LDRD_FALSE
,
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2288 tune_params::DISPARAGE_FLAGS_NEITHER
,
2289 tune_params::PREF_NEON_64_FALSE
,
2290 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2291 tune_params::FUSE_NOTHING
,
2292 tune_params::SCHED_AUTOPREF_OFF
2295 /* Cortex-M7 tuning. */
2297 const struct tune_params arm_cortex_m7_tune
=
2300 &generic_addr_mode_costs
, /* Addressing mode costs. */
2301 NULL
, /* Sched adj cost. */
2302 arm_cortex_m7_branch_cost
,
2303 &arm_default_vec_cost
,
2304 0, /* Constant limit. */
2305 1, /* Max cond insns. */
2306 8, /* Memset max inline. */
2307 2, /* Issue rate. */
2308 ARM_PREFETCH_NOT_BENEFICIAL
,
2309 tune_params::PREF_CONST_POOL_TRUE
,
2310 tune_params::PREF_LDRD_FALSE
,
2311 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2312 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2313 tune_params::DISPARAGE_FLAGS_NEITHER
,
2314 tune_params::PREF_NEON_64_FALSE
,
2315 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2316 tune_params::FUSE_NOTHING
,
2317 tune_params::SCHED_AUTOPREF_OFF
2320 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2321 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2323 const struct tune_params arm_v6m_tune
=
2325 &generic_extra_costs
, /* Insn extra costs. */
2326 &generic_addr_mode_costs
, /* Addressing mode costs. */
2327 NULL
, /* Sched adj cost. */
2328 arm_default_branch_cost
,
2329 &arm_default_vec_cost
, /* Vectorizer costs. */
2330 1, /* Constant limit. */
2331 5, /* Max cond insns. */
2332 8, /* Memset max inline. */
2333 1, /* Issue rate. */
2334 ARM_PREFETCH_NOT_BENEFICIAL
,
2335 tune_params::PREF_CONST_POOL_FALSE
,
2336 tune_params::PREF_LDRD_FALSE
,
2337 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2338 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2339 tune_params::DISPARAGE_FLAGS_NEITHER
,
2340 tune_params::PREF_NEON_64_FALSE
,
2341 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2342 tune_params::FUSE_NOTHING
,
2343 tune_params::SCHED_AUTOPREF_OFF
2346 const struct tune_params arm_fa726te_tune
=
2348 &generic_extra_costs
, /* Insn extra costs. */
2349 &generic_addr_mode_costs
, /* Addressing mode costs. */
2350 fa726te_sched_adjust_cost
,
2351 arm_default_branch_cost
,
2352 &arm_default_vec_cost
,
2353 1, /* Constant limit. */
2354 5, /* Max cond insns. */
2355 8, /* Memset max inline. */
2356 2, /* Issue rate. */
2357 ARM_PREFETCH_NOT_BENEFICIAL
,
2358 tune_params::PREF_CONST_POOL_TRUE
,
2359 tune_params::PREF_LDRD_FALSE
,
2360 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2361 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2362 tune_params::DISPARAGE_FLAGS_NEITHER
,
2363 tune_params::PREF_NEON_64_FALSE
,
2364 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2365 tune_params::FUSE_NOTHING
,
2366 tune_params::SCHED_AUTOPREF_OFF
2369 /* Auto-generated CPU, FPU and architecture tables. */
2370 #include "arm-cpu-data.h"
2372 /* The name of the preprocessor macro to define for this architecture. PROFILE
2373 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2374 is thus chosen to be big enough to hold the longest architecture name. */
2376 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2378 /* Supported TLS relocations. */
2386 TLS_DESCSEQ
/* GNU scheme */
2389 /* The maximum number of insns to be used when loading a constant. */
2391 arm_constant_limit (bool size_p
)
2393 return size_p
? 1 : current_tune
->constant_limit
;
2396 /* Emit an insn that's a simple single-set. Both the operands must be known
2398 inline static rtx_insn
*
2399 emit_set_insn (rtx x
, rtx y
)
2401 return emit_insn (gen_rtx_SET (x
, y
));
2404 /* Return the number of bits set in VALUE. */
2406 bit_count (unsigned long value
)
2408 unsigned long count
= 0;
2413 value
&= value
- 1; /* Clear the least-significant set bit. */
2419 /* Return the number of bits set in BMAP. */
2421 bitmap_popcount (const sbitmap bmap
)
2423 unsigned int count
= 0;
2425 sbitmap_iterator sbi
;
2427 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2436 } arm_fixed_mode_set
;
2438 /* A small helper for setting fixed-point library libfuncs. */
2441 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2442 const char *funcname
, const char *modename
,
2447 if (num_suffix
== 0)
2448 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2450 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2452 set_optab_libfunc (optable
, mode
, buffer
);
2456 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2457 machine_mode from
, const char *funcname
,
2458 const char *toname
, const char *fromname
)
2461 const char *maybe_suffix_2
= "";
2463 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2464 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2465 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2466 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2467 maybe_suffix_2
= "2";
2469 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2472 set_conv_libfunc (optable
, to
, from
, buffer
);
2475 static GTY(()) rtx speculation_barrier_libfunc
;
2477 /* Set up library functions unique to ARM. */
2479 arm_init_libfuncs (void)
2481 /* For Linux, we have access to kernel support for atomic operations. */
2482 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2483 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2485 /* There are no special library functions unless we are using the
2490 /* The functions below are described in Section 4 of the "Run-Time
2491 ABI for the ARM architecture", Version 1.0. */
2493 /* Double-precision floating-point arithmetic. Table 2. */
2494 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2495 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2496 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2497 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2498 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2500 /* Double-precision comparisons. Table 3. */
2501 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2502 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2503 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2504 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2505 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2506 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2507 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2509 /* Single-precision floating-point arithmetic. Table 4. */
2510 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2511 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2512 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2513 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2514 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2516 /* Single-precision comparisons. Table 5. */
2517 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2518 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2519 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2520 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2521 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2522 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2523 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2525 /* Floating-point to integer conversions. Table 6. */
2526 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2527 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2528 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2529 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2530 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2531 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2532 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2533 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2535 /* Conversions between floating types. Table 7. */
2536 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2537 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2539 /* Integer to floating-point conversions. Table 8. */
2540 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2541 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2542 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2543 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2544 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2545 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2546 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2547 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2549 /* Long long. Table 9. */
2550 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2551 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2552 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2553 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2554 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2555 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2556 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2557 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2559 /* Integer (32/32->32) division. \S 4.3.1. */
2560 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2561 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2563 /* The divmod functions are designed so that they can be used for
2564 plain division, even though they return both the quotient and the
2565 remainder. The quotient is returned in the usual location (i.e.,
2566 r0 for SImode, {r0, r1} for DImode), just as would be expected
2567 for an ordinary division routine. Because the AAPCS calling
2568 conventions specify that all of { r0, r1, r2, r3 } are
2569 callee-saved registers, there is no need to tell the compiler
2570 explicitly that those registers are clobbered by these
2572 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2573 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2575 /* For SImode division the ABI provides div-without-mod routines,
2576 which are faster. */
2577 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2578 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2580 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2581 divmod libcalls instead. */
2582 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2583 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2584 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2585 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2587 /* Half-precision float operations. The compiler handles all operations
2588 with NULL libfuncs by converting the SFmode. */
2589 switch (arm_fp16_format
)
2591 case ARM_FP16_FORMAT_IEEE
:
2592 case ARM_FP16_FORMAT_ALTERNATIVE
:
2595 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2596 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2598 : "__gnu_f2h_alternative"));
2599 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2600 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2602 : "__gnu_h2f_alternative"));
2604 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2605 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2607 : "__gnu_d2h_alternative"));
2610 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2611 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2612 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2613 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2614 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2617 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2618 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2619 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2620 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2621 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2622 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2623 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2630 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2632 const arm_fixed_mode_set fixed_arith_modes
[] =
2635 { E_UQQmode
, "uqq" },
2637 { E_UHQmode
, "uhq" },
2639 { E_USQmode
, "usq" },
2641 { E_UDQmode
, "udq" },
2643 { E_UTQmode
, "utq" },
2645 { E_UHAmode
, "uha" },
2647 { E_USAmode
, "usa" },
2649 { E_UDAmode
, "uda" },
2651 { E_UTAmode
, "uta" }
2653 const arm_fixed_mode_set fixed_conv_modes
[] =
2656 { E_UQQmode
, "uqq" },
2658 { E_UHQmode
, "uhq" },
2660 { E_USQmode
, "usq" },
2662 { E_UDQmode
, "udq" },
2664 { E_UTQmode
, "utq" },
2666 { E_UHAmode
, "uha" },
2668 { E_USAmode
, "usa" },
2670 { E_UDAmode
, "uda" },
2672 { E_UTAmode
, "uta" },
2683 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2685 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2686 "add", fixed_arith_modes
[i
].name
, 3);
2687 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2688 "ssadd", fixed_arith_modes
[i
].name
, 3);
2689 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2690 "usadd", fixed_arith_modes
[i
].name
, 3);
2691 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2692 "sub", fixed_arith_modes
[i
].name
, 3);
2693 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2694 "sssub", fixed_arith_modes
[i
].name
, 3);
2695 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2696 "ussub", fixed_arith_modes
[i
].name
, 3);
2697 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2698 "mul", fixed_arith_modes
[i
].name
, 3);
2699 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2700 "ssmul", fixed_arith_modes
[i
].name
, 3);
2701 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2702 "usmul", fixed_arith_modes
[i
].name
, 3);
2703 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2704 "div", fixed_arith_modes
[i
].name
, 3);
2705 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2706 "udiv", fixed_arith_modes
[i
].name
, 3);
2707 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2708 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2709 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2710 "usdiv", fixed_arith_modes
[i
].name
, 3);
2711 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2712 "neg", fixed_arith_modes
[i
].name
, 2);
2713 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2714 "ssneg", fixed_arith_modes
[i
].name
, 2);
2715 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2716 "usneg", fixed_arith_modes
[i
].name
, 2);
2717 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2718 "ashl", fixed_arith_modes
[i
].name
, 3);
2719 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2720 "ashr", fixed_arith_modes
[i
].name
, 3);
2721 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2722 "lshr", fixed_arith_modes
[i
].name
, 3);
2723 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2724 "ssashl", fixed_arith_modes
[i
].name
, 3);
2725 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2726 "usashl", fixed_arith_modes
[i
].name
, 3);
2727 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2728 "cmp", fixed_arith_modes
[i
].name
, 2);
2731 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2732 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2735 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2736 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2739 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2740 fixed_conv_modes
[j
].mode
, "fract",
2741 fixed_conv_modes
[i
].name
,
2742 fixed_conv_modes
[j
].name
);
2743 arm_set_fixed_conv_libfunc (satfract_optab
,
2744 fixed_conv_modes
[i
].mode
,
2745 fixed_conv_modes
[j
].mode
, "satfract",
2746 fixed_conv_modes
[i
].name
,
2747 fixed_conv_modes
[j
].name
);
2748 arm_set_fixed_conv_libfunc (fractuns_optab
,
2749 fixed_conv_modes
[i
].mode
,
2750 fixed_conv_modes
[j
].mode
, "fractuns",
2751 fixed_conv_modes
[i
].name
,
2752 fixed_conv_modes
[j
].name
);
2753 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2754 fixed_conv_modes
[i
].mode
,
2755 fixed_conv_modes
[j
].mode
, "satfractuns",
2756 fixed_conv_modes
[i
].name
,
2757 fixed_conv_modes
[j
].name
);
2761 if (TARGET_AAPCS_BASED
)
2762 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2764 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2767 /* On AAPCS systems, this is the "struct __va_list". */
2768 static GTY(()) tree va_list_type
;
2770 /* Return the type to use as __builtin_va_list. */
2772 arm_build_builtin_va_list (void)
2777 if (!TARGET_AAPCS_BASED
)
2778 return std_build_builtin_va_list ();
2780 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2788 The C Library ABI further reinforces this definition in \S
2791 We must follow this definition exactly. The structure tag
2792 name is visible in C++ mangled names, and thus forms a part
2793 of the ABI. The field name may be used by people who
2794 #include <stdarg.h>. */
2795 /* Create the type. */
2796 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2797 /* Give it the required name. */
2798 va_list_name
= build_decl (BUILTINS_LOCATION
,
2800 get_identifier ("__va_list"),
2802 DECL_ARTIFICIAL (va_list_name
) = 1;
2803 TYPE_NAME (va_list_type
) = va_list_name
;
2804 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2805 /* Create the __ap field. */
2806 ap_field
= build_decl (BUILTINS_LOCATION
,
2808 get_identifier ("__ap"),
2810 DECL_ARTIFICIAL (ap_field
) = 1;
2811 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2812 TYPE_FIELDS (va_list_type
) = ap_field
;
2813 /* Compute its layout. */
2814 layout_type (va_list_type
);
2816 return va_list_type
;
2819 /* Return an expression of type "void *" pointing to the next
2820 available argument in a variable-argument list. VALIST is the
2821 user-level va_list object, of type __builtin_va_list. */
2823 arm_extract_valist_ptr (tree valist
)
2825 if (TREE_TYPE (valist
) == error_mark_node
)
2826 return error_mark_node
;
2828 /* On an AAPCS target, the pointer is stored within "struct
2830 if (TARGET_AAPCS_BASED
)
2832 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2833 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2834 valist
, ap_field
, NULL_TREE
);
2840 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2842 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2844 valist
= arm_extract_valist_ptr (valist
);
2845 std_expand_builtin_va_start (valist
, nextarg
);
2848 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2850 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2853 valist
= arm_extract_valist_ptr (valist
);
2854 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2857 /* Check any incompatible options that the user has specified. */
2859 arm_option_check_internal (struct gcc_options
*opts
)
2861 int flags
= opts
->x_target_flags
;
2863 /* iWMMXt and NEON are incompatible. */
2865 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2866 error ("iWMMXt and NEON are incompatible");
2868 /* Make sure that the processor choice does not conflict with any of the
2869 other command line choices. */
2870 if (TARGET_ARM_P (flags
)
2871 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2872 error ("target CPU does not support ARM mode");
2874 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2875 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2876 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2878 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2879 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2881 /* If this target is normally configured to use APCS frames, warn if they
2882 are turned off and debugging is turned on. */
2883 if (TARGET_ARM_P (flags
)
2884 && write_symbols
!= NO_DEBUG
2885 && !TARGET_APCS_FRAME
2886 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2887 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2890 /* iWMMXt unsupported under Thumb mode. */
2891 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2892 error ("iWMMXt unsupported under Thumb mode");
2894 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2895 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2897 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2899 error ("RTP PIC is incompatible with Thumb");
2903 if (target_pure_code
|| target_slow_flash_data
)
2905 const char *flag
= (target_pure_code
? "-mpure-code" :
2906 "-mslow-flash-data");
2908 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2910 if (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
)
2911 error ("%s only supports non-pic code on M-profile targets with the "
2912 "MOVT instruction", flag
);
2914 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2915 -mword-relocations forbids relocation of MOVT/MOVW. */
2916 if (target_word_relocations
)
2917 error ("%s incompatible with %<-mword-relocations%>", flag
);
2921 /* Recompute the global settings depending on target attribute options. */
2924 arm_option_params_internal (void)
2926 /* If we are not using the default (ARM mode) section anchor offset
2927 ranges, then set the correct ranges now. */
2930 /* Thumb-1 LDR instructions cannot have negative offsets.
2931 Permissible positive offset ranges are 5-bit (for byte loads),
2932 6-bit (for halfword loads), or 7-bit (for word loads).
2933 Empirical results suggest a 7-bit anchor range gives the best
2934 overall code size. */
2935 targetm
.min_anchor_offset
= 0;
2936 targetm
.max_anchor_offset
= 127;
2938 else if (TARGET_THUMB2
)
2940 /* The minimum is set such that the total size of the block
2941 for a particular anchor is 248 + 1 + 4095 bytes, which is
2942 divisible by eight, ensuring natural spacing of anchors. */
2943 targetm
.min_anchor_offset
= -248;
2944 targetm
.max_anchor_offset
= 4095;
2948 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2949 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2952 /* Increase the number of conditional instructions with -Os. */
2953 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
2955 /* For THUMB2, we limit the conditional sequence to one IT block. */
2957 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
2960 /* True if -mflip-thumb should next add an attribute for the default
2961 mode, false if it should next add an attribute for the opposite mode. */
2962 static GTY(()) bool thumb_flipper
;
2964 /* Options after initial target override. */
2965 static GTY(()) tree init_optimize
;
2968 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2970 /* -falign-functions without argument: supply one. */
2971 if (opts
->x_flag_align_functions
&& !opts
->x_str_align_functions
)
2972 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2973 && opts
->x_optimize_size
? "2" : "4";
2976 /* Implement targetm.override_options_after_change. */
2979 arm_override_options_after_change (void)
2981 arm_configure_build_target (&arm_active_target
,
2982 TREE_TARGET_OPTION (target_option_default_node
),
2983 &global_options_set
, false);
2985 arm_override_options_after_change_1 (&global_options
);
2988 /* Implement TARGET_OPTION_SAVE. */
2990 arm_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
2992 ptr
->x_arm_arch_string
= opts
->x_arm_arch_string
;
2993 ptr
->x_arm_cpu_string
= opts
->x_arm_cpu_string
;
2994 ptr
->x_arm_tune_string
= opts
->x_arm_tune_string
;
2997 /* Implement TARGET_OPTION_RESTORE. */
2999 arm_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
3001 opts
->x_arm_arch_string
= ptr
->x_arm_arch_string
;
3002 opts
->x_arm_cpu_string
= ptr
->x_arm_cpu_string
;
3003 opts
->x_arm_tune_string
= ptr
->x_arm_tune_string
;
3004 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
3008 /* Reset options between modes that the user has specified. */
3010 arm_option_override_internal (struct gcc_options
*opts
,
3011 struct gcc_options
*opts_set
)
3013 arm_override_options_after_change_1 (opts
);
3015 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3017 /* The default is to enable interworking, so this warning message would
3018 be confusing to users who have just compiled with
3019 eg, -march=armv4. */
3020 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3021 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3024 if (TARGET_THUMB_P (opts
->x_target_flags
)
3025 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3027 warning (0, "target CPU does not support THUMB instructions");
3028 opts
->x_target_flags
&= ~MASK_THUMB
;
3031 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3033 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3034 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3037 /* Callee super interworking implies thumb interworking. Adding
3038 this to the flags here simplifies the logic elsewhere. */
3039 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3040 opts
->x_target_flags
|= MASK_INTERWORK
;
3042 /* need to remember initial values so combinaisons of options like
3043 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3044 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3046 if (! opts_set
->x_arm_restrict_it
)
3047 opts
->x_arm_restrict_it
= arm_arch8
;
3049 /* ARM execution state and M profile don't have [restrict] IT. */
3050 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3051 opts
->x_arm_restrict_it
= 0;
3053 /* Enable -munaligned-access by default for
3054 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3055 i.e. Thumb2 and ARM state only.
3056 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3057 - ARMv8 architecture-base processors.
3059 Disable -munaligned-access by default for
3060 - all pre-ARMv6 architecture-based processors
3061 - ARMv6-M architecture-based processors
3062 - ARMv8-M Baseline processors. */
3064 if (! opts_set
->x_unaligned_access
)
3066 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3067 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3069 else if (opts
->x_unaligned_access
== 1
3070 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3072 warning (0, "target CPU does not support unaligned accesses");
3073 opts
->x_unaligned_access
= 0;
3076 /* Don't warn since it's on by default in -O2. */
3077 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3078 opts
->x_flag_schedule_insns
= 0;
3080 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3082 /* Disable shrink-wrap when optimizing function for size, since it tends to
3083 generate additional returns. */
3084 if (optimize_function_for_size_p (cfun
)
3085 && TARGET_THUMB2_P (opts
->x_target_flags
))
3086 opts
->x_flag_shrink_wrap
= false;
3088 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3090 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3091 - epilogue_insns - does not accurately model the corresponding insns
3092 emitted in the asm file. In particular, see the comment in thumb_exit
3093 'Find out how many of the (return) argument registers we can corrupt'.
3094 As a consequence, the epilogue may clobber registers without fipa-ra
3095 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3096 TODO: Accurately model clobbers for epilogue_insns and reenable
3098 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3099 opts
->x_flag_ipa_ra
= 0;
3101 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3103 /* Thumb2 inline assembly code should always use unified syntax.
3104 This will apply to ARM and Thumb1 eventually. */
3105 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3106 opts
->x_inline_asm_unified
= true;
3108 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3109 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3113 static sbitmap isa_all_fpubits
;
3114 static sbitmap isa_quirkbits
;
3116 /* Configure a build target TARGET from the user-specified options OPTS and
3117 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3118 architecture have been specified, but the two are not identical. */
3120 arm_configure_build_target (struct arm_build_target
*target
,
3121 struct cl_target_option
*opts
,
3122 struct gcc_options
*opts_set
,
3123 bool warn_compatible
)
3125 const cpu_option
*arm_selected_tune
= NULL
;
3126 const arch_option
*arm_selected_arch
= NULL
;
3127 const cpu_option
*arm_selected_cpu
= NULL
;
3128 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3129 const char *tune_opts
= NULL
;
3130 const char *arch_opts
= NULL
;
3131 const char *cpu_opts
= NULL
;
3133 bitmap_clear (target
->isa
);
3134 target
->core_name
= NULL
;
3135 target
->arch_name
= NULL
;
3137 if (opts_set
->x_arm_arch_string
)
3139 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3141 opts
->x_arm_arch_string
);
3142 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3145 if (opts_set
->x_arm_cpu_string
)
3147 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3148 opts
->x_arm_cpu_string
);
3149 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3150 arm_selected_tune
= arm_selected_cpu
;
3151 /* If taking the tuning from -mcpu, we don't need to rescan the
3152 options for tuning. */
3155 if (opts_set
->x_arm_tune_string
)
3157 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3158 opts
->x_arm_tune_string
);
3159 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3162 if (arm_selected_arch
)
3164 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3165 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3168 if (arm_selected_cpu
)
3170 auto_sbitmap
cpu_isa (isa_num_bits
);
3171 auto_sbitmap
isa_delta (isa_num_bits
);
3173 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3174 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3176 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3177 /* Ignore any bits that are quirk bits. */
3178 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3179 /* Ignore (for now) any bits that might be set by -mfpu. */
3180 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits
);
3182 if (!bitmap_empty_p (isa_delta
))
3184 if (warn_compatible
)
3185 warning (0, "switch %<-mcpu=%s%> conflicts "
3186 "with %<-march=%s%> switch",
3187 arm_selected_cpu
->common
.name
,
3188 arm_selected_arch
->common
.name
);
3189 /* -march wins for code generation.
3190 -mcpu wins for default tuning. */
3191 if (!arm_selected_tune
)
3192 arm_selected_tune
= arm_selected_cpu
;
3194 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3195 target
->arch_name
= arm_selected_arch
->common
.name
;
3199 /* Architecture and CPU are essentially the same.
3200 Prefer the CPU setting. */
3201 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3202 target
->core_name
= arm_selected_cpu
->common
.name
;
3203 /* Copy the CPU's capabilities, so that we inherit the
3204 appropriate extensions and quirks. */
3205 bitmap_copy (target
->isa
, cpu_isa
);
3210 /* Pick a CPU based on the architecture. */
3211 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3212 target
->arch_name
= arm_selected_arch
->common
.name
;
3213 /* Note: target->core_name is left unset in this path. */
3216 else if (arm_selected_cpu
)
3218 target
->core_name
= arm_selected_cpu
->common
.name
;
3219 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3220 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3222 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3224 /* If the user did not specify a processor or architecture, choose
3228 const cpu_option
*sel
;
3229 auto_sbitmap
sought_isa (isa_num_bits
);
3230 bitmap_clear (sought_isa
);
3231 auto_sbitmap
default_isa (isa_num_bits
);
3233 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3234 TARGET_CPU_DEFAULT
);
3235 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3236 gcc_assert (arm_selected_cpu
->common
.name
);
3238 /* RWE: All of the selection logic below (to the end of this
3239 'if' clause) looks somewhat suspect. It appears to be mostly
3240 there to support forcing thumb support when the default CPU
3241 does not have thumb (somewhat dubious in terms of what the
3242 user might be expecting). I think it should be removed once
3243 support for the pre-thumb era cores is removed. */
3244 sel
= arm_selected_cpu
;
3245 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3246 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3249 /* Now check to see if the user has specified any command line
3250 switches that require certain abilities from the cpu. */
3252 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3253 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3255 /* If there are such requirements and the default CPU does not
3256 satisfy them, we need to run over the complete list of
3257 cores looking for one that is satisfactory. */
3258 if (!bitmap_empty_p (sought_isa
)
3259 && !bitmap_subset_p (sought_isa
, default_isa
))
3261 auto_sbitmap
candidate_isa (isa_num_bits
);
3262 /* We're only interested in a CPU with at least the
3263 capabilities of the default CPU and the required
3264 additional features. */
3265 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3267 /* Try to locate a CPU type that supports all of the abilities
3268 of the default CPU, plus the extra abilities requested by
3270 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3272 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3273 /* An exact match? */
3274 if (bitmap_equal_p (default_isa
, candidate_isa
))
3278 if (sel
->common
.name
== NULL
)
3280 unsigned current_bit_count
= isa_num_bits
;
3281 const cpu_option
*best_fit
= NULL
;
3283 /* Ideally we would like to issue an error message here
3284 saying that it was not possible to find a CPU compatible
3285 with the default CPU, but which also supports the command
3286 line options specified by the programmer, and so they
3287 ought to use the -mcpu=<name> command line option to
3288 override the default CPU type.
3290 If we cannot find a CPU that has exactly the
3291 characteristics of the default CPU and the given
3292 command line options we scan the array again looking
3293 for a best match. The best match must have at least
3294 the capabilities of the perfect match. */
3295 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3297 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3299 if (bitmap_subset_p (default_isa
, candidate_isa
))
3303 bitmap_and_compl (candidate_isa
, candidate_isa
,
3305 count
= bitmap_popcount (candidate_isa
);
3307 if (count
< current_bit_count
)
3310 current_bit_count
= count
;
3314 gcc_assert (best_fit
);
3318 arm_selected_cpu
= sel
;
3321 /* Now we know the CPU, we can finally initialize the target
3323 target
->core_name
= arm_selected_cpu
->common
.name
;
3324 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3325 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3327 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3330 gcc_assert (arm_selected_cpu
);
3331 gcc_assert (arm_selected_arch
);
3333 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3335 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3336 auto_sbitmap
fpu_bits (isa_num_bits
);
3338 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3339 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3340 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3343 if (!arm_selected_tune
)
3344 arm_selected_tune
= arm_selected_cpu
;
3345 else /* Validate the features passed to -mtune. */
3346 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3348 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3350 /* Finish initializing the target structure. */
3351 target
->arch_pp_name
= arm_selected_arch
->arch
;
3352 target
->base_arch
= arm_selected_arch
->base_arch
;
3353 target
->profile
= arm_selected_arch
->profile
;
3355 target
->tune_flags
= tune_data
->tune_flags
;
3356 target
->tune
= tune_data
->tune
;
3357 target
->tune_core
= tune_data
->scheduler
;
3358 arm_option_reconfigure_globals ();
3361 /* Fix up any incompatible options that the user has specified. */
3363 arm_option_override (void)
3365 static const enum isa_feature fpu_bitlist
[]
3366 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3367 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3368 cl_target_option opts
;
3370 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3371 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3373 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3374 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3376 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3378 if (!global_options_set
.x_arm_fpu_index
)
3383 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3386 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3389 cl_target_option_save (&opts
, &global_options
);
3390 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3393 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3394 SUBTARGET_OVERRIDE_OPTIONS
;
3397 /* Initialize boolean versions of the architectural flags, for use
3398 in the arm.md file and for enabling feature flags. */
3399 arm_option_reconfigure_globals ();
3401 arm_tune
= arm_active_target
.tune_core
;
3402 tune_flags
= arm_active_target
.tune_flags
;
3403 current_tune
= arm_active_target
.tune
;
3405 /* TBD: Dwarf info for apcs frame is not handled yet. */
3406 if (TARGET_APCS_FRAME
)
3407 flag_shrink_wrap
= false;
3409 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3411 warning (0, "%<-mapcs-stack-check%> incompatible with "
3412 "%<-mno-apcs-frame%>");
3413 target_flags
|= MASK_APCS_FRAME
;
3416 if (TARGET_POKE_FUNCTION_NAME
)
3417 target_flags
|= MASK_APCS_FRAME
;
3419 if (TARGET_APCS_REENT
&& flag_pic
)
3420 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3422 if (TARGET_APCS_REENT
)
3423 warning (0, "APCS reentrant code not supported. Ignored");
3425 /* Set up some tuning parameters. */
3426 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3427 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3428 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3429 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3430 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3431 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3433 /* For arm2/3 there is no need to do any scheduling if we are doing
3434 software floating-point. */
3435 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3436 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3438 /* Override the default structure alignment for AAPCS ABI. */
3439 if (!global_options_set
.x_arm_structure_size_boundary
)
3441 if (TARGET_AAPCS_BASED
)
3442 arm_structure_size_boundary
= 8;
3446 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3448 if (arm_structure_size_boundary
!= 8
3449 && arm_structure_size_boundary
!= 32
3450 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3452 if (ARM_DOUBLEWORD_ALIGN
)
3454 "structure size boundary can only be set to 8, 32 or 64");
3456 warning (0, "structure size boundary can only be set to 8 or 32");
3457 arm_structure_size_boundary
3458 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3462 if (TARGET_VXWORKS_RTP
)
3464 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3465 arm_pic_data_is_text_relative
= 0;
3468 && !arm_pic_data_is_text_relative
3469 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3470 /* When text & data segments don't have a fixed displacement, the
3471 intended use is with a single, read only, pic base register.
3472 Unless the user explicitly requested not to do that, set
3474 target_flags
|= MASK_SINGLE_PIC_BASE
;
3476 /* If stack checking is disabled, we can use r10 as the PIC register,
3477 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3478 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3480 if (TARGET_VXWORKS_RTP
)
3481 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3482 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3485 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3486 arm_pic_register
= 9;
3488 if (arm_pic_register_string
!= NULL
)
3490 int pic_register
= decode_reg_name (arm_pic_register_string
);
3493 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3495 /* Prevent the user from choosing an obviously stupid PIC register. */
3496 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3497 || pic_register
== HARD_FRAME_POINTER_REGNUM
3498 || pic_register
== STACK_POINTER_REGNUM
3499 || pic_register
>= PC_REGNUM
3500 || (TARGET_VXWORKS_RTP
3501 && (unsigned int) pic_register
!= arm_pic_register
))
3502 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3504 arm_pic_register
= pic_register
;
3508 target_word_relocations
= 1;
3510 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3511 if (fix_cm3_ldrd
== 2)
3513 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3519 /* Hot/Cold partitioning is not currently supported, since we can't
3520 handle literal pool placement in that case. */
3521 if (flag_reorder_blocks_and_partition
)
3523 inform (input_location
,
3524 "%<-freorder-blocks-and-partition%> not supported "
3525 "on this architecture");
3526 flag_reorder_blocks_and_partition
= 0;
3527 flag_reorder_blocks
= 1;
3531 /* Hoisting PIC address calculations more aggressively provides a small,
3532 but measurable, size reduction for PIC code. Therefore, we decrease
3533 the bar for unrestricted expression hoisting to the cost of PIC address
3534 calculation, which is 2 instructions. */
3535 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3536 global_options
.x_param_values
,
3537 global_options_set
.x_param_values
);
3539 /* ARM EABI defaults to strict volatile bitfields. */
3540 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3541 && abi_version_at_least(2))
3542 flag_strict_volatile_bitfields
= 1;
3544 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3545 have deemed it beneficial (signified by setting
3546 prefetch.num_slots to 1 or more). */
3547 if (flag_prefetch_loop_arrays
< 0
3550 && current_tune
->prefetch
.num_slots
> 0)
3551 flag_prefetch_loop_arrays
= 1;
3553 /* Set up parameters to be used in prefetching algorithm. Do not
3554 override the defaults unless we are tuning for a core we have
3555 researched values for. */
3556 if (current_tune
->prefetch
.num_slots
> 0)
3557 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3558 current_tune
->prefetch
.num_slots
,
3559 global_options
.x_param_values
,
3560 global_options_set
.x_param_values
);
3561 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3562 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3563 current_tune
->prefetch
.l1_cache_line_size
,
3564 global_options
.x_param_values
,
3565 global_options_set
.x_param_values
);
3566 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3567 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3568 current_tune
->prefetch
.l1_cache_size
,
3569 global_options
.x_param_values
,
3570 global_options_set
.x_param_values
);
3572 /* Use Neon to perform 64-bits operations rather than core
3574 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3575 if (use_neon_for_64bits
== 1)
3576 prefer_neon_for_64bits
= true;
3578 /* Use the alternative scheduling-pressure algorithm by default. */
3579 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3580 global_options
.x_param_values
,
3581 global_options_set
.x_param_values
);
3583 /* Look through ready list and all of queue for instructions
3584 relevant for L2 auto-prefetcher. */
3585 int param_sched_autopref_queue_depth
;
3587 switch (current_tune
->sched_autopref
)
3589 case tune_params::SCHED_AUTOPREF_OFF
:
3590 param_sched_autopref_queue_depth
= -1;
3593 case tune_params::SCHED_AUTOPREF_RANK
:
3594 param_sched_autopref_queue_depth
= 0;
3597 case tune_params::SCHED_AUTOPREF_FULL
:
3598 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3605 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3606 param_sched_autopref_queue_depth
,
3607 global_options
.x_param_values
,
3608 global_options_set
.x_param_values
);
3610 /* Currently, for slow flash data, we just disable literal pools. We also
3611 disable it for pure-code. */
3612 if (target_slow_flash_data
|| target_pure_code
)
3613 arm_disable_literal_pool
= true;
3615 /* Disable scheduling fusion by default if it's not armv7 processor
3616 or doesn't prefer ldrd/strd. */
3617 if (flag_schedule_fusion
== 2
3618 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3619 flag_schedule_fusion
= 0;
3621 /* Need to remember initial options before they are overriden. */
3622 init_optimize
= build_optimization_node (&global_options
);
3624 arm_options_perform_arch_sanity_checks ();
3625 arm_option_override_internal (&global_options
, &global_options_set
);
3626 arm_option_check_internal (&global_options
);
3627 arm_option_params_internal ();
3629 /* Create the default target_options structure. */
3630 target_option_default_node
= target_option_current_node
3631 = build_target_option_node (&global_options
);
3633 /* Register global variables with the garbage collector. */
3634 arm_add_gc_roots ();
3636 /* Init initial mode for testing. */
3637 thumb_flipper
= TARGET_THUMB
;
3641 /* Reconfigure global status flags from the active_target.isa. */
3643 arm_option_reconfigure_globals (void)
3645 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3646 arm_base_arch
= arm_active_target
.base_arch
;
3648 /* Initialize boolean versions of the architectural flags, for use
3649 in the arm.md file. */
3650 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3651 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3652 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3653 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3654 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3655 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3656 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3657 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3658 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3659 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3660 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3661 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3662 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3663 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3664 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3665 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3666 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3667 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3668 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3669 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3670 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3671 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3672 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3673 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3674 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3675 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3678 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3679 error ("selected fp16 options are incompatible");
3680 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3683 /* And finally, set up some quirks. */
3684 arm_arch_no_volatile_ce
3685 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3686 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3687 isa_bit_quirk_armv6kz
);
3689 /* Use the cp15 method if it is available. */
3690 if (target_thread_pointer
== TP_AUTO
)
3692 if (arm_arch6k
&& !TARGET_THUMB1
)
3693 target_thread_pointer
= TP_CP15
;
3695 target_thread_pointer
= TP_SOFT
;
3699 /* Perform some validation between the desired architecture and the rest of the
3702 arm_options_perform_arch_sanity_checks (void)
3704 /* V5T code we generate is completely interworking capable, so we turn off
3705 TARGET_INTERWORK here to avoid many tests later on. */
3707 /* XXX However, we must pass the right pre-processor defines to CPP
3708 or GLD can get confused. This is a hack. */
3709 if (TARGET_INTERWORK
)
3710 arm_cpp_interwork
= 1;
3713 target_flags
&= ~MASK_INTERWORK
;
3715 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3716 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3718 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3719 error ("iwmmxt abi requires an iwmmxt capable cpu");
3721 /* BPABI targets use linker tricks to allow interworking on cores
3722 without thumb support. */
3723 if (TARGET_INTERWORK
3725 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3727 warning (0, "target CPU does not support interworking" );
3728 target_flags
&= ~MASK_INTERWORK
;
3731 /* If soft-float is specified then don't use FPU. */
3732 if (TARGET_SOFT_FLOAT
)
3733 arm_fpu_attr
= FPU_NONE
;
3735 arm_fpu_attr
= FPU_VFP
;
3737 if (TARGET_AAPCS_BASED
)
3739 if (TARGET_CALLER_INTERWORKING
)
3740 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3742 if (TARGET_CALLEE_INTERWORKING
)
3743 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3746 /* __fp16 support currently assumes the core has ldrh. */
3747 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3748 sorry ("__fp16 and no ldrh");
3750 if (use_cmse
&& !arm_arch_cmse
)
3751 error ("target CPU does not support ARMv8-M Security Extensions");
3753 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3754 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3755 if (use_cmse
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3756 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3759 if (TARGET_AAPCS_BASED
)
3761 if (arm_abi
== ARM_ABI_IWMMXT
)
3762 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3763 else if (TARGET_HARD_FLOAT_ABI
)
3765 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3766 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
))
3767 error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3770 arm_pcs_default
= ARM_PCS_AAPCS
;
3774 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3775 sorry ("%<-mfloat-abi=hard%> and VFP");
3777 if (arm_abi
== ARM_ABI_APCS
)
3778 arm_pcs_default
= ARM_PCS_APCS
;
3780 arm_pcs_default
= ARM_PCS_ATPCS
;
3785 arm_add_gc_roots (void)
3787 gcc_obstack_init(&minipool_obstack
);
3788 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3791 /* A table of known ARM exception types.
3792 For use with the interrupt function attribute. */
3796 const char *const arg
;
3797 const unsigned long return_value
;
3801 static const isr_attribute_arg isr_attribute_args
[] =
3803 { "IRQ", ARM_FT_ISR
},
3804 { "irq", ARM_FT_ISR
},
3805 { "FIQ", ARM_FT_FIQ
},
3806 { "fiq", ARM_FT_FIQ
},
3807 { "ABORT", ARM_FT_ISR
},
3808 { "abort", ARM_FT_ISR
},
3809 { "ABORT", ARM_FT_ISR
},
3810 { "abort", ARM_FT_ISR
},
3811 { "UNDEF", ARM_FT_EXCEPTION
},
3812 { "undef", ARM_FT_EXCEPTION
},
3813 { "SWI", ARM_FT_EXCEPTION
},
3814 { "swi", ARM_FT_EXCEPTION
},
3815 { NULL
, ARM_FT_NORMAL
}
3818 /* Returns the (interrupt) function type of the current
3819 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3821 static unsigned long
3822 arm_isr_value (tree argument
)
3824 const isr_attribute_arg
* ptr
;
3828 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3830 /* No argument - default to IRQ. */
3831 if (argument
== NULL_TREE
)
3834 /* Get the value of the argument. */
3835 if (TREE_VALUE (argument
) == NULL_TREE
3836 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3837 return ARM_FT_UNKNOWN
;
3839 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3841 /* Check it against the list of known arguments. */
3842 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3843 if (streq (arg
, ptr
->arg
))
3844 return ptr
->return_value
;
3846 /* An unrecognized interrupt type. */
3847 return ARM_FT_UNKNOWN
;
3850 /* Computes the type of the current function. */
3852 static unsigned long
3853 arm_compute_func_type (void)
3855 unsigned long type
= ARM_FT_UNKNOWN
;
3859 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3861 /* Decide if the current function is volatile. Such functions
3862 never return, and many memory cycles can be saved by not storing
3863 register values that will never be needed again. This optimization
3864 was added to speed up context switching in a kernel application. */
3866 && (TREE_NOTHROW (current_function_decl
)
3867 || !(flag_unwind_tables
3869 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3870 && TREE_THIS_VOLATILE (current_function_decl
))
3871 type
|= ARM_FT_VOLATILE
;
3873 if (cfun
->static_chain_decl
!= NULL
)
3874 type
|= ARM_FT_NESTED
;
3876 attr
= DECL_ATTRIBUTES (current_function_decl
);
3878 a
= lookup_attribute ("naked", attr
);
3880 type
|= ARM_FT_NAKED
;
3882 a
= lookup_attribute ("isr", attr
);
3884 a
= lookup_attribute ("interrupt", attr
);
3887 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3889 type
|= arm_isr_value (TREE_VALUE (a
));
3891 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3892 type
|= ARM_FT_CMSE_ENTRY
;
3897 /* Returns the type of the current function. */
3900 arm_current_func_type (void)
3902 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3903 cfun
->machine
->func_type
= arm_compute_func_type ();
3905 return cfun
->machine
->func_type
;
3909 arm_allocate_stack_slots_for_args (void)
3911 /* Naked functions should not allocate stack slots for arguments. */
3912 return !IS_NAKED (arm_current_func_type ());
3916 arm_warn_func_return (tree decl
)
3918 /* Naked functions are implemented entirely in assembly, including the
3919 return sequence, so suppress warnings about this. */
3920 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3924 /* Output assembler code for a block containing the constant parts
3925 of a trampoline, leaving space for the variable parts.
3927 On the ARM, (if r8 is the static chain regnum, and remembering that
3928 referencing pc adds an offset of 8) the trampoline looks like:
3931 .word static chain value
3932 .word function's address
3933 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3936 arm_asm_trampoline_template (FILE *f
)
3938 fprintf (f
, "\t.syntax unified\n");
3942 fprintf (f
, "\t.arm\n");
3943 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3944 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3946 else if (TARGET_THUMB2
)
3948 fprintf (f
, "\t.thumb\n");
3949 /* The Thumb-2 trampoline is similar to the arm implementation.
3950 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3951 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3952 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3953 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3957 ASM_OUTPUT_ALIGN (f
, 2);
3958 fprintf (f
, "\t.code\t16\n");
3959 fprintf (f
, ".Ltrampoline_start:\n");
3960 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3961 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3962 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3963 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3964 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3965 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3967 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3968 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3971 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3974 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3976 rtx fnaddr
, mem
, a_tramp
;
3978 emit_block_move (m_tramp
, assemble_trampoline_template (),
3979 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3981 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3982 emit_move_insn (mem
, chain_value
);
3984 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3985 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3986 emit_move_insn (mem
, fnaddr
);
3988 a_tramp
= XEXP (m_tramp
, 0);
3989 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3990 LCT_NORMAL
, VOIDmode
, a_tramp
, Pmode
,
3991 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3994 /* Thumb trampolines should be entered in thumb mode, so set
3995 the bottom bit of the address. */
3998 arm_trampoline_adjust_address (rtx addr
)
4001 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4002 NULL
, 0, OPTAB_LIB_WIDEN
);
4006 /* Return 1 if it is possible to return using a single instruction.
4007 If SIBLING is non-null, this is a test for a return before a sibling
4008 call. SIBLING is the call insn, so we can examine its register usage. */
4011 use_return_insn (int iscond
, rtx sibling
)
4014 unsigned int func_type
;
4015 unsigned long saved_int_regs
;
4016 unsigned HOST_WIDE_INT stack_adjust
;
4017 arm_stack_offsets
*offsets
;
4019 /* Never use a return instruction before reload has run. */
4020 if (!reload_completed
)
4023 func_type
= arm_current_func_type ();
4025 /* Naked, volatile and stack alignment functions need special
4027 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4030 /* So do interrupt functions that use the frame pointer and Thumb
4031 interrupt functions. */
4032 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4035 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4036 && !optimize_function_for_size_p (cfun
))
4039 offsets
= arm_get_frame_offsets ();
4040 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4042 /* As do variadic functions. */
4043 if (crtl
->args
.pretend_args_size
4044 || cfun
->machine
->uses_anonymous_args
4045 /* Or if the function calls __builtin_eh_return () */
4046 || crtl
->calls_eh_return
4047 /* Or if the function calls alloca */
4048 || cfun
->calls_alloca
4049 /* Or if there is a stack adjustment. However, if the stack pointer
4050 is saved on the stack, we can use a pre-incrementing stack load. */
4051 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4052 && stack_adjust
== 4))
4053 /* Or if the static chain register was saved above the frame, under the
4054 assumption that the stack pointer isn't saved on the stack. */
4055 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4056 && arm_compute_static_chain_stack_bytes() != 0))
4059 saved_int_regs
= offsets
->saved_regs_mask
;
4061 /* Unfortunately, the insn
4063 ldmib sp, {..., sp, ...}
4065 triggers a bug on most SA-110 based devices, such that the stack
4066 pointer won't be correctly restored if the instruction takes a
4067 page fault. We work around this problem by popping r3 along with
4068 the other registers, since that is never slower than executing
4069 another instruction.
4071 We test for !arm_arch5t here, because code for any architecture
4072 less than this could potentially be run on one of the buggy
4074 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4076 /* Validate that r3 is a call-clobbered register (always true in
4077 the default abi) ... */
4078 if (!call_used_regs
[3])
4081 /* ... that it isn't being used for a return value ... */
4082 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4085 /* ... or for a tail-call argument ... */
4088 gcc_assert (CALL_P (sibling
));
4090 if (find_regno_fusage (sibling
, USE
, 3))
4094 /* ... and that there are no call-saved registers in r0-r2
4095 (always true in the default ABI). */
4096 if (saved_int_regs
& 0x7)
4100 /* Can't be done if interworking with Thumb, and any registers have been
4102 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4105 /* On StrongARM, conditional returns are expensive if they aren't
4106 taken and multiple registers have been stacked. */
4107 if (iscond
&& arm_tune_strongarm
)
4109 /* Conditional return when just the LR is stored is a simple
4110 conditional-load instruction, that's not expensive. */
4111 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4115 && arm_pic_register
!= INVALID_REGNUM
4116 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4120 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4121 several instructions if anything needs to be popped. */
4122 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4125 /* If there are saved registers but the LR isn't saved, then we need
4126 two instructions for the return. */
4127 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4130 /* Can't be done if any of the VFP regs are pushed,
4131 since this also requires an insn. */
4132 if (TARGET_HARD_FLOAT
)
4133 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4134 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4137 if (TARGET_REALLY_IWMMXT
)
4138 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4139 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4145 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4146 shrink-wrapping if possible. This is the case if we need to emit a
4147 prologue, which we can test by looking at the offsets. */
4149 use_simple_return_p (void)
4151 arm_stack_offsets
*offsets
;
4153 /* Note this function can be called before or after reload. */
4154 if (!reload_completed
)
4155 arm_compute_frame_layout ();
4157 offsets
= arm_get_frame_offsets ();
4158 return offsets
->outgoing_args
!= 0;
4161 /* Return TRUE if int I is a valid immediate ARM constant. */
4164 const_ok_for_arm (HOST_WIDE_INT i
)
4168 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4169 be all zero, or all one. */
4170 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4171 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4172 != ((~(unsigned HOST_WIDE_INT
) 0)
4173 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4176 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4178 /* Fast return for 0 and small values. We must do this for zero, since
4179 the code below can't handle that one case. */
4180 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4183 /* Get the number of trailing zeros. */
4184 lowbit
= ffs((int) i
) - 1;
4186 /* Only even shifts are allowed in ARM mode so round down to the
4187 nearest even number. */
4191 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4196 /* Allow rotated constants in ARM mode. */
4198 && ((i
& ~0xc000003f) == 0
4199 || (i
& ~0xf000000f) == 0
4200 || (i
& ~0xfc000003) == 0))
4203 else if (TARGET_THUMB2
)
4207 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4210 if (i
== v
|| i
== (v
| (v
<< 8)))
4213 /* Allow repeated pattern 0xXY00XY00. */
4219 else if (TARGET_HAVE_MOVT
)
4221 /* Thumb-1 Targets with MOVT. */
4231 /* Return true if I is a valid constant for the operation CODE. */
4233 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4235 if (const_ok_for_arm (i
))
4241 /* See if we can use movw. */
4242 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4245 /* Otherwise, try mvn. */
4246 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4249 /* See if we can use addw or subw. */
4251 && ((i
& 0xfffff000) == 0
4252 || ((-i
) & 0xfffff000) == 0))
4273 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4275 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4281 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4285 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4292 /* Return true if I is a valid di mode constant for the operation CODE. */
4294 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4296 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4297 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4298 rtx hi
= GEN_INT (hi_val
);
4299 rtx lo
= GEN_INT (lo_val
);
4309 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4310 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4312 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4319 /* Emit a sequence of insns to handle a large constant.
4320 CODE is the code of the operation required, it can be any of SET, PLUS,
4321 IOR, AND, XOR, MINUS;
4322 MODE is the mode in which the operation is being performed;
4323 VAL is the integer to operate on;
4324 SOURCE is the other operand (a register, or a null-pointer for SET);
4325 SUBTARGETS means it is safe to create scratch registers if that will
4326 either produce a simpler sequence, or we will want to cse the values.
4327 Return value is the number of insns emitted. */
4329 /* ??? Tweak this for thumb2. */
4331 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4332 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4336 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4337 cond
= COND_EXEC_TEST (PATTERN (insn
));
4341 if (subtargets
|| code
== SET
4342 || (REG_P (target
) && REG_P (source
)
4343 && REGNO (target
) != REGNO (source
)))
4345 /* After arm_reorg has been called, we can't fix up expensive
4346 constants by pushing them into memory so we must synthesize
4347 them in-line, regardless of the cost. This is only likely to
4348 be more costly on chips that have load delay slots and we are
4349 compiling without running the scheduler (so no splitting
4350 occurred before the final instruction emission).
4352 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4354 if (!cfun
->machine
->after_arm_reorg
4356 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4358 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4363 /* Currently SET is the only monadic value for CODE, all
4364 the rest are diadic. */
4365 if (TARGET_USE_MOVT
)
4366 arm_emit_movpair (target
, GEN_INT (val
));
4368 emit_set_insn (target
, GEN_INT (val
));
4374 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4376 if (TARGET_USE_MOVT
)
4377 arm_emit_movpair (temp
, GEN_INT (val
));
4379 emit_set_insn (temp
, GEN_INT (val
));
4381 /* For MINUS, the value is subtracted from, since we never
4382 have subtraction of a constant. */
4384 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4386 emit_set_insn (target
,
4387 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4393 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4397 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4398 ARM/THUMB2 immediates, and add up to VAL.
4399 Thr function return value gives the number of insns required. */
4401 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4402 struct four_ints
*return_sequence
)
4404 int best_consecutive_zeros
= 0;
4408 struct four_ints tmp_sequence
;
4410 /* If we aren't targeting ARM, the best place to start is always at
4411 the bottom, otherwise look more closely. */
4414 for (i
= 0; i
< 32; i
+= 2)
4416 int consecutive_zeros
= 0;
4418 if (!(val
& (3 << i
)))
4420 while ((i
< 32) && !(val
& (3 << i
)))
4422 consecutive_zeros
+= 2;
4425 if (consecutive_zeros
> best_consecutive_zeros
)
4427 best_consecutive_zeros
= consecutive_zeros
;
4428 best_start
= i
- consecutive_zeros
;
4435 /* So long as it won't require any more insns to do so, it's
4436 desirable to emit a small constant (in bits 0...9) in the last
4437 insn. This way there is more chance that it can be combined with
4438 a later addressing insn to form a pre-indexed load or store
4439 operation. Consider:
4441 *((volatile int *)0xe0000100) = 1;
4442 *((volatile int *)0xe0000110) = 2;
4444 We want this to wind up as:
4448 str rB, [rA, #0x100]
4450 str rB, [rA, #0x110]
4452 rather than having to synthesize both large constants from scratch.
4454 Therefore, we calculate how many insns would be required to emit
4455 the constant starting from `best_start', and also starting from
4456 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4457 yield a shorter sequence, we may as well use zero. */
4458 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4460 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4462 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4463 if (insns2
<= insns1
)
4465 *return_sequence
= tmp_sequence
;
4473 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4475 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4476 struct four_ints
*return_sequence
, int i
)
4478 int remainder
= val
& 0xffffffff;
4481 /* Try and find a way of doing the job in either two or three
4484 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4485 location. We start at position I. This may be the MSB, or
4486 optimial_immediate_sequence may have positioned it at the largest block
4487 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4488 wrapping around to the top of the word when we drop off the bottom.
4489 In the worst case this code should produce no more than four insns.
4491 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4492 constants, shifted to any arbitrary location. We should always start
4497 unsigned int b1
, b2
, b3
, b4
;
4498 unsigned HOST_WIDE_INT result
;
4501 gcc_assert (insns
< 4);
4506 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4507 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4510 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4511 /* We can use addw/subw for the last 12 bits. */
4515 /* Use an 8-bit shifted/rotated immediate. */
4519 result
= remainder
& ((0x0ff << end
)
4520 | ((i
< end
) ? (0xff >> (32 - end
))
4527 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4528 arbitrary shifts. */
4529 i
-= TARGET_ARM
? 2 : 1;
4533 /* Next, see if we can do a better job with a thumb2 replicated
4536 We do it this way around to catch the cases like 0x01F001E0 where
4537 two 8-bit immediates would work, but a replicated constant would
4540 TODO: 16-bit constants that don't clear all the bits, but still win.
4541 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4544 b1
= (remainder
& 0xff000000) >> 24;
4545 b2
= (remainder
& 0x00ff0000) >> 16;
4546 b3
= (remainder
& 0x0000ff00) >> 8;
4547 b4
= remainder
& 0xff;
4551 /* The 8-bit immediate already found clears b1 (and maybe b2),
4552 but must leave b3 and b4 alone. */
4554 /* First try to find a 32-bit replicated constant that clears
4555 almost everything. We can assume that we can't do it in one,
4556 or else we wouldn't be here. */
4557 unsigned int tmp
= b1
& b2
& b3
& b4
;
4558 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4560 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4561 + (tmp
== b3
) + (tmp
== b4
);
4563 && (matching_bytes
>= 3
4564 || (matching_bytes
== 2
4565 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4567 /* At least 3 of the bytes match, and the fourth has at
4568 least as many bits set, or two of the bytes match
4569 and it will only require one more insn to finish. */
4577 /* Second, try to find a 16-bit replicated constant that can
4578 leave three of the bytes clear. If b2 or b4 is already
4579 zero, then we can. If the 8-bit from above would not
4580 clear b2 anyway, then we still win. */
4581 else if (b1
== b3
&& (!b2
|| !b4
4582 || (remainder
& 0x00ff0000 & ~result
)))
4584 result
= remainder
& 0xff00ff00;
4590 /* The 8-bit immediate already found clears b2 (and maybe b3)
4591 and we don't get here unless b1 is alredy clear, but it will
4592 leave b4 unchanged. */
4594 /* If we can clear b2 and b4 at once, then we win, since the
4595 8-bits couldn't possibly reach that far. */
4598 result
= remainder
& 0x00ff00ff;
4604 return_sequence
->i
[insns
++] = result
;
4605 remainder
&= ~result
;
4607 if (code
== SET
|| code
== MINUS
)
4615 /* Emit an instruction with the indicated PATTERN. If COND is
4616 non-NULL, conditionalize the execution of the instruction on COND
4620 emit_constant_insn (rtx cond
, rtx pattern
)
4623 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4624 emit_insn (pattern
);
4627 /* As above, but extra parameter GENERATE which, if clear, suppresses
4631 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4632 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4633 int subtargets
, int generate
)
4637 int final_invert
= 0;
4639 int set_sign_bit_copies
= 0;
4640 int clear_sign_bit_copies
= 0;
4641 int clear_zero_bit_copies
= 0;
4642 int set_zero_bit_copies
= 0;
4643 int insns
= 0, neg_insns
, inv_insns
;
4644 unsigned HOST_WIDE_INT temp1
, temp2
;
4645 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4646 struct four_ints
*immediates
;
4647 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4649 /* Find out which operations are safe for a given CODE. Also do a quick
4650 check for degenerate cases; these can occur when DImode operations
4663 if (remainder
== 0xffffffff)
4666 emit_constant_insn (cond
,
4667 gen_rtx_SET (target
,
4668 GEN_INT (ARM_SIGN_EXTEND (val
))));
4674 if (reload_completed
&& rtx_equal_p (target
, source
))
4678 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4687 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4690 if (remainder
== 0xffffffff)
4692 if (reload_completed
&& rtx_equal_p (target
, source
))
4695 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4704 if (reload_completed
&& rtx_equal_p (target
, source
))
4707 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4711 if (remainder
== 0xffffffff)
4714 emit_constant_insn (cond
,
4715 gen_rtx_SET (target
,
4716 gen_rtx_NOT (mode
, source
)));
4723 /* We treat MINUS as (val - source), since (source - val) is always
4724 passed as (source + (-val)). */
4728 emit_constant_insn (cond
,
4729 gen_rtx_SET (target
,
4730 gen_rtx_NEG (mode
, source
)));
4733 if (const_ok_for_arm (val
))
4736 emit_constant_insn (cond
,
4737 gen_rtx_SET (target
,
4738 gen_rtx_MINUS (mode
, GEN_INT (val
),
4749 /* If we can do it in one insn get out quickly. */
4750 if (const_ok_for_op (val
, code
))
4753 emit_constant_insn (cond
,
4754 gen_rtx_SET (target
,
4756 ? gen_rtx_fmt_ee (code
, mode
, source
,
4762 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4764 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4765 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4769 if (mode
== SImode
&& i
== 16)
4770 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4772 emit_constant_insn (cond
,
4773 gen_zero_extendhisi2
4774 (target
, gen_lowpart (HImode
, source
)));
4776 /* Extz only supports SImode, but we can coerce the operands
4778 emit_constant_insn (cond
,
4779 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4780 gen_lowpart (SImode
, source
),
4781 GEN_INT (i
), const0_rtx
));
4787 /* Calculate a few attributes that may be useful for specific
4789 /* Count number of leading zeros. */
4790 for (i
= 31; i
>= 0; i
--)
4792 if ((remainder
& (1 << i
)) == 0)
4793 clear_sign_bit_copies
++;
4798 /* Count number of leading 1's. */
4799 for (i
= 31; i
>= 0; i
--)
4801 if ((remainder
& (1 << i
)) != 0)
4802 set_sign_bit_copies
++;
4807 /* Count number of trailing zero's. */
4808 for (i
= 0; i
<= 31; i
++)
4810 if ((remainder
& (1 << i
)) == 0)
4811 clear_zero_bit_copies
++;
4816 /* Count number of trailing 1's. */
4817 for (i
= 0; i
<= 31; i
++)
4819 if ((remainder
& (1 << i
)) != 0)
4820 set_zero_bit_copies
++;
4828 /* See if we can do this by sign_extending a constant that is known
4829 to be negative. This is a good, way of doing it, since the shift
4830 may well merge into a subsequent insn. */
4831 if (set_sign_bit_copies
> 1)
4833 if (const_ok_for_arm
4834 (temp1
= ARM_SIGN_EXTEND (remainder
4835 << (set_sign_bit_copies
- 1))))
4839 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4840 emit_constant_insn (cond
,
4841 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4842 emit_constant_insn (cond
,
4843 gen_ashrsi3 (target
, new_src
,
4844 GEN_INT (set_sign_bit_copies
- 1)));
4848 /* For an inverted constant, we will need to set the low bits,
4849 these will be shifted out of harm's way. */
4850 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4851 if (const_ok_for_arm (~temp1
))
4855 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4856 emit_constant_insn (cond
,
4857 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4858 emit_constant_insn (cond
,
4859 gen_ashrsi3 (target
, new_src
,
4860 GEN_INT (set_sign_bit_copies
- 1)));
4866 /* See if we can calculate the value as the difference between two
4867 valid immediates. */
4868 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4870 int topshift
= clear_sign_bit_copies
& ~1;
4872 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4873 & (0xff000000 >> topshift
));
4875 /* If temp1 is zero, then that means the 9 most significant
4876 bits of remainder were 1 and we've caused it to overflow.
4877 When topshift is 0 we don't need to do anything since we
4878 can borrow from 'bit 32'. */
4879 if (temp1
== 0 && topshift
!= 0)
4880 temp1
= 0x80000000 >> (topshift
- 1);
4882 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4884 if (const_ok_for_arm (temp2
))
4888 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4889 emit_constant_insn (cond
,
4890 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4891 emit_constant_insn (cond
,
4892 gen_addsi3 (target
, new_src
,
4900 /* See if we can generate this by setting the bottom (or the top)
4901 16 bits, and then shifting these into the other half of the
4902 word. We only look for the simplest cases, to do more would cost
4903 too much. Be careful, however, not to generate this when the
4904 alternative would take fewer insns. */
4905 if (val
& 0xffff0000)
4907 temp1
= remainder
& 0xffff0000;
4908 temp2
= remainder
& 0x0000ffff;
4910 /* Overlaps outside this range are best done using other methods. */
4911 for (i
= 9; i
< 24; i
++)
4913 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4914 && !const_ok_for_arm (temp2
))
4916 rtx new_src
= (subtargets
4917 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4919 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4920 source
, subtargets
, generate
);
4928 gen_rtx_ASHIFT (mode
, source
,
4935 /* Don't duplicate cases already considered. */
4936 for (i
= 17; i
< 24; i
++)
4938 if (((temp1
| (temp1
>> i
)) == remainder
)
4939 && !const_ok_for_arm (temp1
))
4941 rtx new_src
= (subtargets
4942 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4944 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4945 source
, subtargets
, generate
);
4950 gen_rtx_SET (target
,
4953 gen_rtx_LSHIFTRT (mode
, source
,
4964 /* If we have IOR or XOR, and the constant can be loaded in a
4965 single instruction, and we can find a temporary to put it in,
4966 then this can be done in two instructions instead of 3-4. */
4968 /* TARGET can't be NULL if SUBTARGETS is 0 */
4969 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4971 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4975 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4977 emit_constant_insn (cond
,
4978 gen_rtx_SET (sub
, GEN_INT (val
)));
4979 emit_constant_insn (cond
,
4980 gen_rtx_SET (target
,
4981 gen_rtx_fmt_ee (code
, mode
,
4992 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4993 and the remainder 0s for e.g. 0xfff00000)
4994 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4996 This can be done in 2 instructions by using shifts with mov or mvn.
5001 mvn r0, r0, lsr #12 */
5002 if (set_sign_bit_copies
> 8
5003 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5007 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5008 rtx shift
= GEN_INT (set_sign_bit_copies
);
5014 gen_rtx_ASHIFT (mode
,
5019 gen_rtx_SET (target
,
5021 gen_rtx_LSHIFTRT (mode
, sub
,
5028 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5030 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5032 For eg. r0 = r0 | 0xfff
5037 if (set_zero_bit_copies
> 8
5038 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5042 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5043 rtx shift
= GEN_INT (set_zero_bit_copies
);
5049 gen_rtx_LSHIFTRT (mode
,
5054 gen_rtx_SET (target
,
5056 gen_rtx_ASHIFT (mode
, sub
,
5062 /* This will never be reached for Thumb2 because orn is a valid
5063 instruction. This is for Thumb1 and the ARM 32 bit cases.
5065 x = y | constant (such that ~constant is a valid constant)
5067 x = ~(~y & ~constant).
5069 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5073 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5074 emit_constant_insn (cond
,
5076 gen_rtx_NOT (mode
, source
)));
5079 sub
= gen_reg_rtx (mode
);
5080 emit_constant_insn (cond
,
5082 gen_rtx_AND (mode
, source
,
5084 emit_constant_insn (cond
,
5085 gen_rtx_SET (target
,
5086 gen_rtx_NOT (mode
, sub
)));
5093 /* See if two shifts will do 2 or more insn's worth of work. */
5094 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5096 HOST_WIDE_INT shift_mask
= ((0xffffffff
5097 << (32 - clear_sign_bit_copies
))
5100 if ((remainder
| shift_mask
) != 0xffffffff)
5102 HOST_WIDE_INT new_val
5103 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5107 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5108 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5109 new_src
, source
, subtargets
, 1);
5114 rtx targ
= subtargets
? NULL_RTX
: target
;
5115 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5116 targ
, source
, subtargets
, 0);
5122 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5123 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5125 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5126 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5132 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5134 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5136 if ((remainder
| shift_mask
) != 0xffffffff)
5138 HOST_WIDE_INT new_val
5139 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5142 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5144 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5145 new_src
, source
, subtargets
, 1);
5150 rtx targ
= subtargets
? NULL_RTX
: target
;
5152 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5153 targ
, source
, subtargets
, 0);
5159 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5160 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5162 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5163 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5175 /* Calculate what the instruction sequences would be if we generated it
5176 normally, negated, or inverted. */
5178 /* AND cannot be split into multiple insns, so invert and use BIC. */
5181 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5184 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5189 if (can_invert
|| final_invert
)
5190 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5195 immediates
= &pos_immediates
;
5197 /* Is the negated immediate sequence more efficient? */
5198 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5201 immediates
= &neg_immediates
;
5206 /* Is the inverted immediate sequence more efficient?
5207 We must allow for an extra NOT instruction for XOR operations, although
5208 there is some chance that the final 'mvn' will get optimized later. */
5209 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5212 immediates
= &inv_immediates
;
5220 /* Now output the chosen sequence as instructions. */
5223 for (i
= 0; i
< insns
; i
++)
5225 rtx new_src
, temp1_rtx
;
5227 temp1
= immediates
->i
[i
];
5229 if (code
== SET
|| code
== MINUS
)
5230 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5231 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5232 new_src
= gen_reg_rtx (mode
);
5238 else if (can_negate
)
5241 temp1
= trunc_int_for_mode (temp1
, mode
);
5242 temp1_rtx
= GEN_INT (temp1
);
5246 else if (code
== MINUS
)
5247 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5249 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5251 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5256 can_negate
= can_invert
;
5260 else if (code
== MINUS
)
5268 emit_constant_insn (cond
, gen_rtx_SET (target
,
5269 gen_rtx_NOT (mode
, source
)));
5276 /* Canonicalize a comparison so that we are more likely to recognize it.
5277 This can be done for a few constant compares, where we can make the
5278 immediate value easier to load. */
5281 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5282 bool op0_preserve_value
)
5285 unsigned HOST_WIDE_INT i
, maxval
;
5287 mode
= GET_MODE (*op0
);
5288 if (mode
== VOIDmode
)
5289 mode
= GET_MODE (*op1
);
5291 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5293 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5294 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5295 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5296 for GTU/LEU in Thumb mode. */
5300 if (*code
== GT
|| *code
== LE
5301 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5303 /* Missing comparison. First try to use an available
5305 if (CONST_INT_P (*op1
))
5313 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5315 *op1
= GEN_INT (i
+ 1);
5316 *code
= *code
== GT
? GE
: LT
;
5322 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5323 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5325 *op1
= GEN_INT (i
+ 1);
5326 *code
= *code
== GTU
? GEU
: LTU
;
5335 /* If that did not work, reverse the condition. */
5336 if (!op0_preserve_value
)
5338 std::swap (*op0
, *op1
);
5339 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5345 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5346 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5347 to facilitate possible combining with a cmp into 'ands'. */
5349 && GET_CODE (*op0
) == ZERO_EXTEND
5350 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5351 && GET_MODE (XEXP (*op0
, 0)) == QImode
5352 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5353 && subreg_lowpart_p (XEXP (*op0
, 0))
5354 && *op1
== const0_rtx
)
5355 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5358 /* Comparisons smaller than DImode. Only adjust comparisons against
5359 an out-of-range constant. */
5360 if (!CONST_INT_P (*op1
)
5361 || const_ok_for_arm (INTVAL (*op1
))
5362 || const_ok_for_arm (- INTVAL (*op1
)))
5376 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5378 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5379 *code
= *code
== GT
? GE
: LT
;
5387 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5389 *op1
= GEN_INT (i
- 1);
5390 *code
= *code
== GE
? GT
: LE
;
5397 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5398 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5400 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5401 *code
= *code
== GTU
? GEU
: LTU
;
5409 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5411 *op1
= GEN_INT (i
- 1);
5412 *code
= *code
== GEU
? GTU
: LEU
;
5423 /* Define how to find the value returned by a function. */
5426 arm_function_value(const_tree type
, const_tree func
,
5427 bool outgoing ATTRIBUTE_UNUSED
)
5430 int unsignedp ATTRIBUTE_UNUSED
;
5431 rtx r ATTRIBUTE_UNUSED
;
5433 mode
= TYPE_MODE (type
);
5435 if (TARGET_AAPCS_BASED
)
5436 return aapcs_allocate_return_reg (mode
, type
, func
);
5438 /* Promote integer types. */
5439 if (INTEGRAL_TYPE_P (type
))
5440 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5442 /* Promotes small structs returned in a register to full-word size
5443 for big-endian AAPCS. */
5444 if (arm_return_in_msb (type
))
5446 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5447 if (size
% UNITS_PER_WORD
!= 0)
5449 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5450 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5454 return arm_libcall_value_1 (mode
);
5457 /* libcall hashtable helpers. */
5459 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5461 static inline hashval_t
hash (const rtx_def
*);
5462 static inline bool equal (const rtx_def
*, const rtx_def
*);
5463 static inline void remove (rtx_def
*);
5467 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5469 return rtx_equal_p (p1
, p2
);
5473 libcall_hasher::hash (const rtx_def
*p1
)
5475 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5478 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5481 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5483 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5487 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5489 static bool init_done
= false;
5490 static libcall_table_type
*libcall_htab
= NULL
;
5496 libcall_htab
= new libcall_table_type (31);
5497 add_libcall (libcall_htab
,
5498 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5499 add_libcall (libcall_htab
,
5500 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5501 add_libcall (libcall_htab
,
5502 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5503 add_libcall (libcall_htab
,
5504 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5506 add_libcall (libcall_htab
,
5507 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5508 add_libcall (libcall_htab
,
5509 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5510 add_libcall (libcall_htab
,
5511 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5512 add_libcall (libcall_htab
,
5513 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5515 add_libcall (libcall_htab
,
5516 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5517 add_libcall (libcall_htab
,
5518 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5519 add_libcall (libcall_htab
,
5520 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5521 add_libcall (libcall_htab
,
5522 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5523 add_libcall (libcall_htab
,
5524 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5525 add_libcall (libcall_htab
,
5526 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5527 add_libcall (libcall_htab
,
5528 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5529 add_libcall (libcall_htab
,
5530 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5532 /* Values from double-precision helper functions are returned in core
5533 registers if the selected core only supports single-precision
5534 arithmetic, even if we are using the hard-float ABI. The same is
5535 true for single-precision helpers, but we will never be using the
5536 hard-float ABI on a CPU which doesn't support single-precision
5537 operations in hardware. */
5538 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5539 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5540 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5541 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5542 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5543 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5544 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5545 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5546 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5547 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5548 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5549 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5551 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5553 add_libcall (libcall_htab
,
5554 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5557 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5561 arm_libcall_value_1 (machine_mode mode
)
5563 if (TARGET_AAPCS_BASED
)
5564 return aapcs_libcall_value (mode
);
5565 else if (TARGET_IWMMXT_ABI
5566 && arm_vector_mode_supported_p (mode
))
5567 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5569 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5572 /* Define how to find the value returned by a library function
5573 assuming the value has mode MODE. */
5576 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5578 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5579 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5581 /* The following libcalls return their result in integer registers,
5582 even though they return a floating point value. */
5583 if (arm_libcall_uses_aapcs_base (libcall
))
5584 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5588 return arm_libcall_value_1 (mode
);
5591 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5594 arm_function_value_regno_p (const unsigned int regno
)
5596 if (regno
== ARG_REGISTER (1)
5598 && TARGET_AAPCS_BASED
5599 && TARGET_HARD_FLOAT
5600 && regno
== FIRST_VFP_REGNUM
)
5601 || (TARGET_IWMMXT_ABI
5602 && regno
== FIRST_IWMMXT_REGNUM
))
5608 /* Determine the amount of memory needed to store the possible return
5609 registers of an untyped call. */
5611 arm_apply_result_size (void)
5617 if (TARGET_HARD_FLOAT_ABI
)
5619 if (TARGET_IWMMXT_ABI
)
5626 /* Decide whether TYPE should be returned in memory (true)
5627 or in a register (false). FNTYPE is the type of the function making
5630 arm_return_in_memory (const_tree type
, const_tree fntype
)
5634 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5636 if (TARGET_AAPCS_BASED
)
5638 /* Simple, non-aggregate types (ie not including vectors and
5639 complex) are always returned in a register (or registers).
5640 We don't care about which register here, so we can short-cut
5641 some of the detail. */
5642 if (!AGGREGATE_TYPE_P (type
)
5643 && TREE_CODE (type
) != VECTOR_TYPE
5644 && TREE_CODE (type
) != COMPLEX_TYPE
)
5647 /* Any return value that is no larger than one word can be
5649 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5652 /* Check any available co-processors to see if they accept the
5653 type as a register candidate (VFP, for example, can return
5654 some aggregates in consecutive registers). These aren't
5655 available if the call is variadic. */
5656 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5659 /* Vector values should be returned using ARM registers, not
5660 memory (unless they're over 16 bytes, which will break since
5661 we only have four call-clobbered registers to play with). */
5662 if (TREE_CODE (type
) == VECTOR_TYPE
)
5663 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5665 /* The rest go in memory. */
5669 if (TREE_CODE (type
) == VECTOR_TYPE
)
5670 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5672 if (!AGGREGATE_TYPE_P (type
) &&
5673 (TREE_CODE (type
) != VECTOR_TYPE
))
5674 /* All simple types are returned in registers. */
5677 if (arm_abi
!= ARM_ABI_APCS
)
5679 /* ATPCS and later return aggregate types in memory only if they are
5680 larger than a word (or are variable size). */
5681 return (size
< 0 || size
> UNITS_PER_WORD
);
5684 /* For the arm-wince targets we choose to be compatible with Microsoft's
5685 ARM and Thumb compilers, which always return aggregates in memory. */
5687 /* All structures/unions bigger than one word are returned in memory.
5688 Also catch the case where int_size_in_bytes returns -1. In this case
5689 the aggregate is either huge or of variable size, and in either case
5690 we will want to return it via memory and not in a register. */
5691 if (size
< 0 || size
> UNITS_PER_WORD
)
5694 if (TREE_CODE (type
) == RECORD_TYPE
)
5698 /* For a struct the APCS says that we only return in a register
5699 if the type is 'integer like' and every addressable element
5700 has an offset of zero. For practical purposes this means
5701 that the structure can have at most one non bit-field element
5702 and that this element must be the first one in the structure. */
5704 /* Find the first field, ignoring non FIELD_DECL things which will
5705 have been created by C++. */
5706 for (field
= TYPE_FIELDS (type
);
5707 field
&& TREE_CODE (field
) != FIELD_DECL
;
5708 field
= DECL_CHAIN (field
))
5712 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5714 /* Check that the first field is valid for returning in a register. */
5716 /* ... Floats are not allowed */
5717 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5720 /* ... Aggregates that are not themselves valid for returning in
5721 a register are not allowed. */
5722 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5725 /* Now check the remaining fields, if any. Only bitfields are allowed,
5726 since they are not addressable. */
5727 for (field
= DECL_CHAIN (field
);
5729 field
= DECL_CHAIN (field
))
5731 if (TREE_CODE (field
) != FIELD_DECL
)
5734 if (!DECL_BIT_FIELD_TYPE (field
))
5741 if (TREE_CODE (type
) == UNION_TYPE
)
5745 /* Unions can be returned in registers if every element is
5746 integral, or can be returned in an integer register. */
5747 for (field
= TYPE_FIELDS (type
);
5749 field
= DECL_CHAIN (field
))
5751 if (TREE_CODE (field
) != FIELD_DECL
)
5754 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5757 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5763 #endif /* not ARM_WINCE */
5765 /* Return all other types in memory. */
5769 const struct pcs_attribute_arg
5773 } pcs_attribute_args
[] =
5775 {"aapcs", ARM_PCS_AAPCS
},
5776 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5778 /* We could recognize these, but changes would be needed elsewhere
5779 * to implement them. */
5780 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5781 {"atpcs", ARM_PCS_ATPCS
},
5782 {"apcs", ARM_PCS_APCS
},
5784 {NULL
, ARM_PCS_UNKNOWN
}
5788 arm_pcs_from_attribute (tree attr
)
5790 const struct pcs_attribute_arg
*ptr
;
5793 /* Get the value of the argument. */
5794 if (TREE_VALUE (attr
) == NULL_TREE
5795 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5796 return ARM_PCS_UNKNOWN
;
5798 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5800 /* Check it against the list of known arguments. */
5801 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5802 if (streq (arg
, ptr
->arg
))
5805 /* An unrecognized interrupt type. */
5806 return ARM_PCS_UNKNOWN
;
5809 /* Get the PCS variant to use for this call. TYPE is the function's type
5810 specification, DECL is the specific declartion. DECL may be null if
5811 the call could be indirect or if this is a library call. */
5813 arm_get_pcs_model (const_tree type
, const_tree decl
)
5815 bool user_convention
= false;
5816 enum arm_pcs user_pcs
= arm_pcs_default
;
5821 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5824 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5825 user_convention
= true;
5828 if (TARGET_AAPCS_BASED
)
5830 /* Detect varargs functions. These always use the base rules
5831 (no argument is ever a candidate for a co-processor
5833 bool base_rules
= stdarg_p (type
);
5835 if (user_convention
)
5837 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5838 sorry ("non-AAPCS derived PCS variant");
5839 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5840 error ("variadic functions must use the base AAPCS variant");
5844 return ARM_PCS_AAPCS
;
5845 else if (user_convention
)
5847 else if (decl
&& flag_unit_at_a_time
)
5849 /* Local functions never leak outside this compilation unit,
5850 so we are free to use whatever conventions are
5852 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5853 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5855 return ARM_PCS_AAPCS_LOCAL
;
5858 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5859 sorry ("PCS variant");
5861 /* For everything else we use the target's default. */
5862 return arm_pcs_default
;
5867 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5868 const_tree fntype ATTRIBUTE_UNUSED
,
5869 rtx libcall ATTRIBUTE_UNUSED
,
5870 const_tree fndecl ATTRIBUTE_UNUSED
)
5872 /* Record the unallocated VFP registers. */
5873 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5874 pcum
->aapcs_vfp_reg_alloc
= 0;
5877 /* Walk down the type tree of TYPE counting consecutive base elements.
5878 If *MODEP is VOIDmode, then set it to the first valid floating point
5879 type. If a non-floating point type is found, or if a floating point
5880 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5881 otherwise return the count in the sub-tree. */
5883 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5888 switch (TREE_CODE (type
))
5891 mode
= TYPE_MODE (type
);
5892 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5895 if (*modep
== VOIDmode
)
5904 mode
= TYPE_MODE (TREE_TYPE (type
));
5905 if (mode
!= DFmode
&& mode
!= SFmode
)
5908 if (*modep
== VOIDmode
)
5917 /* Use V2SImode and V4SImode as representatives of all 64-bit
5918 and 128-bit vector types, whether or not those modes are
5919 supported with the present options. */
5920 size
= int_size_in_bytes (type
);
5933 if (*modep
== VOIDmode
)
5936 /* Vector modes are considered to be opaque: two vectors are
5937 equivalent for the purposes of being homogeneous aggregates
5938 if they are the same size. */
5947 tree index
= TYPE_DOMAIN (type
);
5949 /* Can't handle incomplete types nor sizes that are not
5951 if (!COMPLETE_TYPE_P (type
)
5952 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5955 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5958 || !TYPE_MAX_VALUE (index
)
5959 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5960 || !TYPE_MIN_VALUE (index
)
5961 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5965 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5966 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5968 /* There must be no padding. */
5969 if (wi::to_wide (TYPE_SIZE (type
))
5970 != count
* GET_MODE_BITSIZE (*modep
))
5982 /* Can't handle incomplete types nor sizes that are not
5984 if (!COMPLETE_TYPE_P (type
)
5985 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5988 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5990 if (TREE_CODE (field
) != FIELD_DECL
)
5993 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5999 /* There must be no padding. */
6000 if (wi::to_wide (TYPE_SIZE (type
))
6001 != count
* GET_MODE_BITSIZE (*modep
))
6008 case QUAL_UNION_TYPE
:
6010 /* These aren't very interesting except in a degenerate case. */
6015 /* Can't handle incomplete types nor sizes that are not
6017 if (!COMPLETE_TYPE_P (type
)
6018 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6021 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6023 if (TREE_CODE (field
) != FIELD_DECL
)
6026 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6029 count
= count
> sub_count
? count
: sub_count
;
6032 /* There must be no padding. */
6033 if (wi::to_wide (TYPE_SIZE (type
))
6034 != count
* GET_MODE_BITSIZE (*modep
))
6047 /* Return true if PCS_VARIANT should use VFP registers. */
6049 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6051 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6053 static bool seen_thumb1_vfp
= false;
6055 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6057 sorry ("Thumb-1 hard-float VFP ABI");
6058 /* sorry() is not immediately fatal, so only display this once. */
6059 seen_thumb1_vfp
= true;
6065 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6068 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6069 (TARGET_VFP_DOUBLE
|| !is_double
));
6072 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6073 suitable for passing or returning in VFP registers for the PCS
6074 variant selected. If it is, then *BASE_MODE is updated to contain
6075 a machine mode describing each element of the argument's type and
6076 *COUNT to hold the number of such elements. */
6078 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6079 machine_mode mode
, const_tree type
,
6080 machine_mode
*base_mode
, int *count
)
6082 machine_mode new_mode
= VOIDmode
;
6084 /* If we have the type information, prefer that to working things
6085 out from the mode. */
6088 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6090 if (ag_count
> 0 && ag_count
<= 4)
6095 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6096 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6097 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6102 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6105 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6111 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6114 *base_mode
= new_mode
;
6116 if (TARGET_GENERAL_REGS_ONLY
)
6117 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6124 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6125 machine_mode mode
, const_tree type
)
6127 int count ATTRIBUTE_UNUSED
;
6128 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6130 if (!use_vfp_abi (pcs_variant
, false))
6132 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6137 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6140 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6143 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6144 &pcum
->aapcs_vfp_rmode
,
6145 &pcum
->aapcs_vfp_rcount
);
6148 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6149 for the behaviour of this function. */
6152 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6153 const_tree type ATTRIBUTE_UNUSED
)
6156 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6157 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6158 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6161 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6162 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6164 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6166 || (mode
== TImode
&& ! TARGET_NEON
)
6167 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6170 int rcount
= pcum
->aapcs_vfp_rcount
;
6172 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6176 /* Avoid using unsupported vector modes. */
6177 if (rmode
== V2SImode
)
6179 else if (rmode
== V4SImode
)
6186 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6187 for (i
= 0; i
< rcount
; i
++)
6189 rtx tmp
= gen_rtx_REG (rmode
,
6190 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6191 tmp
= gen_rtx_EXPR_LIST
6193 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6194 XVECEXP (par
, 0, i
) = tmp
;
6197 pcum
->aapcs_reg
= par
;
6200 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6206 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6207 comment there for the behaviour of this function. */
6210 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6212 const_tree type ATTRIBUTE_UNUSED
)
6214 if (!use_vfp_abi (pcs_variant
, false))
6218 || (GET_MODE_CLASS (mode
) == MODE_INT
6219 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6223 machine_mode ag_mode
;
6228 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6233 if (ag_mode
== V2SImode
)
6235 else if (ag_mode
== V4SImode
)
6241 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6242 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6243 for (i
= 0; i
< count
; i
++)
6245 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6246 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6247 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6248 XVECEXP (par
, 0, i
) = tmp
;
6254 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6258 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6259 machine_mode mode ATTRIBUTE_UNUSED
,
6260 const_tree type ATTRIBUTE_UNUSED
)
6262 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6263 pcum
->aapcs_vfp_reg_alloc
= 0;
6267 #define AAPCS_CP(X) \
6269 aapcs_ ## X ## _cum_init, \
6270 aapcs_ ## X ## _is_call_candidate, \
6271 aapcs_ ## X ## _allocate, \
6272 aapcs_ ## X ## _is_return_candidate, \
6273 aapcs_ ## X ## _allocate_return_reg, \
6274 aapcs_ ## X ## _advance \
6277 /* Table of co-processors that can be used to pass arguments in
6278 registers. Idealy no arugment should be a candidate for more than
6279 one co-processor table entry, but the table is processed in order
6280 and stops after the first match. If that entry then fails to put
6281 the argument into a co-processor register, the argument will go on
6285 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6286 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6288 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6289 BLKmode) is a candidate for this co-processor's registers; this
6290 function should ignore any position-dependent state in
6291 CUMULATIVE_ARGS and only use call-type dependent information. */
6292 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6294 /* Return true if the argument does get a co-processor register; it
6295 should set aapcs_reg to an RTX of the register allocated as is
6296 required for a return from FUNCTION_ARG. */
6297 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6299 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6300 be returned in this co-processor's registers. */
6301 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6303 /* Allocate and return an RTX element to hold the return type of a call. This
6304 routine must not fail and will only be called if is_return_candidate
6305 returned true with the same parameters. */
6306 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6308 /* Finish processing this argument and prepare to start processing
6310 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6311 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6319 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6324 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6325 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6332 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6334 /* We aren't passed a decl, so we can't check that a call is local.
6335 However, it isn't clear that that would be a win anyway, since it
6336 might limit some tail-calling opportunities. */
6337 enum arm_pcs pcs_variant
;
6341 const_tree fndecl
= NULL_TREE
;
6343 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6346 fntype
= TREE_TYPE (fntype
);
6349 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6352 pcs_variant
= arm_pcs_default
;
6354 if (pcs_variant
!= ARM_PCS_AAPCS
)
6358 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6359 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6368 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6371 /* We aren't passed a decl, so we can't check that a call is local.
6372 However, it isn't clear that that would be a win anyway, since it
6373 might limit some tail-calling opportunities. */
6374 enum arm_pcs pcs_variant
;
6375 int unsignedp ATTRIBUTE_UNUSED
;
6379 const_tree fndecl
= NULL_TREE
;
6381 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6384 fntype
= TREE_TYPE (fntype
);
6387 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6390 pcs_variant
= arm_pcs_default
;
6392 /* Promote integer types. */
6393 if (type
&& INTEGRAL_TYPE_P (type
))
6394 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6396 if (pcs_variant
!= ARM_PCS_AAPCS
)
6400 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6401 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6403 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6407 /* Promotes small structs returned in a register to full-word size
6408 for big-endian AAPCS. */
6409 if (type
&& arm_return_in_msb (type
))
6411 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6412 if (size
% UNITS_PER_WORD
!= 0)
6414 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6415 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6419 return gen_rtx_REG (mode
, R0_REGNUM
);
6423 aapcs_libcall_value (machine_mode mode
)
6425 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6426 && GET_MODE_SIZE (mode
) <= 4)
6429 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6432 /* Lay out a function argument using the AAPCS rules. The rule
6433 numbers referred to here are those in the AAPCS. */
6435 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6436 const_tree type
, bool named
)
6441 /* We only need to do this once per argument. */
6442 if (pcum
->aapcs_arg_processed
)
6445 pcum
->aapcs_arg_processed
= true;
6447 /* Special case: if named is false then we are handling an incoming
6448 anonymous argument which is on the stack. */
6452 /* Is this a potential co-processor register candidate? */
6453 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6455 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6456 pcum
->aapcs_cprc_slot
= slot
;
6458 /* We don't have to apply any of the rules from part B of the
6459 preparation phase, these are handled elsewhere in the
6464 /* A Co-processor register candidate goes either in its own
6465 class of registers or on the stack. */
6466 if (!pcum
->aapcs_cprc_failed
[slot
])
6468 /* C1.cp - Try to allocate the argument to co-processor
6470 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6473 /* C2.cp - Put the argument on the stack and note that we
6474 can't assign any more candidates in this slot. We also
6475 need to note that we have allocated stack space, so that
6476 we won't later try to split a non-cprc candidate between
6477 core registers and the stack. */
6478 pcum
->aapcs_cprc_failed
[slot
] = true;
6479 pcum
->can_split
= false;
6482 /* We didn't get a register, so this argument goes on the
6484 gcc_assert (pcum
->can_split
== false);
6489 /* C3 - For double-word aligned arguments, round the NCRN up to the
6490 next even number. */
6491 ncrn
= pcum
->aapcs_ncrn
;
6494 int res
= arm_needs_doubleword_align (mode
, type
);
6495 /* Only warn during RTL expansion of call stmts, otherwise we would
6496 warn e.g. during gimplification even on functions that will be
6497 always inlined, and we'd warn multiple times. Don't warn when
6498 called in expand_function_start either, as we warn instead in
6499 arm_function_arg_boundary in that case. */
6500 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6501 inform (input_location
, "parameter passing for argument of type "
6502 "%qT changed in GCC 7.1", type
);
6507 nregs
= ARM_NUM_REGS2(mode
, type
);
6509 /* Sigh, this test should really assert that nregs > 0, but a GCC
6510 extension allows empty structs and then gives them empty size; it
6511 then allows such a structure to be passed by value. For some of
6512 the code below we have to pretend that such an argument has
6513 non-zero size so that we 'locate' it correctly either in
6514 registers or on the stack. */
6515 gcc_assert (nregs
>= 0);
6517 nregs2
= nregs
? nregs
: 1;
6519 /* C4 - Argument fits entirely in core registers. */
6520 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6522 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6523 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6527 /* C5 - Some core registers left and there are no arguments already
6528 on the stack: split this argument between the remaining core
6529 registers and the stack. */
6530 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6532 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6533 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6534 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6538 /* C6 - NCRN is set to 4. */
6539 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6541 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6545 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6546 for a call to a function whose data type is FNTYPE.
6547 For a library call, FNTYPE is NULL. */
6549 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6551 tree fndecl ATTRIBUTE_UNUSED
)
6553 /* Long call handling. */
6555 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6557 pcum
->pcs_variant
= arm_pcs_default
;
6559 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6561 if (arm_libcall_uses_aapcs_base (libname
))
6562 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6564 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6565 pcum
->aapcs_reg
= NULL_RTX
;
6566 pcum
->aapcs_partial
= 0;
6567 pcum
->aapcs_arg_processed
= false;
6568 pcum
->aapcs_cprc_slot
= -1;
6569 pcum
->can_split
= true;
6571 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6575 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6577 pcum
->aapcs_cprc_failed
[i
] = false;
6578 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6586 /* On the ARM, the offset starts at 0. */
6588 pcum
->iwmmxt_nregs
= 0;
6589 pcum
->can_split
= true;
6591 /* Varargs vectors are treated the same as long long.
6592 named_count avoids having to change the way arm handles 'named' */
6593 pcum
->named_count
= 0;
6596 if (TARGET_REALLY_IWMMXT
&& fntype
)
6600 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6602 fn_arg
= TREE_CHAIN (fn_arg
))
6603 pcum
->named_count
+= 1;
6605 if (! pcum
->named_count
)
6606 pcum
->named_count
= INT_MAX
;
6610 /* Return 2 if double word alignment is required for argument passing,
6611 but wasn't required before the fix for PR88469.
6612 Return 1 if double word alignment is required for argument passing.
6613 Return -1 if double word alignment used to be required for argument
6614 passing before PR77728 ABI fix, but is not required anymore.
6615 Return 0 if double word alignment is not required and wasn't requried
6618 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6621 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6623 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6624 if (!AGGREGATE_TYPE_P (type
))
6625 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6627 /* Array types: Use member alignment of element type. */
6628 if (TREE_CODE (type
) == ARRAY_TYPE
)
6629 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6633 /* Record/aggregate types: Use greatest member alignment of any member. */
6634 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6635 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6637 if (TREE_CODE (field
) == FIELD_DECL
)
6640 /* Before PR77728 fix, we were incorrectly considering also
6641 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6642 Make sure we can warn about that with -Wpsabi. */
6645 else if (TREE_CODE (field
) == FIELD_DECL
6646 && DECL_BIT_FIELD_TYPE (field
)
6647 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
6657 /* Determine where to put an argument to a function.
6658 Value is zero to push the argument on the stack,
6659 or a hard register in which to store the argument.
6661 MODE is the argument's machine mode.
6662 TYPE is the data type of the argument (as a tree).
6663 This is null for libcalls where that information may
6665 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6666 the preceding args and about the function being called.
6667 NAMED is nonzero if this argument is a named parameter
6668 (otherwise it is an extra parameter matching an ellipsis).
6670 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6671 other arguments are passed on the stack. If (NAMED == 0) (which happens
6672 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6673 defined), say it is passed in the stack (function_prologue will
6674 indeed make it pass in the stack if necessary). */
6677 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6678 const_tree type
, bool named
)
6680 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6683 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6684 a call insn (op3 of a call_value insn). */
6685 if (mode
== VOIDmode
)
6688 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6690 aapcs_layout_arg (pcum
, mode
, type
, named
);
6691 return pcum
->aapcs_reg
;
6694 /* Varargs vectors are treated the same as long long.
6695 named_count avoids having to change the way arm handles 'named' */
6696 if (TARGET_IWMMXT_ABI
6697 && arm_vector_mode_supported_p (mode
)
6698 && pcum
->named_count
> pcum
->nargs
+ 1)
6700 if (pcum
->iwmmxt_nregs
<= 9)
6701 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6704 pcum
->can_split
= false;
6709 /* Put doubleword aligned quantities in even register pairs. */
6710 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6712 int res
= arm_needs_doubleword_align (mode
, type
);
6713 if (res
< 0 && warn_psabi
)
6714 inform (input_location
, "parameter passing for argument of type "
6715 "%qT changed in GCC 7.1", type
);
6719 if (res
> 1 && warn_psabi
)
6720 inform (input_location
, "parameter passing for argument of type "
6721 "%qT changed in GCC 9.1", type
);
6725 /* Only allow splitting an arg between regs and memory if all preceding
6726 args were allocated to regs. For args passed by reference we only count
6727 the reference pointer. */
6728 if (pcum
->can_split
)
6731 nregs
= ARM_NUM_REGS2 (mode
, type
);
6733 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6736 return gen_rtx_REG (mode
, pcum
->nregs
);
6740 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6742 if (!ARM_DOUBLEWORD_ALIGN
)
6743 return PARM_BOUNDARY
;
6745 int res
= arm_needs_doubleword_align (mode
, type
);
6746 if (res
< 0 && warn_psabi
)
6747 inform (input_location
, "parameter passing for argument of type %qT "
6748 "changed in GCC 7.1", type
);
6749 if (res
> 1 && warn_psabi
)
6750 inform (input_location
, "parameter passing for argument of type "
6751 "%qT changed in GCC 9.1", type
);
6753 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6757 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
6759 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6760 int nregs
= pcum
->nregs
;
6762 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6764 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
6765 return pcum
->aapcs_partial
;
6768 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
6771 if (NUM_ARG_REGS
> nregs
6772 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
6774 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6779 /* Update the data in PCUM to advance over an argument
6780 of mode MODE and data type TYPE.
6781 (TYPE is null for libcalls where that information may not be available.) */
6784 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6785 const_tree type
, bool named
)
6787 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6789 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6791 aapcs_layout_arg (pcum
, mode
, type
, named
);
6793 if (pcum
->aapcs_cprc_slot
>= 0)
6795 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6797 pcum
->aapcs_cprc_slot
= -1;
6800 /* Generic stuff. */
6801 pcum
->aapcs_arg_processed
= false;
6802 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6803 pcum
->aapcs_reg
= NULL_RTX
;
6804 pcum
->aapcs_partial
= 0;
6809 if (arm_vector_mode_supported_p (mode
)
6810 && pcum
->named_count
> pcum
->nargs
6811 && TARGET_IWMMXT_ABI
)
6812 pcum
->iwmmxt_nregs
+= 1;
6814 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6818 /* Variable sized types are passed by reference. This is a GCC
6819 extension to the ARM ABI. */
6822 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
6824 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
6827 /* Encode the current state of the #pragma [no_]long_calls. */
6830 OFF
, /* No #pragma [no_]long_calls is in effect. */
6831 LONG
, /* #pragma long_calls is in effect. */
6832 SHORT
/* #pragma no_long_calls is in effect. */
6835 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6838 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6840 arm_pragma_long_calls
= LONG
;
6844 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6846 arm_pragma_long_calls
= SHORT
;
6850 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6852 arm_pragma_long_calls
= OFF
;
6855 /* Handle an attribute requiring a FUNCTION_DECL;
6856 arguments as in struct attribute_spec.handler. */
6858 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6859 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6861 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6863 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6865 *no_add_attrs
= true;
6871 /* Handle an "interrupt" or "isr" attribute;
6872 arguments as in struct attribute_spec.handler. */
6874 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6879 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6881 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6883 *no_add_attrs
= true;
6885 /* FIXME: the argument if any is checked for type attributes;
6886 should it be checked for decl ones? */
6890 if (TREE_CODE (*node
) == FUNCTION_TYPE
6891 || TREE_CODE (*node
) == METHOD_TYPE
)
6893 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6895 warning (OPT_Wattributes
, "%qE attribute ignored",
6897 *no_add_attrs
= true;
6900 else if (TREE_CODE (*node
) == POINTER_TYPE
6901 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6902 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6903 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6905 *node
= build_variant_type_copy (*node
);
6906 TREE_TYPE (*node
) = build_type_attribute_variant
6908 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6909 *no_add_attrs
= true;
6913 /* Possibly pass this attribute on from the type to a decl. */
6914 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6915 | (int) ATTR_FLAG_FUNCTION_NEXT
6916 | (int) ATTR_FLAG_ARRAY_NEXT
))
6918 *no_add_attrs
= true;
6919 return tree_cons (name
, args
, NULL_TREE
);
6923 warning (OPT_Wattributes
, "%qE attribute ignored",
6932 /* Handle a "pcs" attribute; arguments as in struct
6933 attribute_spec.handler. */
6935 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6936 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6938 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6940 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6941 *no_add_attrs
= true;
6946 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6947 /* Handle the "notshared" attribute. This attribute is another way of
6948 requesting hidden visibility. ARM's compiler supports
6949 "__declspec(notshared)"; we support the same thing via an
6953 arm_handle_notshared_attribute (tree
*node
,
6954 tree name ATTRIBUTE_UNUSED
,
6955 tree args ATTRIBUTE_UNUSED
,
6956 int flags ATTRIBUTE_UNUSED
,
6959 tree decl
= TYPE_NAME (*node
);
6963 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6964 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6965 *no_add_attrs
= false;
6971 /* This function returns true if a function with declaration FNDECL and type
6972 FNTYPE uses the stack to pass arguments or return variables and false
6973 otherwise. This is used for functions with the attributes
6974 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6975 diagnostic messages if the stack is used. NAME is the name of the attribute
6979 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6981 function_args_iterator args_iter
;
6982 CUMULATIVE_ARGS args_so_far_v
;
6983 cumulative_args_t args_so_far
;
6984 bool first_param
= true;
6985 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6987 /* Error out if any argument is passed on the stack. */
6988 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6989 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6990 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6993 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6995 prev_arg_type
= arg_type
;
6996 if (VOID_TYPE_P (arg_type
))
6999 function_arg_info
arg (arg_type
, /*named=*/true);
7001 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
7002 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
7003 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7005 error ("%qE attribute not available to functions with arguments "
7006 "passed on the stack", name
);
7009 first_param
= false;
7012 /* Error out for variadic functions since we cannot control how many
7013 arguments will be passed and thus stack could be used. stdarg_p () is not
7014 used for the checking to avoid browsing arguments twice. */
7015 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7017 error ("%qE attribute not available to functions with variable number "
7018 "of arguments", name
);
7022 /* Error out if return value is passed on the stack. */
7023 ret_type
= TREE_TYPE (fntype
);
7024 if (arm_return_in_memory (ret_type
, fntype
))
7026 error ("%qE attribute not available to functions that return value on "
7033 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7034 function will check whether the attribute is allowed here and will add the
7035 attribute to the function declaration tree or otherwise issue a warning. */
7038 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7047 *no_add_attrs
= true;
7048 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7053 /* Ignore attribute for function types. */
7054 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7056 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7058 *no_add_attrs
= true;
7064 /* Warn for static linkage functions. */
7065 if (!TREE_PUBLIC (fndecl
))
7067 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7068 "with static linkage", name
);
7069 *no_add_attrs
= true;
7073 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7074 TREE_TYPE (fndecl
));
7079 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7080 function will check whether the attribute is allowed here and will add the
7081 attribute to the function type tree or otherwise issue a diagnostic. The
7082 reason we check this at declaration time is to only allow the use of the
7083 attribute with declarations of function pointers and not function
7084 declarations. This function checks NODE is of the expected type and issues
7085 diagnostics otherwise using NAME. If it is not of the expected type
7086 *NO_ADD_ATTRS will be set to true. */
7089 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7094 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
7099 *no_add_attrs
= true;
7100 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7105 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7108 fntype
= TREE_TYPE (decl
);
7111 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7112 fntype
= TREE_TYPE (fntype
);
7114 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
7116 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7117 "function pointer", name
);
7118 *no_add_attrs
= true;
7122 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7127 /* Prevent trees being shared among function types with and without
7128 cmse_nonsecure_call attribute. */
7129 type
= TREE_TYPE (decl
);
7131 type
= build_distinct_type_copy (type
);
7132 TREE_TYPE (decl
) = type
;
7135 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7138 fntype
= TREE_TYPE (fntype
);
7139 fntype
= build_distinct_type_copy (fntype
);
7140 TREE_TYPE (type
) = fntype
;
7143 /* Construct a type attribute and add it to the function type. */
7144 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7145 TYPE_ATTRIBUTES (fntype
));
7146 TYPE_ATTRIBUTES (fntype
) = attrs
;
7150 /* Return 0 if the attributes for two types are incompatible, 1 if they
7151 are compatible, and 2 if they are nearly compatible (which causes a
7152 warning to be generated). */
7154 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7158 /* Check for mismatch of non-default calling convention. */
7159 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7162 /* Check for mismatched call attributes. */
7163 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7164 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7165 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7166 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7168 /* Only bother to check if an attribute is defined. */
7169 if (l1
| l2
| s1
| s2
)
7171 /* If one type has an attribute, the other must have the same attribute. */
7172 if ((l1
!= l2
) || (s1
!= s2
))
7175 /* Disallow mixed attributes. */
7176 if ((l1
& s2
) || (l2
& s1
))
7180 /* Check for mismatched ISR attribute. */
7181 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7183 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7184 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7186 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7190 l1
= lookup_attribute ("cmse_nonsecure_call",
7191 TYPE_ATTRIBUTES (type1
)) != NULL
;
7192 l2
= lookup_attribute ("cmse_nonsecure_call",
7193 TYPE_ATTRIBUTES (type2
)) != NULL
;
7201 /* Assigns default attributes to newly defined type. This is used to
7202 set short_call/long_call attributes for function types of
7203 functions defined inside corresponding #pragma scopes. */
7205 arm_set_default_type_attributes (tree type
)
7207 /* Add __attribute__ ((long_call)) to all functions, when
7208 inside #pragma long_calls or __attribute__ ((short_call)),
7209 when inside #pragma no_long_calls. */
7210 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7212 tree type_attr_list
, attr_name
;
7213 type_attr_list
= TYPE_ATTRIBUTES (type
);
7215 if (arm_pragma_long_calls
== LONG
)
7216 attr_name
= get_identifier ("long_call");
7217 else if (arm_pragma_long_calls
== SHORT
)
7218 attr_name
= get_identifier ("short_call");
7222 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7223 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7227 /* Return true if DECL is known to be linked into section SECTION. */
7230 arm_function_in_section_p (tree decl
, section
*section
)
7232 /* We can only be certain about the prevailing symbol definition. */
7233 if (!decl_binds_to_current_def_p (decl
))
7236 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7237 if (!DECL_SECTION_NAME (decl
))
7239 /* Make sure that we will not create a unique section for DECL. */
7240 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7244 return function_section (decl
) == section
;
7247 /* Return nonzero if a 32-bit "long_call" should be generated for
7248 a call from the current function to DECL. We generate a long_call
7251 a. has an __attribute__((long call))
7252 or b. is within the scope of a #pragma long_calls
7253 or c. the -mlong-calls command line switch has been specified
7255 However we do not generate a long call if the function:
7257 d. has an __attribute__ ((short_call))
7258 or e. is inside the scope of a #pragma no_long_calls
7259 or f. is defined in the same section as the current function. */
7262 arm_is_long_call_p (tree decl
)
7267 return TARGET_LONG_CALLS
;
7269 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7270 if (lookup_attribute ("short_call", attrs
))
7273 /* For "f", be conservative, and only cater for cases in which the
7274 whole of the current function is placed in the same section. */
7275 if (!flag_reorder_blocks_and_partition
7276 && TREE_CODE (decl
) == FUNCTION_DECL
7277 && arm_function_in_section_p (decl
, current_function_section ()))
7280 if (lookup_attribute ("long_call", attrs
))
7283 return TARGET_LONG_CALLS
;
7286 /* Return nonzero if it is ok to make a tail-call to DECL. */
7288 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7290 unsigned long func_type
;
7292 if (cfun
->machine
->sibcall_blocked
)
7295 /* Never tailcall something if we are generating code for Thumb-1. */
7299 /* The PIC register is live on entry to VxWorks PLT entries, so we
7300 must make the call before restoring the PIC register. */
7301 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7304 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7305 may be used both as target of the call and base register for restoring
7306 the VFP registers */
7307 if (TARGET_APCS_FRAME
&& TARGET_ARM
7308 && TARGET_HARD_FLOAT
7309 && decl
&& arm_is_long_call_p (decl
))
7312 /* If we are interworking and the function is not declared static
7313 then we can't tail-call it unless we know that it exists in this
7314 compilation unit (since it might be a Thumb routine). */
7315 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7316 && !TREE_ASM_WRITTEN (decl
))
7319 func_type
= arm_current_func_type ();
7320 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7321 if (IS_INTERRUPT (func_type
))
7324 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7325 generated for entry functions themselves. */
7326 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7329 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7330 this would complicate matters for later code generation. */
7331 if (TREE_CODE (exp
) == CALL_EXPR
)
7333 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7334 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7338 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7340 /* Check that the return value locations are the same. For
7341 example that we aren't returning a value from the sibling in
7342 a VFP register but then need to transfer it to a core
7345 tree decl_or_type
= decl
;
7347 /* If it is an indirect function pointer, get the function type. */
7349 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7351 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7352 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7354 if (!rtx_equal_p (a
, b
))
7358 /* Never tailcall if function may be called with a misaligned SP. */
7359 if (IS_STACKALIGN (func_type
))
7362 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7363 references should become a NOP. Don't convert such calls into
7365 if (TARGET_AAPCS_BASED
7366 && arm_abi
== ARM_ABI_AAPCS
7368 && DECL_WEAK (decl
))
7371 /* We cannot do a tailcall for an indirect call by descriptor if all the
7372 argument registers are used because the only register left to load the
7373 address is IP and it will already contain the static chain. */
7374 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7376 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7377 CUMULATIVE_ARGS cum
;
7378 cumulative_args_t cum_v
;
7380 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7381 cum_v
= pack_cumulative_args (&cum
);
7383 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7385 tree type
= TREE_VALUE (t
);
7386 if (!VOID_TYPE_P (type
))
7387 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7390 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7394 /* Everything else is ok. */
7399 /* Addressing mode support functions. */
7401 /* Return nonzero if X is a legitimate immediate operand when compiling
7402 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7404 legitimate_pic_operand_p (rtx x
)
7406 if (GET_CODE (x
) == SYMBOL_REF
7407 || (GET_CODE (x
) == CONST
7408 && GET_CODE (XEXP (x
, 0)) == PLUS
7409 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7415 /* Record that the current function needs a PIC register. If PIC_REG is null,
7416 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7417 both case cfun->machine->pic_reg is initialized if we have not already done
7418 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7419 PIC register is reloaded in the current position of the instruction stream
7420 irregardless of whether it was loaded before. Otherwise, it is only loaded
7421 if not already done so (crtl->uses_pic_offset_table is null). Note that
7422 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7423 is only supported iff COMPUTE_NOW is false. */
7426 require_pic_register (rtx pic_reg
, bool compute_now
)
7428 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
7430 /* A lot of the logic here is made obscure by the fact that this
7431 routine gets called as part of the rtx cost estimation process.
7432 We don't want those calls to affect any assumptions about the real
7433 function; and further, we can't call entry_of_function() until we
7434 start the real expansion process. */
7435 if (!crtl
->uses_pic_offset_table
|| compute_now
)
7437 gcc_assert (can_create_pseudo_p ()
7438 || (pic_reg
!= NULL_RTX
7440 && GET_MODE (pic_reg
) == Pmode
));
7441 if (arm_pic_register
!= INVALID_REGNUM
7443 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7445 if (!cfun
->machine
->pic_reg
)
7446 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7448 /* Play games to avoid marking the function as needing pic
7449 if we are being called as part of the cost-estimation
7451 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7452 crtl
->uses_pic_offset_table
= 1;
7456 rtx_insn
*seq
, *insn
;
7458 if (pic_reg
== NULL_RTX
)
7459 pic_reg
= gen_reg_rtx (Pmode
);
7460 if (!cfun
->machine
->pic_reg
)
7461 cfun
->machine
->pic_reg
= pic_reg
;
7463 /* Play games to avoid marking the function as needing pic
7464 if we are being called as part of the cost-estimation
7466 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7468 crtl
->uses_pic_offset_table
= 1;
7471 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7472 && arm_pic_register
> LAST_LO_REGNUM
7474 emit_move_insn (cfun
->machine
->pic_reg
,
7475 gen_rtx_REG (Pmode
, arm_pic_register
));
7477 arm_load_pic_register (0UL, pic_reg
);
7482 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7484 INSN_LOCATION (insn
) = prologue_location
;
7486 /* We can be called during expansion of PHI nodes, where
7487 we can't yet emit instructions directly in the final
7488 insn stream. Queue the insns on the entry edge, they will
7489 be committed after everything else is expanded. */
7490 if (currently_expanding_to_rtl
)
7491 insert_insn_on_edge (seq
,
7493 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7501 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7502 created to hold the result of the load. If not NULL, PIC_REG indicates
7503 which register to use as PIC register, otherwise it is decided by register
7504 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7505 location in the instruction stream, irregardless of whether it was loaded
7506 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7507 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7509 Returns the register REG into which the PIC load is performed. */
7512 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
7515 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
7517 if (GET_CODE (orig
) == SYMBOL_REF
7518 || GET_CODE (orig
) == LABEL_REF
)
7522 gcc_assert (can_create_pseudo_p ());
7523 reg
= gen_reg_rtx (Pmode
);
7526 /* VxWorks does not impose a fixed gap between segments; the run-time
7527 gap can be different from the object-file gap. We therefore can't
7528 use GOTOFF unless we are absolutely sure that the symbol is in the
7529 same segment as the GOT. Unfortunately, the flexibility of linker
7530 scripts means that we can't be sure of that in general, so assume
7531 that GOTOFF is never valid on VxWorks. */
7532 /* References to weak symbols cannot be resolved locally: they
7533 may be overridden by a non-weak definition at link time. */
7535 if ((GET_CODE (orig
) == LABEL_REF
7536 || (GET_CODE (orig
) == SYMBOL_REF
7537 && SYMBOL_REF_LOCAL_P (orig
)
7538 && (SYMBOL_REF_DECL (orig
)
7539 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7541 && arm_pic_data_is_text_relative
)
7542 insn
= arm_pic_static_addr (orig
, reg
);
7548 /* If this function doesn't have a pic register, create one now. */
7549 require_pic_register (pic_reg
, compute_now
);
7551 if (pic_reg
== NULL_RTX
)
7552 pic_reg
= cfun
->machine
->pic_reg
;
7554 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
7556 /* Make the MEM as close to a constant as possible. */
7557 mem
= SET_SRC (pat
);
7558 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7559 MEM_READONLY_P (mem
) = 1;
7560 MEM_NOTRAP_P (mem
) = 1;
7562 insn
= emit_insn (pat
);
7565 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7567 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7571 else if (GET_CODE (orig
) == CONST
)
7575 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7576 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7579 /* Handle the case where we have: const (UNSPEC_TLS). */
7580 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7581 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7584 /* Handle the case where we have:
7585 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7587 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7588 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7589 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7591 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7597 gcc_assert (can_create_pseudo_p ());
7598 reg
= gen_reg_rtx (Pmode
);
7601 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7603 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
7604 pic_reg
, compute_now
);
7605 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7606 base
== reg
? 0 : reg
, pic_reg
,
7609 if (CONST_INT_P (offset
))
7611 /* The base register doesn't really matter, we only want to
7612 test the index for the appropriate mode. */
7613 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7615 gcc_assert (can_create_pseudo_p ());
7616 offset
= force_reg (Pmode
, offset
);
7619 if (CONST_INT_P (offset
))
7620 return plus_constant (Pmode
, base
, INTVAL (offset
));
7623 if (GET_MODE_SIZE (mode
) > 4
7624 && (GET_MODE_CLASS (mode
) == MODE_INT
7625 || TARGET_SOFT_FLOAT
))
7627 emit_insn (gen_addsi3 (reg
, base
, offset
));
7631 return gen_rtx_PLUS (Pmode
, base
, offset
);
7638 /* Whether a register is callee saved or not. This is necessary because high
7639 registers are marked as caller saved when optimizing for size on Thumb-1
7640 targets despite being callee saved in order to avoid using them. */
7641 #define callee_saved_reg_p(reg) \
7642 (!call_used_regs[reg] \
7643 || (TARGET_THUMB1 && optimize_size \
7644 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
7646 /* Return a mask for the call-clobbered low registers that are unused
7647 at the end of the prologue. */
7648 static unsigned long
7649 thumb1_prologue_unused_call_clobbered_lo_regs (void)
7651 unsigned long mask
= 0;
7652 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
7654 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
7655 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
7656 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
7660 /* Similarly for the start of the epilogue. */
7661 static unsigned long
7662 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
7664 unsigned long mask
= 0;
7665 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
7667 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
7668 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
7669 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
7673 /* Find a spare register to use during the prolog of a function. */
7676 thumb_find_work_register (unsigned long pushed_regs_mask
)
7680 unsigned long unused_regs
7681 = thumb1_prologue_unused_call_clobbered_lo_regs ();
7683 /* Check the argument registers first as these are call-used. The
7684 register allocation order means that sometimes r3 might be used
7685 but earlier argument registers might not, so check them all. */
7686 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
7687 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
7690 /* Otherwise look for a call-saved register that is going to be pushed. */
7691 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7692 if (pushed_regs_mask
& (1 << reg
))
7697 /* Thumb-2 can use high regs. */
7698 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7699 if (pushed_regs_mask
& (1 << reg
))
7702 /* Something went wrong - thumb_compute_save_reg_mask()
7703 should have arranged for a suitable register to be pushed. */
7707 static GTY(()) int pic_labelno
;
7709 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7713 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
7715 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
7717 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7720 gcc_assert (flag_pic
);
7722 if (pic_reg
== NULL_RTX
)
7723 pic_reg
= cfun
->machine
->pic_reg
;
7724 if (TARGET_VXWORKS_RTP
)
7726 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7727 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7728 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7730 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7732 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7733 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7737 /* We use an UNSPEC rather than a LABEL_REF because this label
7738 never appears in the code stream. */
7740 labelno
= GEN_INT (pic_labelno
++);
7741 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7742 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7744 /* On the ARM the PC register contains 'dot + 8' at the time of the
7745 addition, on the Thumb it is 'dot + 4'. */
7746 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7747 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7749 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7753 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7755 else /* TARGET_THUMB1 */
7757 if (arm_pic_register
!= INVALID_REGNUM
7758 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7760 /* We will have pushed the pic register, so we should always be
7761 able to find a work register. */
7762 pic_tmp
= gen_rtx_REG (SImode
,
7763 thumb_find_work_register (saved_regs
));
7764 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7765 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7766 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7768 else if (arm_pic_register
!= INVALID_REGNUM
7769 && arm_pic_register
> LAST_LO_REGNUM
7770 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7772 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7773 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7774 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7777 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7781 /* Need to emit this whether or not we obey regdecls,
7782 since setjmp/longjmp can cause life info to screw up. */
7786 /* Generate code to load the address of a static var when flag_pic is set. */
7788 arm_pic_static_addr (rtx orig
, rtx reg
)
7790 rtx l1
, labelno
, offset_rtx
;
7792 gcc_assert (flag_pic
);
7794 /* We use an UNSPEC rather than a LABEL_REF because this label
7795 never appears in the code stream. */
7796 labelno
= GEN_INT (pic_labelno
++);
7797 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7798 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7800 /* On the ARM the PC register contains 'dot + 8' at the time of the
7801 addition, on the Thumb it is 'dot + 4'. */
7802 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7803 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7804 UNSPEC_SYMBOL_OFFSET
);
7805 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7807 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7810 /* Return nonzero if X is valid as an ARM state addressing register. */
7812 arm_address_register_rtx_p (rtx x
, int strict_p
)
7822 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7824 return (regno
<= LAST_ARM_REGNUM
7825 || regno
>= FIRST_PSEUDO_REGISTER
7826 || regno
== FRAME_POINTER_REGNUM
7827 || regno
== ARG_POINTER_REGNUM
);
7830 /* Return TRUE if this rtx is the difference of a symbol and a label,
7831 and will reduce to a PC-relative relocation in the object file.
7832 Expressions like this can be left alone when generating PIC, rather
7833 than forced through the GOT. */
7835 pcrel_constant_p (rtx x
)
7837 if (GET_CODE (x
) == MINUS
)
7838 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7843 /* Return true if X will surely end up in an index register after next
7846 will_be_in_index_register (const_rtx x
)
7848 /* arm.md: calculate_pic_address will split this into a register. */
7849 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7852 /* Return nonzero if X is a valid ARM state address operand. */
7854 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7858 enum rtx_code code
= GET_CODE (x
);
7860 if (arm_address_register_rtx_p (x
, strict_p
))
7863 use_ldrd
= (TARGET_LDRD
7864 && (mode
== DImode
|| mode
== DFmode
));
7866 if (code
== POST_INC
|| code
== PRE_DEC
7867 || ((code
== PRE_INC
|| code
== POST_DEC
)
7868 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7869 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7871 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7872 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7873 && GET_CODE (XEXP (x
, 1)) == PLUS
7874 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7876 rtx addend
= XEXP (XEXP (x
, 1), 1);
7878 /* Don't allow ldrd post increment by register because it's hard
7879 to fixup invalid register choices. */
7881 && GET_CODE (x
) == POST_MODIFY
7885 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7886 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7889 /* After reload constants split into minipools will have addresses
7890 from a LABEL_REF. */
7891 else if (reload_completed
7892 && (code
== LABEL_REF
7894 && GET_CODE (XEXP (x
, 0)) == PLUS
7895 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7896 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7899 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7902 else if (code
== PLUS
)
7904 rtx xop0
= XEXP (x
, 0);
7905 rtx xop1
= XEXP (x
, 1);
7907 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7908 && ((CONST_INT_P (xop1
)
7909 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7910 || (!strict_p
&& will_be_in_index_register (xop1
))))
7911 || (arm_address_register_rtx_p (xop1
, strict_p
)
7912 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7916 /* Reload currently can't handle MINUS, so disable this for now */
7917 else if (GET_CODE (x
) == MINUS
)
7919 rtx xop0
= XEXP (x
, 0);
7920 rtx xop1
= XEXP (x
, 1);
7922 return (arm_address_register_rtx_p (xop0
, strict_p
)
7923 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7927 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7928 && code
== SYMBOL_REF
7929 && CONSTANT_POOL_ADDRESS_P (x
)
7931 && symbol_mentioned_p (get_pool_constant (x
))
7932 && ! pcrel_constant_p (get_pool_constant (x
))))
7938 /* Return true if we can avoid creating a constant pool entry for x. */
7940 can_avoid_literal_pool_for_label_p (rtx x
)
7942 /* Normally we can assign constant values to target registers without
7943 the help of constant pool. But there are cases we have to use constant
7945 1) assign a label to register.
7946 2) sign-extend a 8bit value to 32bit and then assign to register.
7948 Constant pool access in format:
7949 (set (reg r0) (mem (symbol_ref (".LC0"))))
7950 will cause the use of literal pool (later in function arm_reorg).
7951 So here we mark such format as an invalid format, then the compiler
7952 will adjust it into:
7953 (set (reg r0) (symbol_ref (".LC0")))
7954 (set (reg r0) (mem (reg r0))).
7955 No extra register is required, and (mem (reg r0)) won't cause the use
7956 of literal pools. */
7957 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
7958 && CONSTANT_POOL_ADDRESS_P (x
))
7964 /* Return nonzero if X is a valid Thumb-2 address operand. */
7966 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7969 enum rtx_code code
= GET_CODE (x
);
7971 if (arm_address_register_rtx_p (x
, strict_p
))
7974 use_ldrd
= (TARGET_LDRD
7975 && (mode
== DImode
|| mode
== DFmode
));
7977 if (code
== POST_INC
|| code
== PRE_DEC
7978 || ((code
== PRE_INC
|| code
== POST_DEC
)
7979 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7980 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7982 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7983 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7984 && GET_CODE (XEXP (x
, 1)) == PLUS
7985 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7987 /* Thumb-2 only has autoincrement by constant. */
7988 rtx addend
= XEXP (XEXP (x
, 1), 1);
7989 HOST_WIDE_INT offset
;
7991 if (!CONST_INT_P (addend
))
7994 offset
= INTVAL(addend
);
7995 if (GET_MODE_SIZE (mode
) <= 4)
7996 return (offset
> -256 && offset
< 256);
7998 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7999 && (offset
& 3) == 0);
8002 /* After reload constants split into minipools will have addresses
8003 from a LABEL_REF. */
8004 else if (reload_completed
8005 && (code
== LABEL_REF
8007 && GET_CODE (XEXP (x
, 0)) == PLUS
8008 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8009 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8012 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8015 else if (code
== PLUS
)
8017 rtx xop0
= XEXP (x
, 0);
8018 rtx xop1
= XEXP (x
, 1);
8020 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8021 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8022 || (!strict_p
&& will_be_in_index_register (xop1
))))
8023 || (arm_address_register_rtx_p (xop1
, strict_p
)
8024 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8027 else if (can_avoid_literal_pool_for_label_p (x
))
8030 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8031 && code
== SYMBOL_REF
8032 && CONSTANT_POOL_ADDRESS_P (x
)
8034 && symbol_mentioned_p (get_pool_constant (x
))
8035 && ! pcrel_constant_p (get_pool_constant (x
))))
8041 /* Return nonzero if INDEX is valid for an address index operand in
8044 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8047 HOST_WIDE_INT range
;
8048 enum rtx_code code
= GET_CODE (index
);
8050 /* Standard coprocessor addressing modes. */
8051 if (TARGET_HARD_FLOAT
8052 && (mode
== SFmode
|| mode
== DFmode
))
8053 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8054 && INTVAL (index
) > -1024
8055 && (INTVAL (index
) & 3) == 0);
8057 /* For quad modes, we restrict the constant offset to be slightly less
8058 than what the instruction format permits. We do this because for
8059 quad mode moves, we will actually decompose them into two separate
8060 double-mode reads or writes. INDEX must therefore be a valid
8061 (double-mode) offset and so should INDEX+8. */
8062 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8063 return (code
== CONST_INT
8064 && INTVAL (index
) < 1016
8065 && INTVAL (index
) > -1024
8066 && (INTVAL (index
) & 3) == 0);
8068 /* We have no such constraint on double mode offsets, so we permit the
8069 full range of the instruction format. */
8070 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8071 return (code
== CONST_INT
8072 && INTVAL (index
) < 1024
8073 && INTVAL (index
) > -1024
8074 && (INTVAL (index
) & 3) == 0);
8076 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8077 return (code
== CONST_INT
8078 && INTVAL (index
) < 1024
8079 && INTVAL (index
) > -1024
8080 && (INTVAL (index
) & 3) == 0);
8082 if (arm_address_register_rtx_p (index
, strict_p
)
8083 && (GET_MODE_SIZE (mode
) <= 4))
8086 if (mode
== DImode
|| mode
== DFmode
)
8088 if (code
== CONST_INT
)
8090 HOST_WIDE_INT val
= INTVAL (index
);
8092 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8093 If vldr is selected it uses arm_coproc_mem_operand. */
8095 return val
> -256 && val
< 256;
8097 return val
> -4096 && val
< 4092;
8100 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8103 if (GET_MODE_SIZE (mode
) <= 4
8107 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8111 rtx xiop0
= XEXP (index
, 0);
8112 rtx xiop1
= XEXP (index
, 1);
8114 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8115 && power_of_two_operand (xiop1
, SImode
))
8116 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8117 && power_of_two_operand (xiop0
, SImode
)));
8119 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8120 || code
== ASHIFT
|| code
== ROTATERT
)
8122 rtx op
= XEXP (index
, 1);
8124 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8127 && INTVAL (op
) <= 31);
8131 /* For ARM v4 we may be doing a sign-extend operation during the
8137 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8143 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8145 return (code
== CONST_INT
8146 && INTVAL (index
) < range
8147 && INTVAL (index
) > -range
);
8150 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8151 index operand. i.e. 1, 2, 4 or 8. */
8153 thumb2_index_mul_operand (rtx op
)
8157 if (!CONST_INT_P (op
))
8161 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8164 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8166 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8168 enum rtx_code code
= GET_CODE (index
);
8170 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8171 /* Standard coprocessor addressing modes. */
8172 if (TARGET_HARD_FLOAT
8173 && (mode
== SFmode
|| mode
== DFmode
))
8174 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8175 /* Thumb-2 allows only > -256 index range for it's core register
8176 load/stores. Since we allow SF/DF in core registers, we have
8177 to use the intersection between -256~4096 (core) and -1024~1024
8179 && INTVAL (index
) > -256
8180 && (INTVAL (index
) & 3) == 0);
8182 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8184 /* For DImode assume values will usually live in core regs
8185 and only allow LDRD addressing modes. */
8186 if (!TARGET_LDRD
|| mode
!= DImode
)
8187 return (code
== CONST_INT
8188 && INTVAL (index
) < 1024
8189 && INTVAL (index
) > -1024
8190 && (INTVAL (index
) & 3) == 0);
8193 /* For quad modes, we restrict the constant offset to be slightly less
8194 than what the instruction format permits. We do this because for
8195 quad mode moves, we will actually decompose them into two separate
8196 double-mode reads or writes. INDEX must therefore be a valid
8197 (double-mode) offset and so should INDEX+8. */
8198 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8199 return (code
== CONST_INT
8200 && INTVAL (index
) < 1016
8201 && INTVAL (index
) > -1024
8202 && (INTVAL (index
) & 3) == 0);
8204 /* We have no such constraint on double mode offsets, so we permit the
8205 full range of the instruction format. */
8206 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8207 return (code
== CONST_INT
8208 && INTVAL (index
) < 1024
8209 && INTVAL (index
) > -1024
8210 && (INTVAL (index
) & 3) == 0);
8212 if (arm_address_register_rtx_p (index
, strict_p
)
8213 && (GET_MODE_SIZE (mode
) <= 4))
8216 if (mode
== DImode
|| mode
== DFmode
)
8218 if (code
== CONST_INT
)
8220 HOST_WIDE_INT val
= INTVAL (index
);
8221 /* Thumb-2 ldrd only has reg+const addressing modes.
8222 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8223 If vldr is selected it uses arm_coproc_mem_operand. */
8225 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8227 return IN_RANGE (val
, -255, 4095 - 4);
8235 rtx xiop0
= XEXP (index
, 0);
8236 rtx xiop1
= XEXP (index
, 1);
8238 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8239 && thumb2_index_mul_operand (xiop1
))
8240 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8241 && thumb2_index_mul_operand (xiop0
)));
8243 else if (code
== ASHIFT
)
8245 rtx op
= XEXP (index
, 1);
8247 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8250 && INTVAL (op
) <= 3);
8253 return (code
== CONST_INT
8254 && INTVAL (index
) < 4096
8255 && INTVAL (index
) > -256);
8258 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8260 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8270 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8272 return (regno
<= LAST_LO_REGNUM
8273 || regno
> LAST_VIRTUAL_REGISTER
8274 || regno
== FRAME_POINTER_REGNUM
8275 || (GET_MODE_SIZE (mode
) >= 4
8276 && (regno
== STACK_POINTER_REGNUM
8277 || regno
>= FIRST_PSEUDO_REGISTER
8278 || x
== hard_frame_pointer_rtx
8279 || x
== arg_pointer_rtx
)));
8282 /* Return nonzero if x is a legitimate index register. This is the case
8283 for any base register that can access a QImode object. */
8285 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8287 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8290 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8292 The AP may be eliminated to either the SP or the FP, so we use the
8293 least common denominator, e.g. SImode, and offsets from 0 to 64.
8295 ??? Verify whether the above is the right approach.
8297 ??? Also, the FP may be eliminated to the SP, so perhaps that
8298 needs special handling also.
8300 ??? Look at how the mips16 port solves this problem. It probably uses
8301 better ways to solve some of these problems.
8303 Although it is not incorrect, we don't accept QImode and HImode
8304 addresses based on the frame pointer or arg pointer until the
8305 reload pass starts. This is so that eliminating such addresses
8306 into stack based ones won't produce impossible code. */
8308 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8310 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8313 /* ??? Not clear if this is right. Experiment. */
8314 if (GET_MODE_SIZE (mode
) < 4
8315 && !(reload_in_progress
|| reload_completed
)
8316 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8317 || reg_mentioned_p (arg_pointer_rtx
, x
)
8318 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8319 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8320 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8321 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8324 /* Accept any base register. SP only in SImode or larger. */
8325 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8328 /* This is PC relative data before arm_reorg runs. */
8329 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8330 && GET_CODE (x
) == SYMBOL_REF
8331 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8334 /* This is PC relative data after arm_reorg runs. */
8335 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8337 && (GET_CODE (x
) == LABEL_REF
8338 || (GET_CODE (x
) == CONST
8339 && GET_CODE (XEXP (x
, 0)) == PLUS
8340 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8341 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8344 /* Post-inc indexing only supported for SImode and larger. */
8345 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8346 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8349 else if (GET_CODE (x
) == PLUS
)
8351 /* REG+REG address can be any two index registers. */
8352 /* We disallow FRAME+REG addressing since we know that FRAME
8353 will be replaced with STACK, and SP relative addressing only
8354 permits SP+OFFSET. */
8355 if (GET_MODE_SIZE (mode
) <= 4
8356 && XEXP (x
, 0) != frame_pointer_rtx
8357 && XEXP (x
, 1) != frame_pointer_rtx
8358 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8359 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8360 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8363 /* REG+const has 5-7 bit offset for non-SP registers. */
8364 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8365 || XEXP (x
, 0) == arg_pointer_rtx
)
8366 && CONST_INT_P (XEXP (x
, 1))
8367 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8370 /* REG+const has 10-bit offset for SP, but only SImode and
8371 larger is supported. */
8372 /* ??? Should probably check for DI/DFmode overflow here
8373 just like GO_IF_LEGITIMATE_OFFSET does. */
8374 else if (REG_P (XEXP (x
, 0))
8375 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8376 && GET_MODE_SIZE (mode
) >= 4
8377 && CONST_INT_P (XEXP (x
, 1))
8378 && INTVAL (XEXP (x
, 1)) >= 0
8379 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8380 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8383 else if (REG_P (XEXP (x
, 0))
8384 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8385 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8386 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8387 && REGNO (XEXP (x
, 0))
8388 <= LAST_VIRTUAL_POINTER_REGISTER
))
8389 && GET_MODE_SIZE (mode
) >= 4
8390 && CONST_INT_P (XEXP (x
, 1))
8391 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8395 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8396 && GET_MODE_SIZE (mode
) == 4
8397 && GET_CODE (x
) == SYMBOL_REF
8398 && CONSTANT_POOL_ADDRESS_P (x
)
8400 && symbol_mentioned_p (get_pool_constant (x
))
8401 && ! pcrel_constant_p (get_pool_constant (x
))))
8407 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8408 instruction of mode MODE. */
8410 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8412 switch (GET_MODE_SIZE (mode
))
8415 return val
>= 0 && val
< 32;
8418 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8422 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8428 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8431 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8432 else if (TARGET_THUMB2
)
8433 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8434 else /* if (TARGET_THUMB1) */
8435 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8438 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8440 Given an rtx X being reloaded into a reg required to be
8441 in class CLASS, return the class of reg to actually use.
8442 In general this is just CLASS, but for the Thumb core registers and
8443 immediate constants we prefer a LO_REGS class or a subset. */
8446 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8452 if (rclass
== GENERAL_REGS
)
8459 /* Build the SYMBOL_REF for __tls_get_addr. */
8461 static GTY(()) rtx tls_get_addr_libfunc
;
8464 get_tls_get_addr (void)
8466 if (!tls_get_addr_libfunc
)
8467 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8468 return tls_get_addr_libfunc
;
8472 arm_load_tp (rtx target
)
8475 target
= gen_reg_rtx (SImode
);
8479 /* Can return in any reg. */
8480 emit_insn (gen_load_tp_hard (target
));
8484 /* Always returned in r0. Immediately copy the result into a pseudo,
8485 otherwise other uses of r0 (e.g. setting up function arguments) may
8486 clobber the value. */
8490 emit_insn (gen_load_tp_soft ());
8492 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8493 emit_move_insn (target
, tmp
);
8499 load_tls_operand (rtx x
, rtx reg
)
8503 if (reg
== NULL_RTX
)
8504 reg
= gen_reg_rtx (SImode
);
8506 tmp
= gen_rtx_CONST (SImode
, x
);
8508 emit_move_insn (reg
, tmp
);
8514 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8516 rtx label
, labelno
, sum
;
8518 gcc_assert (reloc
!= TLS_DESCSEQ
);
8521 labelno
= GEN_INT (pic_labelno
++);
8522 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8523 label
= gen_rtx_CONST (VOIDmode
, label
);
8525 sum
= gen_rtx_UNSPEC (Pmode
,
8526 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8527 GEN_INT (TARGET_ARM
? 8 : 4)),
8529 reg
= load_tls_operand (sum
, reg
);
8532 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8534 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8536 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8537 LCT_PURE
, /* LCT_CONST? */
8540 rtx_insn
*insns
= get_insns ();
8547 arm_tls_descseq_addr (rtx x
, rtx reg
)
8549 rtx labelno
= GEN_INT (pic_labelno
++);
8550 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8551 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8552 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8553 gen_rtx_CONST (VOIDmode
, label
),
8554 GEN_INT (!TARGET_ARM
)),
8556 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8558 emit_insn (gen_tlscall (x
, labelno
));
8560 reg
= gen_reg_rtx (SImode
);
8562 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8564 emit_move_insn (reg
, reg0
);
8570 legitimize_tls_address (rtx x
, rtx reg
)
8572 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8574 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8578 case TLS_MODEL_GLOBAL_DYNAMIC
:
8579 if (TARGET_GNU2_TLS
)
8581 reg
= arm_tls_descseq_addr (x
, reg
);
8583 tp
= arm_load_tp (NULL_RTX
);
8585 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8589 /* Original scheme */
8590 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8591 dest
= gen_reg_rtx (Pmode
);
8592 emit_libcall_block (insns
, dest
, ret
, x
);
8596 case TLS_MODEL_LOCAL_DYNAMIC
:
8597 if (TARGET_GNU2_TLS
)
8599 reg
= arm_tls_descseq_addr (x
, reg
);
8601 tp
= arm_load_tp (NULL_RTX
);
8603 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8607 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8609 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8610 share the LDM result with other LD model accesses. */
8611 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8613 dest
= gen_reg_rtx (Pmode
);
8614 emit_libcall_block (insns
, dest
, ret
, eqv
);
8616 /* Load the addend. */
8617 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8618 GEN_INT (TLS_LDO32
)),
8620 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8621 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8625 case TLS_MODEL_INITIAL_EXEC
:
8626 labelno
= GEN_INT (pic_labelno
++);
8627 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8628 label
= gen_rtx_CONST (VOIDmode
, label
);
8629 sum
= gen_rtx_UNSPEC (Pmode
,
8630 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8631 GEN_INT (TARGET_ARM
? 8 : 4)),
8633 reg
= load_tls_operand (sum
, reg
);
8636 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8637 else if (TARGET_THUMB2
)
8638 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8641 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8642 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8645 tp
= arm_load_tp (NULL_RTX
);
8647 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8649 case TLS_MODEL_LOCAL_EXEC
:
8650 tp
= arm_load_tp (NULL_RTX
);
8652 reg
= gen_rtx_UNSPEC (Pmode
,
8653 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8655 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8657 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8664 /* Try machine-dependent ways of modifying an illegitimate address
8665 to be legitimate. If we find one, return the new, valid address. */
8667 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8669 if (arm_tls_referenced_p (x
))
8673 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8675 addend
= XEXP (XEXP (x
, 0), 1);
8676 x
= XEXP (XEXP (x
, 0), 0);
8679 if (GET_CODE (x
) != SYMBOL_REF
)
8682 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8684 x
= legitimize_tls_address (x
, NULL_RTX
);
8688 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8697 /* TODO: legitimize_address for Thumb2. */
8700 return thumb_legitimize_address (x
, orig_x
, mode
);
8703 if (GET_CODE (x
) == PLUS
)
8705 rtx xop0
= XEXP (x
, 0);
8706 rtx xop1
= XEXP (x
, 1);
8708 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8709 xop0
= force_reg (SImode
, xop0
);
8711 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8712 && !symbol_mentioned_p (xop1
))
8713 xop1
= force_reg (SImode
, xop1
);
8715 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8716 && CONST_INT_P (xop1
))
8718 HOST_WIDE_INT n
, low_n
;
8722 /* VFP addressing modes actually allow greater offsets, but for
8723 now we just stick with the lowest common denominator. */
8724 if (mode
== DImode
|| mode
== DFmode
)
8736 low_n
= ((mode
) == TImode
? 0
8737 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8741 base_reg
= gen_reg_rtx (SImode
);
8742 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8743 emit_move_insn (base_reg
, val
);
8744 x
= plus_constant (Pmode
, base_reg
, low_n
);
8746 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8747 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8750 /* XXX We don't allow MINUS any more -- see comment in
8751 arm_legitimate_address_outer_p (). */
8752 else if (GET_CODE (x
) == MINUS
)
8754 rtx xop0
= XEXP (x
, 0);
8755 rtx xop1
= XEXP (x
, 1);
8757 if (CONSTANT_P (xop0
))
8758 xop0
= force_reg (SImode
, xop0
);
8760 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8761 xop1
= force_reg (SImode
, xop1
);
8763 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8764 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8767 /* Make sure to take full advantage of the pre-indexed addressing mode
8768 with absolute addresses which often allows for the base register to
8769 be factorized for multiple adjacent memory references, and it might
8770 even allows for the mini pool to be avoided entirely. */
8771 else if (CONST_INT_P (x
) && optimize
> 0)
8774 HOST_WIDE_INT mask
, base
, index
;
8777 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8778 use a 8-bit index. So let's use a 12-bit index for SImode only and
8779 hope that arm_gen_constant will enable ldrb to use more bits. */
8780 bits
= (mode
== SImode
) ? 12 : 8;
8781 mask
= (1 << bits
) - 1;
8782 base
= INTVAL (x
) & ~mask
;
8783 index
= INTVAL (x
) & mask
;
8784 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8786 /* It'll most probably be more efficient to generate the base
8787 with more bits set and use a negative index instead. */
8791 base_reg
= force_reg (SImode
, GEN_INT (base
));
8792 x
= plus_constant (Pmode
, base_reg
, index
);
8797 /* We need to find and carefully transform any SYMBOL and LABEL
8798 references; so go back to the original address expression. */
8799 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
8800 false /*compute_now*/);
8802 if (new_x
!= orig_x
)
8810 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8811 to be legitimate. If we find one, return the new, valid address. */
8813 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8815 if (GET_CODE (x
) == PLUS
8816 && CONST_INT_P (XEXP (x
, 1))
8817 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8818 || INTVAL (XEXP (x
, 1)) < 0))
8820 rtx xop0
= XEXP (x
, 0);
8821 rtx xop1
= XEXP (x
, 1);
8822 HOST_WIDE_INT offset
= INTVAL (xop1
);
8824 /* Try and fold the offset into a biasing of the base register and
8825 then offsetting that. Don't do this when optimizing for space
8826 since it can cause too many CSEs. */
8827 if (optimize_size
&& offset
>= 0
8828 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8830 HOST_WIDE_INT delta
;
8833 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8834 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8835 delta
= 31 * GET_MODE_SIZE (mode
);
8837 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8839 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8841 x
= plus_constant (Pmode
, xop0
, delta
);
8843 else if (offset
< 0 && offset
> -256)
8844 /* Small negative offsets are best done with a subtract before the
8845 dereference, forcing these into a register normally takes two
8847 x
= force_operand (x
, NULL_RTX
);
8850 /* For the remaining cases, force the constant into a register. */
8851 xop1
= force_reg (SImode
, xop1
);
8852 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8855 else if (GET_CODE (x
) == PLUS
8856 && s_register_operand (XEXP (x
, 1), SImode
)
8857 && !s_register_operand (XEXP (x
, 0), SImode
))
8859 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8861 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8866 /* We need to find and carefully transform any SYMBOL and LABEL
8867 references; so go back to the original address expression. */
8868 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
8869 false /*compute_now*/);
8871 if (new_x
!= orig_x
)
8878 /* Return TRUE if X contains any TLS symbol references. */
8881 arm_tls_referenced_p (rtx x
)
8883 if (! TARGET_HAVE_TLS
)
8886 subrtx_iterator::array_type array
;
8887 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8889 const_rtx x
= *iter
;
8890 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8892 /* ARM currently does not provide relocations to encode TLS variables
8893 into AArch32 instructions, only data, so there is no way to
8894 currently implement these if a literal pool is disabled. */
8895 if (arm_disable_literal_pool
)
8896 sorry ("accessing thread-local storage is not currently supported "
8897 "with %<-mpure-code%> or %<-mslow-flash-data%>");
8902 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8903 TLS offsets, not real symbol references. */
8904 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8905 iter
.skip_subrtxes ();
8910 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8912 On the ARM, allow any integer (invalid ones are removed later by insn
8913 patterns), nice doubles and symbol_refs which refer to the function's
8916 When generating pic allow anything. */
8919 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8921 return flag_pic
|| !label_mentioned_p (x
);
8925 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8927 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8928 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8929 for ARMv8-M Baseline or later the result is valid. */
8930 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8933 return (CONST_INT_P (x
)
8934 || CONST_DOUBLE_P (x
)
8935 || CONSTANT_ADDRESS_P (x
)
8936 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
8941 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8943 return (!arm_cannot_force_const_mem (mode
, x
)
8945 ? arm_legitimate_constant_p_1 (mode
, x
)
8946 : thumb_legitimate_constant_p (mode
, x
)));
8949 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8952 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8955 split_const (x
, &base
, &offset
);
8957 if (SYMBOL_REF_P (base
))
8959 /* Function symbols cannot have an offset due to the Thumb bit. */
8960 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
8961 && INTVAL (offset
) != 0)
8964 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
8965 && !offset_within_block_p (base
, INTVAL (offset
)))
8968 return arm_tls_referenced_p (x
);
8971 #define REG_OR_SUBREG_REG(X) \
8973 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8975 #define REG_OR_SUBREG_RTX(X) \
8976 (REG_P (X) ? (X) : SUBREG_REG (X))
8979 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8981 machine_mode mode
= GET_MODE (x
);
8990 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8997 return COSTS_N_INSNS (1);
9000 if (arm_arch6m
&& arm_m_profile_small_mul
)
9001 return COSTS_N_INSNS (32);
9003 if (CONST_INT_P (XEXP (x
, 1)))
9006 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9013 return COSTS_N_INSNS (2) + cycles
;
9015 return COSTS_N_INSNS (1) + 16;
9018 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9020 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9021 return (COSTS_N_INSNS (words
)
9022 + 4 * ((MEM_P (SET_SRC (x
)))
9023 + MEM_P (SET_DEST (x
))));
9028 if (UINTVAL (x
) < 256
9029 /* 16-bit constant. */
9030 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9032 if (thumb_shiftable_const (INTVAL (x
)))
9033 return COSTS_N_INSNS (2);
9034 return COSTS_N_INSNS (3);
9036 else if ((outer
== PLUS
|| outer
== COMPARE
)
9037 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9039 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9040 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9041 return COSTS_N_INSNS (1);
9042 else if (outer
== AND
)
9045 /* This duplicates the tests in the andsi3 expander. */
9046 for (i
= 9; i
<= 31; i
++)
9047 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9048 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9049 return COSTS_N_INSNS (2);
9051 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9052 || outer
== LSHIFTRT
)
9054 return COSTS_N_INSNS (2);
9060 return COSTS_N_INSNS (3);
9078 /* XXX another guess. */
9079 /* Memory costs quite a lot for the first word, but subsequent words
9080 load at the equivalent of a single insn each. */
9081 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9082 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9087 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9093 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9094 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9100 return total
+ COSTS_N_INSNS (1);
9102 /* Assume a two-shift sequence. Increase the cost slightly so
9103 we prefer actual shifts over an extend operation. */
9104 return total
+ 1 + COSTS_N_INSNS (2);
9111 /* Estimates the size cost of thumb1 instructions.
9112 For now most of the code is copied from thumb1_rtx_costs. We need more
9113 fine grain tuning when we have more related test cases. */
9115 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9117 machine_mode mode
= GET_MODE (x
);
9126 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9130 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9131 defined by RTL expansion, especially for the expansion of
9133 if ((GET_CODE (XEXP (x
, 0)) == MULT
9134 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9135 || (GET_CODE (XEXP (x
, 1)) == MULT
9136 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9137 return COSTS_N_INSNS (2);
9142 return COSTS_N_INSNS (1);
9145 if (CONST_INT_P (XEXP (x
, 1)))
9147 /* Thumb1 mul instruction can't operate on const. We must Load it
9148 into a register first. */
9149 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9150 /* For the targets which have a very small and high-latency multiply
9151 unit, we prefer to synthesize the mult with up to 5 instructions,
9152 giving a good balance between size and performance. */
9153 if (arm_arch6m
&& arm_m_profile_small_mul
)
9154 return COSTS_N_INSNS (5);
9156 return COSTS_N_INSNS (1) + const_size
;
9158 return COSTS_N_INSNS (1);
9161 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9163 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9164 cost
= COSTS_N_INSNS (words
);
9165 if (satisfies_constraint_J (SET_SRC (x
))
9166 || satisfies_constraint_K (SET_SRC (x
))
9167 /* Too big an immediate for a 2-byte mov, using MOVT. */
9168 || (CONST_INT_P (SET_SRC (x
))
9169 && UINTVAL (SET_SRC (x
)) >= 256
9171 && satisfies_constraint_j (SET_SRC (x
)))
9172 /* thumb1_movdi_insn. */
9173 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9174 cost
+= COSTS_N_INSNS (1);
9180 if (UINTVAL (x
) < 256)
9181 return COSTS_N_INSNS (1);
9182 /* movw is 4byte long. */
9183 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9184 return COSTS_N_INSNS (2);
9185 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9186 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9187 return COSTS_N_INSNS (2);
9188 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9189 if (thumb_shiftable_const (INTVAL (x
)))
9190 return COSTS_N_INSNS (2);
9191 return COSTS_N_INSNS (3);
9193 else if ((outer
== PLUS
|| outer
== COMPARE
)
9194 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9196 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9197 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9198 return COSTS_N_INSNS (1);
9199 else if (outer
== AND
)
9202 /* This duplicates the tests in the andsi3 expander. */
9203 for (i
= 9; i
<= 31; i
++)
9204 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9205 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9206 return COSTS_N_INSNS (2);
9208 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9209 || outer
== LSHIFTRT
)
9211 return COSTS_N_INSNS (2);
9217 return COSTS_N_INSNS (3);
9231 return COSTS_N_INSNS (1);
9234 return (COSTS_N_INSNS (1)
9236 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9237 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9238 ? COSTS_N_INSNS (1) : 0));
9242 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9247 /* XXX still guessing. */
9248 switch (GET_MODE (XEXP (x
, 0)))
9251 return (1 + (mode
== DImode
? 4 : 0)
9252 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9255 return (4 + (mode
== DImode
? 4 : 0)
9256 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9259 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9270 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9271 operand, then return the operand that is being shifted. If the shift
9272 is not by a constant, then set SHIFT_REG to point to the operand.
9273 Return NULL if OP is not a shifter operand. */
9275 shifter_op_p (rtx op
, rtx
*shift_reg
)
9277 enum rtx_code code
= GET_CODE (op
);
9279 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9280 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9281 return XEXP (op
, 0);
9282 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9283 return XEXP (op
, 0);
9284 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9285 || code
== ASHIFTRT
)
9287 if (!CONST_INT_P (XEXP (op
, 1)))
9288 *shift_reg
= XEXP (op
, 1);
9289 return XEXP (op
, 0);
9296 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9298 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9299 rtx_code code
= GET_CODE (x
);
9300 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9302 switch (XINT (x
, 1))
9304 case UNSPEC_UNALIGNED_LOAD
:
9305 /* We can only do unaligned loads into the integer unit, and we can't
9307 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9309 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9310 + extra_cost
->ldst
.load_unaligned
);
9313 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9314 ADDR_SPACE_GENERIC
, speed_p
);
9318 case UNSPEC_UNALIGNED_STORE
:
9319 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9321 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9322 + extra_cost
->ldst
.store_unaligned
);
9324 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9326 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9327 ADDR_SPACE_GENERIC
, speed_p
);
9338 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9342 *cost
= COSTS_N_INSNS (2);
9348 /* Cost of a libcall. We assume one insn per argument, an amount for the
9349 call (one insn for -Os) and then one for processing the result. */
9350 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9352 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9355 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9356 if (shift_op != NULL \
9357 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9362 *cost += extra_cost->alu.arith_shift_reg; \
9363 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9364 ASHIFT, 1, speed_p); \
9367 *cost += extra_cost->alu.arith_shift; \
9369 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9370 ASHIFT, 0, speed_p) \
9371 + rtx_cost (XEXP (x, 1 - IDX), \
9372 GET_MODE (shift_op), \
9379 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9380 considering the costs of the addressing mode and memory access
9383 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
9384 int *cost
, bool speed_p
)
9386 machine_mode mode
= GET_MODE (x
);
9388 *cost
= COSTS_N_INSNS (1);
9391 && GET_CODE (XEXP (x
, 0)) == PLUS
9392 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9393 /* This will be split into two instructions. Add the cost of the
9394 additional instruction here. The cost of the memory access is computed
9395 below. See arm.md:calculate_pic_address. */
9396 *cost
+= COSTS_N_INSNS (1);
9398 /* Calculate cost of the addressing mode. */
9401 arm_addr_mode_op op_type
;
9402 switch (GET_CODE (XEXP (x
, 0)))
9406 op_type
= AMO_DEFAULT
;
9409 /* MINUS does not appear in RTL, but the architecture supports it,
9410 so handle this case defensively. */
9413 op_type
= AMO_NO_WB
;
9425 if (VECTOR_MODE_P (mode
))
9426 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
9427 else if (FLOAT_MODE_P (mode
))
9428 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
9430 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
9433 /* Calculate cost of memory access. */
9436 if (FLOAT_MODE_P (mode
))
9438 if (GET_MODE_SIZE (mode
) == 8)
9439 *cost
+= extra_cost
->ldst
.loadd
;
9441 *cost
+= extra_cost
->ldst
.loadf
;
9443 else if (VECTOR_MODE_P (mode
))
9444 *cost
+= extra_cost
->ldst
.loadv
;
9448 if (GET_MODE_SIZE (mode
) == 8)
9449 *cost
+= extra_cost
->ldst
.ldrd
;
9451 *cost
+= extra_cost
->ldst
.load
;
9458 /* RTX costs. Make an estimate of the cost of executing the operation
9459 X, which is contained within an operation with code OUTER_CODE.
9460 SPEED_P indicates whether the cost desired is the performance cost,
9461 or the size cost. The estimate is stored in COST and the return
9462 value is TRUE if the cost calculation is final, or FALSE if the
9463 caller should recurse through the operands of X to add additional
9466 We currently make no attempt to model the size savings of Thumb-2
9467 16-bit instructions. At the normal points in compilation where
9468 this code is called we have no measure of whether the condition
9469 flags are live or not, and thus no realistic way to determine what
9470 the size will eventually be. */
9472 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9473 const struct cpu_cost_table
*extra_cost
,
9474 int *cost
, bool speed_p
)
9476 machine_mode mode
= GET_MODE (x
);
9478 *cost
= COSTS_N_INSNS (1);
9483 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9485 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9493 /* SET RTXs don't have a mode so we get it from the destination. */
9494 mode
= GET_MODE (SET_DEST (x
));
9496 if (REG_P (SET_SRC (x
))
9497 && REG_P (SET_DEST (x
)))
9499 /* Assume that most copies can be done with a single insn,
9500 unless we don't have HW FP, in which case everything
9501 larger than word mode will require two insns. */
9502 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9503 && GET_MODE_SIZE (mode
) > 4)
9506 /* Conditional register moves can be encoded
9507 in 16 bits in Thumb mode. */
9508 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9514 if (CONST_INT_P (SET_SRC (x
)))
9516 /* Handle CONST_INT here, since the value doesn't have a mode
9517 and we would otherwise be unable to work out the true cost. */
9518 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9521 /* Slightly lower the cost of setting a core reg to a constant.
9522 This helps break up chains and allows for better scheduling. */
9523 if (REG_P (SET_DEST (x
))
9524 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9527 /* Immediate moves with an immediate in the range [0, 255] can be
9528 encoded in 16 bits in Thumb mode. */
9529 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9530 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9532 goto const_int_cost
;
9538 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
9542 /* Calculations of LDM costs are complex. We assume an initial cost
9543 (ldm_1st) which will load the number of registers mentioned in
9544 ldm_regs_per_insn_1st registers; then each additional
9545 ldm_regs_per_insn_subsequent registers cost one more insn. The
9546 formula for N regs is thus:
9548 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9549 + ldm_regs_per_insn_subsequent - 1)
9550 / ldm_regs_per_insn_subsequent).
9552 Additional costs may also be added for addressing. A similar
9553 formula is used for STM. */
9555 bool is_ldm
= load_multiple_operation (x
, SImode
);
9556 bool is_stm
= store_multiple_operation (x
, SImode
);
9558 if (is_ldm
|| is_stm
)
9562 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9563 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9564 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9565 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9566 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9567 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9568 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9570 *cost
+= regs_per_insn_1st
9571 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9572 + regs_per_insn_sub
- 1)
9573 / regs_per_insn_sub
);
9582 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9583 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9584 *cost
+= COSTS_N_INSNS (speed_p
9585 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9586 else if (mode
== SImode
&& TARGET_IDIV
)
9587 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9589 *cost
= LIBCALL_COST (2);
9591 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9592 possible udiv is prefered. */
9593 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
9594 return false; /* All arguments must be in registers. */
9597 /* MOD by a power of 2 can be expanded as:
9599 and r0, r0, #(n - 1)
9600 and r1, r1, #(n - 1)
9601 rsbpl r0, r1, #0. */
9602 if (CONST_INT_P (XEXP (x
, 1))
9603 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9606 *cost
+= COSTS_N_INSNS (3);
9609 *cost
+= 2 * extra_cost
->alu
.logical
9610 + extra_cost
->alu
.arith
;
9616 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9617 possible udiv is prefered. */
9618 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
9619 return false; /* All arguments must be in registers. */
9622 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9624 *cost
+= (COSTS_N_INSNS (1)
9625 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9627 *cost
+= extra_cost
->alu
.shift_reg
;
9635 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9637 *cost
+= (COSTS_N_INSNS (2)
9638 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9640 *cost
+= 2 * extra_cost
->alu
.shift
;
9641 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9642 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
9646 else if (mode
== SImode
)
9648 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9649 /* Slightly disparage register shifts at -Os, but not by much. */
9650 if (!CONST_INT_P (XEXP (x
, 1)))
9651 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9652 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9655 else if (GET_MODE_CLASS (mode
) == MODE_INT
9656 && GET_MODE_SIZE (mode
) < 4)
9660 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9661 /* Slightly disparage register shifts at -Os, but not by
9663 if (!CONST_INT_P (XEXP (x
, 1)))
9664 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9665 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9667 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9669 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9671 /* Can use SBFX/UBFX. */
9673 *cost
+= extra_cost
->alu
.bfx
;
9674 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9678 *cost
+= COSTS_N_INSNS (1);
9679 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9682 if (CONST_INT_P (XEXP (x
, 1)))
9683 *cost
+= 2 * extra_cost
->alu
.shift
;
9685 *cost
+= (extra_cost
->alu
.shift
9686 + extra_cost
->alu
.shift_reg
);
9689 /* Slightly disparage register shifts. */
9690 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9695 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9696 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9699 if (CONST_INT_P (XEXP (x
, 1)))
9700 *cost
+= (2 * extra_cost
->alu
.shift
9701 + extra_cost
->alu
.log_shift
);
9703 *cost
+= (extra_cost
->alu
.shift
9704 + extra_cost
->alu
.shift_reg
9705 + extra_cost
->alu
.log_shift_reg
);
9711 *cost
= LIBCALL_COST (2);
9720 *cost
+= extra_cost
->alu
.rev
;
9727 /* No rev instruction available. Look at arm_legacy_rev
9728 and thumb_legacy_rev for the form of RTL used then. */
9731 *cost
+= COSTS_N_INSNS (9);
9735 *cost
+= 6 * extra_cost
->alu
.shift
;
9736 *cost
+= 3 * extra_cost
->alu
.logical
;
9741 *cost
+= COSTS_N_INSNS (4);
9745 *cost
+= 2 * extra_cost
->alu
.shift
;
9746 *cost
+= extra_cost
->alu
.arith_shift
;
9747 *cost
+= 2 * extra_cost
->alu
.logical
;
9755 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9756 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9758 if (GET_CODE (XEXP (x
, 0)) == MULT
9759 || GET_CODE (XEXP (x
, 1)) == MULT
)
9761 rtx mul_op0
, mul_op1
, sub_op
;
9764 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9766 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9768 mul_op0
= XEXP (XEXP (x
, 0), 0);
9769 mul_op1
= XEXP (XEXP (x
, 0), 1);
9770 sub_op
= XEXP (x
, 1);
9774 mul_op0
= XEXP (XEXP (x
, 1), 0);
9775 mul_op1
= XEXP (XEXP (x
, 1), 1);
9776 sub_op
= XEXP (x
, 0);
9779 /* The first operand of the multiply may be optionally
9781 if (GET_CODE (mul_op0
) == NEG
)
9782 mul_op0
= XEXP (mul_op0
, 0);
9784 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9785 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9786 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9792 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9798 rtx shift_by_reg
= NULL
;
9802 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9803 if (shift_op
== NULL
)
9805 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9806 non_shift_op
= XEXP (x
, 0);
9809 non_shift_op
= XEXP (x
, 1);
9811 if (shift_op
!= NULL
)
9813 if (shift_by_reg
!= NULL
)
9816 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9817 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9820 *cost
+= extra_cost
->alu
.arith_shift
;
9822 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9823 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9828 && GET_CODE (XEXP (x
, 1)) == MULT
)
9832 *cost
+= extra_cost
->mult
[0].add
;
9833 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9834 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9835 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9839 if (CONST_INT_P (XEXP (x
, 0)))
9841 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9842 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9844 *cost
= COSTS_N_INSNS (insns
);
9846 *cost
+= insns
* extra_cost
->alu
.arith
;
9847 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9851 *cost
+= extra_cost
->alu
.arith
;
9856 if (GET_MODE_CLASS (mode
) == MODE_INT
9857 && GET_MODE_SIZE (mode
) < 4)
9859 rtx shift_op
, shift_reg
;
9862 /* We check both sides of the MINUS for shifter operands since,
9863 unlike PLUS, it's not commutative. */
9865 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
9866 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
9868 /* Slightly disparage, as we might need to widen the result. */
9871 *cost
+= extra_cost
->alu
.arith
;
9873 if (CONST_INT_P (XEXP (x
, 0)))
9875 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9884 *cost
+= COSTS_N_INSNS (1);
9886 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9888 rtx op1
= XEXP (x
, 1);
9891 *cost
+= 2 * extra_cost
->alu
.arith
;
9893 if (GET_CODE (op1
) == ZERO_EXTEND
)
9894 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9897 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9898 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9902 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9905 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9906 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9908 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9911 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9912 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9915 *cost
+= (extra_cost
->alu
.arith
9916 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9917 ? extra_cost
->alu
.arith
9918 : extra_cost
->alu
.arith_shift
));
9919 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9920 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9921 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9926 *cost
+= 2 * extra_cost
->alu
.arith
;
9932 *cost
= LIBCALL_COST (2);
9936 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9937 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9939 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9941 rtx mul_op0
, mul_op1
, add_op
;
9944 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9946 mul_op0
= XEXP (XEXP (x
, 0), 0);
9947 mul_op1
= XEXP (XEXP (x
, 0), 1);
9948 add_op
= XEXP (x
, 1);
9950 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9951 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9952 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9958 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9961 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9963 *cost
= LIBCALL_COST (2);
9967 /* Narrow modes can be synthesized in SImode, but the range
9968 of useful sub-operations is limited. Check for shift operations
9969 on one of the operands. Only left shifts can be used in the
9971 if (GET_MODE_CLASS (mode
) == MODE_INT
9972 && GET_MODE_SIZE (mode
) < 4)
9974 rtx shift_op
, shift_reg
;
9977 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
9979 if (CONST_INT_P (XEXP (x
, 1)))
9981 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9982 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9984 *cost
= COSTS_N_INSNS (insns
);
9986 *cost
+= insns
* extra_cost
->alu
.arith
;
9987 /* Slightly penalize a narrow operation as the result may
9989 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9993 /* Slightly penalize a narrow operation as the result may
9997 *cost
+= extra_cost
->alu
.arith
;
10002 if (mode
== SImode
)
10004 rtx shift_op
, shift_reg
;
10006 if (TARGET_INT_SIMD
10007 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10008 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10010 /* UXTA[BH] or SXTA[BH]. */
10012 *cost
+= extra_cost
->alu
.extend_arith
;
10013 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10015 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10020 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10021 if (shift_op
!= NULL
)
10026 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10027 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10030 *cost
+= extra_cost
->alu
.arith_shift
;
10032 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10033 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10036 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10038 rtx mul_op
= XEXP (x
, 0);
10040 if (TARGET_DSP_MULTIPLY
10041 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10042 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10043 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10044 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10045 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10046 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10047 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10048 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10049 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10050 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10051 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10052 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10055 /* SMLA[BT][BT]. */
10057 *cost
+= extra_cost
->mult
[0].extend_add
;
10058 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10059 SIGN_EXTEND
, 0, speed_p
)
10060 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10061 SIGN_EXTEND
, 0, speed_p
)
10062 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10067 *cost
+= extra_cost
->mult
[0].add
;
10068 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10069 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10070 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10073 if (CONST_INT_P (XEXP (x
, 1)))
10075 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10076 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10078 *cost
= COSTS_N_INSNS (insns
);
10080 *cost
+= insns
* extra_cost
->alu
.arith
;
10081 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10085 *cost
+= extra_cost
->alu
.arith
;
10090 if (mode
== DImode
)
10092 if (GET_CODE (XEXP (x
, 0)) == MULT
10093 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10094 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10095 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10096 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10099 *cost
+= extra_cost
->mult
[1].extend_add
;
10100 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10101 ZERO_EXTEND
, 0, speed_p
)
10102 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10103 ZERO_EXTEND
, 0, speed_p
)
10104 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10108 *cost
+= COSTS_N_INSNS (1);
10110 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10111 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10114 *cost
+= (extra_cost
->alu
.arith
10115 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10116 ? extra_cost
->alu
.arith
10117 : extra_cost
->alu
.arith_shift
));
10119 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10121 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10126 *cost
+= 2 * extra_cost
->alu
.arith
;
10131 *cost
= LIBCALL_COST (2);
10134 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10137 *cost
+= extra_cost
->alu
.rev
;
10141 /* Fall through. */
10142 case AND
: case XOR
:
10143 if (mode
== SImode
)
10145 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10146 rtx op0
= XEXP (x
, 0);
10147 rtx shift_op
, shift_reg
;
10151 || (code
== IOR
&& TARGET_THUMB2
)))
10152 op0
= XEXP (op0
, 0);
10155 shift_op
= shifter_op_p (op0
, &shift_reg
);
10156 if (shift_op
!= NULL
)
10161 *cost
+= extra_cost
->alu
.log_shift_reg
;
10162 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10165 *cost
+= extra_cost
->alu
.log_shift
;
10167 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10168 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10172 if (CONST_INT_P (XEXP (x
, 1)))
10174 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10175 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10178 *cost
= COSTS_N_INSNS (insns
);
10180 *cost
+= insns
* extra_cost
->alu
.logical
;
10181 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10186 *cost
+= extra_cost
->alu
.logical
;
10187 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10188 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10192 if (mode
== DImode
)
10194 rtx op0
= XEXP (x
, 0);
10195 enum rtx_code subcode
= GET_CODE (op0
);
10197 *cost
+= COSTS_N_INSNS (1);
10201 || (code
== IOR
&& TARGET_THUMB2
)))
10202 op0
= XEXP (op0
, 0);
10204 if (GET_CODE (op0
) == ZERO_EXTEND
)
10207 *cost
+= 2 * extra_cost
->alu
.logical
;
10209 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10211 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10214 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10217 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10219 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10221 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10226 *cost
+= 2 * extra_cost
->alu
.logical
;
10232 *cost
= LIBCALL_COST (2);
10236 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10237 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10239 rtx op0
= XEXP (x
, 0);
10241 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10242 op0
= XEXP (op0
, 0);
10245 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10247 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10248 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10251 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10253 *cost
= LIBCALL_COST (2);
10257 if (mode
== SImode
)
10259 if (TARGET_DSP_MULTIPLY
10260 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10261 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10262 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10263 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10264 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10265 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10266 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10267 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10268 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10269 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10270 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10271 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10274 /* SMUL[TB][TB]. */
10276 *cost
+= extra_cost
->mult
[0].extend
;
10277 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10278 SIGN_EXTEND
, 0, speed_p
);
10279 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10280 SIGN_EXTEND
, 1, speed_p
);
10284 *cost
+= extra_cost
->mult
[0].simple
;
10288 if (mode
== DImode
)
10290 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10291 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10292 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10293 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
10296 *cost
+= extra_cost
->mult
[1].extend
;
10297 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10298 ZERO_EXTEND
, 0, speed_p
)
10299 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10300 ZERO_EXTEND
, 0, speed_p
));
10304 *cost
= LIBCALL_COST (2);
10309 *cost
= LIBCALL_COST (2);
10313 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10314 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10316 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10319 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10324 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10328 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10330 *cost
= LIBCALL_COST (1);
10334 if (mode
== SImode
)
10336 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10338 *cost
+= COSTS_N_INSNS (1);
10339 /* Assume the non-flag-changing variant. */
10341 *cost
+= (extra_cost
->alu
.log_shift
10342 + extra_cost
->alu
.arith_shift
);
10343 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10347 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10348 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10350 *cost
+= COSTS_N_INSNS (1);
10351 /* No extra cost for MOV imm and MVN imm. */
10352 /* If the comparison op is using the flags, there's no further
10353 cost, otherwise we need to add the cost of the comparison. */
10354 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10355 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10356 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10358 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10359 *cost
+= (COSTS_N_INSNS (1)
10360 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10362 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10365 *cost
+= extra_cost
->alu
.arith
;
10371 *cost
+= extra_cost
->alu
.arith
;
10375 if (GET_MODE_CLASS (mode
) == MODE_INT
10376 && GET_MODE_SIZE (mode
) < 4)
10378 /* Slightly disparage, as we might need an extend operation. */
10381 *cost
+= extra_cost
->alu
.arith
;
10385 if (mode
== DImode
)
10387 *cost
+= COSTS_N_INSNS (1);
10389 *cost
+= 2 * extra_cost
->alu
.arith
;
10394 *cost
= LIBCALL_COST (1);
10398 if (mode
== SImode
)
10401 rtx shift_reg
= NULL
;
10403 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10407 if (shift_reg
!= NULL
)
10410 *cost
+= extra_cost
->alu
.log_shift_reg
;
10411 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10414 *cost
+= extra_cost
->alu
.log_shift
;
10415 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10420 *cost
+= extra_cost
->alu
.logical
;
10423 if (mode
== DImode
)
10425 *cost
+= COSTS_N_INSNS (1);
10431 *cost
+= LIBCALL_COST (1);
10436 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10438 *cost
+= COSTS_N_INSNS (3);
10441 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10442 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10444 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10445 /* Assume that if one arm of the if_then_else is a register,
10446 that it will be tied with the result and eliminate the
10447 conditional insn. */
10448 if (REG_P (XEXP (x
, 1)))
10450 else if (REG_P (XEXP (x
, 2)))
10456 if (extra_cost
->alu
.non_exec_costs_exec
)
10457 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10459 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10462 *cost
+= op1cost
+ op2cost
;
10468 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10472 machine_mode op0mode
;
10473 /* We'll mostly assume that the cost of a compare is the cost of the
10474 LHS. However, there are some notable exceptions. */
10476 /* Floating point compares are never done as side-effects. */
10477 op0mode
= GET_MODE (XEXP (x
, 0));
10478 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10479 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10482 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10484 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10486 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10492 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10494 *cost
= LIBCALL_COST (2);
10498 /* DImode compares normally take two insns. */
10499 if (op0mode
== DImode
)
10501 *cost
+= COSTS_N_INSNS (1);
10503 *cost
+= 2 * extra_cost
->alu
.arith
;
10507 if (op0mode
== SImode
)
10512 if (XEXP (x
, 1) == const0_rtx
10513 && !(REG_P (XEXP (x
, 0))
10514 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10515 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10517 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10519 /* Multiply operations that set the flags are often
10520 significantly more expensive. */
10522 && GET_CODE (XEXP (x
, 0)) == MULT
10523 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10524 *cost
+= extra_cost
->mult
[0].flag_setting
;
10527 && GET_CODE (XEXP (x
, 0)) == PLUS
10528 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10529 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10531 *cost
+= extra_cost
->mult
[0].flag_setting
;
10536 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10537 if (shift_op
!= NULL
)
10539 if (shift_reg
!= NULL
)
10541 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10544 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10547 *cost
+= extra_cost
->alu
.arith_shift
;
10548 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10549 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10554 *cost
+= extra_cost
->alu
.arith
;
10555 if (CONST_INT_P (XEXP (x
, 1))
10556 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10558 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10566 *cost
= LIBCALL_COST (2);
10589 if (outer_code
== SET
)
10591 /* Is it a store-flag operation? */
10592 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10593 && XEXP (x
, 1) == const0_rtx
)
10595 /* Thumb also needs an IT insn. */
10596 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10599 if (XEXP (x
, 1) == const0_rtx
)
10604 /* LSR Rd, Rn, #31. */
10606 *cost
+= extra_cost
->alu
.shift
;
10616 *cost
+= COSTS_N_INSNS (1);
10620 /* RSBS T1, Rn, Rn, LSR #31
10622 *cost
+= COSTS_N_INSNS (1);
10624 *cost
+= extra_cost
->alu
.arith_shift
;
10628 /* RSB Rd, Rn, Rn, ASR #1
10629 LSR Rd, Rd, #31. */
10630 *cost
+= COSTS_N_INSNS (1);
10632 *cost
+= (extra_cost
->alu
.arith_shift
10633 + extra_cost
->alu
.shift
);
10639 *cost
+= COSTS_N_INSNS (1);
10641 *cost
+= extra_cost
->alu
.shift
;
10645 /* Remaining cases are either meaningless or would take
10646 three insns anyway. */
10647 *cost
= COSTS_N_INSNS (3);
10650 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10655 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10656 if (CONST_INT_P (XEXP (x
, 1))
10657 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10659 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10666 /* Not directly inside a set. If it involves the condition code
10667 register it must be the condition for a branch, cond_exec or
10668 I_T_E operation. Since the comparison is performed elsewhere
10669 this is just the control part which has no additional
10671 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10672 && XEXP (x
, 1) == const0_rtx
)
10680 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10681 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10684 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10688 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10690 *cost
= LIBCALL_COST (1);
10694 if (mode
== SImode
)
10697 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10701 *cost
= LIBCALL_COST (1);
10705 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10706 && MEM_P (XEXP (x
, 0)))
10708 if (mode
== DImode
)
10709 *cost
+= COSTS_N_INSNS (1);
10714 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10715 *cost
+= extra_cost
->ldst
.load
;
10717 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10719 if (mode
== DImode
)
10720 *cost
+= extra_cost
->alu
.shift
;
10725 /* Widening from less than 32-bits requires an extend operation. */
10726 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10728 /* We have SXTB/SXTH. */
10729 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10731 *cost
+= extra_cost
->alu
.extend
;
10733 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10735 /* Needs two shifts. */
10736 *cost
+= COSTS_N_INSNS (1);
10737 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10739 *cost
+= 2 * extra_cost
->alu
.shift
;
10742 /* Widening beyond 32-bits requires one more insn. */
10743 if (mode
== DImode
)
10745 *cost
+= COSTS_N_INSNS (1);
10747 *cost
+= extra_cost
->alu
.shift
;
10754 || GET_MODE (XEXP (x
, 0)) == SImode
10755 || GET_MODE (XEXP (x
, 0)) == QImode
)
10756 && MEM_P (XEXP (x
, 0)))
10758 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10760 if (mode
== DImode
)
10761 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10766 /* Widening from less than 32-bits requires an extend operation. */
10767 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10769 /* UXTB can be a shorter instruction in Thumb2, but it might
10770 be slower than the AND Rd, Rn, #255 alternative. When
10771 optimizing for speed it should never be slower to use
10772 AND, and we don't really model 16-bit vs 32-bit insns
10775 *cost
+= extra_cost
->alu
.logical
;
10777 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10779 /* We have UXTB/UXTH. */
10780 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10782 *cost
+= extra_cost
->alu
.extend
;
10784 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10786 /* Needs two shifts. It's marginally preferable to use
10787 shifts rather than two BIC instructions as the second
10788 shift may merge with a subsequent insn as a shifter
10790 *cost
= COSTS_N_INSNS (2);
10791 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10793 *cost
+= 2 * extra_cost
->alu
.shift
;
10796 /* Widening beyond 32-bits requires one more insn. */
10797 if (mode
== DImode
)
10799 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10806 /* CONST_INT has no mode, so we cannot tell for sure how many
10807 insns are really going to be needed. The best we can do is
10808 look at the value passed. If it fits in SImode, then assume
10809 that's the mode it will be used for. Otherwise assume it
10810 will be used in DImode. */
10811 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10816 /* Avoid blowing up in arm_gen_constant (). */
10817 if (!(outer_code
== PLUS
10818 || outer_code
== AND
10819 || outer_code
== IOR
10820 || outer_code
== XOR
10821 || outer_code
== MINUS
))
10825 if (mode
== SImode
)
10827 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10828 INTVAL (x
), NULL
, NULL
,
10834 *cost
+= COSTS_N_INSNS (arm_gen_constant
10835 (outer_code
, SImode
, NULL
,
10836 trunc_int_for_mode (INTVAL (x
), SImode
),
10838 + arm_gen_constant (outer_code
, SImode
, NULL
,
10839 INTVAL (x
) >> 32, NULL
,
10851 if (arm_arch_thumb2
&& !flag_pic
)
10852 *cost
+= COSTS_N_INSNS (1);
10854 *cost
+= extra_cost
->ldst
.load
;
10857 *cost
+= COSTS_N_INSNS (1);
10861 *cost
+= COSTS_N_INSNS (1);
10863 *cost
+= extra_cost
->alu
.arith
;
10869 *cost
= COSTS_N_INSNS (4);
10874 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10875 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10877 if (vfp3_const_double_rtx (x
))
10880 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10886 if (mode
== DFmode
)
10887 *cost
+= extra_cost
->ldst
.loadd
;
10889 *cost
+= extra_cost
->ldst
.loadf
;
10892 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10896 *cost
= COSTS_N_INSNS (4);
10902 && TARGET_HARD_FLOAT
10903 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10904 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10905 *cost
= COSTS_N_INSNS (1);
10907 *cost
= COSTS_N_INSNS (4);
10912 /* When optimizing for size, we prefer constant pool entries to
10913 MOVW/MOVT pairs, so bump the cost of these slightly. */
10920 *cost
+= extra_cost
->alu
.clz
;
10924 if (XEXP (x
, 1) == const0_rtx
)
10927 *cost
+= extra_cost
->alu
.log_shift
;
10928 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10931 /* Fall through. */
10935 *cost
+= COSTS_N_INSNS (1);
10939 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10940 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10941 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10942 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10943 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10944 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10945 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10946 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10950 *cost
+= extra_cost
->mult
[1].extend
;
10951 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10952 ZERO_EXTEND
, 0, speed_p
)
10953 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10954 ZERO_EXTEND
, 0, speed_p
));
10957 *cost
= LIBCALL_COST (1);
10960 case UNSPEC_VOLATILE
:
10962 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10965 /* Reading the PC is like reading any other register. Writing it
10966 is more expensive, but we take that into account elsewhere. */
10971 /* TODO: Simple zero_extract of bottom bits using AND. */
10972 /* Fall through. */
10976 && CONST_INT_P (XEXP (x
, 1))
10977 && CONST_INT_P (XEXP (x
, 2)))
10980 *cost
+= extra_cost
->alu
.bfx
;
10981 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10984 /* Without UBFX/SBFX, need to resort to shift operations. */
10985 *cost
+= COSTS_N_INSNS (1);
10987 *cost
+= 2 * extra_cost
->alu
.shift
;
10988 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10992 if (TARGET_HARD_FLOAT
)
10995 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10997 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10999 /* Pre v8, widening HF->DF is a two-step process, first
11000 widening to SFmode. */
11001 *cost
+= COSTS_N_INSNS (1);
11003 *cost
+= extra_cost
->fp
[0].widen
;
11005 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11009 *cost
= LIBCALL_COST (1);
11012 case FLOAT_TRUNCATE
:
11013 if (TARGET_HARD_FLOAT
)
11016 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11017 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11019 /* Vector modes? */
11021 *cost
= LIBCALL_COST (1);
11025 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11027 rtx op0
= XEXP (x
, 0);
11028 rtx op1
= XEXP (x
, 1);
11029 rtx op2
= XEXP (x
, 2);
11032 /* vfms or vfnma. */
11033 if (GET_CODE (op0
) == NEG
)
11034 op0
= XEXP (op0
, 0);
11036 /* vfnms or vfnma. */
11037 if (GET_CODE (op2
) == NEG
)
11038 op2
= XEXP (op2
, 0);
11040 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11041 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11042 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11045 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11050 *cost
= LIBCALL_COST (3);
11055 if (TARGET_HARD_FLOAT
)
11057 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11058 a vcvt fixed-point conversion. */
11059 if (code
== FIX
&& mode
== SImode
11060 && GET_CODE (XEXP (x
, 0)) == FIX
11061 && GET_MODE (XEXP (x
, 0)) == SFmode
11062 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11063 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11067 *cost
+= extra_cost
->fp
[0].toint
;
11069 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11074 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11076 mode
= GET_MODE (XEXP (x
, 0));
11078 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11079 /* Strip of the 'cost' of rounding towards zero. */
11080 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11081 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11084 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11085 /* ??? Increase the cost to deal with transferring from
11086 FP -> CORE registers? */
11089 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11093 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11096 /* Vector costs? */
11098 *cost
= LIBCALL_COST (1);
11102 case UNSIGNED_FLOAT
:
11103 if (TARGET_HARD_FLOAT
)
11105 /* ??? Increase the cost to deal with transferring from CORE
11106 -> FP registers? */
11108 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11111 *cost
= LIBCALL_COST (1);
11119 /* Just a guess. Guess number of instructions in the asm
11120 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11121 though (see PR60663). */
11122 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11123 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11125 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11129 if (mode
!= VOIDmode
)
11130 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11132 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11137 #undef HANDLE_NARROW_SHIFT_ARITH
11139 /* RTX costs entry point. */
11142 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11143 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11146 int code
= GET_CODE (x
);
11147 gcc_assert (current_tune
->insn_extra_cost
);
11149 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
11150 (enum rtx_code
) outer_code
,
11151 current_tune
->insn_extra_cost
,
11154 if (dump_file
&& arm_verbose_cost
)
11156 print_rtl_single (dump_file
, x
);
11157 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11158 *total
, result
? "final" : "partial");
11163 /* All address computations that can be done are free, but rtx cost returns
11164 the same for practically all of them. So we weight the different types
11165 of address here in the order (most pref first):
11166 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11168 arm_arm_address_cost (rtx x
)
11170 enum rtx_code c
= GET_CODE (x
);
11172 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11174 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11179 if (CONST_INT_P (XEXP (x
, 1)))
11182 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11192 arm_thumb_address_cost (rtx x
)
11194 enum rtx_code c
= GET_CODE (x
);
11199 && REG_P (XEXP (x
, 0))
11200 && CONST_INT_P (XEXP (x
, 1)))
11207 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11208 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11210 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11213 /* Adjust cost hook for XScale. */
11215 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11218 /* Some true dependencies can have a higher cost depending
11219 on precisely how certain input operands are used. */
11221 && recog_memoized (insn
) >= 0
11222 && recog_memoized (dep
) >= 0)
11224 int shift_opnum
= get_attr_shift (insn
);
11225 enum attr_type attr_type
= get_attr_type (dep
);
11227 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11228 operand for INSN. If we have a shifted input operand and the
11229 instruction we depend on is another ALU instruction, then we may
11230 have to account for an additional stall. */
11231 if (shift_opnum
!= 0
11232 && (attr_type
== TYPE_ALU_SHIFT_IMM
11233 || attr_type
== TYPE_ALUS_SHIFT_IMM
11234 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11235 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11236 || attr_type
== TYPE_ALU_SHIFT_REG
11237 || attr_type
== TYPE_ALUS_SHIFT_REG
11238 || attr_type
== TYPE_LOGIC_SHIFT_REG
11239 || attr_type
== TYPE_LOGICS_SHIFT_REG
11240 || attr_type
== TYPE_MOV_SHIFT
11241 || attr_type
== TYPE_MVN_SHIFT
11242 || attr_type
== TYPE_MOV_SHIFT_REG
11243 || attr_type
== TYPE_MVN_SHIFT_REG
))
11245 rtx shifted_operand
;
11248 /* Get the shifted operand. */
11249 extract_insn (insn
);
11250 shifted_operand
= recog_data
.operand
[shift_opnum
];
11252 /* Iterate over all the operands in DEP. If we write an operand
11253 that overlaps with SHIFTED_OPERAND, then we have increase the
11254 cost of this dependency. */
11255 extract_insn (dep
);
11256 preprocess_constraints (dep
);
11257 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11259 /* We can ignore strict inputs. */
11260 if (recog_data
.operand_type
[opno
] == OP_IN
)
11263 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11275 /* Adjust cost hook for Cortex A9. */
11277 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11287 case REG_DEP_OUTPUT
:
11288 if (recog_memoized (insn
) >= 0
11289 && recog_memoized (dep
) >= 0)
11291 if (GET_CODE (PATTERN (insn
)) == SET
)
11294 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11296 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11298 enum attr_type attr_type_insn
= get_attr_type (insn
);
11299 enum attr_type attr_type_dep
= get_attr_type (dep
);
11301 /* By default all dependencies of the form
11304 have an extra latency of 1 cycle because
11305 of the input and output dependency in this
11306 case. However this gets modeled as an true
11307 dependency and hence all these checks. */
11308 if (REG_P (SET_DEST (PATTERN (insn
)))
11309 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11311 /* FMACS is a special case where the dependent
11312 instruction can be issued 3 cycles before
11313 the normal latency in case of an output
11315 if ((attr_type_insn
== TYPE_FMACS
11316 || attr_type_insn
== TYPE_FMACD
)
11317 && (attr_type_dep
== TYPE_FMACS
11318 || attr_type_dep
== TYPE_FMACD
))
11320 if (dep_type
== REG_DEP_OUTPUT
)
11321 *cost
= insn_default_latency (dep
) - 3;
11323 *cost
= insn_default_latency (dep
);
11328 if (dep_type
== REG_DEP_OUTPUT
)
11329 *cost
= insn_default_latency (dep
) + 1;
11331 *cost
= insn_default_latency (dep
);
11341 gcc_unreachable ();
11347 /* Adjust cost hook for FA726TE. */
11349 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11352 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11353 have penalty of 3. */
11354 if (dep_type
== REG_DEP_TRUE
11355 && recog_memoized (insn
) >= 0
11356 && recog_memoized (dep
) >= 0
11357 && get_attr_conds (dep
) == CONDS_SET
)
11359 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11360 if (get_attr_conds (insn
) == CONDS_USE
11361 && get_attr_type (insn
) != TYPE_BRANCH
)
11367 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11368 || get_attr_conds (insn
) == CONDS_USE
)
11378 /* Implement TARGET_REGISTER_MOVE_COST.
11380 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11381 it is typically more expensive than a single memory access. We set
11382 the cost to less than two memory accesses so that floating
11383 point to integer conversion does not go through memory. */
11386 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11387 reg_class_t from
, reg_class_t to
)
11391 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11392 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11394 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11395 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11397 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11404 if (from
== HI_REGS
|| to
== HI_REGS
)
11411 /* Implement TARGET_MEMORY_MOVE_COST. */
11414 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11415 bool in ATTRIBUTE_UNUSED
)
11421 if (GET_MODE_SIZE (mode
) < 4)
11424 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11428 /* Vectorizer cost model implementation. */
11430 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11432 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11434 int misalign ATTRIBUTE_UNUSED
)
11438 switch (type_of_cost
)
11441 return current_tune
->vec_costs
->scalar_stmt_cost
;
11444 return current_tune
->vec_costs
->scalar_load_cost
;
11447 return current_tune
->vec_costs
->scalar_store_cost
;
11450 return current_tune
->vec_costs
->vec_stmt_cost
;
11453 return current_tune
->vec_costs
->vec_align_load_cost
;
11456 return current_tune
->vec_costs
->vec_store_cost
;
11458 case vec_to_scalar
:
11459 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11461 case scalar_to_vec
:
11462 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11464 case unaligned_load
:
11465 case vector_gather_load
:
11466 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11468 case unaligned_store
:
11469 case vector_scatter_store
:
11470 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11472 case cond_branch_taken
:
11473 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11475 case cond_branch_not_taken
:
11476 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11479 case vec_promote_demote
:
11480 return current_tune
->vec_costs
->vec_stmt_cost
;
11482 case vec_construct
:
11483 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11484 return elements
/ 2 + 1;
11487 gcc_unreachable ();
11491 /* Implement targetm.vectorize.add_stmt_cost. */
11494 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11495 struct _stmt_vec_info
*stmt_info
, int misalign
,
11496 enum vect_cost_model_location where
)
11498 unsigned *cost
= (unsigned *) data
;
11499 unsigned retval
= 0;
11501 if (flag_vect_cost_model
)
11503 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11504 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11506 /* Statements in an inner loop relative to the loop being
11507 vectorized are weighted more heavily. The value here is
11508 arbitrary and could potentially be improved with analysis. */
11509 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11510 count
*= 50; /* FIXME. */
11512 retval
= (unsigned) (count
* stmt_cost
);
11513 cost
[where
] += retval
;
11519 /* Return true if and only if this insn can dual-issue only as older. */
11521 cortexa7_older_only (rtx_insn
*insn
)
11523 if (recog_memoized (insn
) < 0)
11526 switch (get_attr_type (insn
))
11528 case TYPE_ALU_DSP_REG
:
11529 case TYPE_ALU_SREG
:
11530 case TYPE_ALUS_SREG
:
11531 case TYPE_LOGIC_REG
:
11532 case TYPE_LOGICS_REG
:
11534 case TYPE_ADCS_REG
:
11539 case TYPE_SHIFT_IMM
:
11540 case TYPE_SHIFT_REG
:
11541 case TYPE_LOAD_BYTE
:
11544 case TYPE_FFARITHS
:
11546 case TYPE_FFARITHD
:
11564 case TYPE_F_STORES
:
11571 /* Return true if and only if this insn can dual-issue as younger. */
11573 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11575 if (recog_memoized (insn
) < 0)
11578 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11582 switch (get_attr_type (insn
))
11585 case TYPE_ALUS_IMM
:
11586 case TYPE_LOGIC_IMM
:
11587 case TYPE_LOGICS_IMM
:
11592 case TYPE_MOV_SHIFT
:
11593 case TYPE_MOV_SHIFT_REG
:
11603 /* Look for an instruction that can dual issue only as an older
11604 instruction, and move it in front of any instructions that can
11605 dual-issue as younger, while preserving the relative order of all
11606 other instructions in the ready list. This is a hueuristic to help
11607 dual-issue in later cycles, by postponing issue of more flexible
11608 instructions. This heuristic may affect dual issue opportunities
11609 in the current cycle. */
11611 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11612 int *n_readyp
, int clock
)
11615 int first_older_only
= -1, first_younger
= -1;
11619 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11623 /* Traverse the ready list from the head (the instruction to issue
11624 first), and looking for the first instruction that can issue as
11625 younger and the first instruction that can dual-issue only as
11627 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11629 rtx_insn
*insn
= ready
[i
];
11630 if (cortexa7_older_only (insn
))
11632 first_older_only
= i
;
11634 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11637 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11641 /* Nothing to reorder because either no younger insn found or insn
11642 that can dual-issue only as older appears before any insn that
11643 can dual-issue as younger. */
11644 if (first_younger
== -1)
11647 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11651 /* Nothing to reorder because no older-only insn in the ready list. */
11652 if (first_older_only
== -1)
11655 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11659 /* Move first_older_only insn before first_younger. */
11661 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11662 INSN_UID(ready
[first_older_only
]),
11663 INSN_UID(ready
[first_younger
]));
11664 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11665 for (i
= first_older_only
; i
< first_younger
; i
++)
11667 ready
[i
] = ready
[i
+1];
11670 ready
[i
] = first_older_only_insn
;
11674 /* Implement TARGET_SCHED_REORDER. */
11676 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11681 case TARGET_CPU_cortexa7
:
11682 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11685 /* Do nothing for other cores. */
11689 return arm_issue_rate ();
11692 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11693 It corrects the value of COST based on the relationship between
11694 INSN and DEP through the dependence LINK. It returns the new
11695 value. There is a per-core adjust_cost hook to adjust scheduler costs
11696 and the per-core hook can choose to completely override the generic
11697 adjust_cost function. Only put bits of code into arm_adjust_cost that
11698 are common across all cores. */
11700 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11705 /* When generating Thumb-1 code, we want to place flag-setting operations
11706 close to a conditional branch which depends on them, so that we can
11707 omit the comparison. */
11710 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11711 && recog_memoized (dep
) >= 0
11712 && get_attr_conds (dep
) == CONDS_SET
)
11715 if (current_tune
->sched_adjust_cost
!= NULL
)
11717 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11721 /* XXX Is this strictly true? */
11722 if (dep_type
== REG_DEP_ANTI
11723 || dep_type
== REG_DEP_OUTPUT
)
11726 /* Call insns don't incur a stall, even if they follow a load. */
11731 if ((i_pat
= single_set (insn
)) != NULL
11732 && MEM_P (SET_SRC (i_pat
))
11733 && (d_pat
= single_set (dep
)) != NULL
11734 && MEM_P (SET_DEST (d_pat
)))
11736 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11737 /* This is a load after a store, there is no conflict if the load reads
11738 from a cached area. Assume that loads from the stack, and from the
11739 constant pool are cached, and that others will miss. This is a
11742 if ((GET_CODE (src_mem
) == SYMBOL_REF
11743 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11744 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11745 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11746 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11754 arm_max_conditional_execute (void)
11756 return max_insns_skipped
;
11760 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11763 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11765 return (optimize
> 0) ? 2 : 0;
11769 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11771 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11774 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11775 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11776 sequences of non-executed instructions in IT blocks probably take the same
11777 amount of time as executed instructions (and the IT instruction itself takes
11778 space in icache). This function was experimentally determined to give good
11779 results on a popular embedded benchmark. */
11782 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11784 return (TARGET_32BIT
&& speed_p
) ? 1
11785 : arm_default_branch_cost (speed_p
, predictable_p
);
11789 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11791 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11794 static bool fp_consts_inited
= false;
11796 static REAL_VALUE_TYPE value_fp0
;
11799 init_fp_table (void)
11803 r
= REAL_VALUE_ATOF ("0", DFmode
);
11805 fp_consts_inited
= true;
11808 /* Return TRUE if rtx X is a valid immediate FP constant. */
11810 arm_const_double_rtx (rtx x
)
11812 const REAL_VALUE_TYPE
*r
;
11814 if (!fp_consts_inited
)
11817 r
= CONST_DOUBLE_REAL_VALUE (x
);
11818 if (REAL_VALUE_MINUS_ZERO (*r
))
11821 if (real_equal (r
, &value_fp0
))
11827 /* VFPv3 has a fairly wide range of representable immediates, formed from
11828 "quarter-precision" floating-point values. These can be evaluated using this
11829 formula (with ^ for exponentiation):
11833 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11834 16 <= n <= 31 and 0 <= r <= 7.
11836 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11838 - A (most-significant) is the sign bit.
11839 - BCD are the exponent (encoded as r XOR 3).
11840 - EFGH are the mantissa (encoded as n - 16).
11843 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11844 fconst[sd] instruction, or -1 if X isn't suitable. */
11846 vfp3_const_double_index (rtx x
)
11848 REAL_VALUE_TYPE r
, m
;
11849 int sign
, exponent
;
11850 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11851 unsigned HOST_WIDE_INT mask
;
11852 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11855 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11858 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11860 /* We can't represent these things, so detect them first. */
11861 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11864 /* Extract sign, exponent and mantissa. */
11865 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11866 r
= real_value_abs (&r
);
11867 exponent
= REAL_EXP (&r
);
11868 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11869 highest (sign) bit, with a fixed binary point at bit point_pos.
11870 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11871 bits for the mantissa, this may fail (low bits would be lost). */
11872 real_ldexp (&m
, &r
, point_pos
- exponent
);
11873 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11874 mantissa
= w
.elt (0);
11875 mant_hi
= w
.elt (1);
11877 /* If there are bits set in the low part of the mantissa, we can't
11878 represent this value. */
11882 /* Now make it so that mantissa contains the most-significant bits, and move
11883 the point_pos to indicate that the least-significant bits have been
11885 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11886 mantissa
= mant_hi
;
11888 /* We can permit four significant bits of mantissa only, plus a high bit
11889 which is always 1. */
11890 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11891 if ((mantissa
& mask
) != 0)
11894 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11895 mantissa
>>= point_pos
- 5;
11897 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11898 floating-point immediate zero with Neon using an integer-zero load, but
11899 that case is handled elsewhere.) */
11903 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11905 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11906 normalized significands are in the range [1, 2). (Our mantissa is shifted
11907 left 4 places at this point relative to normalized IEEE754 values). GCC
11908 internally uses [0.5, 1) (see real.c), so the exponent returned from
11909 REAL_EXP must be altered. */
11910 exponent
= 5 - exponent
;
11912 if (exponent
< 0 || exponent
> 7)
11915 /* Sign, mantissa and exponent are now in the correct form to plug into the
11916 formula described in the comment above. */
11917 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11920 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11922 vfp3_const_double_rtx (rtx x
)
11927 return vfp3_const_double_index (x
) != -1;
11930 /* Recognize immediates which can be used in various Neon instructions. Legal
11931 immediates are described by the following table (for VMVN variants, the
11932 bitwise inverse of the constant shown is recognized. In either case, VMOV
11933 is output and the correct instruction to use for a given constant is chosen
11934 by the assembler). The constant shown is replicated across all elements of
11935 the destination vector.
11937 insn elems variant constant (binary)
11938 ---- ----- ------- -----------------
11939 vmov i32 0 00000000 00000000 00000000 abcdefgh
11940 vmov i32 1 00000000 00000000 abcdefgh 00000000
11941 vmov i32 2 00000000 abcdefgh 00000000 00000000
11942 vmov i32 3 abcdefgh 00000000 00000000 00000000
11943 vmov i16 4 00000000 abcdefgh
11944 vmov i16 5 abcdefgh 00000000
11945 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11946 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11947 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11948 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11949 vmvn i16 10 00000000 abcdefgh
11950 vmvn i16 11 abcdefgh 00000000
11951 vmov i32 12 00000000 00000000 abcdefgh 11111111
11952 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11953 vmov i32 14 00000000 abcdefgh 11111111 11111111
11954 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11955 vmov i8 16 abcdefgh
11956 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11957 eeeeeeee ffffffff gggggggg hhhhhhhh
11958 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11959 vmov f32 19 00000000 00000000 00000000 00000000
11961 For case 18, B = !b. Representable values are exactly those accepted by
11962 vfp3_const_double_index, but are output as floating-point numbers rather
11965 For case 19, we will change it to vmov.i32 when assembling.
11967 Variants 0-5 (inclusive) may also be used as immediates for the second
11968 operand of VORR/VBIC instructions.
11970 The INVERSE argument causes the bitwise inverse of the given operand to be
11971 recognized instead (used for recognizing legal immediates for the VAND/VORN
11972 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11973 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11974 output, rather than the real insns vbic/vorr).
11976 INVERSE makes no difference to the recognition of float vectors.
11978 The return value is the variant of immediate as shown in the above table, or
11979 -1 if the given value doesn't match any of the listed patterns.
11982 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11983 rtx
*modconst
, int *elementwidth
)
11985 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11987 for (i = 0; i < idx; i += (STRIDE)) \
11992 immtype = (CLASS); \
11993 elsize = (ELSIZE); \
11997 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11998 unsigned int innersize
;
11999 unsigned char bytes
[16];
12000 int immtype
= -1, matches
;
12001 unsigned int invmask
= inverse
? 0xff : 0;
12002 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12005 n_elts
= CONST_VECTOR_NUNITS (op
);
12009 gcc_assert (mode
!= VOIDmode
);
12012 innersize
= GET_MODE_UNIT_SIZE (mode
);
12014 /* Vectors of float constants. */
12015 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12017 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12019 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12022 /* FP16 vectors cannot be represented. */
12023 if (GET_MODE_INNER (mode
) == HFmode
)
12026 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12027 are distinct in this context. */
12028 if (!const_vec_duplicate_p (op
))
12032 *modconst
= CONST_VECTOR_ELT (op
, 0);
12037 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12043 /* The tricks done in the code below apply for little-endian vector layout.
12044 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12045 FIXME: Implement logic for big-endian vectors. */
12046 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
12049 /* Splat vector constant out into a byte vector. */
12050 for (i
= 0; i
< n_elts
; i
++)
12052 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12053 unsigned HOST_WIDE_INT elpart
;
12055 gcc_assert (CONST_INT_P (el
));
12056 elpart
= INTVAL (el
);
12058 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
12060 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12061 elpart
>>= BITS_PER_UNIT
;
12065 /* Sanity check. */
12066 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12070 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12071 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12073 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12074 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12076 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12077 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12079 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12080 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12082 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12084 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12086 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12087 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12089 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12090 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12092 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12093 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12095 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12096 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12098 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12100 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12102 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12103 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12105 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12106 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12108 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12109 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12111 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12112 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12114 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12116 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12117 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12125 *elementwidth
= elsize
;
12129 unsigned HOST_WIDE_INT imm
= 0;
12131 /* Un-invert bytes of recognized vector, if necessary. */
12133 for (i
= 0; i
< idx
; i
++)
12134 bytes
[i
] ^= invmask
;
12138 /* FIXME: Broken on 32-bit H_W_I hosts. */
12139 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12141 for (i
= 0; i
< 8; i
++)
12142 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12143 << (i
* BITS_PER_UNIT
);
12145 *modconst
= GEN_INT (imm
);
12149 unsigned HOST_WIDE_INT imm
= 0;
12151 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12152 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12154 *modconst
= GEN_INT (imm
);
12162 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12163 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12164 float elements), and a modified constant (whatever should be output for a
12165 VMOV) in *MODCONST. */
12168 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12169 rtx
*modconst
, int *elementwidth
)
12173 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12179 *modconst
= tmpconst
;
12182 *elementwidth
= tmpwidth
;
12187 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12188 the immediate is valid, write a constant suitable for using as an operand
12189 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12190 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12193 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12194 rtx
*modconst
, int *elementwidth
)
12198 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12200 if (retval
< 0 || retval
> 5)
12204 *modconst
= tmpconst
;
12207 *elementwidth
= tmpwidth
;
12212 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12213 the immediate is valid, write a constant suitable for using as an operand
12214 to VSHR/VSHL to *MODCONST and the corresponding element width to
12215 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12216 because they have different limitations. */
12219 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12220 rtx
*modconst
, int *elementwidth
,
12223 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12224 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12225 unsigned HOST_WIDE_INT last_elt
= 0;
12226 unsigned HOST_WIDE_INT maxshift
;
12228 /* Split vector constant out into a byte vector. */
12229 for (i
= 0; i
< n_elts
; i
++)
12231 rtx el
= CONST_VECTOR_ELT (op
, i
);
12232 unsigned HOST_WIDE_INT elpart
;
12234 if (CONST_INT_P (el
))
12235 elpart
= INTVAL (el
);
12236 else if (CONST_DOUBLE_P (el
))
12239 gcc_unreachable ();
12241 if (i
!= 0 && elpart
!= last_elt
)
12247 /* Shift less than element size. */
12248 maxshift
= innersize
* 8;
12252 /* Left shift immediate value can be from 0 to <size>-1. */
12253 if (last_elt
>= maxshift
)
12258 /* Right shift immediate value can be from 1 to <size>. */
12259 if (last_elt
== 0 || last_elt
> maxshift
)
12264 *elementwidth
= innersize
* 8;
12267 *modconst
= CONST_VECTOR_ELT (op
, 0);
12272 /* Return a string suitable for output of Neon immediate logic operation
12276 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12277 int inverse
, int quad
)
12279 int width
, is_valid
;
12280 static char templ
[40];
12282 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12284 gcc_assert (is_valid
!= 0);
12287 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12289 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12294 /* Return a string suitable for output of Neon immediate shift operation
12295 (VSHR or VSHL) MNEM. */
12298 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12299 machine_mode mode
, int quad
,
12302 int width
, is_valid
;
12303 static char templ
[40];
12305 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12306 gcc_assert (is_valid
!= 0);
12309 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12311 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12316 /* Output a sequence of pairwise operations to implement a reduction.
12317 NOTE: We do "too much work" here, because pairwise operations work on two
12318 registers-worth of operands in one go. Unfortunately we can't exploit those
12319 extra calculations to do the full operation in fewer steps, I don't think.
12320 Although all vector elements of the result but the first are ignored, we
12321 actually calculate the same result in each of the elements. An alternative
12322 such as initially loading a vector with zero to use as each of the second
12323 operands would use up an additional register and take an extra instruction,
12324 for no particular gain. */
12327 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12328 rtx (*reduc
) (rtx
, rtx
, rtx
))
12330 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12333 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12335 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12336 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12341 /* If VALS is a vector constant that can be loaded into a register
12342 using VDUP, generate instructions to do so and return an RTX to
12343 assign to the register. Otherwise return NULL_RTX. */
12346 neon_vdup_constant (rtx vals
)
12348 machine_mode mode
= GET_MODE (vals
);
12349 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12352 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12355 if (!const_vec_duplicate_p (vals
, &x
))
12356 /* The elements are not all the same. We could handle repeating
12357 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12358 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12362 /* We can load this constant by using VDUP and a constant in a
12363 single ARM register. This will be cheaper than a vector
12366 x
= copy_to_mode_reg (inner_mode
, x
);
12367 return gen_vec_duplicate (mode
, x
);
12370 /* Generate code to load VALS, which is a PARALLEL containing only
12371 constants (for vec_init) or CONST_VECTOR, efficiently into a
12372 register. Returns an RTX to copy into the register, or NULL_RTX
12373 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
12376 neon_make_constant (rtx vals
)
12378 machine_mode mode
= GET_MODE (vals
);
12380 rtx const_vec
= NULL_RTX
;
12381 int n_elts
= GET_MODE_NUNITS (mode
);
12385 if (GET_CODE (vals
) == CONST_VECTOR
)
12387 else if (GET_CODE (vals
) == PARALLEL
)
12389 /* A CONST_VECTOR must contain only CONST_INTs and
12390 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12391 Only store valid constants in a CONST_VECTOR. */
12392 for (i
= 0; i
< n_elts
; ++i
)
12394 rtx x
= XVECEXP (vals
, 0, i
);
12395 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12398 if (n_const
== n_elts
)
12399 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12402 gcc_unreachable ();
12404 if (const_vec
!= NULL
12405 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12406 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12408 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12409 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12410 pipeline cycle; creating the constant takes one or two ARM
12411 pipeline cycles. */
12413 else if (const_vec
!= NULL_RTX
)
12414 /* Load from constant pool. On Cortex-A8 this takes two cycles
12415 (for either double or quad vectors). We cannot take advantage
12416 of single-cycle VLD1 because we need a PC-relative addressing
12420 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12421 We cannot construct an initializer. */
12425 /* Initialize vector TARGET to VALS. */
12428 neon_expand_vector_init (rtx target
, rtx vals
)
12430 machine_mode mode
= GET_MODE (target
);
12431 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12432 int n_elts
= GET_MODE_NUNITS (mode
);
12433 int n_var
= 0, one_var
= -1;
12434 bool all_same
= true;
12438 for (i
= 0; i
< n_elts
; ++i
)
12440 x
= XVECEXP (vals
, 0, i
);
12441 if (!CONSTANT_P (x
))
12442 ++n_var
, one_var
= i
;
12444 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12450 rtx constant
= neon_make_constant (vals
);
12451 if (constant
!= NULL_RTX
)
12453 emit_move_insn (target
, constant
);
12458 /* Splat a single non-constant element if we can. */
12459 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12461 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12462 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
12466 /* One field is non-constant. Load constant then overwrite varying
12467 field. This is more efficient than using the stack. */
12470 rtx copy
= copy_rtx (vals
);
12471 rtx merge_mask
= GEN_INT (1 << one_var
);
12473 /* Load constant part of vector, substitute neighboring value for
12474 varying element. */
12475 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12476 neon_expand_vector_init (target
, copy
);
12478 /* Insert variable. */
12479 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12480 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
12484 /* Construct the vector in memory one field at a time
12485 and load the whole vector. */
12486 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12487 for (i
= 0; i
< n_elts
; i
++)
12488 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12489 i
* GET_MODE_SIZE (inner_mode
)),
12490 XVECEXP (vals
, 0, i
));
12491 emit_move_insn (target
, mem
);
12494 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12495 ERR if it doesn't. EXP indicates the source location, which includes the
12496 inlining history for intrinsics. */
12499 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12500 const_tree exp
, const char *desc
)
12502 HOST_WIDE_INT lane
;
12504 gcc_assert (CONST_INT_P (operand
));
12506 lane
= INTVAL (operand
);
12508 if (lane
< low
|| lane
>= high
)
12511 error ("%K%s %wd out of range %wd - %wd",
12512 exp
, desc
, lane
, low
, high
- 1);
12514 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12518 /* Bounds-check lanes. */
12521 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12524 bounds_check (operand
, low
, high
, exp
, "lane");
12527 /* Bounds-check constants. */
12530 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12532 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12536 neon_element_bits (machine_mode mode
)
12538 return GET_MODE_UNIT_BITSIZE (mode
);
12542 /* Predicates for `match_operand' and `match_operator'. */
12544 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12545 WB is true if full writeback address modes are allowed and is false
12546 if limited writeback address modes (POST_INC and PRE_DEC) are
12550 arm_coproc_mem_operand (rtx op
, bool wb
)
12554 /* Reject eliminable registers. */
12555 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12556 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12557 || reg_mentioned_p (arg_pointer_rtx
, op
)
12558 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12559 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12560 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12561 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12564 /* Constants are converted into offsets from labels. */
12568 ind
= XEXP (op
, 0);
12570 if (reload_completed
12571 && (GET_CODE (ind
) == LABEL_REF
12572 || (GET_CODE (ind
) == CONST
12573 && GET_CODE (XEXP (ind
, 0)) == PLUS
12574 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12575 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12578 /* Match: (mem (reg)). */
12580 return arm_address_register_rtx_p (ind
, 0);
12582 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12583 acceptable in any case (subject to verification by
12584 arm_address_register_rtx_p). We need WB to be true to accept
12585 PRE_INC and POST_DEC. */
12586 if (GET_CODE (ind
) == POST_INC
12587 || GET_CODE (ind
) == PRE_DEC
12589 && (GET_CODE (ind
) == PRE_INC
12590 || GET_CODE (ind
) == POST_DEC
)))
12591 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12594 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12595 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12596 && GET_CODE (XEXP (ind
, 1)) == PLUS
12597 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12598 ind
= XEXP (ind
, 1);
12603 if (GET_CODE (ind
) == PLUS
12604 && REG_P (XEXP (ind
, 0))
12605 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12606 && CONST_INT_P (XEXP (ind
, 1))
12607 && INTVAL (XEXP (ind
, 1)) > -1024
12608 && INTVAL (XEXP (ind
, 1)) < 1024
12609 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12615 /* Return TRUE if OP is a memory operand which we can load or store a vector
12616 to/from. TYPE is one of the following values:
12617 0 - Vector load/stor (vldr)
12618 1 - Core registers (ldm)
12619 2 - Element/structure loads (vld1)
12622 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12626 /* Reject eliminable registers. */
12627 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12628 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12629 || reg_mentioned_p (arg_pointer_rtx
, op
)
12630 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12631 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12632 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12633 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12636 /* Constants are converted into offsets from labels. */
12640 ind
= XEXP (op
, 0);
12642 if (reload_completed
12643 && (GET_CODE (ind
) == LABEL_REF
12644 || (GET_CODE (ind
) == CONST
12645 && GET_CODE (XEXP (ind
, 0)) == PLUS
12646 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12647 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12650 /* Match: (mem (reg)). */
12652 return arm_address_register_rtx_p (ind
, 0);
12654 /* Allow post-increment with Neon registers. */
12655 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12656 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12657 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12659 /* Allow post-increment by register for VLDn */
12660 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12661 && GET_CODE (XEXP (ind
, 1)) == PLUS
12662 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12669 && GET_CODE (ind
) == PLUS
12670 && REG_P (XEXP (ind
, 0))
12671 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12672 && CONST_INT_P (XEXP (ind
, 1))
12673 && INTVAL (XEXP (ind
, 1)) > -1024
12674 /* For quad modes, we restrict the constant offset to be slightly less
12675 than what the instruction format permits. We have no such constraint
12676 on double mode offsets. (This must match arm_legitimate_index_p.) */
12677 && (INTVAL (XEXP (ind
, 1))
12678 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12679 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12685 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12688 neon_struct_mem_operand (rtx op
)
12692 /* Reject eliminable registers. */
12693 if (! (reload_in_progress
|| reload_completed
)
12694 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12695 || reg_mentioned_p (arg_pointer_rtx
, op
)
12696 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12697 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12698 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12699 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12702 /* Constants are converted into offsets from labels. */
12706 ind
= XEXP (op
, 0);
12708 if (reload_completed
12709 && (GET_CODE (ind
) == LABEL_REF
12710 || (GET_CODE (ind
) == CONST
12711 && GET_CODE (XEXP (ind
, 0)) == PLUS
12712 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12713 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12716 /* Match: (mem (reg)). */
12718 return arm_address_register_rtx_p (ind
, 0);
12720 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12721 if (GET_CODE (ind
) == POST_INC
12722 || GET_CODE (ind
) == PRE_DEC
)
12723 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12728 /* Prepares the operands for the VCMLA by lane instruction such that the right
12729 register number is selected. This instruction is special in that it always
12730 requires a D register, however there is a choice to be made between Dn[0],
12731 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
12733 The VCMLA by lane function always selects two values. For instance given D0
12734 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
12735 used by the instruction. However given V4SF then index 0 and 1 are valid as
12736 D0[0] or D1[0] are both valid.
12738 This function centralizes that information based on OPERANDS, OPERANDS[3]
12739 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
12740 updated to contain the right index. */
12743 neon_vcmla_lane_prepare_operands (rtx
*operands
)
12745 int lane
= INTVAL (operands
[4]);
12746 machine_mode constmode
= SImode
;
12747 machine_mode mode
= GET_MODE (operands
[3]);
12748 int regno
= REGNO (operands
[3]);
12749 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
12750 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
12752 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
12754 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
12758 operands
[3] = gen_int_mode (regno
, constmode
);
12759 operands
[4] = gen_int_mode (lane
, constmode
);
12765 /* Return true if X is a register that will be eliminated later on. */
12767 arm_eliminable_register (rtx x
)
12769 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12770 || REGNO (x
) == ARG_POINTER_REGNUM
12771 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12772 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12775 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12776 coprocessor registers. Otherwise return NO_REGS. */
12779 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12781 if (mode
== HFmode
)
12783 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12784 return GENERAL_REGS
;
12785 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12787 return GENERAL_REGS
;
12790 /* The neon move patterns handle all legitimate vector and struct
12793 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12794 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12795 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12796 || VALID_NEON_STRUCT_MODE (mode
)))
12799 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12802 return GENERAL_REGS
;
12805 /* Values which must be returned in the most-significant end of the return
12809 arm_return_in_msb (const_tree valtype
)
12811 return (TARGET_AAPCS_BASED
12812 && BYTES_BIG_ENDIAN
12813 && (AGGREGATE_TYPE_P (valtype
)
12814 || TREE_CODE (valtype
) == COMPLEX_TYPE
12815 || FIXED_POINT_TYPE_P (valtype
)));
12818 /* Return TRUE if X references a SYMBOL_REF. */
12820 symbol_mentioned_p (rtx x
)
12825 if (GET_CODE (x
) == SYMBOL_REF
)
12828 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12829 are constant offsets, not symbols. */
12830 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12833 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12835 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12841 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12842 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12845 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12852 /* Return TRUE if X references a LABEL_REF. */
12854 label_mentioned_p (rtx x
)
12859 if (GET_CODE (x
) == LABEL_REF
)
12862 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12863 instruction, but they are constant offsets, not symbols. */
12864 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12867 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12868 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12874 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12875 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12878 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12886 tls_mentioned_p (rtx x
)
12888 switch (GET_CODE (x
))
12891 return tls_mentioned_p (XEXP (x
, 0));
12894 if (XINT (x
, 1) == UNSPEC_TLS
)
12897 /* Fall through. */
12903 /* Must not copy any rtx that uses a pc-relative address.
12904 Also, disallow copying of load-exclusive instructions that
12905 may appear after splitting of compare-and-swap-style operations
12906 so as to prevent those loops from being transformed away from their
12907 canonical forms (see PR 69904). */
12910 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12912 /* The tls call insn cannot be copied, as it is paired with a data
12914 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12917 subrtx_iterator::array_type array
;
12918 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12920 const_rtx x
= *iter
;
12921 if (GET_CODE (x
) == UNSPEC
12922 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12923 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12927 rtx set
= single_set (insn
);
12930 rtx src
= SET_SRC (set
);
12931 if (GET_CODE (src
) == ZERO_EXTEND
)
12932 src
= XEXP (src
, 0);
12934 /* Catch the load-exclusive and load-acquire operations. */
12935 if (GET_CODE (src
) == UNSPEC_VOLATILE
12936 && (XINT (src
, 1) == VUNSPEC_LL
12937 || XINT (src
, 1) == VUNSPEC_LAX
))
12944 minmax_code (rtx x
)
12946 enum rtx_code code
= GET_CODE (x
);
12959 gcc_unreachable ();
12963 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12966 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12967 int *mask
, bool *signed_sat
)
12969 /* The high bound must be a power of two minus one. */
12970 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12974 /* The low bound is either zero (for usat) or one less than the
12975 negation of the high bound (for ssat). */
12976 if (INTVAL (lo_bound
) == 0)
12981 *signed_sat
= false;
12986 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12991 *signed_sat
= true;
12999 /* Return 1 if memory locations are adjacent. */
13001 adjacent_mem_locations (rtx a
, rtx b
)
13003 /* We don't guarantee to preserve the order of these memory refs. */
13004 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13007 if ((REG_P (XEXP (a
, 0))
13008 || (GET_CODE (XEXP (a
, 0)) == PLUS
13009 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13010 && (REG_P (XEXP (b
, 0))
13011 || (GET_CODE (XEXP (b
, 0)) == PLUS
13012 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13014 HOST_WIDE_INT val0
= 0, val1
= 0;
13018 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13020 reg0
= XEXP (XEXP (a
, 0), 0);
13021 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13024 reg0
= XEXP (a
, 0);
13026 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13028 reg1
= XEXP (XEXP (b
, 0), 0);
13029 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13032 reg1
= XEXP (b
, 0);
13034 /* Don't accept any offset that will require multiple
13035 instructions to handle, since this would cause the
13036 arith_adjacentmem pattern to output an overlong sequence. */
13037 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13040 /* Don't allow an eliminable register: register elimination can make
13041 the offset too large. */
13042 if (arm_eliminable_register (reg0
))
13045 val_diff
= val1
- val0
;
13049 /* If the target has load delay slots, then there's no benefit
13050 to using an ldm instruction unless the offset is zero and
13051 we are optimizing for size. */
13052 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13053 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13054 && (val_diff
== 4 || val_diff
== -4));
13057 return ((REGNO (reg0
) == REGNO (reg1
))
13058 && (val_diff
== 4 || val_diff
== -4));
13064 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13065 for load operations, false for store operations. CONSECUTIVE is true
13066 if the register numbers in the operation must be consecutive in the register
13067 bank. RETURN_PC is true if value is to be loaded in PC.
13068 The pattern we are trying to match for load is:
13069 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13070 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13073 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13076 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13077 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13078 3. If consecutive is TRUE, then for kth register being loaded,
13079 REGNO (R_dk) = REGNO (R_d0) + k.
13080 The pattern for store is similar. */
13082 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13083 bool consecutive
, bool return_pc
)
13085 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13086 rtx reg
, mem
, addr
;
13088 unsigned first_regno
;
13089 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13091 bool addr_reg_in_reglist
= false;
13092 bool update
= false;
13097 /* If not in SImode, then registers must be consecutive
13098 (e.g., VLDM instructions for DFmode). */
13099 gcc_assert ((mode
== SImode
) || consecutive
);
13100 /* Setting return_pc for stores is illegal. */
13101 gcc_assert (!return_pc
|| load
);
13103 /* Set up the increments and the regs per val based on the mode. */
13104 reg_increment
= GET_MODE_SIZE (mode
);
13105 regs_per_val
= reg_increment
/ 4;
13106 offset_adj
= return_pc
? 1 : 0;
13109 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13110 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13113 /* Check if this is a write-back. */
13114 elt
= XVECEXP (op
, 0, offset_adj
);
13115 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13121 /* The offset adjustment must be the number of registers being
13122 popped times the size of a single register. */
13123 if (!REG_P (SET_DEST (elt
))
13124 || !REG_P (XEXP (SET_SRC (elt
), 0))
13125 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13126 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13127 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13128 ((count
- 1 - offset_adj
) * reg_increment
))
13132 i
= i
+ offset_adj
;
13133 base
= base
+ offset_adj
;
13134 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13135 success depends on the type: VLDM can do just one reg,
13136 LDM must do at least two. */
13137 if ((count
<= i
) && (mode
== SImode
))
13140 elt
= XVECEXP (op
, 0, i
- 1);
13141 if (GET_CODE (elt
) != SET
)
13146 reg
= SET_DEST (elt
);
13147 mem
= SET_SRC (elt
);
13151 reg
= SET_SRC (elt
);
13152 mem
= SET_DEST (elt
);
13155 if (!REG_P (reg
) || !MEM_P (mem
))
13158 regno
= REGNO (reg
);
13159 first_regno
= regno
;
13160 addr
= XEXP (mem
, 0);
13161 if (GET_CODE (addr
) == PLUS
)
13163 if (!CONST_INT_P (XEXP (addr
, 1)))
13166 offset
= INTVAL (XEXP (addr
, 1));
13167 addr
= XEXP (addr
, 0);
13173 /* Don't allow SP to be loaded unless it is also the base register. It
13174 guarantees that SP is reset correctly when an LDM instruction
13175 is interrupted. Otherwise, we might end up with a corrupt stack. */
13176 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13179 if (regno
== REGNO (addr
))
13180 addr_reg_in_reglist
= true;
13182 for (; i
< count
; i
++)
13184 elt
= XVECEXP (op
, 0, i
);
13185 if (GET_CODE (elt
) != SET
)
13190 reg
= SET_DEST (elt
);
13191 mem
= SET_SRC (elt
);
13195 reg
= SET_SRC (elt
);
13196 mem
= SET_DEST (elt
);
13200 || GET_MODE (reg
) != mode
13201 || REGNO (reg
) <= regno
13204 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13205 /* Don't allow SP to be loaded unless it is also the base register. It
13206 guarantees that SP is reset correctly when an LDM instruction
13207 is interrupted. Otherwise, we might end up with a corrupt stack. */
13208 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13210 || GET_MODE (mem
) != mode
13211 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13212 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13213 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13214 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13215 offset
+ (i
- base
) * reg_increment
))
13216 && (!REG_P (XEXP (mem
, 0))
13217 || offset
+ (i
- base
) * reg_increment
!= 0)))
13220 regno
= REGNO (reg
);
13221 if (regno
== REGNO (addr
))
13222 addr_reg_in_reglist
= true;
13227 if (update
&& addr_reg_in_reglist
)
13230 /* For Thumb-1, address register is always modified - either by write-back
13231 or by explicit load. If the pattern does not describe an update,
13232 then the address register must be in the list of loaded registers. */
13234 return update
|| addr_reg_in_reglist
;
13240 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13241 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13242 instruction. ADD_OFFSET is nonzero if the base address register needs
13243 to be modified with an add instruction before we can use it. */
13246 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13247 int nops
, HOST_WIDE_INT add_offset
)
13249 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13250 if the offset isn't small enough. The reason 2 ldrs are faster
13251 is because these ARMs are able to do more than one cache access
13252 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13253 whilst the ARM8 has a double bandwidth cache. This means that
13254 these cores can do both an instruction fetch and a data fetch in
13255 a single cycle, so the trick of calculating the address into a
13256 scratch register (one of the result regs) and then doing a load
13257 multiple actually becomes slower (and no smaller in code size).
13258 That is the transformation
13260 ldr rd1, [rbase + offset]
13261 ldr rd2, [rbase + offset + 4]
13265 add rd1, rbase, offset
13266 ldmia rd1, {rd1, rd2}
13268 produces worse code -- '3 cycles + any stalls on rd2' instead of
13269 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13270 access per cycle, the first sequence could never complete in less
13271 than 6 cycles, whereas the ldm sequence would only take 5 and
13272 would make better use of sequential accesses if not hitting the
13275 We cheat here and test 'arm_ld_sched' which we currently know to
13276 only be true for the ARM8, ARM9 and StrongARM. If this ever
13277 changes, then the test below needs to be reworked. */
13278 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13281 /* XScale has load-store double instructions, but they have stricter
13282 alignment requirements than load-store multiple, so we cannot
13285 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13286 the pipeline until completion.
13294 An ldr instruction takes 1-3 cycles, but does not block the
13303 Best case ldr will always win. However, the more ldr instructions
13304 we issue, the less likely we are to be able to schedule them well.
13305 Using ldr instructions also increases code size.
13307 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13308 for counts of 3 or 4 regs. */
13309 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13314 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13315 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13316 an array ORDER which describes the sequence to use when accessing the
13317 offsets that produces an ascending order. In this sequence, each
13318 offset must be larger by exactly 4 than the previous one. ORDER[0]
13319 must have been filled in with the lowest offset by the caller.
13320 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13321 we use to verify that ORDER produces an ascending order of registers.
13322 Return true if it was possible to construct such an order, false if
13326 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13327 int *unsorted_regs
)
13330 for (i
= 1; i
< nops
; i
++)
13334 order
[i
] = order
[i
- 1];
13335 for (j
= 0; j
< nops
; j
++)
13336 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13338 /* We must find exactly one offset that is higher than the
13339 previous one by 4. */
13340 if (order
[i
] != order
[i
- 1])
13344 if (order
[i
] == order
[i
- 1])
13346 /* The register numbers must be ascending. */
13347 if (unsorted_regs
!= NULL
13348 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13354 /* Used to determine in a peephole whether a sequence of load
13355 instructions can be changed into a load-multiple instruction.
13356 NOPS is the number of separate load instructions we are examining. The
13357 first NOPS entries in OPERANDS are the destination registers, the
13358 next NOPS entries are memory operands. If this function is
13359 successful, *BASE is set to the common base register of the memory
13360 accesses; *LOAD_OFFSET is set to the first memory location's offset
13361 from that base register.
13362 REGS is an array filled in with the destination register numbers.
13363 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13364 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13365 the sequence of registers in REGS matches the loads from ascending memory
13366 locations, and the function verifies that the register numbers are
13367 themselves ascending. If CHECK_REGS is false, the register numbers
13368 are stored in the order they are found in the operands. */
13370 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13371 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13373 int unsorted_regs
[MAX_LDM_STM_OPS
];
13374 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13375 int order
[MAX_LDM_STM_OPS
];
13379 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13380 easily extended if required. */
13381 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13383 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13385 /* Loop over the operands and check that the memory references are
13386 suitable (i.e. immediate offsets from the same base register). At
13387 the same time, extract the target register, and the memory
13389 for (i
= 0; i
< nops
; i
++)
13394 /* Convert a subreg of a mem into the mem itself. */
13395 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13396 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13398 gcc_assert (MEM_P (operands
[nops
+ i
]));
13400 /* Don't reorder volatile memory references; it doesn't seem worth
13401 looking for the case where the order is ok anyway. */
13402 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13405 offset
= const0_rtx
;
13407 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13408 || (GET_CODE (reg
) == SUBREG
13409 && REG_P (reg
= SUBREG_REG (reg
))))
13410 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13411 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13412 || (GET_CODE (reg
) == SUBREG
13413 && REG_P (reg
= SUBREG_REG (reg
))))
13414 && (CONST_INT_P (offset
13415 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13419 base_reg
= REGNO (reg
);
13420 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13423 else if (base_reg
!= (int) REGNO (reg
))
13424 /* Not addressed from the same base register. */
13427 unsorted_regs
[i
] = (REG_P (operands
[i
])
13428 ? REGNO (operands
[i
])
13429 : REGNO (SUBREG_REG (operands
[i
])));
13431 /* If it isn't an integer register, or if it overwrites the
13432 base register but isn't the last insn in the list, then
13433 we can't do this. */
13434 if (unsorted_regs
[i
] < 0
13435 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13436 || unsorted_regs
[i
] > 14
13437 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13440 /* Don't allow SP to be loaded unless it is also the base
13441 register. It guarantees that SP is reset correctly when
13442 an LDM instruction is interrupted. Otherwise, we might
13443 end up with a corrupt stack. */
13444 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13447 unsorted_offsets
[i
] = INTVAL (offset
);
13448 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13452 /* Not a suitable memory address. */
13456 /* All the useful information has now been extracted from the
13457 operands into unsorted_regs and unsorted_offsets; additionally,
13458 order[0] has been set to the lowest offset in the list. Sort
13459 the offsets into order, verifying that they are adjacent, and
13460 check that the register numbers are ascending. */
13461 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13462 check_regs
? unsorted_regs
: NULL
))
13466 memcpy (saved_order
, order
, sizeof order
);
13472 for (i
= 0; i
< nops
; i
++)
13473 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13475 *load_offset
= unsorted_offsets
[order
[0]];
13478 if (unsorted_offsets
[order
[0]] == 0)
13479 ldm_case
= 1; /* ldmia */
13480 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13481 ldm_case
= 2; /* ldmib */
13482 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13483 ldm_case
= 3; /* ldmda */
13484 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13485 ldm_case
= 4; /* ldmdb */
13486 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13487 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13492 if (!multiple_operation_profitable_p (false, nops
,
13494 ? unsorted_offsets
[order
[0]] : 0))
13500 /* Used to determine in a peephole whether a sequence of store instructions can
13501 be changed into a store-multiple instruction.
13502 NOPS is the number of separate store instructions we are examining.
13503 NOPS_TOTAL is the total number of instructions recognized by the peephole
13505 The first NOPS entries in OPERANDS are the source registers, the next
13506 NOPS entries are memory operands. If this function is successful, *BASE is
13507 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13508 to the first memory location's offset from that base register. REGS is an
13509 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13510 likewise filled with the corresponding rtx's.
13511 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13512 numbers to an ascending order of stores.
13513 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13514 from ascending memory locations, and the function verifies that the register
13515 numbers are themselves ascending. If CHECK_REGS is false, the register
13516 numbers are stored in the order they are found in the operands. */
13518 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13519 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13520 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13522 int unsorted_regs
[MAX_LDM_STM_OPS
];
13523 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13524 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13525 int order
[MAX_LDM_STM_OPS
];
13527 rtx base_reg_rtx
= NULL
;
13530 /* Write back of base register is currently only supported for Thumb 1. */
13531 int base_writeback
= TARGET_THUMB1
;
13533 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13534 easily extended if required. */
13535 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13537 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13539 /* Loop over the operands and check that the memory references are
13540 suitable (i.e. immediate offsets from the same base register). At
13541 the same time, extract the target register, and the memory
13543 for (i
= 0; i
< nops
; i
++)
13548 /* Convert a subreg of a mem into the mem itself. */
13549 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13550 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13552 gcc_assert (MEM_P (operands
[nops
+ i
]));
13554 /* Don't reorder volatile memory references; it doesn't seem worth
13555 looking for the case where the order is ok anyway. */
13556 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13559 offset
= const0_rtx
;
13561 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13562 || (GET_CODE (reg
) == SUBREG
13563 && REG_P (reg
= SUBREG_REG (reg
))))
13564 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13565 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13566 || (GET_CODE (reg
) == SUBREG
13567 && REG_P (reg
= SUBREG_REG (reg
))))
13568 && (CONST_INT_P (offset
13569 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13571 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13572 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13573 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13577 base_reg
= REGNO (reg
);
13578 base_reg_rtx
= reg
;
13579 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13582 else if (base_reg
!= (int) REGNO (reg
))
13583 /* Not addressed from the same base register. */
13586 /* If it isn't an integer register, then we can't do this. */
13587 if (unsorted_regs
[i
] < 0
13588 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13589 /* The effects are unpredictable if the base register is
13590 both updated and stored. */
13591 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13592 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13593 || unsorted_regs
[i
] > 14)
13596 unsorted_offsets
[i
] = INTVAL (offset
);
13597 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13601 /* Not a suitable memory address. */
13605 /* All the useful information has now been extracted from the
13606 operands into unsorted_regs and unsorted_offsets; additionally,
13607 order[0] has been set to the lowest offset in the list. Sort
13608 the offsets into order, verifying that they are adjacent, and
13609 check that the register numbers are ascending. */
13610 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13611 check_regs
? unsorted_regs
: NULL
))
13615 memcpy (saved_order
, order
, sizeof order
);
13621 for (i
= 0; i
< nops
; i
++)
13623 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13625 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13628 *load_offset
= unsorted_offsets
[order
[0]];
13632 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13635 if (unsorted_offsets
[order
[0]] == 0)
13636 stm_case
= 1; /* stmia */
13637 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13638 stm_case
= 2; /* stmib */
13639 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13640 stm_case
= 3; /* stmda */
13641 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13642 stm_case
= 4; /* stmdb */
13646 if (!multiple_operation_profitable_p (false, nops
, 0))
13652 /* Routines for use in generating RTL. */
13654 /* Generate a load-multiple instruction. COUNT is the number of loads in
13655 the instruction; REGS and MEMS are arrays containing the operands.
13656 BASEREG is the base register to be used in addressing the memory operands.
13657 WBACK_OFFSET is nonzero if the instruction should update the base
13661 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13662 HOST_WIDE_INT wback_offset
)
13667 if (!multiple_operation_profitable_p (false, count
, 0))
13673 for (i
= 0; i
< count
; i
++)
13674 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13676 if (wback_offset
!= 0)
13677 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13679 seq
= get_insns ();
13685 result
= gen_rtx_PARALLEL (VOIDmode
,
13686 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13687 if (wback_offset
!= 0)
13689 XVECEXP (result
, 0, 0)
13690 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13695 for (j
= 0; i
< count
; i
++, j
++)
13696 XVECEXP (result
, 0, i
)
13697 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13702 /* Generate a store-multiple instruction. COUNT is the number of stores in
13703 the instruction; REGS and MEMS are arrays containing the operands.
13704 BASEREG is the base register to be used in addressing the memory operands.
13705 WBACK_OFFSET is nonzero if the instruction should update the base
13709 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13710 HOST_WIDE_INT wback_offset
)
13715 if (GET_CODE (basereg
) == PLUS
)
13716 basereg
= XEXP (basereg
, 0);
13718 if (!multiple_operation_profitable_p (false, count
, 0))
13724 for (i
= 0; i
< count
; i
++)
13725 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13727 if (wback_offset
!= 0)
13728 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13730 seq
= get_insns ();
13736 result
= gen_rtx_PARALLEL (VOIDmode
,
13737 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13738 if (wback_offset
!= 0)
13740 XVECEXP (result
, 0, 0)
13741 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13746 for (j
= 0; i
< count
; i
++, j
++)
13747 XVECEXP (result
, 0, i
)
13748 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13753 /* Generate either a load-multiple or a store-multiple instruction. This
13754 function can be used in situations where we can start with a single MEM
13755 rtx and adjust its address upwards.
13756 COUNT is the number of operations in the instruction, not counting a
13757 possible update of the base register. REGS is an array containing the
13759 BASEREG is the base register to be used in addressing the memory operands,
13760 which are constructed from BASEMEM.
13761 WRITE_BACK specifies whether the generated instruction should include an
13762 update of the base register.
13763 OFFSETP is used to pass an offset to and from this function; this offset
13764 is not used when constructing the address (instead BASEMEM should have an
13765 appropriate offset in its address), it is used only for setting
13766 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13769 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13770 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13772 rtx mems
[MAX_LDM_STM_OPS
];
13773 HOST_WIDE_INT offset
= *offsetp
;
13776 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13778 if (GET_CODE (basereg
) == PLUS
)
13779 basereg
= XEXP (basereg
, 0);
13781 for (i
= 0; i
< count
; i
++)
13783 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13784 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13792 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13793 write_back
? 4 * count
: 0);
13795 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13796 write_back
? 4 * count
: 0);
13800 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13801 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13803 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13808 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13809 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13811 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13815 /* Called from a peephole2 expander to turn a sequence of loads into an
13816 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13817 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13818 is true if we can reorder the registers because they are used commutatively
13820 Returns true iff we could generate a new instruction. */
13823 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13825 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13826 rtx mems
[MAX_LDM_STM_OPS
];
13827 int i
, j
, base_reg
;
13829 HOST_WIDE_INT offset
;
13830 int write_back
= FALSE
;
13834 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13835 &base_reg
, &offset
, !sort_regs
);
13841 for (i
= 0; i
< nops
- 1; i
++)
13842 for (j
= i
+ 1; j
< nops
; j
++)
13843 if (regs
[i
] > regs
[j
])
13849 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13853 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13855 /* Thumb-1 ldm uses writeback except if the base is loaded. */
13857 for (i
= 0; i
< nops
; i
++)
13858 if (base_reg
== regs
[i
])
13859 write_back
= false;
13861 /* Ensure the base is dead if it is updated. */
13862 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
13868 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13869 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13871 base_reg_rtx
= newbase
;
13874 for (i
= 0; i
< nops
; i
++)
13876 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13877 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13880 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13881 write_back
? offset
+ i
* 4 : 0));
13885 /* Called from a peephole2 expander to turn a sequence of stores into an
13886 STM instruction. OPERANDS are the operands found by the peephole matcher;
13887 NOPS indicates how many separate stores we are trying to combine.
13888 Returns true iff we could generate a new instruction. */
13891 gen_stm_seq (rtx
*operands
, int nops
)
13894 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13895 rtx mems
[MAX_LDM_STM_OPS
];
13898 HOST_WIDE_INT offset
;
13899 int write_back
= FALSE
;
13902 bool base_reg_dies
;
13904 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13905 mem_order
, &base_reg
, &offset
, true);
13910 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13912 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13915 gcc_assert (base_reg_dies
);
13921 gcc_assert (base_reg_dies
);
13922 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13926 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13928 for (i
= 0; i
< nops
; i
++)
13930 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13931 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13934 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13935 write_back
? offset
+ i
* 4 : 0));
13939 /* Called from a peephole2 expander to turn a sequence of stores that are
13940 preceded by constant loads into an STM instruction. OPERANDS are the
13941 operands found by the peephole matcher; NOPS indicates how many
13942 separate stores we are trying to combine; there are 2 * NOPS
13943 instructions in the peephole.
13944 Returns true iff we could generate a new instruction. */
13947 gen_const_stm_seq (rtx
*operands
, int nops
)
13949 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13950 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13951 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13952 rtx mems
[MAX_LDM_STM_OPS
];
13955 HOST_WIDE_INT offset
;
13956 int write_back
= FALSE
;
13959 bool base_reg_dies
;
13961 HARD_REG_SET allocated
;
13963 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13964 mem_order
, &base_reg
, &offset
, false);
13969 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13971 /* If the same register is used more than once, try to find a free
13973 CLEAR_HARD_REG_SET (allocated
);
13974 for (i
= 0; i
< nops
; i
++)
13976 for (j
= i
+ 1; j
< nops
; j
++)
13977 if (regs
[i
] == regs
[j
])
13979 rtx t
= peep2_find_free_register (0, nops
* 2,
13980 TARGET_THUMB1
? "l" : "r",
13981 SImode
, &allocated
);
13985 regs
[i
] = REGNO (t
);
13989 /* Compute an ordering that maps the register numbers to an ascending
13992 for (i
= 0; i
< nops
; i
++)
13993 if (regs
[i
] < regs
[reg_order
[0]])
13996 for (i
= 1; i
< nops
; i
++)
13998 int this_order
= reg_order
[i
- 1];
13999 for (j
= 0; j
< nops
; j
++)
14000 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14001 && (this_order
== reg_order
[i
- 1]
14002 || regs
[j
] < regs
[this_order
]))
14004 reg_order
[i
] = this_order
;
14007 /* Ensure that registers that must be live after the instruction end
14008 up with the correct value. */
14009 for (i
= 0; i
< nops
; i
++)
14011 int this_order
= reg_order
[i
];
14012 if ((this_order
!= mem_order
[i
]
14013 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14014 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14018 /* Load the constants. */
14019 for (i
= 0; i
< nops
; i
++)
14021 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14022 sorted_regs
[i
] = regs
[reg_order
[i
]];
14023 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14026 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14028 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14031 gcc_assert (base_reg_dies
);
14037 gcc_assert (base_reg_dies
);
14038 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14042 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14044 for (i
= 0; i
< nops
; i
++)
14046 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14047 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14050 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14051 write_back
? offset
+ i
* 4 : 0));
14055 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14056 unaligned copies on processors which support unaligned semantics for those
14057 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14058 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14059 An interleave factor of 1 (the minimum) will perform no interleaving.
14060 Load/store multiple are used for aligned addresses where possible. */
14063 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14064 HOST_WIDE_INT length
,
14065 unsigned int interleave_factor
)
14067 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14068 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14069 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14070 HOST_WIDE_INT i
, j
;
14071 HOST_WIDE_INT remaining
= length
, words
;
14072 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14074 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14075 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14076 HOST_WIDE_INT srcoffset
, dstoffset
;
14077 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14080 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
14082 /* Use hard registers if we have aligned source or destination so we can use
14083 load/store multiple with contiguous registers. */
14084 if (dst_aligned
|| src_aligned
)
14085 for (i
= 0; i
< interleave_factor
; i
++)
14086 regs
[i
] = gen_rtx_REG (SImode
, i
);
14088 for (i
= 0; i
< interleave_factor
; i
++)
14089 regs
[i
] = gen_reg_rtx (SImode
);
14091 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14092 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14094 srcoffset
= dstoffset
= 0;
14096 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14097 For copying the last bytes we want to subtract this offset again. */
14098 src_autoinc
= dst_autoinc
= 0;
14100 for (i
= 0; i
< interleave_factor
; i
++)
14103 /* Copy BLOCK_SIZE_BYTES chunks. */
14105 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14108 if (src_aligned
&& interleave_factor
> 1)
14110 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14111 TRUE
, srcbase
, &srcoffset
));
14112 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14116 for (j
= 0; j
< interleave_factor
; j
++)
14118 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14120 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14121 srcoffset
+ j
* UNITS_PER_WORD
);
14122 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14124 srcoffset
+= block_size_bytes
;
14128 if (dst_aligned
&& interleave_factor
> 1)
14130 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14131 TRUE
, dstbase
, &dstoffset
));
14132 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14136 for (j
= 0; j
< interleave_factor
; j
++)
14138 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14140 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14141 dstoffset
+ j
* UNITS_PER_WORD
);
14142 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14144 dstoffset
+= block_size_bytes
;
14147 remaining
-= block_size_bytes
;
14150 /* Copy any whole words left (note these aren't interleaved with any
14151 subsequent halfword/byte load/stores in the interests of simplicity). */
14153 words
= remaining
/ UNITS_PER_WORD
;
14155 gcc_assert (words
< interleave_factor
);
14157 if (src_aligned
&& words
> 1)
14159 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14161 src_autoinc
+= UNITS_PER_WORD
* words
;
14165 for (j
= 0; j
< words
; j
++)
14167 addr
= plus_constant (Pmode
, src
,
14168 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14169 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14170 srcoffset
+ j
* UNITS_PER_WORD
);
14172 emit_move_insn (regs
[j
], mem
);
14174 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14176 srcoffset
+= words
* UNITS_PER_WORD
;
14179 if (dst_aligned
&& words
> 1)
14181 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14183 dst_autoinc
+= words
* UNITS_PER_WORD
;
14187 for (j
= 0; j
< words
; j
++)
14189 addr
= plus_constant (Pmode
, dst
,
14190 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14191 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14192 dstoffset
+ j
* UNITS_PER_WORD
);
14194 emit_move_insn (mem
, regs
[j
]);
14196 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14198 dstoffset
+= words
* UNITS_PER_WORD
;
14201 remaining
-= words
* UNITS_PER_WORD
;
14203 gcc_assert (remaining
< 4);
14205 /* Copy a halfword if necessary. */
14207 if (remaining
>= 2)
14209 halfword_tmp
= gen_reg_rtx (SImode
);
14211 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14212 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14213 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14215 /* Either write out immediately, or delay until we've loaded the last
14216 byte, depending on interleave factor. */
14217 if (interleave_factor
== 1)
14219 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14220 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14221 emit_insn (gen_unaligned_storehi (mem
,
14222 gen_lowpart (HImode
, halfword_tmp
)));
14223 halfword_tmp
= NULL
;
14231 gcc_assert (remaining
< 2);
14233 /* Copy last byte. */
14235 if ((remaining
& 1) != 0)
14237 byte_tmp
= gen_reg_rtx (SImode
);
14239 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14240 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14241 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14243 if (interleave_factor
== 1)
14245 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14246 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14247 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14256 /* Store last halfword if we haven't done so already. */
14260 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14261 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14262 emit_insn (gen_unaligned_storehi (mem
,
14263 gen_lowpart (HImode
, halfword_tmp
)));
14267 /* Likewise for last byte. */
14271 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14272 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14273 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14277 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14280 /* From mips_adjust_block_mem:
14282 Helper function for doing a loop-based block operation on memory
14283 reference MEM. Each iteration of the loop will operate on LENGTH
14286 Create a new base register for use within the loop and point it to
14287 the start of MEM. Create a new memory reference that uses this
14288 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14291 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14294 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14296 /* Although the new mem does not refer to a known location,
14297 it does keep up to LENGTH bytes of alignment. */
14298 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14299 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14302 /* From mips_block_move_loop:
14304 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14305 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14306 the memory regions do not overlap. */
14309 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14310 unsigned int interleave_factor
,
14311 HOST_WIDE_INT bytes_per_iter
)
14313 rtx src_reg
, dest_reg
, final_src
, test
;
14314 HOST_WIDE_INT leftover
;
14316 leftover
= length
% bytes_per_iter
;
14317 length
-= leftover
;
14319 /* Create registers and memory references for use within the loop. */
14320 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14321 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14323 /* Calculate the value that SRC_REG should have after the last iteration of
14325 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14326 0, 0, OPTAB_WIDEN
);
14328 /* Emit the start of the loop. */
14329 rtx_code_label
*label
= gen_label_rtx ();
14330 emit_label (label
);
14332 /* Emit the loop body. */
14333 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14334 interleave_factor
);
14336 /* Move on to the next block. */
14337 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14338 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14340 /* Emit the loop condition. */
14341 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14342 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14344 /* Mop up any left-over bytes. */
14346 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14349 /* Emit a block move when either the source or destination is unaligned (not
14350 aligned to a four-byte boundary). This may need further tuning depending on
14351 core type, optimize_size setting, etc. */
14354 arm_cpymemqi_unaligned (rtx
*operands
)
14356 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14360 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14361 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14362 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14363 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14364 or dst_aligned though: allow more interleaving in those cases since the
14365 resulting code can be smaller. */
14366 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14367 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14370 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14371 interleave_factor
, bytes_per_iter
);
14373 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14374 interleave_factor
);
14378 /* Note that the loop created by arm_block_move_unaligned_loop may be
14379 subject to loop unrolling, which makes tuning this condition a little
14382 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14384 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14391 arm_gen_cpymemqi (rtx
*operands
)
14393 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14394 HOST_WIDE_INT srcoffset
, dstoffset
;
14395 rtx src
, dst
, srcbase
, dstbase
;
14396 rtx part_bytes_reg
= NULL
;
14399 if (!CONST_INT_P (operands
[2])
14400 || !CONST_INT_P (operands
[3])
14401 || INTVAL (operands
[2]) > 64)
14404 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14405 return arm_cpymemqi_unaligned (operands
);
14407 if (INTVAL (operands
[3]) & 3)
14410 dstbase
= operands
[0];
14411 srcbase
= operands
[1];
14413 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14414 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14416 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14417 out_words_to_go
= INTVAL (operands
[2]) / 4;
14418 last_bytes
= INTVAL (operands
[2]) & 3;
14419 dstoffset
= srcoffset
= 0;
14421 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14422 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14424 while (in_words_to_go
>= 2)
14426 if (in_words_to_go
> 4)
14427 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14428 TRUE
, srcbase
, &srcoffset
));
14430 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14431 src
, FALSE
, srcbase
,
14434 if (out_words_to_go
)
14436 if (out_words_to_go
> 4)
14437 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14438 TRUE
, dstbase
, &dstoffset
));
14439 else if (out_words_to_go
!= 1)
14440 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14441 out_words_to_go
, dst
,
14444 dstbase
, &dstoffset
));
14447 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14448 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14449 if (last_bytes
!= 0)
14451 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14457 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14458 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14461 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14462 if (out_words_to_go
)
14466 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14467 sreg
= copy_to_reg (mem
);
14469 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14470 emit_move_insn (mem
, sreg
);
14473 gcc_assert (!in_words_to_go
); /* Sanity check */
14476 if (in_words_to_go
)
14478 gcc_assert (in_words_to_go
> 0);
14480 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14481 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14484 gcc_assert (!last_bytes
|| part_bytes_reg
);
14486 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14488 rtx tmp
= gen_reg_rtx (SImode
);
14490 /* The bytes we want are in the top end of the word. */
14491 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14492 GEN_INT (8 * (4 - last_bytes
))));
14493 part_bytes_reg
= tmp
;
14497 mem
= adjust_automodify_address (dstbase
, QImode
,
14498 plus_constant (Pmode
, dst
,
14500 dstoffset
+ last_bytes
- 1);
14501 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14505 tmp
= gen_reg_rtx (SImode
);
14506 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14507 part_bytes_reg
= tmp
;
14514 if (last_bytes
> 1)
14516 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14517 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14521 rtx tmp
= gen_reg_rtx (SImode
);
14522 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14523 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14524 part_bytes_reg
= tmp
;
14531 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14532 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14539 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
14542 next_consecutive_mem (rtx mem
)
14544 machine_mode mode
= GET_MODE (mem
);
14545 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14546 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14548 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14551 /* Copy using LDRD/STRD instructions whenever possible.
14552 Returns true upon success. */
14554 gen_cpymem_ldrd_strd (rtx
*operands
)
14556 unsigned HOST_WIDE_INT len
;
14557 HOST_WIDE_INT align
;
14558 rtx src
, dst
, base
;
14560 bool src_aligned
, dst_aligned
;
14561 bool src_volatile
, dst_volatile
;
14563 gcc_assert (CONST_INT_P (operands
[2]));
14564 gcc_assert (CONST_INT_P (operands
[3]));
14566 len
= UINTVAL (operands
[2]);
14570 /* Maximum alignment we can assume for both src and dst buffers. */
14571 align
= INTVAL (operands
[3]);
14573 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14576 /* Place src and dst addresses in registers
14577 and update the corresponding mem rtx. */
14579 dst_volatile
= MEM_VOLATILE_P (dst
);
14580 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14581 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14582 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14585 src_volatile
= MEM_VOLATILE_P (src
);
14586 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14587 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14588 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14590 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14593 if (src_volatile
|| dst_volatile
)
14596 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14597 if (!(dst_aligned
|| src_aligned
))
14598 return arm_gen_cpymemqi (operands
);
14600 /* If the either src or dst is unaligned we'll be accessing it as pairs
14601 of unaligned SImode accesses. Otherwise we can generate DImode
14602 ldrd/strd instructions. */
14603 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14604 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14609 reg0
= gen_reg_rtx (DImode
);
14610 rtx low_reg
= NULL_RTX
;
14611 rtx hi_reg
= NULL_RTX
;
14613 if (!src_aligned
|| !dst_aligned
)
14615 low_reg
= gen_lowpart (SImode
, reg0
);
14616 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14619 emit_move_insn (reg0
, src
);
14622 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14623 src
= next_consecutive_mem (src
);
14624 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14628 emit_move_insn (dst
, reg0
);
14631 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14632 dst
= next_consecutive_mem (dst
);
14633 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14636 src
= next_consecutive_mem (src
);
14637 dst
= next_consecutive_mem (dst
);
14640 gcc_assert (len
< 8);
14643 /* More than a word but less than a double-word to copy. Copy a word. */
14644 reg0
= gen_reg_rtx (SImode
);
14645 src
= adjust_address (src
, SImode
, 0);
14646 dst
= adjust_address (dst
, SImode
, 0);
14648 emit_move_insn (reg0
, src
);
14650 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14653 emit_move_insn (dst
, reg0
);
14655 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14657 src
= next_consecutive_mem (src
);
14658 dst
= next_consecutive_mem (dst
);
14665 /* Copy the remaining bytes. */
14668 dst
= adjust_address (dst
, HImode
, 0);
14669 src
= adjust_address (src
, HImode
, 0);
14670 reg0
= gen_reg_rtx (SImode
);
14672 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14674 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14677 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14679 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14681 src
= next_consecutive_mem (src
);
14682 dst
= next_consecutive_mem (dst
);
14687 dst
= adjust_address (dst
, QImode
, 0);
14688 src
= adjust_address (src
, QImode
, 0);
14689 reg0
= gen_reg_rtx (QImode
);
14690 emit_move_insn (reg0
, src
);
14691 emit_move_insn (dst
, reg0
);
14695 /* Select a dominance comparison mode if possible for a test of the general
14696 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14697 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14698 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14699 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14700 In all cases OP will be either EQ or NE, but we don't need to know which
14701 here. If we are unable to support a dominance comparison we return
14702 CC mode. This will then fail to match for the RTL expressions that
14703 generate this call. */
14705 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14707 enum rtx_code cond1
, cond2
;
14710 /* Currently we will probably get the wrong result if the individual
14711 comparisons are not simple. This also ensures that it is safe to
14712 reverse a comparison if necessary. */
14713 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14715 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14719 /* The if_then_else variant of this tests the second condition if the
14720 first passes, but is true if the first fails. Reverse the first
14721 condition to get a true "inclusive-or" expression. */
14722 if (cond_or
== DOM_CC_NX_OR_Y
)
14723 cond1
= reverse_condition (cond1
);
14725 /* If the comparisons are not equal, and one doesn't dominate the other,
14726 then we can't do this. */
14728 && !comparison_dominates_p (cond1
, cond2
)
14729 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14733 std::swap (cond1
, cond2
);
14738 if (cond_or
== DOM_CC_X_AND_Y
)
14743 case EQ
: return CC_DEQmode
;
14744 case LE
: return CC_DLEmode
;
14745 case LEU
: return CC_DLEUmode
;
14746 case GE
: return CC_DGEmode
;
14747 case GEU
: return CC_DGEUmode
;
14748 default: gcc_unreachable ();
14752 if (cond_or
== DOM_CC_X_AND_Y
)
14764 gcc_unreachable ();
14768 if (cond_or
== DOM_CC_X_AND_Y
)
14780 gcc_unreachable ();
14784 if (cond_or
== DOM_CC_X_AND_Y
)
14785 return CC_DLTUmode
;
14790 return CC_DLTUmode
;
14792 return CC_DLEUmode
;
14796 gcc_unreachable ();
14800 if (cond_or
== DOM_CC_X_AND_Y
)
14801 return CC_DGTUmode
;
14806 return CC_DGTUmode
;
14808 return CC_DGEUmode
;
14812 gcc_unreachable ();
14815 /* The remaining cases only occur when both comparisons are the
14818 gcc_assert (cond1
== cond2
);
14822 gcc_assert (cond1
== cond2
);
14826 gcc_assert (cond1
== cond2
);
14830 gcc_assert (cond1
== cond2
);
14831 return CC_DLEUmode
;
14834 gcc_assert (cond1
== cond2
);
14835 return CC_DGEUmode
;
14838 gcc_unreachable ();
14843 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14845 /* All floating point compares return CCFP if it is an equality
14846 comparison, and CCFPE otherwise. */
14847 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14870 gcc_unreachable ();
14874 /* A compare with a shifted operand. Because of canonicalization, the
14875 comparison will have to be swapped when we emit the assembler. */
14876 if (GET_MODE (y
) == SImode
14877 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14878 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14879 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14880 || GET_CODE (x
) == ROTATERT
))
14883 /* This operation is performed swapped, but since we only rely on the Z
14884 flag we don't need an additional mode. */
14885 if (GET_MODE (y
) == SImode
14886 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14887 && GET_CODE (x
) == NEG
14888 && (op
== EQ
|| op
== NE
))
14891 /* This is a special case that is used by combine to allow a
14892 comparison of a shifted byte load to be split into a zero-extend
14893 followed by a comparison of the shifted integer (only valid for
14894 equalities and unsigned inequalities). */
14895 if (GET_MODE (x
) == SImode
14896 && GET_CODE (x
) == ASHIFT
14897 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14898 && GET_CODE (XEXP (x
, 0)) == SUBREG
14899 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14900 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14901 && (op
== EQ
|| op
== NE
14902 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14903 && CONST_INT_P (y
))
14906 /* A construct for a conditional compare, if the false arm contains
14907 0, then both conditions must be true, otherwise either condition
14908 must be true. Not all conditions are possible, so CCmode is
14909 returned if it can't be done. */
14910 if (GET_CODE (x
) == IF_THEN_ELSE
14911 && (XEXP (x
, 2) == const0_rtx
14912 || XEXP (x
, 2) == const1_rtx
)
14913 && COMPARISON_P (XEXP (x
, 0))
14914 && COMPARISON_P (XEXP (x
, 1)))
14915 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14916 INTVAL (XEXP (x
, 2)));
14918 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14919 if (GET_CODE (x
) == AND
14920 && (op
== EQ
|| op
== NE
)
14921 && COMPARISON_P (XEXP (x
, 0))
14922 && COMPARISON_P (XEXP (x
, 1)))
14923 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14926 if (GET_CODE (x
) == IOR
14927 && (op
== EQ
|| op
== NE
)
14928 && COMPARISON_P (XEXP (x
, 0))
14929 && COMPARISON_P (XEXP (x
, 1)))
14930 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14933 /* An operation (on Thumb) where we want to test for a single bit.
14934 This is done by shifting that bit up into the top bit of a
14935 scratch register; we can then branch on the sign bit. */
14937 && GET_MODE (x
) == SImode
14938 && (op
== EQ
|| op
== NE
)
14939 && GET_CODE (x
) == ZERO_EXTRACT
14940 && XEXP (x
, 1) == const1_rtx
)
14943 /* An operation that sets the condition codes as a side-effect, the
14944 V flag is not set correctly, so we can only use comparisons where
14945 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14947 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14948 if (GET_MODE (x
) == SImode
14950 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14951 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14952 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14953 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14954 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14955 || GET_CODE (x
) == LSHIFTRT
14956 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14957 || GET_CODE (x
) == ROTATERT
14958 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14959 return CC_NOOVmode
;
14961 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14964 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14965 && GET_CODE (x
) == PLUS
14966 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14969 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14975 /* A DImode comparison against zero can be implemented by
14976 or'ing the two halves together. */
14977 if (y
== const0_rtx
)
14980 /* We can do an equality test in three Thumb instructions. */
14990 /* DImode unsigned comparisons can be implemented by cmp +
14991 cmpeq without a scratch register. Not worth doing in
15002 /* DImode signed and unsigned comparisons can be implemented
15003 by cmp + sbcs with a scratch register, but that does not
15004 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15005 gcc_assert (op
!= EQ
&& op
!= NE
);
15009 gcc_unreachable ();
15013 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15014 return GET_MODE (x
);
15019 /* X and Y are two things to compare using CODE. Emit the compare insn and
15020 return the rtx for register 0 in the proper mode. FP means this is a
15021 floating point compare: I don't think that it is needed on the arm. */
15023 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15027 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15029 /* We might have X as a constant, Y as a register because of the predicates
15030 used for cmpdi. If so, force X to a register here. */
15031 if (dimode_comparison
&& !REG_P (x
))
15032 x
= force_reg (DImode
, x
);
15034 mode
= SELECT_CC_MODE (code
, x
, y
);
15035 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15037 if (dimode_comparison
15038 && mode
!= CC_CZmode
)
15042 /* To compare two non-zero values for equality, XOR them and
15043 then compare against zero. Not used for ARM mode; there
15044 CC_CZmode is cheaper. */
15045 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15047 gcc_assert (!reload_completed
);
15048 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15052 /* A scratch register is required. */
15053 if (reload_completed
)
15054 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15056 scratch
= gen_rtx_SCRATCH (SImode
);
15058 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15059 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15060 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15063 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15068 /* Generate a sequence of insns that will generate the correct return
15069 address mask depending on the physical architecture that the program
15072 arm_gen_return_addr_mask (void)
15074 rtx reg
= gen_reg_rtx (Pmode
);
15076 emit_insn (gen_return_addr_mask (reg
));
15081 arm_reload_in_hi (rtx
*operands
)
15083 rtx ref
= operands
[1];
15085 HOST_WIDE_INT offset
= 0;
15087 if (GET_CODE (ref
) == SUBREG
)
15089 offset
= SUBREG_BYTE (ref
);
15090 ref
= SUBREG_REG (ref
);
15095 /* We have a pseudo which has been spilt onto the stack; there
15096 are two cases here: the first where there is a simple
15097 stack-slot replacement and a second where the stack-slot is
15098 out of range, or is used as a subreg. */
15099 if (reg_equiv_mem (REGNO (ref
)))
15101 ref
= reg_equiv_mem (REGNO (ref
));
15102 base
= find_replacement (&XEXP (ref
, 0));
15105 /* The slot is out of range, or was dressed up in a SUBREG. */
15106 base
= reg_equiv_address (REGNO (ref
));
15108 /* PR 62554: If there is no equivalent memory location then just move
15109 the value as an SImode register move. This happens when the target
15110 architecture variant does not have an HImode register move. */
15113 gcc_assert (REG_P (operands
[0]));
15114 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15115 gen_rtx_SUBREG (SImode
, ref
, 0)));
15120 base
= find_replacement (&XEXP (ref
, 0));
15122 /* Handle the case where the address is too complex to be offset by 1. */
15123 if (GET_CODE (base
) == MINUS
15124 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15126 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15128 emit_set_insn (base_plus
, base
);
15131 else if (GET_CODE (base
) == PLUS
)
15133 /* The addend must be CONST_INT, or we would have dealt with it above. */
15134 HOST_WIDE_INT hi
, lo
;
15136 offset
+= INTVAL (XEXP (base
, 1));
15137 base
= XEXP (base
, 0);
15139 /* Rework the address into a legal sequence of insns. */
15140 /* Valid range for lo is -4095 -> 4095 */
15143 : -((-offset
) & 0xfff));
15145 /* Corner case, if lo is the max offset then we would be out of range
15146 once we have added the additional 1 below, so bump the msb into the
15147 pre-loading insn(s). */
15151 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15152 ^ (HOST_WIDE_INT
) 0x80000000)
15153 - (HOST_WIDE_INT
) 0x80000000);
15155 gcc_assert (hi
+ lo
== offset
);
15159 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15161 /* Get the base address; addsi3 knows how to handle constants
15162 that require more than one insn. */
15163 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15169 /* Operands[2] may overlap operands[0] (though it won't overlap
15170 operands[1]), that's why we asked for a DImode reg -- so we can
15171 use the bit that does not overlap. */
15172 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15173 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15175 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15177 emit_insn (gen_zero_extendqisi2 (scratch
,
15178 gen_rtx_MEM (QImode
,
15179 plus_constant (Pmode
, base
,
15181 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15182 gen_rtx_MEM (QImode
,
15183 plus_constant (Pmode
, base
,
15185 if (!BYTES_BIG_ENDIAN
)
15186 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15187 gen_rtx_IOR (SImode
,
15190 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15194 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15195 gen_rtx_IOR (SImode
,
15196 gen_rtx_ASHIFT (SImode
, scratch
,
15198 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15201 /* Handle storing a half-word to memory during reload by synthesizing as two
15202 byte stores. Take care not to clobber the input values until after we
15203 have moved them somewhere safe. This code assumes that if the DImode
15204 scratch in operands[2] overlaps either the input value or output address
15205 in some way, then that value must die in this insn (we absolutely need
15206 two scratch registers for some corner cases). */
15208 arm_reload_out_hi (rtx
*operands
)
15210 rtx ref
= operands
[0];
15211 rtx outval
= operands
[1];
15213 HOST_WIDE_INT offset
= 0;
15215 if (GET_CODE (ref
) == SUBREG
)
15217 offset
= SUBREG_BYTE (ref
);
15218 ref
= SUBREG_REG (ref
);
15223 /* We have a pseudo which has been spilt onto the stack; there
15224 are two cases here: the first where there is a simple
15225 stack-slot replacement and a second where the stack-slot is
15226 out of range, or is used as a subreg. */
15227 if (reg_equiv_mem (REGNO (ref
)))
15229 ref
= reg_equiv_mem (REGNO (ref
));
15230 base
= find_replacement (&XEXP (ref
, 0));
15233 /* The slot is out of range, or was dressed up in a SUBREG. */
15234 base
= reg_equiv_address (REGNO (ref
));
15236 /* PR 62254: If there is no equivalent memory location then just move
15237 the value as an SImode register move. This happens when the target
15238 architecture variant does not have an HImode register move. */
15241 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15243 if (REG_P (outval
))
15245 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15246 gen_rtx_SUBREG (SImode
, outval
, 0)));
15248 else /* SUBREG_P (outval) */
15250 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15251 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15252 SUBREG_REG (outval
)));
15254 /* FIXME: Handle other cases ? */
15255 gcc_unreachable ();
15261 base
= find_replacement (&XEXP (ref
, 0));
15263 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15265 /* Handle the case where the address is too complex to be offset by 1. */
15266 if (GET_CODE (base
) == MINUS
15267 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15269 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15271 /* Be careful not to destroy OUTVAL. */
15272 if (reg_overlap_mentioned_p (base_plus
, outval
))
15274 /* Updating base_plus might destroy outval, see if we can
15275 swap the scratch and base_plus. */
15276 if (!reg_overlap_mentioned_p (scratch
, outval
))
15277 std::swap (scratch
, base_plus
);
15280 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15282 /* Be conservative and copy OUTVAL into the scratch now,
15283 this should only be necessary if outval is a subreg
15284 of something larger than a word. */
15285 /* XXX Might this clobber base? I can't see how it can,
15286 since scratch is known to overlap with OUTVAL, and
15287 must be wider than a word. */
15288 emit_insn (gen_movhi (scratch_hi
, outval
));
15289 outval
= scratch_hi
;
15293 emit_set_insn (base_plus
, base
);
15296 else if (GET_CODE (base
) == PLUS
)
15298 /* The addend must be CONST_INT, or we would have dealt with it above. */
15299 HOST_WIDE_INT hi
, lo
;
15301 offset
+= INTVAL (XEXP (base
, 1));
15302 base
= XEXP (base
, 0);
15304 /* Rework the address into a legal sequence of insns. */
15305 /* Valid range for lo is -4095 -> 4095 */
15308 : -((-offset
) & 0xfff));
15310 /* Corner case, if lo is the max offset then we would be out of range
15311 once we have added the additional 1 below, so bump the msb into the
15312 pre-loading insn(s). */
15316 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15317 ^ (HOST_WIDE_INT
) 0x80000000)
15318 - (HOST_WIDE_INT
) 0x80000000);
15320 gcc_assert (hi
+ lo
== offset
);
15324 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15326 /* Be careful not to destroy OUTVAL. */
15327 if (reg_overlap_mentioned_p (base_plus
, outval
))
15329 /* Updating base_plus might destroy outval, see if we
15330 can swap the scratch and base_plus. */
15331 if (!reg_overlap_mentioned_p (scratch
, outval
))
15332 std::swap (scratch
, base_plus
);
15335 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15337 /* Be conservative and copy outval into scratch now,
15338 this should only be necessary if outval is a
15339 subreg of something larger than a word. */
15340 /* XXX Might this clobber base? I can't see how it
15341 can, since scratch is known to overlap with
15343 emit_insn (gen_movhi (scratch_hi
, outval
));
15344 outval
= scratch_hi
;
15348 /* Get the base address; addsi3 knows how to handle constants
15349 that require more than one insn. */
15350 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15356 if (BYTES_BIG_ENDIAN
)
15358 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15359 plus_constant (Pmode
, base
,
15361 gen_lowpart (QImode
, outval
)));
15362 emit_insn (gen_lshrsi3 (scratch
,
15363 gen_rtx_SUBREG (SImode
, outval
, 0),
15365 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15367 gen_lowpart (QImode
, scratch
)));
15371 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15373 gen_lowpart (QImode
, outval
)));
15374 emit_insn (gen_lshrsi3 (scratch
,
15375 gen_rtx_SUBREG (SImode
, outval
, 0),
15377 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15378 plus_constant (Pmode
, base
,
15380 gen_lowpart (QImode
, scratch
)));
15384 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15385 (padded to the size of a word) should be passed in a register. */
15388 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15390 if (TARGET_AAPCS_BASED
)
15391 return must_pass_in_stack_var_size (mode
, type
);
15393 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15397 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15398 byte of a stack argument has useful data. For legacy APCS ABIs we use
15399 the default. For AAPCS based ABIs small aggregate types are placed
15400 in the lowest memory address. */
15402 static pad_direction
15403 arm_function_arg_padding (machine_mode mode
, const_tree type
)
15405 if (!TARGET_AAPCS_BASED
)
15406 return default_function_arg_padding (mode
, type
);
15408 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15409 return PAD_DOWNWARD
;
15415 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15416 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15417 register has useful data, and return the opposite if the most
15418 significant byte does. */
15421 arm_pad_reg_upward (machine_mode mode
,
15422 tree type
, int first ATTRIBUTE_UNUSED
)
15424 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15426 /* For AAPCS, small aggregates, small fixed-point types,
15427 and small complex types are always padded upwards. */
15430 if ((AGGREGATE_TYPE_P (type
)
15431 || TREE_CODE (type
) == COMPLEX_TYPE
15432 || FIXED_POINT_TYPE_P (type
))
15433 && int_size_in_bytes (type
) <= 4)
15438 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15439 && GET_MODE_SIZE (mode
) <= 4)
15444 /* Otherwise, use default padding. */
15445 return !BYTES_BIG_ENDIAN
;
15448 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15449 assuming that the address in the base register is word aligned. */
15451 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15453 HOST_WIDE_INT max_offset
;
15455 /* Offset must be a multiple of 4 in Thumb mode. */
15456 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15461 else if (TARGET_ARM
)
15466 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15469 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15470 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15471 Assumes that the address in the base register RN is word aligned. Pattern
15472 guarantees that both memory accesses use the same base register,
15473 the offsets are constants within the range, and the gap between the offsets is 4.
15474 If preload complete then check that registers are legal. WBACK indicates whether
15475 address is updated. LOAD indicates whether memory access is load or store. */
15477 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15478 bool wback
, bool load
)
15480 unsigned int t
, t2
, n
;
15482 if (!reload_completed
)
15485 if (!offset_ok_for_ldrd_strd (offset
))
15492 if ((TARGET_THUMB2
)
15493 && ((wback
&& (n
== t
|| n
== t2
))
15494 || (t
== SP_REGNUM
)
15495 || (t
== PC_REGNUM
)
15496 || (t2
== SP_REGNUM
)
15497 || (t2
== PC_REGNUM
)
15498 || (!load
&& (n
== PC_REGNUM
))
15499 || (load
&& (t
== t2
))
15500 /* Triggers Cortex-M3 LDRD errata. */
15501 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15505 && ((wback
&& (n
== t
|| n
== t2
))
15506 || (t2
== PC_REGNUM
)
15507 || (t
% 2 != 0) /* First destination register is not even. */
15509 /* PC can be used as base register (for offset addressing only),
15510 but it is depricated. */
15511 || (n
== PC_REGNUM
)))
15517 /* Return true if a 64-bit access with alignment ALIGN and with a
15518 constant offset OFFSET from the base pointer is permitted on this
15521 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
15523 return (unaligned_access
15524 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
15525 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
15528 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15529 operand MEM's address contains an immediate offset from the base
15530 register and has no side effects, in which case it sets BASE,
15531 OFFSET and ALIGN accordingly. */
15533 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
15537 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15539 /* TODO: Handle more general memory operand patterns, such as
15540 PRE_DEC and PRE_INC. */
15542 if (side_effects_p (mem
))
15545 /* Can't deal with subregs. */
15546 if (GET_CODE (mem
) == SUBREG
)
15549 gcc_assert (MEM_P (mem
));
15551 *offset
= const0_rtx
;
15552 *align
= MEM_ALIGN (mem
);
15554 addr
= XEXP (mem
, 0);
15556 /* If addr isn't valid for DImode, then we can't handle it. */
15557 if (!arm_legitimate_address_p (DImode
, addr
,
15558 reload_in_progress
|| reload_completed
))
15566 else if (GET_CODE (addr
) == PLUS
)
15568 *base
= XEXP (addr
, 0);
15569 *offset
= XEXP (addr
, 1);
15570 return (REG_P (*base
) && CONST_INT_P (*offset
));
15576 /* Called from a peephole2 to replace two word-size accesses with a
15577 single LDRD/STRD instruction. Returns true iff we can generate a
15578 new instruction sequence. That is, both accesses use the same base
15579 register and the gap between constant offsets is 4. This function
15580 may reorder its operands to match ldrd/strd RTL templates.
15581 OPERANDS are the operands found by the peephole matcher;
15582 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15583 corresponding memory operands. LOAD indicaates whether the access
15584 is load or store. CONST_STORE indicates a store of constant
15585 integer values held in OPERANDS[4,5] and assumes that the pattern
15586 is of length 4 insn, for the purpose of checking dead registers.
15587 COMMUTE indicates that register operands may be reordered. */
15589 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15590 bool const_store
, bool commute
)
15593 HOST_WIDE_INT offsets
[2], offset
, align
[2];
15594 rtx base
= NULL_RTX
;
15595 rtx cur_base
, cur_offset
, tmp
;
15597 HARD_REG_SET regset
;
15599 gcc_assert (!const_store
|| !load
);
15600 /* Check that the memory references are immediate offsets from the
15601 same base register. Extract the base register, the destination
15602 registers, and the corresponding memory offsets. */
15603 for (i
= 0; i
< nops
; i
++)
15605 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
15611 else if (REGNO (base
) != REGNO (cur_base
))
15614 offsets
[i
] = INTVAL (cur_offset
);
15615 if (GET_CODE (operands
[i
]) == SUBREG
)
15617 tmp
= SUBREG_REG (operands
[i
]);
15618 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15623 /* Make sure there is no dependency between the individual loads. */
15624 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15625 return false; /* RAW */
15627 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15628 return false; /* WAW */
15630 /* If the same input register is used in both stores
15631 when storing different constants, try to find a free register.
15632 For example, the code
15637 can be transformed into
15641 in Thumb mode assuming that r1 is free.
15642 For ARM mode do the same but only if the starting register
15643 can be made to be even. */
15645 && REGNO (operands
[0]) == REGNO (operands
[1])
15646 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15650 CLEAR_HARD_REG_SET (regset
);
15651 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15652 if (tmp
== NULL_RTX
)
15655 /* Use the new register in the first load to ensure that
15656 if the original input register is not dead after peephole,
15657 then it will have the correct constant value. */
15660 else if (TARGET_ARM
)
15662 int regno
= REGNO (operands
[0]);
15663 if (!peep2_reg_dead_p (4, operands
[0]))
15665 /* When the input register is even and is not dead after the
15666 pattern, it has to hold the second constant but we cannot
15667 form a legal STRD in ARM mode with this register as the second
15669 if (regno
% 2 == 0)
15672 /* Is regno-1 free? */
15673 SET_HARD_REG_SET (regset
);
15674 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15675 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15676 if (tmp
== NULL_RTX
)
15683 /* Find a DImode register. */
15684 CLEAR_HARD_REG_SET (regset
);
15685 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15686 if (tmp
!= NULL_RTX
)
15688 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15689 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15693 /* Can we use the input register to form a DI register? */
15694 SET_HARD_REG_SET (regset
);
15695 CLEAR_HARD_REG_BIT(regset
,
15696 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15697 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15698 if (tmp
== NULL_RTX
)
15700 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15704 gcc_assert (operands
[0] != NULL_RTX
);
15705 gcc_assert (operands
[1] != NULL_RTX
);
15706 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15707 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15711 /* Make sure the instructions are ordered with lower memory access first. */
15712 if (offsets
[0] > offsets
[1])
15714 gap
= offsets
[0] - offsets
[1];
15715 offset
= offsets
[1];
15717 /* Swap the instructions such that lower memory is accessed first. */
15718 std::swap (operands
[0], operands
[1]);
15719 std::swap (operands
[2], operands
[3]);
15720 std::swap (align
[0], align
[1]);
15722 std::swap (operands
[4], operands
[5]);
15726 gap
= offsets
[1] - offsets
[0];
15727 offset
= offsets
[0];
15730 /* Make sure accesses are to consecutive memory locations. */
15731 if (gap
!= GET_MODE_SIZE (SImode
))
15734 if (!align_ok_ldrd_strd (align
[0], offset
))
15737 /* Make sure we generate legal instructions. */
15738 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15742 /* In Thumb state, where registers are almost unconstrained, there
15743 is little hope to fix it. */
15747 if (load
&& commute
)
15749 /* Try reordering registers. */
15750 std::swap (operands
[0], operands
[1]);
15751 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15758 /* If input registers are dead after this pattern, they can be
15759 reordered or replaced by other registers that are free in the
15760 current pattern. */
15761 if (!peep2_reg_dead_p (4, operands
[0])
15762 || !peep2_reg_dead_p (4, operands
[1]))
15765 /* Try to reorder the input registers. */
15766 /* For example, the code
15771 can be transformed into
15776 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15779 std::swap (operands
[0], operands
[1]);
15783 /* Try to find a free DI register. */
15784 CLEAR_HARD_REG_SET (regset
);
15785 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15786 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15789 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15790 if (tmp
== NULL_RTX
)
15793 /* DREG must be an even-numbered register in DImode.
15794 Split it into SI registers. */
15795 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15796 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15797 gcc_assert (operands
[0] != NULL_RTX
);
15798 gcc_assert (operands
[1] != NULL_RTX
);
15799 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15800 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15802 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15812 /* Return true if parallel execution of the two word-size accesses provided
15813 could be satisfied with a single LDRD/STRD instruction. Two word-size
15814 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
15815 register operands and OPERANDS[2,3] are the corresponding memory operands.
15818 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
15821 HOST_WIDE_INT offsets
[2], offset
, align
[2];
15822 rtx base
= NULL_RTX
;
15823 rtx cur_base
, cur_offset
;
15826 /* Check that the memory references are immediate offsets from the
15827 same base register. Extract the base register, the destination
15828 registers, and the corresponding memory offsets. */
15829 for (i
= 0; i
< nops
; i
++)
15831 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
15837 else if (REGNO (base
) != REGNO (cur_base
))
15840 offsets
[i
] = INTVAL (cur_offset
);
15841 if (GET_CODE (operands
[i
]) == SUBREG
)
15845 if (offsets
[0] > offsets
[1])
15848 gap
= offsets
[1] - offsets
[0];
15849 offset
= offsets
[0];
15851 /* Make sure accesses are to consecutive memory locations. */
15852 if (gap
!= GET_MODE_SIZE (SImode
))
15855 if (!align_ok_ldrd_strd (align
[0], offset
))
15858 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15863 /* Print a symbolic form of X to the debug file, F. */
15865 arm_print_value (FILE *f
, rtx x
)
15867 switch (GET_CODE (x
))
15870 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15874 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15882 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15884 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15885 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15893 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15897 fprintf (f
, "`%s'", XSTR (x
, 0));
15901 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15905 arm_print_value (f
, XEXP (x
, 0));
15909 arm_print_value (f
, XEXP (x
, 0));
15911 arm_print_value (f
, XEXP (x
, 1));
15919 fprintf (f
, "????");
15924 /* Routines for manipulation of the constant pool. */
15926 /* Arm instructions cannot load a large constant directly into a
15927 register; they have to come from a pc relative load. The constant
15928 must therefore be placed in the addressable range of the pc
15929 relative load. Depending on the precise pc relative load
15930 instruction the range is somewhere between 256 bytes and 4k. This
15931 means that we often have to dump a constant inside a function, and
15932 generate code to branch around it.
15934 It is important to minimize this, since the branches will slow
15935 things down and make the code larger.
15937 Normally we can hide the table after an existing unconditional
15938 branch so that there is no interruption of the flow, but in the
15939 worst case the code looks like this:
15957 We fix this by performing a scan after scheduling, which notices
15958 which instructions need to have their operands fetched from the
15959 constant table and builds the table.
15961 The algorithm starts by building a table of all the constants that
15962 need fixing up and all the natural barriers in the function (places
15963 where a constant table can be dropped without breaking the flow).
15964 For each fixup we note how far the pc-relative replacement will be
15965 able to reach and the offset of the instruction into the function.
15967 Having built the table we then group the fixes together to form
15968 tables that are as large as possible (subject to addressing
15969 constraints) and emit each table of constants after the last
15970 barrier that is within range of all the instructions in the group.
15971 If a group does not contain a barrier, then we forcibly create one
15972 by inserting a jump instruction into the flow. Once the table has
15973 been inserted, the insns are then modified to reference the
15974 relevant entry in the pool.
15976 Possible enhancements to the algorithm (not implemented) are:
15978 1) For some processors and object formats, there may be benefit in
15979 aligning the pools to the start of cache lines; this alignment
15980 would need to be taken into account when calculating addressability
15983 /* These typedefs are located at the start of this file, so that
15984 they can be used in the prototypes there. This comment is to
15985 remind readers of that fact so that the following structures
15986 can be understood more easily.
15988 typedef struct minipool_node Mnode;
15989 typedef struct minipool_fixup Mfix; */
15991 struct minipool_node
15993 /* Doubly linked chain of entries. */
15996 /* The maximum offset into the code that this entry can be placed. While
15997 pushing fixes for forward references, all entries are sorted in order
15998 of increasing max_address. */
15999 HOST_WIDE_INT max_address
;
16000 /* Similarly for an entry inserted for a backwards ref. */
16001 HOST_WIDE_INT min_address
;
16002 /* The number of fixes referencing this entry. This can become zero
16003 if we "unpush" an entry. In this case we ignore the entry when we
16004 come to emit the code. */
16006 /* The offset from the start of the minipool. */
16007 HOST_WIDE_INT offset
;
16008 /* The value in table. */
16010 /* The mode of value. */
16012 /* The size of the value. With iWMMXt enabled
16013 sizes > 4 also imply an alignment of 8-bytes. */
16017 struct minipool_fixup
16021 HOST_WIDE_INT address
;
16027 HOST_WIDE_INT forwards
;
16028 HOST_WIDE_INT backwards
;
16031 /* Fixes less than a word need padding out to a word boundary. */
16032 #define MINIPOOL_FIX_SIZE(mode) \
16033 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16035 static Mnode
* minipool_vector_head
;
16036 static Mnode
* minipool_vector_tail
;
16037 static rtx_code_label
*minipool_vector_label
;
16038 static int minipool_pad
;
16040 /* The linked list of all minipool fixes required for this function. */
16041 Mfix
* minipool_fix_head
;
16042 Mfix
* minipool_fix_tail
;
16043 /* The fix entry for the current minipool, once it has been placed. */
16044 Mfix
* minipool_barrier
;
16046 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16047 #define JUMP_TABLES_IN_TEXT_SECTION 0
16050 static HOST_WIDE_INT
16051 get_jump_table_size (rtx_jump_table_data
*insn
)
16053 /* ADDR_VECs only take room if read-only data does into the text
16055 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16057 rtx body
= PATTERN (insn
);
16058 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16059 HOST_WIDE_INT size
;
16060 HOST_WIDE_INT modesize
;
16062 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16063 size
= modesize
* XVECLEN (body
, elt
);
16067 /* Round up size of TBB table to a halfword boundary. */
16068 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
16071 /* No padding necessary for TBH. */
16074 /* Add two bytes for alignment on Thumb. */
16079 gcc_unreachable ();
16087 /* Return the maximum amount of padding that will be inserted before
16090 static HOST_WIDE_INT
16091 get_label_padding (rtx label
)
16093 HOST_WIDE_INT align
, min_insn_size
;
16095 align
= 1 << label_to_alignment (label
).levels
[0].log
;
16096 min_insn_size
= TARGET_THUMB
? 2 : 4;
16097 return align
> min_insn_size
? align
- min_insn_size
: 0;
16100 /* Move a minipool fix MP from its current location to before MAX_MP.
16101 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16102 constraints may need updating. */
16104 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16105 HOST_WIDE_INT max_address
)
16107 /* The code below assumes these are different. */
16108 gcc_assert (mp
!= max_mp
);
16110 if (max_mp
== NULL
)
16112 if (max_address
< mp
->max_address
)
16113 mp
->max_address
= max_address
;
16117 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16118 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16120 mp
->max_address
= max_address
;
16122 /* Unlink MP from its current position. Since max_mp is non-null,
16123 mp->prev must be non-null. */
16124 mp
->prev
->next
= mp
->next
;
16125 if (mp
->next
!= NULL
)
16126 mp
->next
->prev
= mp
->prev
;
16128 minipool_vector_tail
= mp
->prev
;
16130 /* Re-insert it before MAX_MP. */
16132 mp
->prev
= max_mp
->prev
;
16135 if (mp
->prev
!= NULL
)
16136 mp
->prev
->next
= mp
;
16138 minipool_vector_head
= mp
;
16141 /* Save the new entry. */
16144 /* Scan over the preceding entries and adjust their addresses as
16146 while (mp
->prev
!= NULL
16147 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16149 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16156 /* Add a constant to the minipool for a forward reference. Returns the
16157 node added or NULL if the constant will not fit in this pool. */
16159 add_minipool_forward_ref (Mfix
*fix
)
16161 /* If set, max_mp is the first pool_entry that has a lower
16162 constraint than the one we are trying to add. */
16163 Mnode
* max_mp
= NULL
;
16164 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16167 /* If the minipool starts before the end of FIX->INSN then this FIX
16168 cannot be placed into the current pool. Furthermore, adding the
16169 new constant pool entry may cause the pool to start FIX_SIZE bytes
16171 if (minipool_vector_head
&&
16172 (fix
->address
+ get_attr_length (fix
->insn
)
16173 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16176 /* Scan the pool to see if a constant with the same value has
16177 already been added. While we are doing this, also note the
16178 location where we must insert the constant if it doesn't already
16180 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16182 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16183 && fix
->mode
== mp
->mode
16184 && (!LABEL_P (fix
->value
)
16185 || (CODE_LABEL_NUMBER (fix
->value
)
16186 == CODE_LABEL_NUMBER (mp
->value
)))
16187 && rtx_equal_p (fix
->value
, mp
->value
))
16189 /* More than one fix references this entry. */
16191 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16194 /* Note the insertion point if necessary. */
16196 && mp
->max_address
> max_address
)
16199 /* If we are inserting an 8-bytes aligned quantity and
16200 we have not already found an insertion point, then
16201 make sure that all such 8-byte aligned quantities are
16202 placed at the start of the pool. */
16203 if (ARM_DOUBLEWORD_ALIGN
16205 && fix
->fix_size
>= 8
16206 && mp
->fix_size
< 8)
16209 max_address
= mp
->max_address
;
16213 /* The value is not currently in the minipool, so we need to create
16214 a new entry for it. If MAX_MP is NULL, the entry will be put on
16215 the end of the list since the placement is less constrained than
16216 any existing entry. Otherwise, we insert the new fix before
16217 MAX_MP and, if necessary, adjust the constraints on the other
16220 mp
->fix_size
= fix
->fix_size
;
16221 mp
->mode
= fix
->mode
;
16222 mp
->value
= fix
->value
;
16224 /* Not yet required for a backwards ref. */
16225 mp
->min_address
= -65536;
16227 if (max_mp
== NULL
)
16229 mp
->max_address
= max_address
;
16231 mp
->prev
= minipool_vector_tail
;
16233 if (mp
->prev
== NULL
)
16235 minipool_vector_head
= mp
;
16236 minipool_vector_label
= gen_label_rtx ();
16239 mp
->prev
->next
= mp
;
16241 minipool_vector_tail
= mp
;
16245 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16246 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16248 mp
->max_address
= max_address
;
16251 mp
->prev
= max_mp
->prev
;
16253 if (mp
->prev
!= NULL
)
16254 mp
->prev
->next
= mp
;
16256 minipool_vector_head
= mp
;
16259 /* Save the new entry. */
16262 /* Scan over the preceding entries and adjust their addresses as
16264 while (mp
->prev
!= NULL
16265 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16267 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16275 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16276 HOST_WIDE_INT min_address
)
16278 HOST_WIDE_INT offset
;
16280 /* The code below assumes these are different. */
16281 gcc_assert (mp
!= min_mp
);
16283 if (min_mp
== NULL
)
16285 if (min_address
> mp
->min_address
)
16286 mp
->min_address
= min_address
;
16290 /* We will adjust this below if it is too loose. */
16291 mp
->min_address
= min_address
;
16293 /* Unlink MP from its current position. Since min_mp is non-null,
16294 mp->next must be non-null. */
16295 mp
->next
->prev
= mp
->prev
;
16296 if (mp
->prev
!= NULL
)
16297 mp
->prev
->next
= mp
->next
;
16299 minipool_vector_head
= mp
->next
;
16301 /* Reinsert it after MIN_MP. */
16303 mp
->next
= min_mp
->next
;
16305 if (mp
->next
!= NULL
)
16306 mp
->next
->prev
= mp
;
16308 minipool_vector_tail
= mp
;
16314 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16316 mp
->offset
= offset
;
16317 if (mp
->refcount
> 0)
16318 offset
+= mp
->fix_size
;
16320 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16321 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16327 /* Add a constant to the minipool for a backward reference. Returns the
16328 node added or NULL if the constant will not fit in this pool.
16330 Note that the code for insertion for a backwards reference can be
16331 somewhat confusing because the calculated offsets for each fix do
16332 not take into account the size of the pool (which is still under
16335 add_minipool_backward_ref (Mfix
*fix
)
16337 /* If set, min_mp is the last pool_entry that has a lower constraint
16338 than the one we are trying to add. */
16339 Mnode
*min_mp
= NULL
;
16340 /* This can be negative, since it is only a constraint. */
16341 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16344 /* If we can't reach the current pool from this insn, or if we can't
16345 insert this entry at the end of the pool without pushing other
16346 fixes out of range, then we don't try. This ensures that we
16347 can't fail later on. */
16348 if (min_address
>= minipool_barrier
->address
16349 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16350 >= minipool_barrier
->address
))
16353 /* Scan the pool to see if a constant with the same value has
16354 already been added. While we are doing this, also note the
16355 location where we must insert the constant if it doesn't already
16357 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16359 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16360 && fix
->mode
== mp
->mode
16361 && (!LABEL_P (fix
->value
)
16362 || (CODE_LABEL_NUMBER (fix
->value
)
16363 == CODE_LABEL_NUMBER (mp
->value
)))
16364 && rtx_equal_p (fix
->value
, mp
->value
)
16365 /* Check that there is enough slack to move this entry to the
16366 end of the table (this is conservative). */
16367 && (mp
->max_address
16368 > (minipool_barrier
->address
16369 + minipool_vector_tail
->offset
16370 + minipool_vector_tail
->fix_size
)))
16373 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16376 if (min_mp
!= NULL
)
16377 mp
->min_address
+= fix
->fix_size
;
16380 /* Note the insertion point if necessary. */
16381 if (mp
->min_address
< min_address
)
16383 /* For now, we do not allow the insertion of 8-byte alignment
16384 requiring nodes anywhere but at the start of the pool. */
16385 if (ARM_DOUBLEWORD_ALIGN
16386 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16391 else if (mp
->max_address
16392 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16394 /* Inserting before this entry would push the fix beyond
16395 its maximum address (which can happen if we have
16396 re-located a forwards fix); force the new fix to come
16398 if (ARM_DOUBLEWORD_ALIGN
16399 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16404 min_address
= mp
->min_address
+ fix
->fix_size
;
16407 /* Do not insert a non-8-byte aligned quantity before 8-byte
16408 aligned quantities. */
16409 else if (ARM_DOUBLEWORD_ALIGN
16410 && fix
->fix_size
< 8
16411 && mp
->fix_size
>= 8)
16414 min_address
= mp
->min_address
+ fix
->fix_size
;
16419 /* We need to create a new entry. */
16421 mp
->fix_size
= fix
->fix_size
;
16422 mp
->mode
= fix
->mode
;
16423 mp
->value
= fix
->value
;
16425 mp
->max_address
= minipool_barrier
->address
+ 65536;
16427 mp
->min_address
= min_address
;
16429 if (min_mp
== NULL
)
16432 mp
->next
= minipool_vector_head
;
16434 if (mp
->next
== NULL
)
16436 minipool_vector_tail
= mp
;
16437 minipool_vector_label
= gen_label_rtx ();
16440 mp
->next
->prev
= mp
;
16442 minipool_vector_head
= mp
;
16446 mp
->next
= min_mp
->next
;
16450 if (mp
->next
!= NULL
)
16451 mp
->next
->prev
= mp
;
16453 minipool_vector_tail
= mp
;
16456 /* Save the new entry. */
16464 /* Scan over the following entries and adjust their offsets. */
16465 while (mp
->next
!= NULL
)
16467 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16468 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16471 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16473 mp
->next
->offset
= mp
->offset
;
16482 assign_minipool_offsets (Mfix
*barrier
)
16484 HOST_WIDE_INT offset
= 0;
16487 minipool_barrier
= barrier
;
16489 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16491 mp
->offset
= offset
;
16493 if (mp
->refcount
> 0)
16494 offset
+= mp
->fix_size
;
16498 /* Output the literal table */
16500 dump_minipool (rtx_insn
*scan
)
16506 if (ARM_DOUBLEWORD_ALIGN
)
16507 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16508 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16515 fprintf (dump_file
,
16516 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16517 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16519 scan
= emit_label_after (gen_label_rtx (), scan
);
16520 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16521 scan
= emit_label_after (minipool_vector_label
, scan
);
16523 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16525 if (mp
->refcount
> 0)
16529 fprintf (dump_file
,
16530 ";; Offset %u, min %ld, max %ld ",
16531 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16532 (unsigned long) mp
->max_address
);
16533 arm_print_value (dump_file
, mp
->value
);
16534 fputc ('\n', dump_file
);
16537 rtx val
= copy_rtx (mp
->value
);
16539 switch (GET_MODE_SIZE (mp
->mode
))
16541 #ifdef HAVE_consttable_1
16543 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16547 #ifdef HAVE_consttable_2
16549 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16553 #ifdef HAVE_consttable_4
16555 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16559 #ifdef HAVE_consttable_8
16561 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16565 #ifdef HAVE_consttable_16
16567 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16572 gcc_unreachable ();
16580 minipool_vector_head
= minipool_vector_tail
= NULL
;
16581 scan
= emit_insn_after (gen_consttable_end (), scan
);
16582 scan
= emit_barrier_after (scan
);
16585 /* Return the cost of forcibly inserting a barrier after INSN. */
16587 arm_barrier_cost (rtx_insn
*insn
)
16589 /* Basing the location of the pool on the loop depth is preferable,
16590 but at the moment, the basic block information seems to be
16591 corrupt by this stage of the compilation. */
16592 int base_cost
= 50;
16593 rtx_insn
*next
= next_nonnote_insn (insn
);
16595 if (next
!= NULL
&& LABEL_P (next
))
16598 switch (GET_CODE (insn
))
16601 /* It will always be better to place the table before the label, rather
16610 return base_cost
- 10;
16613 return base_cost
+ 10;
16617 /* Find the best place in the insn stream in the range
16618 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16619 Create the barrier by inserting a jump and add a new fix entry for
16622 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16624 HOST_WIDE_INT count
= 0;
16625 rtx_barrier
*barrier
;
16626 rtx_insn
*from
= fix
->insn
;
16627 /* The instruction after which we will insert the jump. */
16628 rtx_insn
*selected
= NULL
;
16630 /* The address at which the jump instruction will be placed. */
16631 HOST_WIDE_INT selected_address
;
16633 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16634 rtx_code_label
*label
= gen_label_rtx ();
16636 selected_cost
= arm_barrier_cost (from
);
16637 selected_address
= fix
->address
;
16639 while (from
&& count
< max_count
)
16641 rtx_jump_table_data
*tmp
;
16644 /* This code shouldn't have been called if there was a natural barrier
16646 gcc_assert (!BARRIER_P (from
));
16648 /* Count the length of this insn. This must stay in sync with the
16649 code that pushes minipool fixes. */
16650 if (LABEL_P (from
))
16651 count
+= get_label_padding (from
);
16653 count
+= get_attr_length (from
);
16655 /* If there is a jump table, add its length. */
16656 if (tablejump_p (from
, NULL
, &tmp
))
16658 count
+= get_jump_table_size (tmp
);
16660 /* Jump tables aren't in a basic block, so base the cost on
16661 the dispatch insn. If we select this location, we will
16662 still put the pool after the table. */
16663 new_cost
= arm_barrier_cost (from
);
16665 if (count
< max_count
16666 && (!selected
|| new_cost
<= selected_cost
))
16669 selected_cost
= new_cost
;
16670 selected_address
= fix
->address
+ count
;
16673 /* Continue after the dispatch table. */
16674 from
= NEXT_INSN (tmp
);
16678 new_cost
= arm_barrier_cost (from
);
16680 if (count
< max_count
16681 && (!selected
|| new_cost
<= selected_cost
))
16684 selected_cost
= new_cost
;
16685 selected_address
= fix
->address
+ count
;
16688 from
= NEXT_INSN (from
);
16691 /* Make sure that we found a place to insert the jump. */
16692 gcc_assert (selected
);
16694 /* Create a new JUMP_INSN that branches around a barrier. */
16695 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16696 JUMP_LABEL (from
) = label
;
16697 barrier
= emit_barrier_after (from
);
16698 emit_label_after (label
, barrier
);
16700 /* Create a minipool barrier entry for the new barrier. */
16701 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16702 new_fix
->insn
= barrier
;
16703 new_fix
->address
= selected_address
;
16704 new_fix
->next
= fix
->next
;
16705 fix
->next
= new_fix
;
16710 /* Record that there is a natural barrier in the insn stream at
16713 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16715 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16718 fix
->address
= address
;
16721 if (minipool_fix_head
!= NULL
)
16722 minipool_fix_tail
->next
= fix
;
16724 minipool_fix_head
= fix
;
16726 minipool_fix_tail
= fix
;
16729 /* Record INSN, which will need fixing up to load a value from the
16730 minipool. ADDRESS is the offset of the insn since the start of the
16731 function; LOC is a pointer to the part of the insn which requires
16732 fixing; VALUE is the constant that must be loaded, which is of type
16735 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16736 machine_mode mode
, rtx value
)
16738 gcc_assert (!arm_disable_literal_pool
);
16739 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16742 fix
->address
= address
;
16745 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16746 fix
->value
= value
;
16747 fix
->forwards
= get_attr_pool_range (insn
);
16748 fix
->backwards
= get_attr_neg_pool_range (insn
);
16749 fix
->minipool
= NULL
;
16751 /* If an insn doesn't have a range defined for it, then it isn't
16752 expecting to be reworked by this code. Better to stop now than
16753 to generate duff assembly code. */
16754 gcc_assert (fix
->forwards
|| fix
->backwards
);
16756 /* If an entry requires 8-byte alignment then assume all constant pools
16757 require 4 bytes of padding. Trying to do this later on a per-pool
16758 basis is awkward because existing pool entries have to be modified. */
16759 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16764 fprintf (dump_file
,
16765 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16766 GET_MODE_NAME (mode
),
16767 INSN_UID (insn
), (unsigned long) address
,
16768 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16769 arm_print_value (dump_file
, fix
->value
);
16770 fprintf (dump_file
, "\n");
16773 /* Add it to the chain of fixes. */
16776 if (minipool_fix_head
!= NULL
)
16777 minipool_fix_tail
->next
= fix
;
16779 minipool_fix_head
= fix
;
16781 minipool_fix_tail
= fix
;
16784 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16785 Returns the number of insns needed, or 99 if we always want to synthesize
16788 arm_max_const_double_inline_cost ()
16790 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16793 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16794 Returns the number of insns needed, or 99 if we don't know how to
16797 arm_const_double_inline_cost (rtx val
)
16799 rtx lowpart
, highpart
;
16802 mode
= GET_MODE (val
);
16804 if (mode
== VOIDmode
)
16807 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16809 lowpart
= gen_lowpart (SImode
, val
);
16810 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16812 gcc_assert (CONST_INT_P (lowpart
));
16813 gcc_assert (CONST_INT_P (highpart
));
16815 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16816 NULL_RTX
, NULL_RTX
, 0, 0)
16817 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16818 NULL_RTX
, NULL_RTX
, 0, 0));
16821 /* Cost of loading a SImode constant. */
16823 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16825 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16826 NULL_RTX
, NULL_RTX
, 1, 0);
16829 /* Return true if it is worthwhile to split a 64-bit constant into two
16830 32-bit operations. This is the case if optimizing for size, or
16831 if we have load delay slots, or if one 32-bit part can be done with
16832 a single data operation. */
16834 arm_const_double_by_parts (rtx val
)
16836 machine_mode mode
= GET_MODE (val
);
16839 if (optimize_size
|| arm_ld_sched
)
16842 if (mode
== VOIDmode
)
16845 part
= gen_highpart_mode (SImode
, mode
, val
);
16847 gcc_assert (CONST_INT_P (part
));
16849 if (const_ok_for_arm (INTVAL (part
))
16850 || const_ok_for_arm (~INTVAL (part
)))
16853 part
= gen_lowpart (SImode
, val
);
16855 gcc_assert (CONST_INT_P (part
));
16857 if (const_ok_for_arm (INTVAL (part
))
16858 || const_ok_for_arm (~INTVAL (part
)))
16864 /* Return true if it is possible to inline both the high and low parts
16865 of a 64-bit constant into 32-bit data processing instructions. */
16867 arm_const_double_by_immediates (rtx val
)
16869 machine_mode mode
= GET_MODE (val
);
16872 if (mode
== VOIDmode
)
16875 part
= gen_highpart_mode (SImode
, mode
, val
);
16877 gcc_assert (CONST_INT_P (part
));
16879 if (!const_ok_for_arm (INTVAL (part
)))
16882 part
= gen_lowpart (SImode
, val
);
16884 gcc_assert (CONST_INT_P (part
));
16886 if (!const_ok_for_arm (INTVAL (part
)))
16892 /* Scan INSN and note any of its operands that need fixing.
16893 If DO_PUSHES is false we do not actually push any of the fixups
16896 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16900 extract_constrain_insn (insn
);
16902 if (recog_data
.n_alternatives
== 0)
16905 /* Fill in recog_op_alt with information about the constraints of
16907 preprocess_constraints (insn
);
16909 const operand_alternative
*op_alt
= which_op_alt ();
16910 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16912 /* Things we need to fix can only occur in inputs. */
16913 if (recog_data
.operand_type
[opno
] != OP_IN
)
16916 /* If this alternative is a memory reference, then any mention
16917 of constants in this alternative is really to fool reload
16918 into allowing us to accept one there. We need to fix them up
16919 now so that we output the right code. */
16920 if (op_alt
[opno
].memory_ok
)
16922 rtx op
= recog_data
.operand
[opno
];
16924 if (CONSTANT_P (op
))
16927 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16928 recog_data
.operand_mode
[opno
], op
);
16930 else if (MEM_P (op
)
16931 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16932 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16936 rtx cop
= avoid_constant_pool_reference (op
);
16938 /* Casting the address of something to a mode narrower
16939 than a word can cause avoid_constant_pool_reference()
16940 to return the pool reference itself. That's no good to
16941 us here. Lets just hope that we can use the
16942 constant pool value directly. */
16944 cop
= get_pool_constant (XEXP (op
, 0));
16946 push_minipool_fix (insn
, address
,
16947 recog_data
.operand_loc
[opno
],
16948 recog_data
.operand_mode
[opno
], cop
);
16958 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16959 and unions in the context of ARMv8-M Security Extensions. It is used as a
16960 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16961 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16962 or four masks, depending on whether it is being computed for a
16963 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16964 respectively. The tree for the type of the argument or a field within an
16965 argument is passed in ARG_TYPE, the current register this argument or field
16966 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16967 argument or field starts at is passed in STARTING_BIT and the last used bit
16968 is kept in LAST_USED_BIT which is also updated accordingly. */
16970 static unsigned HOST_WIDE_INT
16971 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16972 uint32_t * padding_bits_to_clear
,
16973 unsigned starting_bit
, int * last_used_bit
)
16976 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16978 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16980 unsigned current_bit
= starting_bit
;
16982 long int offset
, size
;
16985 field
= TYPE_FIELDS (arg_type
);
16988 /* The offset within a structure is always an offset from
16989 the start of that structure. Make sure we take that into the
16990 calculation of the register based offset that we use here. */
16991 offset
= starting_bit
;
16992 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16995 /* This is the actual size of the field, for bitfields this is the
16996 bitfield width and not the container size. */
16997 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16999 if (*last_used_bit
!= offset
)
17001 if (offset
< *last_used_bit
)
17003 /* This field's offset is before the 'last_used_bit', that
17004 means this field goes on the next register. So we need to
17005 pad the rest of the current register and increase the
17006 register number. */
17008 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
17011 padding_bits_to_clear
[*regno
] |= mask
;
17012 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
17017 /* Otherwise we pad the bits between the last field's end and
17018 the start of the new field. */
17021 mask
= ((uint32_t)-1) >> (32 - offset
);
17022 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
17023 padding_bits_to_clear
[*regno
] |= mask
;
17025 current_bit
= offset
;
17028 /* Calculate further padding bits for inner structs/unions too. */
17029 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
17031 *last_used_bit
= current_bit
;
17032 not_to_clear_reg_mask
17033 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
17034 padding_bits_to_clear
, offset
,
17039 /* Update 'current_bit' with this field's size. If the
17040 'current_bit' lies in a subsequent register, update 'regno' and
17041 reset 'current_bit' to point to the current bit in that new
17043 current_bit
+= size
;
17044 while (current_bit
>= 32)
17047 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
17050 *last_used_bit
= current_bit
;
17053 field
= TREE_CHAIN (field
);
17055 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
17057 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
17059 tree field
, field_t
;
17060 int i
, regno_t
, field_size
;
17064 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
17065 = {-1, -1, -1, -1};
17067 /* To compute the padding bits in a union we only consider bits as
17068 padding bits if they are always either a padding bit or fall outside a
17069 fields size for all fields in the union. */
17070 field
= TYPE_FIELDS (arg_type
);
17073 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
17074 = {0U, 0U, 0U, 0U};
17075 int last_used_bit_t
= *last_used_bit
;
17077 field_t
= TREE_TYPE (field
);
17079 /* If the field's type is either a record or a union make sure to
17080 compute their padding bits too. */
17081 if (RECORD_OR_UNION_TYPE_P (field_t
))
17082 not_to_clear_reg_mask
17083 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
17084 &padding_bits_to_clear_t
[0],
17085 starting_bit
, &last_used_bit_t
);
17088 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
17089 regno_t
= (field_size
/ 32) + *regno
;
17090 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
17093 for (i
= *regno
; i
< regno_t
; i
++)
17095 /* For all but the last register used by this field only keep the
17096 padding bits that were padding bits in this field. */
17097 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
17100 /* For the last register, keep all padding bits that were padding
17101 bits in this field and any padding bits that are still valid
17102 as padding bits but fall outside of this field's size. */
17103 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
17104 padding_bits_to_clear_res
[regno_t
]
17105 &= padding_bits_to_clear_t
[regno_t
] | mask
;
17107 /* Update the maximum size of the fields in terms of registers used
17108 ('max_reg') and the 'last_used_bit' in said register. */
17109 if (max_reg
< regno_t
)
17112 max_bit
= last_used_bit_t
;
17114 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
17115 max_bit
= last_used_bit_t
;
17117 field
= TREE_CHAIN (field
);
17120 /* Update the current padding_bits_to_clear using the intersection of the
17121 padding bits of all the fields. */
17122 for (i
=*regno
; i
< max_reg
; i
++)
17123 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
17125 /* Do not keep trailing padding bits, we do not know yet whether this
17126 is the end of the argument. */
17127 mask
= ((uint32_t) 1 << max_bit
) - 1;
17128 padding_bits_to_clear
[max_reg
]
17129 |= padding_bits_to_clear_res
[max_reg
] & mask
;
17132 *last_used_bit
= max_bit
;
17135 /* This function should only be used for structs and unions. */
17136 gcc_unreachable ();
17138 return not_to_clear_reg_mask
;
17141 /* In the context of ARMv8-M Security Extensions, this function is used for both
17142 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17143 registers are used when returning or passing arguments, which is then
17144 returned as a mask. It will also compute a mask to indicate padding/unused
17145 bits for each of these registers, and passes this through the
17146 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17147 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17148 the starting register used to pass this argument or return value is passed
17149 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17150 for struct and union types. */
17152 static unsigned HOST_WIDE_INT
17153 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
17154 uint32_t * padding_bits_to_clear
)
17157 int last_used_bit
= 0;
17158 unsigned HOST_WIDE_INT not_to_clear_mask
;
17160 if (RECORD_OR_UNION_TYPE_P (arg_type
))
17163 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
17164 padding_bits_to_clear
, 0,
17168 /* If the 'last_used_bit' is not zero, that means we are still using a
17169 part of the last 'regno'. In such cases we must clear the trailing
17170 bits. Otherwise we are not using regno and we should mark it as to
17172 if (last_used_bit
!= 0)
17173 padding_bits_to_clear
[regno
]
17174 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
17176 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
17180 not_to_clear_mask
= 0;
17181 /* We are not dealing with structs nor unions. So these arguments may be
17182 passed in floating point registers too. In some cases a BLKmode is
17183 used when returning or passing arguments in multiple VFP registers. */
17184 if (GET_MODE (arg_rtx
) == BLKmode
)
17189 /* This should really only occur when dealing with the hard-float
17191 gcc_assert (TARGET_HARD_FLOAT_ABI
);
17193 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
17195 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
17196 gcc_assert (REG_P (reg
));
17198 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
17200 /* If we are dealing with DF mode, make sure we don't
17201 clear either of the registers it addresses. */
17202 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
17205 unsigned HOST_WIDE_INT mask
;
17206 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
17207 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
17208 not_to_clear_mask
|= mask
;
17214 /* Otherwise we can rely on the MODE to determine how many registers
17215 are being used by this argument. */
17216 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
17217 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
17220 unsigned HOST_WIDE_INT
17221 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
17222 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
17223 not_to_clear_mask
|= mask
;
17228 return not_to_clear_mask
;
17231 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17232 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17233 are to be fully cleared, using the value in register CLEARING_REG if more
17234 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17235 the bits that needs to be cleared in caller-saved core registers, with
17236 SCRATCH_REG used as a scratch register for that clearing.
17238 NOTE: one of three following assertions must hold:
17239 - SCRATCH_REG is a low register
17240 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17241 in TO_CLEAR_BITMAP)
17242 - CLEARING_REG is a low register. */
17245 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
17246 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
17248 bool saved_clearing
= false;
17249 rtx saved_clearing_reg
= NULL_RTX
;
17250 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
17252 gcc_assert (arm_arch_cmse
);
17254 if (!bitmap_empty_p (to_clear_bitmap
))
17256 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
17257 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
17259 clearing_regno
= REGNO (clearing_reg
);
17261 /* Clear padding bits. */
17262 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
17263 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
17266 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
17268 if (padding_bits_to_clear
[i
] == 0)
17271 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17272 CLEARING_REG as scratch. */
17274 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
17276 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17277 such that we can use clearing_reg to clear the unused bits in the
17279 if ((clearing_regno
> maxregno
17280 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
17281 && !saved_clearing
)
17283 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
17284 emit_move_insn (scratch_reg
, clearing_reg
);
17285 saved_clearing
= true;
17286 saved_clearing_reg
= scratch_reg
;
17288 scratch_reg
= clearing_reg
;
17291 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17292 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
17293 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
17295 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17296 mask
= (~padding_bits_to_clear
[i
]) >> 16;
17297 rtx16
= gen_int_mode (16, SImode
);
17298 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
17300 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
17302 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
17304 if (saved_clearing
)
17305 emit_move_insn (clearing_reg
, saved_clearing_reg
);
17308 /* Clear full registers. */
17310 /* If not marked for clearing, clearing_reg already does not contain
17312 if (clearing_regno
<= maxregno
17313 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
17315 emit_move_insn (clearing_reg
, const0_rtx
);
17316 emit_use (clearing_reg
);
17317 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
17320 for (regno
= minregno
; regno
<= maxregno
; regno
++)
17322 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
17325 if (IS_VFP_REGNUM (regno
))
17327 /* If regno is an even vfp register and its successor is also to
17328 be cleared, use vmov. */
17329 if (TARGET_VFP_DOUBLE
17330 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17331 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
17333 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
17334 CONST1_RTX (DFmode
));
17335 emit_use (gen_rtx_REG (DFmode
, regno
));
17340 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17341 CONST1_RTX (SFmode
));
17342 emit_use (gen_rtx_REG (SFmode
, regno
));
17347 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
17348 emit_use (gen_rtx_REG (SImode
, regno
));
17353 /* Clears caller saved registers not used to pass arguments before a
17354 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17355 registers is done in __gnu_cmse_nonsecure_call libcall.
17356 See libgcc/config/arm/cmse_nonsecure_call.S. */
17359 cmse_nonsecure_call_clear_caller_saved (void)
17363 FOR_EACH_BB_FN (bb
, cfun
)
17367 FOR_BB_INSNS (bb
, insn
)
17369 unsigned address_regnum
, regno
, maxregno
=
17370 TARGET_HARD_FLOAT_ABI
? D7_VFP_REGNUM
: NUM_ARG_REGS
- 1;
17371 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
17373 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
17375 CUMULATIVE_ARGS args_so_far_v
;
17376 cumulative_args_t args_so_far
;
17377 tree arg_type
, fntype
;
17378 bool first_param
= true;
17379 function_args_iterator args_iter
;
17380 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
17382 if (!NONDEBUG_INSN_P (insn
))
17385 if (!CALL_P (insn
))
17388 pat
= PATTERN (insn
);
17389 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
17390 call
= XVECEXP (pat
, 0, 0);
17392 /* Get the real call RTX if the insn sets a value, ie. returns. */
17393 if (GET_CODE (call
) == SET
)
17394 call
= SET_SRC (call
);
17396 /* Check if it is a cmse_nonsecure_call. */
17397 unspec
= XEXP (call
, 0);
17398 if (GET_CODE (unspec
) != UNSPEC
17399 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
17402 /* Determine the caller-saved registers we need to clear. */
17403 bitmap_clear (to_clear_bitmap
);
17404 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
17406 /* Only look at the caller-saved floating point registers in case of
17407 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17408 lazy store and loads which clear both caller- and callee-saved
17410 if (TARGET_HARD_FLOAT_ABI
)
17412 auto_sbitmap
float_bitmap (maxregno
+ 1);
17414 bitmap_clear (float_bitmap
);
17415 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
17416 D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1);
17417 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
17420 /* Make sure the register used to hold the function address is not
17422 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
17423 gcc_assert (MEM_P (address
));
17424 gcc_assert (REG_P (XEXP (address
, 0)));
17425 address_regnum
= REGNO (XEXP (address
, 0));
17426 if (address_regnum
< R0_REGNUM
+ NUM_ARG_REGS
)
17427 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
17429 /* Set basic block of call insn so that df rescan is performed on
17430 insns inserted here. */
17431 set_block_for_insn (insn
, bb
);
17432 df_set_flags (DF_DEFER_INSN_RESCAN
);
17435 /* Make sure the scheduler doesn't schedule other insns beyond
17437 emit_insn (gen_blockage ());
17439 /* Walk through all arguments and clear registers appropriately.
17441 fntype
= TREE_TYPE (MEM_EXPR (address
));
17442 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
17444 args_so_far
= pack_cumulative_args (&args_so_far_v
);
17445 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
17448 uint64_t to_clear_args_mask
;
17449 machine_mode arg_mode
= TYPE_MODE (arg_type
);
17451 if (VOID_TYPE_P (arg_type
))
17455 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
17458 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
17460 gcc_assert (REG_P (arg_rtx
));
17462 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
17464 &padding_bits_to_clear
[0]);
17465 if (to_clear_args_mask
)
17467 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17469 if (to_clear_args_mask
& (1ULL << regno
))
17470 bitmap_clear_bit (to_clear_bitmap
, regno
);
17474 first_param
= false;
17477 /* We use right shift and left shift to clear the LSB of the address
17478 we jump to instead of using bic, to avoid having to use an extra
17479 register on Thumb-1. */
17480 clearing_reg
= XEXP (address
, 0);
17481 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
17482 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
17483 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
17484 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
17486 /* Clear caller-saved registers that leak before doing a non-secure
17488 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17489 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
17490 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
17492 seq
= get_insns ();
17494 emit_insn_before (seq
, insn
);
17499 /* Rewrite move insn into subtract of 0 if the condition codes will
17500 be useful in next conditional jump insn. */
17503 thumb1_reorg (void)
17507 FOR_EACH_BB_FN (bb
, cfun
)
17510 rtx cmp
, op0
, op1
, set
= NULL
;
17511 rtx_insn
*prev
, *insn
= BB_END (bb
);
17512 bool insn_clobbered
= false;
17514 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17515 insn
= PREV_INSN (insn
);
17517 /* Find the last cbranchsi4_insn in basic block BB. */
17518 if (insn
== BB_HEAD (bb
)
17519 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17522 /* Get the register with which we are comparing. */
17523 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17524 op0
= XEXP (cmp
, 0);
17525 op1
= XEXP (cmp
, 1);
17527 /* Check that comparison is against ZERO. */
17528 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17531 /* Find the first flag setting insn before INSN in basic block BB. */
17532 gcc_assert (insn
!= BB_HEAD (bb
));
17533 for (prev
= PREV_INSN (insn
);
17535 && prev
!= BB_HEAD (bb
)
17537 || DEBUG_INSN_P (prev
)
17538 || ((set
= single_set (prev
)) != NULL
17539 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17540 prev
= PREV_INSN (prev
))
17542 if (reg_set_p (op0
, prev
))
17543 insn_clobbered
= true;
17546 /* Skip if op0 is clobbered by insn other than prev. */
17547 if (insn_clobbered
)
17553 dest
= SET_DEST (set
);
17554 src
= SET_SRC (set
);
17555 if (!low_register_operand (dest
, SImode
)
17556 || !low_register_operand (src
, SImode
))
17559 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17560 in INSN. Both src and dest of the move insn are checked. */
17561 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17563 dest
= copy_rtx (dest
);
17564 src
= copy_rtx (src
);
17565 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17566 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17567 INSN_CODE (prev
) = -1;
17568 /* Set test register in INSN to dest. */
17569 XEXP (cmp
, 0) = copy_rtx (dest
);
17570 INSN_CODE (insn
) = -1;
17575 /* Convert instructions to their cc-clobbering variant if possible, since
17576 that allows us to use smaller encodings. */
17579 thumb2_reorg (void)
17584 INIT_REG_SET (&live
);
17586 /* We are freeing block_for_insn in the toplev to keep compatibility
17587 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17588 compute_bb_for_insn ();
17591 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17593 FOR_EACH_BB_FN (bb
, cfun
)
17595 if ((current_tune
->disparage_flag_setting_t16_encodings
17596 == tune_params::DISPARAGE_FLAGS_ALL
)
17597 && optimize_bb_for_speed_p (bb
))
17601 Convert_Action action
= SKIP
;
17602 Convert_Action action_for_partial_flag_setting
17603 = ((current_tune
->disparage_flag_setting_t16_encodings
17604 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17605 && optimize_bb_for_speed_p (bb
))
17608 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17609 df_simulate_initialize_backwards (bb
, &live
);
17610 FOR_BB_INSNS_REVERSE (bb
, insn
)
17612 if (NONJUMP_INSN_P (insn
)
17613 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17614 && GET_CODE (PATTERN (insn
)) == SET
)
17617 rtx pat
= PATTERN (insn
);
17618 rtx dst
= XEXP (pat
, 0);
17619 rtx src
= XEXP (pat
, 1);
17620 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17622 if (UNARY_P (src
) || BINARY_P (src
))
17623 op0
= XEXP (src
, 0);
17625 if (BINARY_P (src
))
17626 op1
= XEXP (src
, 1);
17628 if (low_register_operand (dst
, SImode
))
17630 switch (GET_CODE (src
))
17633 /* Adding two registers and storing the result
17634 in the first source is already a 16-bit
17636 if (rtx_equal_p (dst
, op0
)
17637 && register_operand (op1
, SImode
))
17640 if (low_register_operand (op0
, SImode
))
17642 /* ADDS <Rd>,<Rn>,<Rm> */
17643 if (low_register_operand (op1
, SImode
))
17645 /* ADDS <Rdn>,#<imm8> */
17646 /* SUBS <Rdn>,#<imm8> */
17647 else if (rtx_equal_p (dst
, op0
)
17648 && CONST_INT_P (op1
)
17649 && IN_RANGE (INTVAL (op1
), -255, 255))
17651 /* ADDS <Rd>,<Rn>,#<imm3> */
17652 /* SUBS <Rd>,<Rn>,#<imm3> */
17653 else if (CONST_INT_P (op1
)
17654 && IN_RANGE (INTVAL (op1
), -7, 7))
17657 /* ADCS <Rd>, <Rn> */
17658 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17659 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17660 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17662 && COMPARISON_P (op1
)
17663 && cc_register (XEXP (op1
, 0), VOIDmode
)
17664 && maybe_get_arm_condition_code (op1
) == ARM_CS
17665 && XEXP (op1
, 1) == const0_rtx
)
17670 /* RSBS <Rd>,<Rn>,#0
17671 Not handled here: see NEG below. */
17672 /* SUBS <Rd>,<Rn>,#<imm3>
17674 Not handled here: see PLUS above. */
17675 /* SUBS <Rd>,<Rn>,<Rm> */
17676 if (low_register_operand (op0
, SImode
)
17677 && low_register_operand (op1
, SImode
))
17682 /* MULS <Rdm>,<Rn>,<Rdm>
17683 As an exception to the rule, this is only used
17684 when optimizing for size since MULS is slow on all
17685 known implementations. We do not even want to use
17686 MULS in cold code, if optimizing for speed, so we
17687 test the global flag here. */
17688 if (!optimize_size
)
17690 /* Fall through. */
17694 /* ANDS <Rdn>,<Rm> */
17695 if (rtx_equal_p (dst
, op0
)
17696 && low_register_operand (op1
, SImode
))
17697 action
= action_for_partial_flag_setting
;
17698 else if (rtx_equal_p (dst
, op1
)
17699 && low_register_operand (op0
, SImode
))
17700 action
= action_for_partial_flag_setting
== SKIP
17701 ? SKIP
: SWAP_CONV
;
17707 /* ASRS <Rdn>,<Rm> */
17708 /* LSRS <Rdn>,<Rm> */
17709 /* LSLS <Rdn>,<Rm> */
17710 if (rtx_equal_p (dst
, op0
)
17711 && low_register_operand (op1
, SImode
))
17712 action
= action_for_partial_flag_setting
;
17713 /* ASRS <Rd>,<Rm>,#<imm5> */
17714 /* LSRS <Rd>,<Rm>,#<imm5> */
17715 /* LSLS <Rd>,<Rm>,#<imm5> */
17716 else if (low_register_operand (op0
, SImode
)
17717 && CONST_INT_P (op1
)
17718 && IN_RANGE (INTVAL (op1
), 0, 31))
17719 action
= action_for_partial_flag_setting
;
17723 /* RORS <Rdn>,<Rm> */
17724 if (rtx_equal_p (dst
, op0
)
17725 && low_register_operand (op1
, SImode
))
17726 action
= action_for_partial_flag_setting
;
17730 /* MVNS <Rd>,<Rm> */
17731 if (low_register_operand (op0
, SImode
))
17732 action
= action_for_partial_flag_setting
;
17736 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17737 if (low_register_operand (op0
, SImode
))
17742 /* MOVS <Rd>,#<imm8> */
17743 if (CONST_INT_P (src
)
17744 && IN_RANGE (INTVAL (src
), 0, 255))
17745 action
= action_for_partial_flag_setting
;
17749 /* MOVS and MOV<c> with registers have different
17750 encodings, so are not relevant here. */
17758 if (action
!= SKIP
)
17760 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17761 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17764 if (action
== SWAP_CONV
)
17766 src
= copy_rtx (src
);
17767 XEXP (src
, 0) = op1
;
17768 XEXP (src
, 1) = op0
;
17769 pat
= gen_rtx_SET (dst
, src
);
17770 vec
= gen_rtvec (2, pat
, clobber
);
17772 else /* action == CONV */
17773 vec
= gen_rtvec (2, pat
, clobber
);
17775 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17776 INSN_CODE (insn
) = -1;
17780 if (NONDEBUG_INSN_P (insn
))
17781 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17785 CLEAR_REG_SET (&live
);
17788 /* Gcc puts the pool in the wrong place for ARM, since we can only
17789 load addresses a limited distance around the pc. We do some
17790 special munging to move the constant pool values to the correct
17791 point in the code. */
17796 HOST_WIDE_INT address
= 0;
17800 cmse_nonsecure_call_clear_caller_saved ();
17802 /* We cannot run the Thumb passes for thunks because there is no CFG. */
17803 if (cfun
->is_thunk
)
17805 else if (TARGET_THUMB1
)
17807 else if (TARGET_THUMB2
)
17810 /* Ensure all insns that must be split have been split at this point.
17811 Otherwise, the pool placement code below may compute incorrect
17812 insn lengths. Note that when optimizing, all insns have already
17813 been split at this point. */
17815 split_all_insns_noflow ();
17817 /* Make sure we do not attempt to create a literal pool even though it should
17818 no longer be necessary to create any. */
17819 if (arm_disable_literal_pool
)
17822 minipool_fix_head
= minipool_fix_tail
= NULL
;
17824 /* The first insn must always be a note, or the code below won't
17825 scan it properly. */
17826 insn
= get_insns ();
17827 gcc_assert (NOTE_P (insn
));
17830 /* Scan all the insns and record the operands that will need fixing. */
17831 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17833 if (BARRIER_P (insn
))
17834 push_minipool_barrier (insn
, address
);
17835 else if (INSN_P (insn
))
17837 rtx_jump_table_data
*table
;
17839 note_invalid_constants (insn
, address
, true);
17840 address
+= get_attr_length (insn
);
17842 /* If the insn is a vector jump, add the size of the table
17843 and skip the table. */
17844 if (tablejump_p (insn
, NULL
, &table
))
17846 address
+= get_jump_table_size (table
);
17850 else if (LABEL_P (insn
))
17851 /* Add the worst-case padding due to alignment. We don't add
17852 the _current_ padding because the minipool insertions
17853 themselves might change it. */
17854 address
+= get_label_padding (insn
);
17857 fix
= minipool_fix_head
;
17859 /* Now scan the fixups and perform the required changes. */
17864 Mfix
* last_added_fix
;
17865 Mfix
* last_barrier
= NULL
;
17868 /* Skip any further barriers before the next fix. */
17869 while (fix
&& BARRIER_P (fix
->insn
))
17872 /* No more fixes. */
17876 last_added_fix
= NULL
;
17878 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17880 if (BARRIER_P (ftmp
->insn
))
17882 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17885 last_barrier
= ftmp
;
17887 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17890 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17893 /* If we found a barrier, drop back to that; any fixes that we
17894 could have reached but come after the barrier will now go in
17895 the next mini-pool. */
17896 if (last_barrier
!= NULL
)
17898 /* Reduce the refcount for those fixes that won't go into this
17900 for (fdel
= last_barrier
->next
;
17901 fdel
&& fdel
!= ftmp
;
17904 fdel
->minipool
->refcount
--;
17905 fdel
->minipool
= NULL
;
17908 ftmp
= last_barrier
;
17912 /* ftmp is first fix that we can't fit into this pool and
17913 there no natural barriers that we could use. Insert a
17914 new barrier in the code somewhere between the previous
17915 fix and this one, and arrange to jump around it. */
17916 HOST_WIDE_INT max_address
;
17918 /* The last item on the list of fixes must be a barrier, so
17919 we can never run off the end of the list of fixes without
17920 last_barrier being set. */
17923 max_address
= minipool_vector_head
->max_address
;
17924 /* Check that there isn't another fix that is in range that
17925 we couldn't fit into this pool because the pool was
17926 already too large: we need to put the pool before such an
17927 instruction. The pool itself may come just after the
17928 fix because create_fix_barrier also allows space for a
17929 jump instruction. */
17930 if (ftmp
->address
< max_address
)
17931 max_address
= ftmp
->address
+ 1;
17933 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17936 assign_minipool_offsets (last_barrier
);
17940 if (!BARRIER_P (ftmp
->insn
)
17941 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17948 /* Scan over the fixes we have identified for this pool, fixing them
17949 up and adding the constants to the pool itself. */
17950 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17951 this_fix
= this_fix
->next
)
17952 if (!BARRIER_P (this_fix
->insn
))
17955 = plus_constant (Pmode
,
17956 gen_rtx_LABEL_REF (VOIDmode
,
17957 minipool_vector_label
),
17958 this_fix
->minipool
->offset
);
17959 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17962 dump_minipool (last_barrier
->insn
);
17966 /* From now on we must synthesize any constants that we can't handle
17967 directly. This can happen if the RTL gets split during final
17968 instruction generation. */
17969 cfun
->machine
->after_arm_reorg
= 1;
17971 /* Free the minipool memory. */
17972 obstack_free (&minipool_obstack
, minipool_startobj
);
17975 /* Routines to output assembly language. */
17977 /* Return string representation of passed in real value. */
17978 static const char *
17979 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17981 if (!fp_consts_inited
)
17984 gcc_assert (real_equal (r
, &value_fp0
));
17988 /* OPERANDS[0] is the entire list of insns that constitute pop,
17989 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17990 is in the list, UPDATE is true iff the list contains explicit
17991 update of base register. */
17993 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17999 const char *conditional
;
18000 int num_saves
= XVECLEN (operands
[0], 0);
18001 unsigned int regno
;
18002 unsigned int regno_base
= REGNO (operands
[1]);
18003 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
18006 offset
+= update
? 1 : 0;
18007 offset
+= return_pc
? 1 : 0;
18009 /* Is the base register in the list? */
18010 for (i
= offset
; i
< num_saves
; i
++)
18012 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
18013 /* If SP is in the list, then the base register must be SP. */
18014 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
18015 /* If base register is in the list, there must be no explicit update. */
18016 if (regno
== regno_base
)
18017 gcc_assert (!update
);
18020 conditional
= reverse
? "%?%D0" : "%?%d0";
18021 /* Can't use POP if returning from an interrupt. */
18022 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
18023 sprintf (pattern
, "pop%s\t{", conditional
);
18026 /* Output ldmfd when the base register is SP, otherwise output ldmia.
18027 It's just a convention, their semantics are identical. */
18028 if (regno_base
== SP_REGNUM
)
18029 sprintf (pattern
, "ldmfd%s\t", conditional
);
18031 sprintf (pattern
, "ldmia%s\t", conditional
);
18033 sprintf (pattern
, "ldm%s\t", conditional
);
18035 strcat (pattern
, reg_names
[regno_base
]);
18037 strcat (pattern
, "!, {");
18039 strcat (pattern
, ", {");
18042 /* Output the first destination register. */
18044 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
18046 /* Output the rest of the destination registers. */
18047 for (i
= offset
+ 1; i
< num_saves
; i
++)
18049 strcat (pattern
, ", ");
18051 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
18054 strcat (pattern
, "}");
18056 if (interrupt_p
&& return_pc
)
18057 strcat (pattern
, "^");
18059 output_asm_insn (pattern
, &cond
);
18063 /* Output the assembly for a store multiple. */
18066 vfp_output_vstmd (rtx
* operands
)
18072 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
18073 ? XEXP (operands
[0], 0)
18074 : XEXP (XEXP (operands
[0], 0), 0);
18075 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
18078 strcpy (pattern
, "vpush%?.64\t{%P1");
18080 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
18082 p
= strlen (pattern
);
18084 gcc_assert (REG_P (operands
[1]));
18086 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
18087 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
18089 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
18091 strcpy (&pattern
[p
], "}");
18093 output_asm_insn (pattern
, operands
);
18098 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
18099 number of bytes pushed. */
18102 vfp_emit_fstmd (int base_reg
, int count
)
18109 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
18110 register pairs are stored by a store multiple insn. We avoid this
18111 by pushing an extra pair. */
18112 if (count
== 2 && !arm_arch6
)
18114 if (base_reg
== LAST_VFP_REGNUM
- 3)
18119 /* FSTMD may not store more than 16 doubleword registers at once. Split
18120 larger stores into multiple parts (up to a maximum of two, in
18125 /* NOTE: base_reg is an internal register number, so each D register
18127 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
18128 saved
+= vfp_emit_fstmd (base_reg
, 16);
18132 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
18133 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
18135 reg
= gen_rtx_REG (DFmode
, base_reg
);
18138 XVECEXP (par
, 0, 0)
18139 = gen_rtx_SET (gen_frame_mem
18141 gen_rtx_PRE_MODIFY (Pmode
,
18144 (Pmode
, stack_pointer_rtx
,
18147 gen_rtx_UNSPEC (BLKmode
,
18148 gen_rtvec (1, reg
),
18149 UNSPEC_PUSH_MULT
));
18151 tmp
= gen_rtx_SET (stack_pointer_rtx
,
18152 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
18153 RTX_FRAME_RELATED_P (tmp
) = 1;
18154 XVECEXP (dwarf
, 0, 0) = tmp
;
18156 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
18157 RTX_FRAME_RELATED_P (tmp
) = 1;
18158 XVECEXP (dwarf
, 0, 1) = tmp
;
18160 for (i
= 1; i
< count
; i
++)
18162 reg
= gen_rtx_REG (DFmode
, base_reg
);
18164 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
18166 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
18167 plus_constant (Pmode
,
18171 RTX_FRAME_RELATED_P (tmp
) = 1;
18172 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
18175 par
= emit_insn (par
);
18176 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
18177 RTX_FRAME_RELATED_P (par
) = 1;
18182 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18183 has the cmse_nonsecure_call attribute and returns false otherwise. */
18186 detect_cmse_nonsecure_call (tree addr
)
18191 tree fntype
= TREE_TYPE (addr
);
18192 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
18193 TYPE_ATTRIBUTES (fntype
)))
18199 /* Emit a call instruction with pattern PAT. ADDR is the address of
18200 the call target. */
18203 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
18207 insn
= emit_call_insn (pat
);
18209 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18210 If the call might use such an entry, add a use of the PIC register
18211 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18212 if (TARGET_VXWORKS_RTP
18215 && GET_CODE (addr
) == SYMBOL_REF
18216 && (SYMBOL_REF_DECL (addr
)
18217 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
18218 : !SYMBOL_REF_LOCAL_P (addr
)))
18220 require_pic_register (NULL_RTX
, false /*compute_now*/);
18221 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
18224 if (TARGET_AAPCS_BASED
)
18226 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18227 linker. We need to add an IP clobber to allow setting
18228 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18229 is not needed since it's a fixed register. */
18230 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
18231 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
18235 /* Output a 'call' insn. */
18237 output_call (rtx
*operands
)
18239 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
18241 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18242 if (REGNO (operands
[0]) == LR_REGNUM
)
18244 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
18245 output_asm_insn ("mov%?\t%0, %|lr", operands
);
18248 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
18250 if (TARGET_INTERWORK
|| arm_arch4t
)
18251 output_asm_insn ("bx%?\t%0", operands
);
18253 output_asm_insn ("mov%?\t%|pc, %0", operands
);
18258 /* Output a move from arm registers to arm registers of a long double
18259 OPERANDS[0] is the destination.
18260 OPERANDS[1] is the source. */
18262 output_mov_long_double_arm_from_arm (rtx
*operands
)
18264 /* We have to be careful here because the two might overlap. */
18265 int dest_start
= REGNO (operands
[0]);
18266 int src_start
= REGNO (operands
[1]);
18270 if (dest_start
< src_start
)
18272 for (i
= 0; i
< 3; i
++)
18274 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18275 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18276 output_asm_insn ("mov%?\t%0, %1", ops
);
18281 for (i
= 2; i
>= 0; i
--)
18283 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18284 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18285 output_asm_insn ("mov%?\t%0, %1", ops
);
18293 arm_emit_movpair (rtx dest
, rtx src
)
18295 /* If the src is an immediate, simplify it. */
18296 if (CONST_INT_P (src
))
18298 HOST_WIDE_INT val
= INTVAL (src
);
18299 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
18300 if ((val
>> 16) & 0x0000ffff)
18302 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
18304 GEN_INT ((val
>> 16) & 0x0000ffff));
18305 rtx_insn
*insn
= get_last_insn ();
18306 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18310 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
18311 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
18312 rtx_insn
*insn
= get_last_insn ();
18313 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18316 /* Output a move between double words. It must be REG<-MEM
18319 output_move_double (rtx
*operands
, bool emit
, int *count
)
18321 enum rtx_code code0
= GET_CODE (operands
[0]);
18322 enum rtx_code code1
= GET_CODE (operands
[1]);
18327 /* The only case when this might happen is when
18328 you are looking at the length of a DImode instruction
18329 that has an invalid constant in it. */
18330 if (code0
== REG
&& code1
!= MEM
)
18332 gcc_assert (!emit
);
18339 unsigned int reg0
= REGNO (operands
[0]);
18341 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
18343 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
18345 switch (GET_CODE (XEXP (operands
[1], 0)))
18352 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
18353 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
18355 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18360 gcc_assert (TARGET_LDRD
);
18362 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
18369 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
18371 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18379 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18381 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18386 gcc_assert (TARGET_LDRD
);
18388 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18393 /* Autoicrement addressing modes should never have overlapping
18394 base and destination registers, and overlapping index registers
18395 are already prohibited, so this doesn't need to worry about
18397 otherops
[0] = operands
[0];
18398 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18399 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18401 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18403 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18405 /* Registers overlap so split out the increment. */
18408 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18409 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18416 /* Use a single insn if we can.
18417 FIXME: IWMMXT allows offsets larger than ldrd can
18418 handle, fix these up with a pair of ldr. */
18420 || !CONST_INT_P (otherops
[2])
18421 || (INTVAL (otherops
[2]) > -256
18422 && INTVAL (otherops
[2]) < 256))
18425 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18431 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18432 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18442 /* Use a single insn if we can.
18443 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18444 fix these up with a pair of ldr. */
18446 || !CONST_INT_P (otherops
[2])
18447 || (INTVAL (otherops
[2]) > -256
18448 && INTVAL (otherops
[2]) < 256))
18451 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18457 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18458 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18468 /* We might be able to use ldrd %0, %1 here. However the range is
18469 different to ldr/adr, and it is broken on some ARMv7-M
18470 implementations. */
18471 /* Use the second register of the pair to avoid problematic
18473 otherops
[1] = operands
[1];
18475 output_asm_insn ("adr%?\t%0, %1", otherops
);
18476 operands
[1] = otherops
[0];
18480 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18482 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18489 /* ??? This needs checking for thumb2. */
18491 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18492 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18494 otherops
[0] = operands
[0];
18495 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18496 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18498 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18500 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18502 switch ((int) INTVAL (otherops
[2]))
18506 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18512 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18518 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18522 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18523 operands
[1] = otherops
[0];
18525 && (REG_P (otherops
[2])
18527 || (CONST_INT_P (otherops
[2])
18528 && INTVAL (otherops
[2]) > -256
18529 && INTVAL (otherops
[2]) < 256)))
18531 if (reg_overlap_mentioned_p (operands
[0],
18534 /* Swap base and index registers over to
18535 avoid a conflict. */
18536 std::swap (otherops
[1], otherops
[2]);
18538 /* If both registers conflict, it will usually
18539 have been fixed by a splitter. */
18540 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18541 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18545 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18546 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18553 otherops
[0] = operands
[0];
18555 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18560 if (CONST_INT_P (otherops
[2]))
18564 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18565 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18567 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18573 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18579 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18586 return "ldrd%?\t%0, [%1]";
18588 return "ldmia%?\t%1, %M0";
18592 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18593 /* Take care of overlapping base/data reg. */
18594 if (reg_mentioned_p (operands
[0], operands
[1]))
18598 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18599 output_asm_insn ("ldr%?\t%0, %1", operands
);
18609 output_asm_insn ("ldr%?\t%0, %1", operands
);
18610 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18620 /* Constraints should ensure this. */
18621 gcc_assert (code0
== MEM
&& code1
== REG
);
18622 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18623 || (TARGET_ARM
&& TARGET_LDRD
));
18625 /* For TARGET_ARM the first source register of an STRD
18626 must be even. This is usually the case for double-word
18627 values but user assembly constraints can force an odd
18628 starting register. */
18629 bool allow_strd
= TARGET_LDRD
18630 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
18631 switch (GET_CODE (XEXP (operands
[0], 0)))
18637 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18639 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18644 gcc_assert (allow_strd
);
18646 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18653 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18655 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18663 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18665 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18670 gcc_assert (allow_strd
);
18672 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18677 otherops
[0] = operands
[1];
18678 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18679 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18681 /* IWMMXT allows offsets larger than strd can handle,
18682 fix these up with a pair of str. */
18684 && CONST_INT_P (otherops
[2])
18685 && (INTVAL(otherops
[2]) <= -256
18686 || INTVAL(otherops
[2]) >= 256))
18688 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18692 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18693 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18702 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18703 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18709 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18712 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18717 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18722 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18723 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18725 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18729 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18736 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18743 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18748 && (REG_P (otherops
[2])
18750 || (CONST_INT_P (otherops
[2])
18751 && INTVAL (otherops
[2]) > -256
18752 && INTVAL (otherops
[2]) < 256)))
18754 otherops
[0] = operands
[1];
18755 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18757 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18763 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18764 otherops
[1] = operands
[1];
18767 output_asm_insn ("str%?\t%1, %0", operands
);
18768 output_asm_insn ("str%?\t%H1, %0", otherops
);
18778 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18779 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18782 output_move_quad (rtx
*operands
)
18784 if (REG_P (operands
[0]))
18786 /* Load, or reg->reg move. */
18788 if (MEM_P (operands
[1]))
18790 switch (GET_CODE (XEXP (operands
[1], 0)))
18793 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18798 output_asm_insn ("adr%?\t%0, %1", operands
);
18799 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18803 gcc_unreachable ();
18811 gcc_assert (REG_P (operands
[1]));
18813 dest
= REGNO (operands
[0]);
18814 src
= REGNO (operands
[1]);
18816 /* This seems pretty dumb, but hopefully GCC won't try to do it
18819 for (i
= 0; i
< 4; i
++)
18821 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18822 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18823 output_asm_insn ("mov%?\t%0, %1", ops
);
18826 for (i
= 3; i
>= 0; i
--)
18828 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18829 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18830 output_asm_insn ("mov%?\t%0, %1", ops
);
18836 gcc_assert (MEM_P (operands
[0]));
18837 gcc_assert (REG_P (operands
[1]));
18838 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18840 switch (GET_CODE (XEXP (operands
[0], 0)))
18843 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18847 gcc_unreachable ();
18854 /* Output a VFP load or store instruction. */
18857 output_move_vfp (rtx
*operands
)
18859 rtx reg
, mem
, addr
, ops
[2];
18860 int load
= REG_P (operands
[0]);
18861 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18862 int sp
= (!TARGET_VFP_FP16INST
18863 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18864 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18869 reg
= operands
[!load
];
18870 mem
= operands
[load
];
18872 mode
= GET_MODE (reg
);
18874 gcc_assert (REG_P (reg
));
18875 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18876 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18882 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18883 gcc_assert (MEM_P (mem
));
18885 addr
= XEXP (mem
, 0);
18887 switch (GET_CODE (addr
))
18890 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18891 ops
[0] = XEXP (addr
, 0);
18896 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18897 ops
[0] = XEXP (addr
, 0);
18902 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18908 sprintf (buff
, templ
,
18909 load
? "ld" : "st",
18910 dp
? "64" : sp
? "32" : "16",
18912 integer_p
? "\t%@ int" : "");
18913 output_asm_insn (buff
, ops
);
18918 /* Output a Neon double-word or quad-word load or store, or a load
18919 or store for larger structure modes.
18921 WARNING: The ordering of elements is weird in big-endian mode,
18922 because the EABI requires that vectors stored in memory appear
18923 as though they were stored by a VSTM, as required by the EABI.
18924 GCC RTL defines element ordering based on in-memory order.
18925 This can be different from the architectural ordering of elements
18926 within a NEON register. The intrinsics defined in arm_neon.h use the
18927 NEON register element ordering, not the GCC RTL element ordering.
18929 For example, the in-memory ordering of a big-endian a quadword
18930 vector with 16-bit elements when stored from register pair {d0,d1}
18931 will be (lowest address first, d0[N] is NEON register element N):
18933 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18935 When necessary, quadword registers (dN, dN+1) are moved to ARM
18936 registers from rN in the order:
18938 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18940 So that STM/LDM can be used on vectors in ARM registers, and the
18941 same memory layout will result as if VSTM/VLDM were used.
18943 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18944 possible, which allows use of appropriate alignment tags.
18945 Note that the choice of "64" is independent of the actual vector
18946 element size; this size simply ensures that the behavior is
18947 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18949 Due to limitations of those instructions, use of VST1.64/VLD1.64
18950 is not possible if:
18951 - the address contains PRE_DEC, or
18952 - the mode refers to more than 4 double-word registers
18954 In those cases, it would be possible to replace VSTM/VLDM by a
18955 sequence of instructions; this is not currently implemented since
18956 this is not certain to actually improve performance. */
18959 output_move_neon (rtx
*operands
)
18961 rtx reg
, mem
, addr
, ops
[2];
18962 int regno
, nregs
, load
= REG_P (operands
[0]);
18967 reg
= operands
[!load
];
18968 mem
= operands
[load
];
18970 mode
= GET_MODE (reg
);
18972 gcc_assert (REG_P (reg
));
18973 regno
= REGNO (reg
);
18974 nregs
= REG_NREGS (reg
) / 2;
18975 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18976 || NEON_REGNO_OK_FOR_QUAD (regno
));
18977 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18978 || VALID_NEON_QREG_MODE (mode
)
18979 || VALID_NEON_STRUCT_MODE (mode
));
18980 gcc_assert (MEM_P (mem
));
18982 addr
= XEXP (mem
, 0);
18984 /* Strip off const from addresses like (const (plus (...))). */
18985 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18986 addr
= XEXP (addr
, 0);
18988 switch (GET_CODE (addr
))
18991 /* We have to use vldm / vstm for too-large modes. */
18994 templ
= "v%smia%%?\t%%0!, %%h1";
18995 ops
[0] = XEXP (addr
, 0);
18999 templ
= "v%s1.64\t%%h1, %%A0";
19006 /* We have to use vldm / vstm in this case, since there is no
19007 pre-decrement form of the vld1 / vst1 instructions. */
19008 templ
= "v%smdb%%?\t%%0!, %%h1";
19009 ops
[0] = XEXP (addr
, 0);
19014 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
19015 gcc_unreachable ();
19018 /* We have to use vldm / vstm for too-large modes. */
19022 templ
= "v%smia%%?\t%%m0, %%h1";
19024 templ
= "v%s1.64\t%%h1, %%A0";
19030 /* Fall through. */
19036 for (i
= 0; i
< nregs
; i
++)
19038 /* We're only using DImode here because it's a convenient size. */
19039 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
19040 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
19041 if (reg_overlap_mentioned_p (ops
[0], mem
))
19043 gcc_assert (overlap
== -1);
19048 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
19049 output_asm_insn (buff
, ops
);
19054 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
19055 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
19056 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
19057 output_asm_insn (buff
, ops
);
19064 gcc_unreachable ();
19067 sprintf (buff
, templ
, load
? "ld" : "st");
19068 output_asm_insn (buff
, ops
);
19073 /* Compute and return the length of neon_mov<mode>, where <mode> is
19074 one of VSTRUCT modes: EI, OI, CI or XI. */
19076 arm_attr_length_move_neon (rtx_insn
*insn
)
19078 rtx reg
, mem
, addr
;
19082 extract_insn_cached (insn
);
19084 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
19086 mode
= GET_MODE (recog_data
.operand
[0]);
19097 gcc_unreachable ();
19101 load
= REG_P (recog_data
.operand
[0]);
19102 reg
= recog_data
.operand
[!load
];
19103 mem
= recog_data
.operand
[load
];
19105 gcc_assert (MEM_P (mem
));
19107 addr
= XEXP (mem
, 0);
19109 /* Strip off const from addresses like (const (plus (...))). */
19110 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
19111 addr
= XEXP (addr
, 0);
19113 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
19115 int insns
= REG_NREGS (reg
) / 2;
19122 /* Return nonzero if the offset in the address is an immediate. Otherwise,
19126 arm_address_offset_is_imm (rtx_insn
*insn
)
19130 extract_insn_cached (insn
);
19132 if (REG_P (recog_data
.operand
[0]))
19135 mem
= recog_data
.operand
[0];
19137 gcc_assert (MEM_P (mem
));
19139 addr
= XEXP (mem
, 0);
19142 || (GET_CODE (addr
) == PLUS
19143 && REG_P (XEXP (addr
, 0))
19144 && CONST_INT_P (XEXP (addr
, 1))))
19150 /* Output an ADD r, s, #n where n may be too big for one instruction.
19151 If adding zero to one register, output nothing. */
19153 output_add_immediate (rtx
*operands
)
19155 HOST_WIDE_INT n
= INTVAL (operands
[2]);
19157 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
19160 output_multi_immediate (operands
,
19161 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19164 output_multi_immediate (operands
,
19165 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19172 /* Output a multiple immediate operation.
19173 OPERANDS is the vector of operands referred to in the output patterns.
19174 INSTR1 is the output pattern to use for the first constant.
19175 INSTR2 is the output pattern to use for subsequent constants.
19176 IMMED_OP is the index of the constant slot in OPERANDS.
19177 N is the constant value. */
19178 static const char *
19179 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
19180 int immed_op
, HOST_WIDE_INT n
)
19182 #if HOST_BITS_PER_WIDE_INT > 32
19188 /* Quick and easy output. */
19189 operands
[immed_op
] = const0_rtx
;
19190 output_asm_insn (instr1
, operands
);
19195 const char * instr
= instr1
;
19197 /* Note that n is never zero here (which would give no output). */
19198 for (i
= 0; i
< 32; i
+= 2)
19202 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
19203 output_asm_insn (instr
, operands
);
19213 /* Return the name of a shifter operation. */
19214 static const char *
19215 arm_shift_nmem(enum rtx_code code
)
19220 return ARM_LSL_NAME
;
19236 /* Return the appropriate ARM instruction for the operation code.
19237 The returned result should not be overwritten. OP is the rtx of the
19238 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19241 arithmetic_instr (rtx op
, int shift_first_arg
)
19243 switch (GET_CODE (op
))
19249 return shift_first_arg
? "rsb" : "sub";
19264 return arm_shift_nmem(GET_CODE(op
));
19267 gcc_unreachable ();
19271 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19272 for the operation code. The returned result should not be overwritten.
19273 OP is the rtx code of the shift.
19274 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19276 static const char *
19277 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
19280 enum rtx_code code
= GET_CODE (op
);
19285 if (!CONST_INT_P (XEXP (op
, 1)))
19287 output_operand_lossage ("invalid shift operand");
19292 *amountp
= 32 - INTVAL (XEXP (op
, 1));
19300 mnem
= arm_shift_nmem(code
);
19301 if (CONST_INT_P (XEXP (op
, 1)))
19303 *amountp
= INTVAL (XEXP (op
, 1));
19305 else if (REG_P (XEXP (op
, 1)))
19312 output_operand_lossage ("invalid shift operand");
19318 /* We never have to worry about the amount being other than a
19319 power of 2, since this case can never be reloaded from a reg. */
19320 if (!CONST_INT_P (XEXP (op
, 1)))
19322 output_operand_lossage ("invalid shift operand");
19326 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
19328 /* Amount must be a power of two. */
19329 if (*amountp
& (*amountp
- 1))
19331 output_operand_lossage ("invalid shift operand");
19335 *amountp
= exact_log2 (*amountp
);
19336 gcc_assert (IN_RANGE (*amountp
, 0, 31));
19337 return ARM_LSL_NAME
;
19340 output_operand_lossage ("invalid shift operand");
19344 /* This is not 100% correct, but follows from the desire to merge
19345 multiplication by a power of 2 with the recognizer for a
19346 shift. >=32 is not a valid shift for "lsl", so we must try and
19347 output a shift that produces the correct arithmetical result.
19348 Using lsr #32 is identical except for the fact that the carry bit
19349 is not set correctly if we set the flags; but we never use the
19350 carry bit from such an operation, so we can ignore that. */
19351 if (code
== ROTATERT
)
19352 /* Rotate is just modulo 32. */
19354 else if (*amountp
!= (*amountp
& 31))
19356 if (code
== ASHIFT
)
19361 /* Shifts of 0 are no-ops. */
19368 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19369 because /bin/as is horribly restrictive. The judgement about
19370 whether or not each character is 'printable' (and can be output as
19371 is) or not (and must be printed with an octal escape) must be made
19372 with reference to the *host* character set -- the situation is
19373 similar to that discussed in the comments above pp_c_char in
19374 c-pretty-print.c. */
19376 #define MAX_ASCII_LEN 51
19379 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19382 int len_so_far
= 0;
19384 fputs ("\t.ascii\t\"", stream
);
19386 for (i
= 0; i
< len
; i
++)
19390 if (len_so_far
>= MAX_ASCII_LEN
)
19392 fputs ("\"\n\t.ascii\t\"", stream
);
19398 if (c
== '\\' || c
== '\"')
19400 putc ('\\', stream
);
19408 fprintf (stream
, "\\%03o", c
);
19413 fputs ("\"\n", stream
);
19417 /* Compute the register save mask for registers 0 through 12
19418 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19420 static unsigned long
19421 arm_compute_save_reg0_reg12_mask (void)
19423 unsigned long func_type
= arm_current_func_type ();
19424 unsigned long save_reg_mask
= 0;
19427 if (IS_INTERRUPT (func_type
))
19429 unsigned int max_reg
;
19430 /* Interrupt functions must not corrupt any registers,
19431 even call clobbered ones. If this is a leaf function
19432 we can just examine the registers used by the RTL, but
19433 otherwise we have to assume that whatever function is
19434 called might clobber anything, and so we have to save
19435 all the call-clobbered registers as well. */
19436 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19437 /* FIQ handlers have registers r8 - r12 banked, so
19438 we only need to check r0 - r7, Normal ISRs only
19439 bank r14 and r15, so we must check up to r12.
19440 r13 is the stack pointer which is always preserved,
19441 so we do not need to consider it here. */
19446 for (reg
= 0; reg
<= max_reg
; reg
++)
19447 if (df_regs_ever_live_p (reg
)
19448 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19449 save_reg_mask
|= (1 << reg
);
19451 /* Also save the pic base register if necessary. */
19453 && !TARGET_SINGLE_PIC_BASE
19454 && arm_pic_register
!= INVALID_REGNUM
19455 && crtl
->uses_pic_offset_table
)
19456 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19458 else if (IS_VOLATILE(func_type
))
19460 /* For noreturn functions we historically omitted register saves
19461 altogether. However this really messes up debugging. As a
19462 compromise save just the frame pointers. Combined with the link
19463 register saved elsewhere this should be sufficient to get
19465 if (frame_pointer_needed
)
19466 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19467 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19468 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19469 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19470 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19474 /* In the normal case we only need to save those registers
19475 which are call saved and which are used by this function. */
19476 for (reg
= 0; reg
<= 11; reg
++)
19477 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19478 save_reg_mask
|= (1 << reg
);
19480 /* Handle the frame pointer as a special case. */
19481 if (frame_pointer_needed
)
19482 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19484 /* If we aren't loading the PIC register,
19485 don't stack it even though it may be live. */
19487 && !TARGET_SINGLE_PIC_BASE
19488 && arm_pic_register
!= INVALID_REGNUM
19489 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19490 || crtl
->uses_pic_offset_table
))
19491 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19493 /* The prologue will copy SP into R0, so save it. */
19494 if (IS_STACKALIGN (func_type
))
19495 save_reg_mask
|= 1;
19498 /* Save registers so the exception handler can modify them. */
19499 if (crtl
->calls_eh_return
)
19505 reg
= EH_RETURN_DATA_REGNO (i
);
19506 if (reg
== INVALID_REGNUM
)
19508 save_reg_mask
|= 1 << reg
;
19512 return save_reg_mask
;
19515 /* Return true if r3 is live at the start of the function. */
19518 arm_r3_live_at_start_p (void)
19520 /* Just look at cfg info, which is still close enough to correct at this
19521 point. This gives false positives for broken functions that might use
19522 uninitialized data that happens to be allocated in r3, but who cares? */
19523 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19526 /* Compute the number of bytes used to store the static chain register on the
19527 stack, above the stack frame. We need to know this accurately to get the
19528 alignment of the rest of the stack frame correct. */
19531 arm_compute_static_chain_stack_bytes (void)
19533 /* Once the value is updated from the init value of -1, do not
19535 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
19536 return cfun
->machine
->static_chain_stack_bytes
;
19538 /* See the defining assertion in arm_expand_prologue. */
19539 if (IS_NESTED (arm_current_func_type ())
19540 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19541 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19542 || flag_stack_clash_protection
)
19543 && !df_regs_ever_live_p (LR_REGNUM
)))
19544 && arm_r3_live_at_start_p ()
19545 && crtl
->args
.pretend_args_size
== 0)
19551 /* Compute a bit mask of which core registers need to be
19552 saved on the stack for the current function.
19553 This is used by arm_compute_frame_layout, which may add extra registers. */
19555 static unsigned long
19556 arm_compute_save_core_reg_mask (void)
19558 unsigned int save_reg_mask
= 0;
19559 unsigned long func_type
= arm_current_func_type ();
19562 if (IS_NAKED (func_type
))
19563 /* This should never really happen. */
19566 /* If we are creating a stack frame, then we must save the frame pointer,
19567 IP (which will hold the old stack pointer), LR and the PC. */
19568 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19570 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19573 | (1 << PC_REGNUM
);
19575 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19577 /* Decide if we need to save the link register.
19578 Interrupt routines have their own banked link register,
19579 so they never need to save it.
19580 Otherwise if we do not use the link register we do not need to save
19581 it. If we are pushing other registers onto the stack however, we
19582 can save an instruction in the epilogue by pushing the link register
19583 now and then popping it back into the PC. This incurs extra memory
19584 accesses though, so we only do it when optimizing for size, and only
19585 if we know that we will not need a fancy return sequence. */
19586 if (df_regs_ever_live_p (LR_REGNUM
)
19589 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19590 && !crtl
->tail_call_emit
19591 && !crtl
->calls_eh_return
))
19592 save_reg_mask
|= 1 << LR_REGNUM
;
19594 if (cfun
->machine
->lr_save_eliminated
)
19595 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19597 if (TARGET_REALLY_IWMMXT
19598 && ((bit_count (save_reg_mask
)
19599 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19600 arm_compute_static_chain_stack_bytes())
19603 /* The total number of registers that are going to be pushed
19604 onto the stack is odd. We need to ensure that the stack
19605 is 64-bit aligned before we start to save iWMMXt registers,
19606 and also before we start to create locals. (A local variable
19607 might be a double or long long which we will load/store using
19608 an iWMMXt instruction). Therefore we need to push another
19609 ARM register, so that the stack will be 64-bit aligned. We
19610 try to avoid using the arg registers (r0 -r3) as they might be
19611 used to pass values in a tail call. */
19612 for (reg
= 4; reg
<= 12; reg
++)
19613 if ((save_reg_mask
& (1 << reg
)) == 0)
19617 save_reg_mask
|= (1 << reg
);
19620 cfun
->machine
->sibcall_blocked
= 1;
19621 save_reg_mask
|= (1 << 3);
19625 /* We may need to push an additional register for use initializing the
19626 PIC base register. */
19627 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19628 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19630 reg
= thumb_find_work_register (1 << 4);
19631 if (!call_used_regs
[reg
])
19632 save_reg_mask
|= (1 << reg
);
19635 return save_reg_mask
;
19638 /* Compute a bit mask of which core registers need to be
19639 saved on the stack for the current function. */
19640 static unsigned long
19641 thumb1_compute_save_core_reg_mask (void)
19643 unsigned long mask
;
19647 for (reg
= 0; reg
< 12; reg
++)
19648 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19651 /* Handle the frame pointer as a special case. */
19652 if (frame_pointer_needed
)
19653 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19656 && !TARGET_SINGLE_PIC_BASE
19657 && arm_pic_register
!= INVALID_REGNUM
19658 && crtl
->uses_pic_offset_table
)
19659 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19661 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19662 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19663 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19665 /* LR will also be pushed if any lo regs are pushed. */
19666 if (mask
& 0xff || thumb_force_lr_save ())
19667 mask
|= (1 << LR_REGNUM
);
19669 bool call_clobbered_scratch
19670 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
19671 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
19673 /* Make sure we have a low work register if we need one. We will
19674 need one if we are going to push a high register, but we are not
19675 currently intending to push a low register. However if both the
19676 prologue and epilogue have a spare call-clobbered low register,
19677 then we won't need to find an additional work register. It does
19678 not need to be the same register in the prologue and
19680 if ((mask
& 0xff) == 0
19681 && !call_clobbered_scratch
19682 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19684 /* Use thumb_find_work_register to choose which register
19685 we will use. If the register is live then we will
19686 have to push it. Use LAST_LO_REGNUM as our fallback
19687 choice for the register to select. */
19688 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19689 /* Make sure the register returned by thumb_find_work_register is
19690 not part of the return value. */
19691 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19692 reg
= LAST_LO_REGNUM
;
19694 if (callee_saved_reg_p (reg
))
19698 /* The 504 below is 8 bytes less than 512 because there are two possible
19699 alignment words. We can't tell here if they will be present or not so we
19700 have to play it safe and assume that they are. */
19701 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19702 ROUND_UP_WORD (get_frame_size ()) +
19703 crtl
->outgoing_args_size
) >= 504)
19705 /* This is the same as the code in thumb1_expand_prologue() which
19706 determines which register to use for stack decrement. */
19707 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19708 if (mask
& (1 << reg
))
19711 if (reg
> LAST_LO_REGNUM
)
19713 /* Make sure we have a register available for stack decrement. */
19714 mask
|= 1 << LAST_LO_REGNUM
;
19722 /* Return the number of bytes required to save VFP registers. */
19724 arm_get_vfp_saved_size (void)
19726 unsigned int regno
;
19731 /* Space for saved VFP registers. */
19732 if (TARGET_HARD_FLOAT
)
19735 for (regno
= FIRST_VFP_REGNUM
;
19736 regno
< LAST_VFP_REGNUM
;
19739 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19740 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19744 /* Workaround ARM10 VFPr1 bug. */
19745 if (count
== 2 && !arm_arch6
)
19747 saved
+= count
* 8;
19756 if (count
== 2 && !arm_arch6
)
19758 saved
+= count
* 8;
19765 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19766 everything bar the final return instruction. If simple_return is true,
19767 then do not output epilogue, because it has already been emitted in RTL.
19769 Note: do not forget to update length attribute of corresponding insn pattern
19770 when changing assembly output (eg. length attribute of
19771 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19772 register clearing sequences). */
19774 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19775 bool simple_return
)
19777 char conditional
[10];
19780 unsigned long live_regs_mask
;
19781 unsigned long func_type
;
19782 arm_stack_offsets
*offsets
;
19784 func_type
= arm_current_func_type ();
19786 if (IS_NAKED (func_type
))
19789 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19791 /* If this function was declared non-returning, and we have
19792 found a tail call, then we have to trust that the called
19793 function won't return. */
19798 /* Otherwise, trap an attempted return by aborting. */
19800 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19802 assemble_external_libcall (ops
[1]);
19803 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19809 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19811 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19813 cfun
->machine
->return_used_this_function
= 1;
19815 offsets
= arm_get_frame_offsets ();
19816 live_regs_mask
= offsets
->saved_regs_mask
;
19818 if (!simple_return
&& live_regs_mask
)
19820 const char * return_reg
;
19822 /* If we do not have any special requirements for function exit
19823 (e.g. interworking) then we can load the return address
19824 directly into the PC. Otherwise we must load it into LR. */
19826 && !IS_CMSE_ENTRY (func_type
)
19827 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19828 return_reg
= reg_names
[PC_REGNUM
];
19830 return_reg
= reg_names
[LR_REGNUM
];
19832 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19834 /* There are three possible reasons for the IP register
19835 being saved. 1) a stack frame was created, in which case
19836 IP contains the old stack pointer, or 2) an ISR routine
19837 corrupted it, or 3) it was saved to align the stack on
19838 iWMMXt. In case 1, restore IP into SP, otherwise just
19840 if (frame_pointer_needed
)
19842 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19843 live_regs_mask
|= (1 << SP_REGNUM
);
19846 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19849 /* On some ARM architectures it is faster to use LDR rather than
19850 LDM to load a single register. On other architectures, the
19851 cost is the same. In 26 bit mode, or for exception handlers,
19852 we have to use LDM to load the PC so that the CPSR is also
19854 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19855 if (live_regs_mask
== (1U << reg
))
19858 if (reg
<= LAST_ARM_REGNUM
19859 && (reg
!= LR_REGNUM
19861 || ! IS_INTERRUPT (func_type
)))
19863 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19864 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19871 /* Generate the load multiple instruction to restore the
19872 registers. Note we can get here, even if
19873 frame_pointer_needed is true, but only if sp already
19874 points to the base of the saved core registers. */
19875 if (live_regs_mask
& (1 << SP_REGNUM
))
19877 unsigned HOST_WIDE_INT stack_adjust
;
19879 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19880 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19882 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
19883 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19886 /* If we can't use ldmib (SA110 bug),
19887 then try to pop r3 instead. */
19889 live_regs_mask
|= 1 << 3;
19891 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19894 /* For interrupt returns we have to use an LDM rather than
19895 a POP so that we can use the exception return variant. */
19896 else if (IS_INTERRUPT (func_type
))
19897 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19899 sprintf (instr
, "pop%s\t{", conditional
);
19901 p
= instr
+ strlen (instr
);
19903 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19904 if (live_regs_mask
& (1 << reg
))
19906 int l
= strlen (reg_names
[reg
]);
19912 memcpy (p
, ", ", 2);
19916 memcpy (p
, "%|", 2);
19917 memcpy (p
+ 2, reg_names
[reg
], l
);
19921 if (live_regs_mask
& (1 << LR_REGNUM
))
19923 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19924 /* If returning from an interrupt, restore the CPSR. */
19925 if (IS_INTERRUPT (func_type
))
19932 output_asm_insn (instr
, & operand
);
19934 /* See if we need to generate an extra instruction to
19935 perform the actual function return. */
19937 && func_type
!= ARM_FT_INTERWORKED
19938 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19940 /* The return has already been handled
19941 by loading the LR into the PC. */
19948 switch ((int) ARM_FUNC_TYPE (func_type
))
19952 /* ??? This is wrong for unified assembly syntax. */
19953 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19956 case ARM_FT_INTERWORKED
:
19957 gcc_assert (arm_arch5t
|| arm_arch4t
);
19958 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19961 case ARM_FT_EXCEPTION
:
19962 /* ??? This is wrong for unified assembly syntax. */
19963 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19967 if (IS_CMSE_ENTRY (func_type
))
19969 /* Check if we have to clear the 'GE bits' which is only used if
19970 parallel add and subtraction instructions are available. */
19971 if (TARGET_INT_SIMD
)
19972 snprintf (instr
, sizeof (instr
),
19973 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19975 snprintf (instr
, sizeof (instr
),
19976 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19978 output_asm_insn (instr
, & operand
);
19979 if (TARGET_HARD_FLOAT
)
19981 /* Clear the cumulative exception-status bits (0-4,7) and the
19982 condition code bits (28-31) of the FPSCR. We need to
19983 remember to clear the first scratch register used (IP) and
19984 save and restore the second (r4). */
19985 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19986 output_asm_insn (instr
, & operand
);
19987 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19988 output_asm_insn (instr
, & operand
);
19989 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19990 output_asm_insn (instr
, & operand
);
19991 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19992 output_asm_insn (instr
, & operand
);
19993 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19994 output_asm_insn (instr
, & operand
);
19995 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19996 output_asm_insn (instr
, & operand
);
19997 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19998 output_asm_insn (instr
, & operand
);
19999 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
20000 output_asm_insn (instr
, & operand
);
20002 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
20004 /* Use bx if it's available. */
20005 else if (arm_arch5t
|| arm_arch4t
)
20006 sprintf (instr
, "bx%s\t%%|lr", conditional
);
20008 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
20012 output_asm_insn (instr
, & operand
);
20018 /* Output in FILE asm statements needed to declare the NAME of the function
20019 defined by its DECL node. */
20022 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
20024 size_t cmse_name_len
;
20025 char *cmse_name
= 0;
20026 char cmse_prefix
[] = "__acle_se_";
20028 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
20029 extra function label for each function with the 'cmse_nonsecure_entry'
20030 attribute. This extra function label should be prepended with
20031 '__acle_se_', telling the linker that it needs to create secure gateway
20032 veneers for this function. */
20033 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
20034 DECL_ATTRIBUTES (decl
)))
20036 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
20037 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
20038 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
20039 targetm
.asm_out
.globalize_label (file
, cmse_name
);
20041 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
20042 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
20045 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
20046 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20047 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20048 ASM_OUTPUT_LABEL (file
, name
);
20051 ASM_OUTPUT_LABEL (file
, cmse_name
);
20053 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
20056 /* Write the function name into the code section, directly preceding
20057 the function prologue.
20059 Code will be output similar to this:
20061 .ascii "arm_poke_function_name", 0
20064 .word 0xff000000 + (t1 - t0)
20065 arm_poke_function_name
20067 stmfd sp!, {fp, ip, lr, pc}
20070 When performing a stack backtrace, code can inspect the value
20071 of 'pc' stored at 'fp' + 0. If the trace function then looks
20072 at location pc - 12 and the top 8 bits are set, then we know
20073 that there is a function name embedded immediately preceding this
20074 location and has length ((pc[-3]) & 0xff000000).
20076 We assume that pc is declared as a pointer to an unsigned long.
20078 It is of no benefit to output the function name if we are assembling
20079 a leaf function. These function types will not contain a stack
20080 backtrace structure, therefore it is not possible to determine the
20083 arm_poke_function_name (FILE *stream
, const char *name
)
20085 unsigned long alignlength
;
20086 unsigned long length
;
20089 length
= strlen (name
) + 1;
20090 alignlength
= ROUND_UP_WORD (length
);
20092 ASM_OUTPUT_ASCII (stream
, name
, length
);
20093 ASM_OUTPUT_ALIGN (stream
, 2);
20094 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
20095 assemble_aligned_integer (UNITS_PER_WORD
, x
);
20098 /* Place some comments into the assembler stream
20099 describing the current function. */
20101 arm_output_function_prologue (FILE *f
)
20103 unsigned long func_type
;
20105 /* Sanity check. */
20106 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
20108 func_type
= arm_current_func_type ();
20110 switch ((int) ARM_FUNC_TYPE (func_type
))
20113 case ARM_FT_NORMAL
:
20115 case ARM_FT_INTERWORKED
:
20116 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
20119 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
20122 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
20124 case ARM_FT_EXCEPTION
:
20125 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
20129 if (IS_NAKED (func_type
))
20130 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20132 if (IS_VOLATILE (func_type
))
20133 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
20135 if (IS_NESTED (func_type
))
20136 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
20137 if (IS_STACKALIGN (func_type
))
20138 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20139 if (IS_CMSE_ENTRY (func_type
))
20140 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
20142 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20143 (HOST_WIDE_INT
) crtl
->args
.size
,
20144 crtl
->args
.pretend_args_size
,
20145 (HOST_WIDE_INT
) get_frame_size ());
20147 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20148 frame_pointer_needed
,
20149 cfun
->machine
->uses_anonymous_args
);
20151 if (cfun
->machine
->lr_save_eliminated
)
20152 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
20154 if (crtl
->calls_eh_return
)
20155 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
20160 arm_output_function_epilogue (FILE *)
20162 arm_stack_offsets
*offsets
;
20168 /* Emit any call-via-reg trampolines that are needed for v4t support
20169 of call_reg and call_value_reg type insns. */
20170 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
20172 rtx label
= cfun
->machine
->call_via
[regno
];
20176 switch_to_section (function_section (current_function_decl
));
20177 targetm
.asm_out
.internal_label (asm_out_file
, "L",
20178 CODE_LABEL_NUMBER (label
));
20179 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
20183 /* ??? Probably not safe to set this here, since it assumes that a
20184 function will be emitted as assembly immediately after we generate
20185 RTL for it. This does not happen for inline functions. */
20186 cfun
->machine
->return_used_this_function
= 0;
20188 else /* TARGET_32BIT */
20190 /* We need to take into account any stack-frame rounding. */
20191 offsets
= arm_get_frame_offsets ();
20193 gcc_assert (!use_return_insn (FALSE
, NULL
)
20194 || (cfun
->machine
->return_used_this_function
!= 0)
20195 || offsets
->saved_regs
== offsets
->outgoing_args
20196 || frame_pointer_needed
);
20200 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20201 STR and STRD. If an even number of registers are being pushed, one
20202 or more STRD patterns are created for each register pair. If an
20203 odd number of registers are pushed, emit an initial STR followed by
20204 as many STRD instructions as are needed. This works best when the
20205 stack is initially 64-bit aligned (the normal case), since it
20206 ensures that each STRD is also 64-bit aligned. */
20208 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
20213 rtx par
= NULL_RTX
;
20214 rtx dwarf
= NULL_RTX
;
20218 num_regs
= bit_count (saved_regs_mask
);
20220 /* Must be at least one register to save, and can't save SP or PC. */
20221 gcc_assert (num_regs
> 0 && num_regs
<= 14);
20222 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20223 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20225 /* Create sequence for DWARF info. All the frame-related data for
20226 debugging is held in this wrapper. */
20227 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20229 /* Describe the stack adjustment. */
20230 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20231 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20232 RTX_FRAME_RELATED_P (tmp
) = 1;
20233 XVECEXP (dwarf
, 0, 0) = tmp
;
20235 /* Find the first register. */
20236 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
20241 /* If there's an odd number of registers to push. Start off by
20242 pushing a single register. This ensures that subsequent strd
20243 operations are dword aligned (assuming that SP was originally
20244 64-bit aligned). */
20245 if ((num_regs
& 1) != 0)
20247 rtx reg
, mem
, insn
;
20249 reg
= gen_rtx_REG (SImode
, regno
);
20251 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
20252 stack_pointer_rtx
));
20254 mem
= gen_frame_mem (Pmode
,
20256 (Pmode
, stack_pointer_rtx
,
20257 plus_constant (Pmode
, stack_pointer_rtx
,
20260 tmp
= gen_rtx_SET (mem
, reg
);
20261 RTX_FRAME_RELATED_P (tmp
) = 1;
20262 insn
= emit_insn (tmp
);
20263 RTX_FRAME_RELATED_P (insn
) = 1;
20264 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20265 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
20266 RTX_FRAME_RELATED_P (tmp
) = 1;
20269 XVECEXP (dwarf
, 0, i
) = tmp
;
20273 while (i
< num_regs
)
20274 if (saved_regs_mask
& (1 << regno
))
20276 rtx reg1
, reg2
, mem1
, mem2
;
20277 rtx tmp0
, tmp1
, tmp2
;
20280 /* Find the register to pair with this one. */
20281 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
20285 reg1
= gen_rtx_REG (SImode
, regno
);
20286 reg2
= gen_rtx_REG (SImode
, regno2
);
20293 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20296 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20298 -4 * (num_regs
- 1)));
20299 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
20300 plus_constant (Pmode
, stack_pointer_rtx
,
20302 tmp1
= gen_rtx_SET (mem1
, reg1
);
20303 tmp2
= gen_rtx_SET (mem2
, reg2
);
20304 RTX_FRAME_RELATED_P (tmp0
) = 1;
20305 RTX_FRAME_RELATED_P (tmp1
) = 1;
20306 RTX_FRAME_RELATED_P (tmp2
) = 1;
20307 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
20308 XVECEXP (par
, 0, 0) = tmp0
;
20309 XVECEXP (par
, 0, 1) = tmp1
;
20310 XVECEXP (par
, 0, 2) = tmp2
;
20311 insn
= emit_insn (par
);
20312 RTX_FRAME_RELATED_P (insn
) = 1;
20313 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20317 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20320 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20323 tmp1
= gen_rtx_SET (mem1
, reg1
);
20324 tmp2
= gen_rtx_SET (mem2
, reg2
);
20325 RTX_FRAME_RELATED_P (tmp1
) = 1;
20326 RTX_FRAME_RELATED_P (tmp2
) = 1;
20327 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20328 XVECEXP (par
, 0, 0) = tmp1
;
20329 XVECEXP (par
, 0, 1) = tmp2
;
20333 /* Create unwind information. This is an approximation. */
20334 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
20335 plus_constant (Pmode
,
20339 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
20340 plus_constant (Pmode
,
20345 RTX_FRAME_RELATED_P (tmp1
) = 1;
20346 RTX_FRAME_RELATED_P (tmp2
) = 1;
20347 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
20348 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
20350 regno
= regno2
+ 1;
20358 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20359 whenever possible, otherwise it emits single-word stores. The first store
20360 also allocates stack space for all saved registers, using writeback with
20361 post-addressing mode. All other stores use offset addressing. If no STRD
20362 can be emitted, this function emits a sequence of single-word stores,
20363 and not an STM as before, because single-word stores provide more freedom
20364 scheduling and can be turned into an STM by peephole optimizations. */
20366 arm_emit_strd_push (unsigned long saved_regs_mask
)
20369 int i
, j
, dwarf_index
= 0;
20371 rtx dwarf
= NULL_RTX
;
20372 rtx insn
= NULL_RTX
;
20375 /* TODO: A more efficient code can be emitted by changing the
20376 layout, e.g., first push all pairs that can use STRD to keep the
20377 stack aligned, and then push all other registers. */
20378 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20379 if (saved_regs_mask
& (1 << i
))
20382 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20383 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20384 gcc_assert (num_regs
> 0);
20386 /* Create sequence for DWARF info. */
20387 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20389 /* For dwarf info, we generate explicit stack update. */
20390 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20391 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20392 RTX_FRAME_RELATED_P (tmp
) = 1;
20393 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20395 /* Save registers. */
20396 offset
= - 4 * num_regs
;
20398 while (j
<= LAST_ARM_REGNUM
)
20399 if (saved_regs_mask
& (1 << j
))
20402 && (saved_regs_mask
& (1 << (j
+ 1))))
20404 /* Current register and previous register form register pair for
20405 which STRD can be generated. */
20408 /* Allocate stack space for all saved registers. */
20409 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20410 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20411 mem
= gen_frame_mem (DImode
, tmp
);
20414 else if (offset
> 0)
20415 mem
= gen_frame_mem (DImode
,
20416 plus_constant (Pmode
,
20420 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20422 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20423 RTX_FRAME_RELATED_P (tmp
) = 1;
20424 tmp
= emit_insn (tmp
);
20426 /* Record the first store insn. */
20427 if (dwarf_index
== 1)
20430 /* Generate dwarf info. */
20431 mem
= gen_frame_mem (SImode
,
20432 plus_constant (Pmode
,
20435 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20436 RTX_FRAME_RELATED_P (tmp
) = 1;
20437 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20439 mem
= gen_frame_mem (SImode
,
20440 plus_constant (Pmode
,
20443 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20444 RTX_FRAME_RELATED_P (tmp
) = 1;
20445 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20452 /* Emit a single word store. */
20455 /* Allocate stack space for all saved registers. */
20456 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20457 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20458 mem
= gen_frame_mem (SImode
, tmp
);
20461 else if (offset
> 0)
20462 mem
= gen_frame_mem (SImode
,
20463 plus_constant (Pmode
,
20467 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20469 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20470 RTX_FRAME_RELATED_P (tmp
) = 1;
20471 tmp
= emit_insn (tmp
);
20473 /* Record the first store insn. */
20474 if (dwarf_index
== 1)
20477 /* Generate dwarf info. */
20478 mem
= gen_frame_mem (SImode
,
20479 plus_constant(Pmode
,
20482 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20483 RTX_FRAME_RELATED_P (tmp
) = 1;
20484 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20493 /* Attach dwarf info to the first insn we generate. */
20494 gcc_assert (insn
!= NULL_RTX
);
20495 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20496 RTX_FRAME_RELATED_P (insn
) = 1;
20499 /* Generate and emit an insn that we will recognize as a push_multi.
20500 Unfortunately, since this insn does not reflect very well the actual
20501 semantics of the operation, we need to annotate the insn for the benefit
20502 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20503 MASK for registers that should be annotated for DWARF2 frame unwind
20506 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20509 int num_dwarf_regs
= 0;
20513 int dwarf_par_index
;
20516 /* We don't record the PC in the dwarf frame information. */
20517 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20519 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20521 if (mask
& (1 << i
))
20523 if (dwarf_regs_mask
& (1 << i
))
20527 gcc_assert (num_regs
&& num_regs
<= 16);
20528 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20530 /* For the body of the insn we are going to generate an UNSPEC in
20531 parallel with several USEs. This allows the insn to be recognized
20532 by the push_multi pattern in the arm.md file.
20534 The body of the insn looks something like this:
20537 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20538 (const_int:SI <num>)))
20539 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20545 For the frame note however, we try to be more explicit and actually
20546 show each register being stored into the stack frame, plus a (single)
20547 decrement of the stack pointer. We do it this way in order to be
20548 friendly to the stack unwinding code, which only wants to see a single
20549 stack decrement per instruction. The RTL we generate for the note looks
20550 something like this:
20553 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20554 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20555 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20556 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20560 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20561 instead we'd have a parallel expression detailing all
20562 the stores to the various memory addresses so that debug
20563 information is more up-to-date. Remember however while writing
20564 this to take care of the constraints with the push instruction.
20566 Note also that this has to be taken care of for the VFP registers.
20568 For more see PR43399. */
20570 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20571 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20572 dwarf_par_index
= 1;
20574 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20576 if (mask
& (1 << i
))
20578 reg
= gen_rtx_REG (SImode
, i
);
20580 XVECEXP (par
, 0, 0)
20581 = gen_rtx_SET (gen_frame_mem
20583 gen_rtx_PRE_MODIFY (Pmode
,
20586 (Pmode
, stack_pointer_rtx
,
20589 gen_rtx_UNSPEC (BLKmode
,
20590 gen_rtvec (1, reg
),
20591 UNSPEC_PUSH_MULT
));
20593 if (dwarf_regs_mask
& (1 << i
))
20595 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20597 RTX_FRAME_RELATED_P (tmp
) = 1;
20598 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20605 for (j
= 1, i
++; j
< num_regs
; i
++)
20607 if (mask
& (1 << i
))
20609 reg
= gen_rtx_REG (SImode
, i
);
20611 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20613 if (dwarf_regs_mask
& (1 << i
))
20616 = gen_rtx_SET (gen_frame_mem
20618 plus_constant (Pmode
, stack_pointer_rtx
,
20621 RTX_FRAME_RELATED_P (tmp
) = 1;
20622 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20629 par
= emit_insn (par
);
20631 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20632 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20633 RTX_FRAME_RELATED_P (tmp
) = 1;
20634 XVECEXP (dwarf
, 0, 0) = tmp
;
20636 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20641 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20642 SIZE is the offset to be adjusted.
20643 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20645 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20649 RTX_FRAME_RELATED_P (insn
) = 1;
20650 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20651 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20654 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20655 SAVED_REGS_MASK shows which registers need to be restored.
20657 Unfortunately, since this insn does not reflect very well the actual
20658 semantics of the operation, we need to annotate the insn for the benefit
20659 of DWARF2 frame unwind information. */
20661 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20666 rtx dwarf
= NULL_RTX
;
20668 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20672 offset_adj
= return_in_pc
? 1 : 0;
20673 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20674 if (saved_regs_mask
& (1 << i
))
20677 gcc_assert (num_regs
&& num_regs
<= 16);
20679 /* If SP is in reglist, then we don't emit SP update insn. */
20680 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20682 /* The parallel needs to hold num_regs SETs
20683 and one SET for the stack update. */
20684 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20687 XVECEXP (par
, 0, 0) = ret_rtx
;
20691 /* Increment the stack pointer, based on there being
20692 num_regs 4-byte registers to restore. */
20693 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20694 plus_constant (Pmode
,
20697 RTX_FRAME_RELATED_P (tmp
) = 1;
20698 XVECEXP (par
, 0, offset_adj
) = tmp
;
20701 /* Now restore every reg, which may include PC. */
20702 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20703 if (saved_regs_mask
& (1 << i
))
20705 reg
= gen_rtx_REG (SImode
, i
);
20706 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20708 /* Emit single load with writeback. */
20709 tmp
= gen_frame_mem (SImode
,
20710 gen_rtx_POST_INC (Pmode
,
20711 stack_pointer_rtx
));
20712 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20713 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20717 tmp
= gen_rtx_SET (reg
,
20720 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20721 RTX_FRAME_RELATED_P (tmp
) = 1;
20722 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20724 /* We need to maintain a sequence for DWARF info too. As dwarf info
20725 should not have PC, skip PC. */
20726 if (i
!= PC_REGNUM
)
20727 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20733 par
= emit_jump_insn (par
);
20735 par
= emit_insn (par
);
20737 REG_NOTES (par
) = dwarf
;
20739 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20740 stack_pointer_rtx
, stack_pointer_rtx
);
20743 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20744 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20746 Unfortunately, since this insn does not reflect very well the actual
20747 semantics of the operation, we need to annotate the insn for the benefit
20748 of DWARF2 frame unwind information. */
20750 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20754 rtx dwarf
= NULL_RTX
;
20757 gcc_assert (num_regs
&& num_regs
<= 32);
20759 /* Workaround ARM10 VFPr1 bug. */
20760 if (num_regs
== 2 && !arm_arch6
)
20762 if (first_reg
== 15)
20768 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20769 there could be up to 32 D-registers to restore.
20770 If there are more than 16 D-registers, make two recursive calls,
20771 each of which emits one pop_multi instruction. */
20774 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20775 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20779 /* The parallel needs to hold num_regs SETs
20780 and one SET for the stack update. */
20781 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20783 /* Increment the stack pointer, based on there being
20784 num_regs 8-byte registers to restore. */
20785 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20786 RTX_FRAME_RELATED_P (tmp
) = 1;
20787 XVECEXP (par
, 0, 0) = tmp
;
20789 /* Now show every reg that will be restored, using a SET for each. */
20790 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20792 reg
= gen_rtx_REG (DFmode
, i
);
20794 tmp
= gen_rtx_SET (reg
,
20797 plus_constant (Pmode
, base_reg
, 8 * j
)));
20798 RTX_FRAME_RELATED_P (tmp
) = 1;
20799 XVECEXP (par
, 0, j
+ 1) = tmp
;
20801 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20806 par
= emit_insn (par
);
20807 REG_NOTES (par
) = dwarf
;
20809 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20810 if (REGNO (base_reg
) == IP_REGNUM
)
20812 RTX_FRAME_RELATED_P (par
) = 1;
20813 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20816 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20817 base_reg
, base_reg
);
20820 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20821 number of registers are being popped, multiple LDRD patterns are created for
20822 all register pairs. If odd number of registers are popped, last register is
20823 loaded by using LDR pattern. */
20825 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20829 rtx par
= NULL_RTX
;
20830 rtx dwarf
= NULL_RTX
;
20831 rtx tmp
, reg
, tmp1
;
20832 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20834 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20835 if (saved_regs_mask
& (1 << i
))
20838 gcc_assert (num_regs
&& num_regs
<= 16);
20840 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20841 to be popped. So, if num_regs is even, now it will become odd,
20842 and we can generate pop with PC. If num_regs is odd, it will be
20843 even now, and ldr with return can be generated for PC. */
20847 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20849 /* Var j iterates over all the registers to gather all the registers in
20850 saved_regs_mask. Var i gives index of saved registers in stack frame.
20851 A PARALLEL RTX of register-pair is created here, so that pattern for
20852 LDRD can be matched. As PC is always last register to be popped, and
20853 we have already decremented num_regs if PC, we don't have to worry
20854 about PC in this loop. */
20855 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20856 if (saved_regs_mask
& (1 << j
))
20858 /* Create RTX for memory load. */
20859 reg
= gen_rtx_REG (SImode
, j
);
20860 tmp
= gen_rtx_SET (reg
,
20861 gen_frame_mem (SImode
,
20862 plus_constant (Pmode
,
20863 stack_pointer_rtx
, 4 * i
)));
20864 RTX_FRAME_RELATED_P (tmp
) = 1;
20868 /* When saved-register index (i) is even, the RTX to be emitted is
20869 yet to be created. Hence create it first. The LDRD pattern we
20870 are generating is :
20871 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20872 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20873 where target registers need not be consecutive. */
20874 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20878 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20879 added as 0th element and if i is odd, reg_i is added as 1st element
20880 of LDRD pattern shown above. */
20881 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20882 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20886 /* When saved-register index (i) is odd, RTXs for both the registers
20887 to be loaded are generated in above given LDRD pattern, and the
20888 pattern can be emitted now. */
20889 par
= emit_insn (par
);
20890 REG_NOTES (par
) = dwarf
;
20891 RTX_FRAME_RELATED_P (par
) = 1;
20897 /* If the number of registers pushed is odd AND return_in_pc is false OR
20898 number of registers are even AND return_in_pc is true, last register is
20899 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20900 then LDR with post increment. */
20902 /* Increment the stack pointer, based on there being
20903 num_regs 4-byte registers to restore. */
20904 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20905 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20906 RTX_FRAME_RELATED_P (tmp
) = 1;
20907 tmp
= emit_insn (tmp
);
20910 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20911 stack_pointer_rtx
, stack_pointer_rtx
);
20916 if (((num_regs
% 2) == 1 && !return_in_pc
)
20917 || ((num_regs
% 2) == 0 && return_in_pc
))
20919 /* Scan for the single register to be popped. Skip until the saved
20920 register is found. */
20921 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20923 /* Gen LDR with post increment here. */
20924 tmp1
= gen_rtx_MEM (SImode
,
20925 gen_rtx_POST_INC (SImode
,
20926 stack_pointer_rtx
));
20927 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20929 reg
= gen_rtx_REG (SImode
, j
);
20930 tmp
= gen_rtx_SET (reg
, tmp1
);
20931 RTX_FRAME_RELATED_P (tmp
) = 1;
20932 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20936 /* If return_in_pc, j must be PC_REGNUM. */
20937 gcc_assert (j
== PC_REGNUM
);
20938 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20939 XVECEXP (par
, 0, 0) = ret_rtx
;
20940 XVECEXP (par
, 0, 1) = tmp
;
20941 par
= emit_jump_insn (par
);
20945 par
= emit_insn (tmp
);
20946 REG_NOTES (par
) = dwarf
;
20947 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20948 stack_pointer_rtx
, stack_pointer_rtx
);
20952 else if ((num_regs
% 2) == 1 && return_in_pc
)
20954 /* There are 2 registers to be popped. So, generate the pattern
20955 pop_multiple_with_stack_update_and_return to pop in PC. */
20956 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20962 /* LDRD in ARM mode needs consecutive registers as operands. This function
20963 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20964 offset addressing and then generates one separate stack udpate. This provides
20965 more scheduling freedom, compared to writeback on every load. However,
20966 if the function returns using load into PC directly
20967 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20968 before the last load. TODO: Add a peephole optimization to recognize
20969 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20970 peephole optimization to merge the load at stack-offset zero
20971 with the stack update instruction using load with writeback
20972 in post-index addressing mode. */
20974 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20978 rtx par
= NULL_RTX
;
20979 rtx dwarf
= NULL_RTX
;
20982 /* Restore saved registers. */
20983 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20985 while (j
<= LAST_ARM_REGNUM
)
20986 if (saved_regs_mask
& (1 << j
))
20989 && (saved_regs_mask
& (1 << (j
+ 1)))
20990 && (j
+ 1) != PC_REGNUM
)
20992 /* Current register and next register form register pair for which
20993 LDRD can be generated. PC is always the last register popped, and
20994 we handle it separately. */
20996 mem
= gen_frame_mem (DImode
,
20997 plus_constant (Pmode
,
21001 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
21003 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
21004 tmp
= emit_insn (tmp
);
21005 RTX_FRAME_RELATED_P (tmp
) = 1;
21007 /* Generate dwarf info. */
21009 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
21010 gen_rtx_REG (SImode
, j
),
21012 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
21013 gen_rtx_REG (SImode
, j
+ 1),
21016 REG_NOTES (tmp
) = dwarf
;
21021 else if (j
!= PC_REGNUM
)
21023 /* Emit a single word load. */
21025 mem
= gen_frame_mem (SImode
,
21026 plus_constant (Pmode
,
21030 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
21032 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
21033 tmp
= emit_insn (tmp
);
21034 RTX_FRAME_RELATED_P (tmp
) = 1;
21036 /* Generate dwarf info. */
21037 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
21038 gen_rtx_REG (SImode
, j
),
21044 else /* j == PC_REGNUM */
21050 /* Update the stack. */
21053 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21054 plus_constant (Pmode
,
21057 tmp
= emit_insn (tmp
);
21058 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
21059 stack_pointer_rtx
, stack_pointer_rtx
);
21063 if (saved_regs_mask
& (1 << PC_REGNUM
))
21065 /* Only PC is to be popped. */
21066 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
21067 XVECEXP (par
, 0, 0) = ret_rtx
;
21068 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
21069 gen_frame_mem (SImode
,
21070 gen_rtx_POST_INC (SImode
,
21071 stack_pointer_rtx
)));
21072 RTX_FRAME_RELATED_P (tmp
) = 1;
21073 XVECEXP (par
, 0, 1) = tmp
;
21074 par
= emit_jump_insn (par
);
21076 /* Generate dwarf info. */
21077 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
21078 gen_rtx_REG (SImode
, PC_REGNUM
),
21080 REG_NOTES (par
) = dwarf
;
21081 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
21082 stack_pointer_rtx
, stack_pointer_rtx
);
21086 /* Calculate the size of the return value that is passed in registers. */
21088 arm_size_return_regs (void)
21092 if (crtl
->return_rtx
!= 0)
21093 mode
= GET_MODE (crtl
->return_rtx
);
21095 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
21097 return GET_MODE_SIZE (mode
);
21100 /* Return true if the current function needs to save/restore LR. */
21102 thumb_force_lr_save (void)
21104 return !cfun
->machine
->lr_save_eliminated
21106 || thumb_far_jump_used_p ()
21107 || df_regs_ever_live_p (LR_REGNUM
));
21110 /* We do not know if r3 will be available because
21111 we do have an indirect tailcall happening in this
21112 particular case. */
21114 is_indirect_tailcall_p (rtx call
)
21116 rtx pat
= PATTERN (call
);
21118 /* Indirect tail call. */
21119 pat
= XVECEXP (pat
, 0, 0);
21120 if (GET_CODE (pat
) == SET
)
21121 pat
= SET_SRC (pat
);
21123 pat
= XEXP (XEXP (pat
, 0), 0);
21124 return REG_P (pat
);
21127 /* Return true if r3 is used by any of the tail call insns in the
21128 current function. */
21130 any_sibcall_could_use_r3 (void)
21135 if (!crtl
->tail_call_emit
)
21137 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
21138 if (e
->flags
& EDGE_SIBCALL
)
21140 rtx_insn
*call
= BB_END (e
->src
);
21141 if (!CALL_P (call
))
21142 call
= prev_nonnote_nondebug_insn (call
);
21143 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
21144 if (find_regno_fusage (call
, USE
, 3)
21145 || is_indirect_tailcall_p (call
))
21152 /* Compute the distance from register FROM to register TO.
21153 These can be the arg pointer (26), the soft frame pointer (25),
21154 the stack pointer (13) or the hard frame pointer (11).
21155 In thumb mode r7 is used as the soft frame pointer, if needed.
21156 Typical stack layout looks like this:
21158 old stack pointer -> | |
21161 | | saved arguments for
21162 | | vararg functions
21165 hard FP & arg pointer -> | | \
21173 soft frame pointer -> | | /
21178 locals base pointer -> | | /
21183 current stack pointer -> | | /
21186 For a given function some or all of these stack components
21187 may not be needed, giving rise to the possibility of
21188 eliminating some of the registers.
21190 The values returned by this function must reflect the behavior
21191 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21193 The sign of the number returned reflects the direction of stack
21194 growth, so the values are positive for all eliminations except
21195 from the soft frame pointer to the hard frame pointer.
21197 SFP may point just inside the local variables block to ensure correct
21201 /* Return cached stack offsets. */
21203 static arm_stack_offsets
*
21204 arm_get_frame_offsets (void)
21206 struct arm_stack_offsets
*offsets
;
21208 offsets
= &cfun
->machine
->stack_offsets
;
21214 /* Calculate stack offsets. These are used to calculate register elimination
21215 offsets and in prologue/epilogue code. Also calculates which registers
21216 should be saved. */
21219 arm_compute_frame_layout (void)
21221 struct arm_stack_offsets
*offsets
;
21222 unsigned long func_type
;
21225 HOST_WIDE_INT frame_size
;
21228 offsets
= &cfun
->machine
->stack_offsets
;
21230 /* Initially this is the size of the local variables. It will translated
21231 into an offset once we have determined the size of preceding data. */
21232 frame_size
= ROUND_UP_WORD (get_frame_size ());
21234 /* Space for variadic functions. */
21235 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
21237 /* In Thumb mode this is incorrect, but never used. */
21239 = (offsets
->saved_args
21240 + arm_compute_static_chain_stack_bytes ()
21241 + (frame_pointer_needed
? 4 : 0));
21245 unsigned int regno
;
21247 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
21248 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21249 saved
= core_saved
;
21251 /* We know that SP will be doubleword aligned on entry, and we must
21252 preserve that condition at any subroutine call. We also require the
21253 soft frame pointer to be doubleword aligned. */
21255 if (TARGET_REALLY_IWMMXT
)
21257 /* Check for the call-saved iWMMXt registers. */
21258 for (regno
= FIRST_IWMMXT_REGNUM
;
21259 regno
<= LAST_IWMMXT_REGNUM
;
21261 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
21265 func_type
= arm_current_func_type ();
21266 /* Space for saved VFP registers. */
21267 if (! IS_VOLATILE (func_type
)
21268 && TARGET_HARD_FLOAT
)
21269 saved
+= arm_get_vfp_saved_size ();
21271 else /* TARGET_THUMB1 */
21273 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
21274 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21275 saved
= core_saved
;
21276 if (TARGET_BACKTRACE
)
21280 /* Saved registers include the stack frame. */
21281 offsets
->saved_regs
21282 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
21283 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
21285 /* A leaf function does not need any stack alignment if it has nothing
21287 if (crtl
->is_leaf
&& frame_size
== 0
21288 /* However if it calls alloca(), we have a dynamically allocated
21289 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21290 && ! cfun
->calls_alloca
)
21292 offsets
->outgoing_args
= offsets
->soft_frame
;
21293 offsets
->locals_base
= offsets
->soft_frame
;
21297 /* Ensure SFP has the correct alignment. */
21298 if (ARM_DOUBLEWORD_ALIGN
21299 && (offsets
->soft_frame
& 7))
21301 offsets
->soft_frame
+= 4;
21302 /* Try to align stack by pushing an extra reg. Don't bother doing this
21303 when there is a stack frame as the alignment will be rolled into
21304 the normal stack adjustment. */
21305 if (frame_size
+ crtl
->outgoing_args_size
== 0)
21309 /* Register r3 is caller-saved. Normally it does not need to be
21310 saved on entry by the prologue. However if we choose to save
21311 it for padding then we may confuse the compiler into thinking
21312 a prologue sequence is required when in fact it is not. This
21313 will occur when shrink-wrapping if r3 is used as a scratch
21314 register and there are no other callee-saved writes.
21316 This situation can be avoided when other callee-saved registers
21317 are available and r3 is not mandatory if we choose a callee-saved
21318 register for padding. */
21319 bool prefer_callee_reg_p
= false;
21321 /* If it is safe to use r3, then do so. This sometimes
21322 generates better code on Thumb-2 by avoiding the need to
21323 use 32-bit push/pop instructions. */
21324 if (! any_sibcall_could_use_r3 ()
21325 && arm_size_return_regs () <= 12
21326 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
21328 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
21331 if (!TARGET_THUMB2
)
21332 prefer_callee_reg_p
= true;
21335 || prefer_callee_reg_p
)
21337 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
21339 /* Avoid fixed registers; they may be changed at
21340 arbitrary times so it's unsafe to restore them
21341 during the epilogue. */
21343 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
21353 offsets
->saved_regs
+= 4;
21354 offsets
->saved_regs_mask
|= (1 << reg
);
21359 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
21360 offsets
->outgoing_args
= (offsets
->locals_base
21361 + crtl
->outgoing_args_size
);
21363 if (ARM_DOUBLEWORD_ALIGN
)
21365 /* Ensure SP remains doubleword aligned. */
21366 if (offsets
->outgoing_args
& 7)
21367 offsets
->outgoing_args
+= 4;
21368 gcc_assert (!(offsets
->outgoing_args
& 7));
21373 /* Calculate the relative offsets for the different stack pointers. Positive
21374 offsets are in the direction of stack growth. */
21377 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
21379 arm_stack_offsets
*offsets
;
21381 offsets
= arm_get_frame_offsets ();
21383 /* OK, now we have enough information to compute the distances.
21384 There must be an entry in these switch tables for each pair
21385 of registers in ELIMINABLE_REGS, even if some of the entries
21386 seem to be redundant or useless. */
21389 case ARG_POINTER_REGNUM
:
21392 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21395 case FRAME_POINTER_REGNUM
:
21396 /* This is the reverse of the soft frame pointer
21397 to hard frame pointer elimination below. */
21398 return offsets
->soft_frame
- offsets
->saved_args
;
21400 case ARM_HARD_FRAME_POINTER_REGNUM
:
21401 /* This is only non-zero in the case where the static chain register
21402 is stored above the frame. */
21403 return offsets
->frame
- offsets
->saved_args
- 4;
21405 case STACK_POINTER_REGNUM
:
21406 /* If nothing has been pushed on the stack at all
21407 then this will return -4. This *is* correct! */
21408 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21411 gcc_unreachable ();
21413 gcc_unreachable ();
21415 case FRAME_POINTER_REGNUM
:
21418 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21421 case ARM_HARD_FRAME_POINTER_REGNUM
:
21422 /* The hard frame pointer points to the top entry in the
21423 stack frame. The soft frame pointer to the bottom entry
21424 in the stack frame. If there is no stack frame at all,
21425 then they are identical. */
21427 return offsets
->frame
- offsets
->soft_frame
;
21429 case STACK_POINTER_REGNUM
:
21430 return offsets
->outgoing_args
- offsets
->soft_frame
;
21433 gcc_unreachable ();
21435 gcc_unreachable ();
21438 /* You cannot eliminate from the stack pointer.
21439 In theory you could eliminate from the hard frame
21440 pointer to the stack pointer, but this will never
21441 happen, since if a stack frame is not needed the
21442 hard frame pointer will never be used. */
21443 gcc_unreachable ();
21447 /* Given FROM and TO register numbers, say whether this elimination is
21448 allowed. Frame pointer elimination is automatically handled.
21450 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21451 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21452 pointer, we must eliminate FRAME_POINTER_REGNUM into
21453 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21454 ARG_POINTER_REGNUM. */
21457 arm_can_eliminate (const int from
, const int to
)
21459 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21460 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21461 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21462 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21466 /* Emit RTL to save coprocessor registers on function entry. Returns the
21467 number of bytes pushed. */
21470 arm_save_coproc_regs(void)
21472 int saved_size
= 0;
21474 unsigned start_reg
;
21477 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21478 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21480 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21481 insn
= gen_rtx_MEM (V2SImode
, insn
);
21482 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21483 RTX_FRAME_RELATED_P (insn
) = 1;
21487 if (TARGET_HARD_FLOAT
)
21489 start_reg
= FIRST_VFP_REGNUM
;
21491 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21493 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21494 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21496 if (start_reg
!= reg
)
21497 saved_size
+= vfp_emit_fstmd (start_reg
,
21498 (reg
- start_reg
) / 2);
21499 start_reg
= reg
+ 2;
21502 if (start_reg
!= reg
)
21503 saved_size
+= vfp_emit_fstmd (start_reg
,
21504 (reg
- start_reg
) / 2);
21510 /* Set the Thumb frame pointer from the stack pointer. */
21513 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21515 HOST_WIDE_INT amount
;
21518 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21520 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21521 stack_pointer_rtx
, GEN_INT (amount
)));
21524 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21525 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21526 expects the first two operands to be the same. */
21529 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21531 hard_frame_pointer_rtx
));
21535 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21536 hard_frame_pointer_rtx
,
21537 stack_pointer_rtx
));
21539 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21540 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21541 RTX_FRAME_RELATED_P (dwarf
) = 1;
21542 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21545 RTX_FRAME_RELATED_P (insn
) = 1;
21548 struct scratch_reg
{
21553 /* Return a short-lived scratch register for use as a 2nd scratch register on
21554 function entry after the registers are saved in the prologue. This register
21555 must be released by means of release_scratch_register_on_entry. IP is not
21556 considered since it is always used as the 1st scratch register if available.
21558 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21559 mask of live registers. */
21562 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21563 unsigned long live_regs
)
21569 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21575 for (i
= 4; i
< 11; i
++)
21576 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21584 /* If IP is used as the 1st scratch register for a nested function,
21585 then either r3 wasn't available or is used to preserve IP. */
21586 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21588 regno
= (regno1
== 3 ? 2 : 3);
21590 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21595 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21598 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21599 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21600 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21601 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21602 RTX_FRAME_RELATED_P (insn
) = 1;
21603 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21607 /* Release a scratch register obtained from the preceding function. */
21610 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21614 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21615 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21616 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21617 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21618 RTX_FRAME_RELATED_P (insn
) = 1;
21619 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21623 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21625 #if PROBE_INTERVAL > 4096
21626 #error Cannot use indexed addressing mode for stack probing
21629 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21630 inclusive. These are offsets from the current stack pointer. REGNO1
21631 is the index number of the 1st scratch register and LIVE_REGS is the
21632 mask of live registers. */
21635 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21636 unsigned int regno1
, unsigned long live_regs
)
21638 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21640 /* See if we have a constant small number of probes to generate. If so,
21641 that's the easy case. */
21642 if (size
<= PROBE_INTERVAL
)
21644 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21645 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21646 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21649 /* The run-time loop is made up of 10 insns in the generic case while the
21650 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21651 else if (size
<= 5 * PROBE_INTERVAL
)
21653 HOST_WIDE_INT i
, rem
;
21655 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21656 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21657 emit_stack_probe (reg1
);
21659 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21660 it exceeds SIZE. If only two probes are needed, this will not
21661 generate any code. Then probe at FIRST + SIZE. */
21662 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21664 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21665 emit_stack_probe (reg1
);
21668 rem
= size
- (i
- PROBE_INTERVAL
);
21669 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21671 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21672 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21675 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21678 /* Otherwise, do the same as above, but in a loop. Note that we must be
21679 extra careful with variables wrapping around because we might be at
21680 the very top (or the very bottom) of the address space and we have
21681 to be able to handle this case properly; in particular, we use an
21682 equality test for the loop condition. */
21685 HOST_WIDE_INT rounded_size
;
21686 struct scratch_reg sr
;
21688 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21690 emit_move_insn (reg1
, GEN_INT (first
));
21693 /* Step 1: round SIZE to the previous multiple of the interval. */
21695 rounded_size
= size
& -PROBE_INTERVAL
;
21696 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21699 /* Step 2: compute initial and final value of the loop counter. */
21701 /* TEST_ADDR = SP + FIRST. */
21702 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21704 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21705 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21708 /* Step 3: the loop
21712 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21715 while (TEST_ADDR != LAST_ADDR)
21717 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21718 until it is equal to ROUNDED_SIZE. */
21720 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21723 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21724 that SIZE is equal to ROUNDED_SIZE. */
21726 if (size
!= rounded_size
)
21728 HOST_WIDE_INT rem
= size
- rounded_size
;
21730 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21732 emit_set_insn (sr
.reg
,
21733 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21734 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21735 PROBE_INTERVAL
- rem
));
21738 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21741 release_scratch_register_on_entry (&sr
);
21744 /* Make sure nothing is scheduled before we are done. */
21745 emit_insn (gen_blockage ());
21748 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21749 absolute addresses. */
21752 output_probe_stack_range (rtx reg1
, rtx reg2
)
21754 static int labelno
= 0;
21758 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21761 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21763 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21765 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21766 output_asm_insn ("sub\t%0, %0, %1", xops
);
21768 /* Probe at TEST_ADDR. */
21769 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21771 /* Test if TEST_ADDR == LAST_ADDR. */
21773 output_asm_insn ("cmp\t%0, %1", xops
);
21776 fputs ("\tbne\t", asm_out_file
);
21777 assemble_name_raw (asm_out_file
, loop_lab
);
21778 fputc ('\n', asm_out_file
);
21783 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21786 arm_expand_prologue (void)
21791 unsigned long live_regs_mask
;
21792 unsigned long func_type
;
21794 int saved_pretend_args
= 0;
21795 int saved_regs
= 0;
21796 unsigned HOST_WIDE_INT args_to_push
;
21797 HOST_WIDE_INT size
;
21798 arm_stack_offsets
*offsets
;
21801 func_type
= arm_current_func_type ();
21803 /* Naked functions don't have prologues. */
21804 if (IS_NAKED (func_type
))
21806 if (flag_stack_usage_info
)
21807 current_function_static_stack_size
= 0;
21811 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21812 args_to_push
= crtl
->args
.pretend_args_size
;
21814 /* Compute which register we will have to save onto the stack. */
21815 offsets
= arm_get_frame_offsets ();
21816 live_regs_mask
= offsets
->saved_regs_mask
;
21818 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21820 if (IS_STACKALIGN (func_type
))
21824 /* Handle a word-aligned stack pointer. We generate the following:
21829 <save and restore r0 in normal prologue/epilogue>
21833 The unwinder doesn't need to know about the stack realignment.
21834 Just tell it we saved SP in r0. */
21835 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21837 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21838 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21840 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21841 RTX_FRAME_RELATED_P (insn
) = 1;
21842 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21844 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21846 /* ??? The CFA changes here, which may cause GDB to conclude that it
21847 has entered a different function. That said, the unwind info is
21848 correct, individually, before and after this instruction because
21849 we've described the save of SP, which will override the default
21850 handling of SP as restoring from the CFA. */
21851 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21854 /* Let's compute the static_chain_stack_bytes required and store it. Right
21855 now the value must be -1 as stored by arm_init_machine_status (). */
21856 cfun
->machine
->static_chain_stack_bytes
21857 = arm_compute_static_chain_stack_bytes ();
21859 /* The static chain register is the same as the IP register. If it is
21860 clobbered when creating the frame, we need to save and restore it. */
21861 clobber_ip
= IS_NESTED (func_type
)
21862 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21863 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21864 || flag_stack_clash_protection
)
21865 && !df_regs_ever_live_p (LR_REGNUM
)
21866 && arm_r3_live_at_start_p ()));
21868 /* Find somewhere to store IP whilst the frame is being created.
21869 We try the following places in order:
21871 1. The last argument register r3 if it is available.
21872 2. A slot on the stack above the frame if there are no
21873 arguments to push onto the stack.
21874 3. Register r3 again, after pushing the argument registers
21875 onto the stack, if this is a varargs function.
21876 4. The last slot on the stack created for the arguments to
21877 push, if this isn't a varargs function.
21879 Note - we only need to tell the dwarf2 backend about the SP
21880 adjustment in the second variant; the static chain register
21881 doesn't need to be unwound, as it doesn't contain a value
21882 inherited from the caller. */
21885 if (!arm_r3_live_at_start_p ())
21886 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21887 else if (args_to_push
== 0)
21891 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21894 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21895 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21898 /* Just tell the dwarf backend that we adjusted SP. */
21899 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21900 plus_constant (Pmode
, stack_pointer_rtx
,
21902 RTX_FRAME_RELATED_P (insn
) = 1;
21903 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21907 /* Store the args on the stack. */
21908 if (cfun
->machine
->uses_anonymous_args
)
21910 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21911 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21912 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21913 saved_pretend_args
= 1;
21919 if (args_to_push
== 4)
21920 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21922 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21923 plus_constant (Pmode
,
21927 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21929 /* Just tell the dwarf backend that we adjusted SP. */
21930 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21931 plus_constant (Pmode
, stack_pointer_rtx
,
21933 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21936 RTX_FRAME_RELATED_P (insn
) = 1;
21937 fp_offset
= args_to_push
;
21942 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21944 if (IS_INTERRUPT (func_type
))
21946 /* Interrupt functions must not corrupt any registers.
21947 Creating a frame pointer however, corrupts the IP
21948 register, so we must push it first. */
21949 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21951 /* Do not set RTX_FRAME_RELATED_P on this insn.
21952 The dwarf stack unwinding code only wants to see one
21953 stack decrement per function, and this is not it. If
21954 this instruction is labeled as being part of the frame
21955 creation sequence then dwarf2out_frame_debug_expr will
21956 die when it encounters the assignment of IP to FP
21957 later on, since the use of SP here establishes SP as
21958 the CFA register and not IP.
21960 Anyway this instruction is not really part of the stack
21961 frame creation although it is part of the prologue. */
21964 insn
= emit_set_insn (ip_rtx
,
21965 plus_constant (Pmode
, stack_pointer_rtx
,
21967 RTX_FRAME_RELATED_P (insn
) = 1;
21972 /* Push the argument registers, or reserve space for them. */
21973 if (cfun
->machine
->uses_anonymous_args
)
21974 insn
= emit_multi_reg_push
21975 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21976 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21979 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21980 GEN_INT (- args_to_push
)));
21981 RTX_FRAME_RELATED_P (insn
) = 1;
21984 /* If this is an interrupt service routine, and the link register
21985 is going to be pushed, and we're not generating extra
21986 push of IP (needed when frame is needed and frame layout if apcs),
21987 subtracting four from LR now will mean that the function return
21988 can be done with a single instruction. */
21989 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21990 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21991 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21994 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21996 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21999 if (live_regs_mask
)
22001 unsigned long dwarf_regs_mask
= live_regs_mask
;
22003 saved_regs
+= bit_count (live_regs_mask
) * 4;
22004 if (optimize_size
&& !frame_pointer_needed
22005 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
22007 /* If no coprocessor registers are being pushed and we don't have
22008 to worry about a frame pointer then push extra registers to
22009 create the stack frame. This is done in a way that does not
22010 alter the frame layout, so is independent of the epilogue. */
22014 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
22016 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
22017 if (frame
&& n
* 4 >= frame
)
22020 live_regs_mask
|= (1 << n
) - 1;
22021 saved_regs
+= frame
;
22026 && current_tune
->prefer_ldrd_strd
22027 && !optimize_function_for_size_p (cfun
))
22029 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
22031 thumb2_emit_strd_push (live_regs_mask
);
22032 else if (TARGET_ARM
22033 && !TARGET_APCS_FRAME
22034 && !IS_INTERRUPT (func_type
))
22035 arm_emit_strd_push (live_regs_mask
);
22038 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
22039 RTX_FRAME_RELATED_P (insn
) = 1;
22044 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
22045 RTX_FRAME_RELATED_P (insn
) = 1;
22049 if (! IS_VOLATILE (func_type
))
22050 saved_regs
+= arm_save_coproc_regs ();
22052 if (frame_pointer_needed
&& TARGET_ARM
)
22054 /* Create the new frame pointer. */
22055 if (TARGET_APCS_FRAME
)
22057 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
22058 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
22059 RTX_FRAME_RELATED_P (insn
) = 1;
22063 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
22064 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22065 stack_pointer_rtx
, insn
));
22066 RTX_FRAME_RELATED_P (insn
) = 1;
22070 size
= offsets
->outgoing_args
- offsets
->saved_args
;
22071 if (flag_stack_usage_info
)
22072 current_function_static_stack_size
= size
;
22074 /* If this isn't an interrupt service routine and we have a frame, then do
22075 stack checking. We use IP as the first scratch register, except for the
22076 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
22077 if (!IS_INTERRUPT (func_type
)
22078 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
22079 || flag_stack_clash_protection
))
22081 unsigned int regno
;
22083 if (!IS_NESTED (func_type
) || clobber_ip
)
22085 else if (df_regs_ever_live_p (LR_REGNUM
))
22090 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
22092 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
22093 arm_emit_probe_stack_range (get_stack_check_protect (),
22094 size
- get_stack_check_protect (),
22095 regno
, live_regs_mask
);
22098 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
22099 regno
, live_regs_mask
);
22102 /* Recover the static chain register. */
22105 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
22106 insn
= gen_rtx_REG (SImode
, 3);
22109 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
22110 insn
= gen_frame_mem (SImode
, insn
);
22112 emit_set_insn (ip_rtx
, insn
);
22113 emit_insn (gen_force_register_use (ip_rtx
));
22116 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
22118 /* This add can produce multiple insns for a large constant, so we
22119 need to get tricky. */
22120 rtx_insn
*last
= get_last_insn ();
22122 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
22123 - offsets
->outgoing_args
);
22125 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22129 last
= last
? NEXT_INSN (last
) : get_insns ();
22130 RTX_FRAME_RELATED_P (last
) = 1;
22132 while (last
!= insn
);
22134 /* If the frame pointer is needed, emit a special barrier that
22135 will prevent the scheduler from moving stores to the frame
22136 before the stack adjustment. */
22137 if (frame_pointer_needed
)
22138 emit_insn (gen_stack_tie (stack_pointer_rtx
,
22139 hard_frame_pointer_rtx
));
22143 if (frame_pointer_needed
&& TARGET_THUMB2
)
22144 thumb_set_frame_pointer (offsets
);
22146 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
22148 unsigned long mask
;
22150 mask
= live_regs_mask
;
22151 mask
&= THUMB2_WORK_REGS
;
22152 if (!IS_NESTED (func_type
))
22153 mask
|= (1 << IP_REGNUM
);
22154 arm_load_pic_register (mask
, NULL_RTX
);
22157 /* If we are profiling, make sure no instructions are scheduled before
22158 the call to mcount. Similarly if the user has requested no
22159 scheduling in the prolog. Similarly if we want non-call exceptions
22160 using the EABI unwinder, to prevent faulting instructions from being
22161 swapped with a stack adjustment. */
22162 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
22163 || (arm_except_unwind_info (&global_options
) == UI_TARGET
22164 && cfun
->can_throw_non_call_exceptions
))
22165 emit_insn (gen_blockage ());
22167 /* If the link register is being kept alive, with the return address in it,
22168 then make sure that it does not get reused by the ce2 pass. */
22169 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
22170 cfun
->machine
->lr_save_eliminated
= 1;
22173 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22175 arm_print_condition (FILE *stream
)
22177 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
22179 /* Branch conversion is not implemented for Thumb-2. */
22182 output_operand_lossage ("predicated Thumb instruction");
22185 if (current_insn_predicate
!= NULL
)
22187 output_operand_lossage
22188 ("predicated instruction in conditional sequence");
22192 fputs (arm_condition_codes
[arm_current_cc
], stream
);
22194 else if (current_insn_predicate
)
22196 enum arm_cond_code code
;
22200 output_operand_lossage ("predicated Thumb instruction");
22204 code
= get_arm_condition_code (current_insn_predicate
);
22205 fputs (arm_condition_codes
[code
], stream
);
22210 /* Globally reserved letters: acln
22211 Puncutation letters currently used: @_|?().!#
22212 Lower case letters currently used: bcdefhimpqtvwxyz
22213 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22214 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22216 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22218 If CODE is 'd', then the X is a condition operand and the instruction
22219 should only be executed if the condition is true.
22220 if CODE is 'D', then the X is a condition operand and the instruction
22221 should only be executed if the condition is false: however, if the mode
22222 of the comparison is CCFPEmode, then always execute the instruction -- we
22223 do this because in these circumstances !GE does not necessarily imply LT;
22224 in these cases the instruction pattern will take care to make sure that
22225 an instruction containing %d will follow, thereby undoing the effects of
22226 doing this instruction unconditionally.
22227 If CODE is 'N' then X is a floating point operand that must be negated
22229 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22230 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22232 arm_print_operand (FILE *stream
, rtx x
, int code
)
22237 fputs (ASM_COMMENT_START
, stream
);
22241 fputs (user_label_prefix
, stream
);
22245 fputs (REGISTER_PREFIX
, stream
);
22249 arm_print_condition (stream
);
22253 /* The current condition code for a condition code setting instruction.
22254 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22255 fputc('s', stream
);
22256 arm_print_condition (stream
);
22260 /* If the instruction is conditionally executed then print
22261 the current condition code, otherwise print 's'. */
22262 gcc_assert (TARGET_THUMB2
);
22263 if (current_insn_predicate
)
22264 arm_print_condition (stream
);
22266 fputc('s', stream
);
22269 /* %# is a "break" sequence. It doesn't output anything, but is used to
22270 separate e.g. operand numbers from following text, if that text consists
22271 of further digits which we don't want to be part of the operand
22279 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
22280 fprintf (stream
, "%s", fp_const_from_val (&r
));
22284 /* An integer or symbol address without a preceding # sign. */
22286 switch (GET_CODE (x
))
22289 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
22293 output_addr_const (stream
, x
);
22297 if (GET_CODE (XEXP (x
, 0)) == PLUS
22298 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
22300 output_addr_const (stream
, x
);
22303 /* Fall through. */
22306 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22310 /* An integer that we want to print in HEX. */
22312 switch (GET_CODE (x
))
22315 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
22319 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22324 if (CONST_INT_P (x
))
22327 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
22328 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
22332 putc ('~', stream
);
22333 output_addr_const (stream
, x
);
22338 /* Print the log2 of a CONST_INT. */
22342 if (!CONST_INT_P (x
)
22343 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
22344 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22346 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22351 /* The low 16 bits of an immediate constant. */
22352 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
22356 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
22360 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
22368 shift
= shift_op (x
, &val
);
22372 fprintf (stream
, ", %s ", shift
);
22374 arm_print_operand (stream
, XEXP (x
, 1), 0);
22376 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22381 /* An explanation of the 'Q', 'R' and 'H' register operands:
22383 In a pair of registers containing a DI or DF value the 'Q'
22384 operand returns the register number of the register containing
22385 the least significant part of the value. The 'R' operand returns
22386 the register number of the register containing the most
22387 significant part of the value.
22389 The 'H' operand returns the higher of the two register numbers.
22390 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22391 same as the 'Q' operand, since the most significant part of the
22392 value is held in the lower number register. The reverse is true
22393 on systems where WORDS_BIG_ENDIAN is false.
22395 The purpose of these operands is to distinguish between cases
22396 where the endian-ness of the values is important (for example
22397 when they are added together), and cases where the endian-ness
22398 is irrelevant, but the order of register operations is important.
22399 For example when loading a value from memory into a register
22400 pair, the endian-ness does not matter. Provided that the value
22401 from the lower memory address is put into the lower numbered
22402 register, and the value from the higher address is put into the
22403 higher numbered register, the load will work regardless of whether
22404 the value being loaded is big-wordian or little-wordian. The
22405 order of the two register loads can matter however, if the address
22406 of the memory location is actually held in one of the registers
22407 being overwritten by the load.
22409 The 'Q' and 'R' constraints are also available for 64-bit
22412 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22414 rtx part
= gen_lowpart (SImode
, x
);
22415 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22419 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22421 output_operand_lossage ("invalid operand for code '%c'", code
);
22425 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22429 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22431 machine_mode mode
= GET_MODE (x
);
22434 if (mode
== VOIDmode
)
22436 part
= gen_highpart_mode (SImode
, mode
, x
);
22437 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22441 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22443 output_operand_lossage ("invalid operand for code '%c'", code
);
22447 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22451 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22453 output_operand_lossage ("invalid operand for code '%c'", code
);
22457 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22461 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22463 output_operand_lossage ("invalid operand for code '%c'", code
);
22467 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22471 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22473 output_operand_lossage ("invalid operand for code '%c'", code
);
22477 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22481 asm_fprintf (stream
, "%r",
22482 REG_P (XEXP (x
, 0))
22483 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22487 asm_fprintf (stream
, "{%r-%r}",
22489 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22492 /* Like 'M', but writing doubleword vector registers, for use by Neon
22496 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22497 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22499 asm_fprintf (stream
, "{d%d}", regno
);
22501 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22506 /* CONST_TRUE_RTX means always -- that's the default. */
22507 if (x
== const_true_rtx
)
22510 if (!COMPARISON_P (x
))
22512 output_operand_lossage ("invalid operand for code '%c'", code
);
22516 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22521 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22522 want to do that. */
22523 if (x
== const_true_rtx
)
22525 output_operand_lossage ("instruction never executed");
22528 if (!COMPARISON_P (x
))
22530 output_operand_lossage ("invalid operand for code '%c'", code
);
22534 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22535 (get_arm_condition_code (x
))],
22545 /* Former Maverick support, removed after GCC-4.7. */
22546 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22551 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22552 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22553 /* Bad value for wCG register number. */
22555 output_operand_lossage ("invalid operand for code '%c'", code
);
22560 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22563 /* Print an iWMMXt control register name. */
22565 if (!CONST_INT_P (x
)
22567 || INTVAL (x
) >= 16)
22568 /* Bad value for wC register number. */
22570 output_operand_lossage ("invalid operand for code '%c'", code
);
22576 static const char * wc_reg_names
[16] =
22578 "wCID", "wCon", "wCSSF", "wCASF",
22579 "wC4", "wC5", "wC6", "wC7",
22580 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22581 "wC12", "wC13", "wC14", "wC15"
22584 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22588 /* Print the high single-precision register of a VFP double-precision
22592 machine_mode mode
= GET_MODE (x
);
22595 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22597 output_operand_lossage ("invalid operand for code '%c'", code
);
22602 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22604 output_operand_lossage ("invalid operand for code '%c'", code
);
22608 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22612 /* Print a VFP/Neon double precision or quad precision register name. */
22616 machine_mode mode
= GET_MODE (x
);
22617 int is_quad
= (code
== 'q');
22620 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22622 output_operand_lossage ("invalid operand for code '%c'", code
);
22627 || !IS_VFP_REGNUM (REGNO (x
)))
22629 output_operand_lossage ("invalid operand for code '%c'", code
);
22634 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22635 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22637 output_operand_lossage ("invalid operand for code '%c'", code
);
22641 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22642 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22646 /* These two codes print the low/high doubleword register of a Neon quad
22647 register, respectively. For pair-structure types, can also print
22648 low/high quadword registers. */
22652 machine_mode mode
= GET_MODE (x
);
22655 if ((GET_MODE_SIZE (mode
) != 16
22656 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22658 output_operand_lossage ("invalid operand for code '%c'", code
);
22663 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22665 output_operand_lossage ("invalid operand for code '%c'", code
);
22669 if (GET_MODE_SIZE (mode
) == 16)
22670 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22671 + (code
== 'f' ? 1 : 0));
22673 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22674 + (code
== 'f' ? 1 : 0));
22678 /* Print a VFPv3 floating-point constant, represented as an integer
22682 int index
= vfp3_const_double_index (x
);
22683 gcc_assert (index
!= -1);
22684 fprintf (stream
, "%d", index
);
22688 /* Print bits representing opcode features for Neon.
22690 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22691 and polynomials as unsigned.
22693 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22695 Bit 2 is 1 for rounding functions, 0 otherwise. */
22697 /* Identify the type as 's', 'u', 'p' or 'f'. */
22700 HOST_WIDE_INT bits
= INTVAL (x
);
22701 fputc ("uspf"[bits
& 3], stream
);
22705 /* Likewise, but signed and unsigned integers are both 'i'. */
22708 HOST_WIDE_INT bits
= INTVAL (x
);
22709 fputc ("iipf"[bits
& 3], stream
);
22713 /* As for 'T', but emit 'u' instead of 'p'. */
22716 HOST_WIDE_INT bits
= INTVAL (x
);
22717 fputc ("usuf"[bits
& 3], stream
);
22721 /* Bit 2: rounding (vs none). */
22724 HOST_WIDE_INT bits
= INTVAL (x
);
22725 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22729 /* Memory operand for vld1/vst1 instruction. */
22733 bool postinc
= FALSE
;
22734 rtx postinc_reg
= NULL
;
22735 unsigned align
, memsize
, align_bits
;
22737 gcc_assert (MEM_P (x
));
22738 addr
= XEXP (x
, 0);
22739 if (GET_CODE (addr
) == POST_INC
)
22742 addr
= XEXP (addr
, 0);
22744 if (GET_CODE (addr
) == POST_MODIFY
)
22746 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22747 addr
= XEXP (addr
, 0);
22749 asm_fprintf (stream
, "[%r", REGNO (addr
));
22751 /* We know the alignment of this access, so we can emit a hint in the
22752 instruction (for some alignments) as an aid to the memory subsystem
22754 align
= MEM_ALIGN (x
) >> 3;
22755 memsize
= MEM_SIZE (x
);
22757 /* Only certain alignment specifiers are supported by the hardware. */
22758 if (memsize
== 32 && (align
% 32) == 0)
22760 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22762 else if (memsize
>= 8 && (align
% 8) == 0)
22767 if (align_bits
!= 0)
22768 asm_fprintf (stream
, ":%d", align_bits
);
22770 asm_fprintf (stream
, "]");
22773 fputs("!", stream
);
22775 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22783 gcc_assert (MEM_P (x
));
22784 addr
= XEXP (x
, 0);
22785 gcc_assert (REG_P (addr
));
22786 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22790 /* Translate an S register number into a D register number and element index. */
22793 machine_mode mode
= GET_MODE (x
);
22796 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22798 output_operand_lossage ("invalid operand for code '%c'", code
);
22803 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22805 output_operand_lossage ("invalid operand for code '%c'", code
);
22809 regno
= regno
- FIRST_VFP_REGNUM
;
22810 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22815 gcc_assert (CONST_DOUBLE_P (x
));
22817 result
= vfp3_const_double_for_fract_bits (x
);
22819 result
= vfp3_const_double_for_bits (x
);
22820 fprintf (stream
, "#%d", result
);
22823 /* Register specifier for vld1.16/vst1.16. Translate the S register
22824 number into a D register number and element index. */
22827 machine_mode mode
= GET_MODE (x
);
22830 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22832 output_operand_lossage ("invalid operand for code '%c'", code
);
22837 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22839 output_operand_lossage ("invalid operand for code '%c'", code
);
22843 regno
= regno
- FIRST_VFP_REGNUM
;
22844 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22851 output_operand_lossage ("missing operand");
22855 switch (GET_CODE (x
))
22858 asm_fprintf (stream
, "%r", REGNO (x
));
22862 output_address (GET_MODE (x
), XEXP (x
, 0));
22868 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22869 sizeof (fpstr
), 0, 1);
22870 fprintf (stream
, "#%s", fpstr
);
22875 gcc_assert (GET_CODE (x
) != NEG
);
22876 fputc ('#', stream
);
22877 if (GET_CODE (x
) == HIGH
)
22879 fputs (":lower16:", stream
);
22883 output_addr_const (stream
, x
);
22889 /* Target hook for printing a memory address. */
22891 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22895 int is_minus
= GET_CODE (x
) == MINUS
;
22898 asm_fprintf (stream
, "[%r]", REGNO (x
));
22899 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22901 rtx base
= XEXP (x
, 0);
22902 rtx index
= XEXP (x
, 1);
22903 HOST_WIDE_INT offset
= 0;
22905 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22907 /* Ensure that BASE is a register. */
22908 /* (one of them must be). */
22909 /* Also ensure the SP is not used as in index register. */
22910 std::swap (base
, index
);
22912 switch (GET_CODE (index
))
22915 offset
= INTVAL (index
);
22918 asm_fprintf (stream
, "[%r, #%wd]",
22919 REGNO (base
), offset
);
22923 asm_fprintf (stream
, "[%r, %s%r]",
22924 REGNO (base
), is_minus
? "-" : "",
22934 asm_fprintf (stream
, "[%r, %s%r",
22935 REGNO (base
), is_minus
? "-" : "",
22936 REGNO (XEXP (index
, 0)));
22937 arm_print_operand (stream
, index
, 'S');
22938 fputs ("]", stream
);
22943 gcc_unreachable ();
22946 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22947 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22949 gcc_assert (REG_P (XEXP (x
, 0)));
22951 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22952 asm_fprintf (stream
, "[%r, #%s%d]!",
22953 REGNO (XEXP (x
, 0)),
22954 GET_CODE (x
) == PRE_DEC
? "-" : "",
22955 GET_MODE_SIZE (mode
));
22957 asm_fprintf (stream
, "[%r], #%s%d",
22958 REGNO (XEXP (x
, 0)),
22959 GET_CODE (x
) == POST_DEC
? "-" : "",
22960 GET_MODE_SIZE (mode
));
22962 else if (GET_CODE (x
) == PRE_MODIFY
)
22964 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22965 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22966 asm_fprintf (stream
, "#%wd]!",
22967 INTVAL (XEXP (XEXP (x
, 1), 1)));
22969 asm_fprintf (stream
, "%r]!",
22970 REGNO (XEXP (XEXP (x
, 1), 1)));
22972 else if (GET_CODE (x
) == POST_MODIFY
)
22974 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22975 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22976 asm_fprintf (stream
, "#%wd",
22977 INTVAL (XEXP (XEXP (x
, 1), 1)));
22979 asm_fprintf (stream
, "%r",
22980 REGNO (XEXP (XEXP (x
, 1), 1)));
22982 else output_addr_const (stream
, x
);
22987 asm_fprintf (stream
, "[%r]", REGNO (x
));
22988 else if (GET_CODE (x
) == POST_INC
)
22989 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22990 else if (GET_CODE (x
) == PLUS
)
22992 gcc_assert (REG_P (XEXP (x
, 0)));
22993 if (CONST_INT_P (XEXP (x
, 1)))
22994 asm_fprintf (stream
, "[%r, #%wd]",
22995 REGNO (XEXP (x
, 0)),
22996 INTVAL (XEXP (x
, 1)));
22998 asm_fprintf (stream
, "[%r, %r]",
22999 REGNO (XEXP (x
, 0)),
23000 REGNO (XEXP (x
, 1)));
23003 output_addr_const (stream
, x
);
23007 /* Target hook for indicating whether a punctuation character for
23008 TARGET_PRINT_OPERAND is valid. */
23010 arm_print_operand_punct_valid_p (unsigned char code
)
23012 return (code
== '@' || code
== '|' || code
== '.'
23013 || code
== '(' || code
== ')' || code
== '#'
23014 || (TARGET_32BIT
&& (code
== '?'))
23015 || (TARGET_THUMB2
&& (code
== '!'))
23016 || (TARGET_THUMB
&& (code
== '_')));
23019 /* Target hook for assembling integer objects. The ARM version needs to
23020 handle word-sized values specially. */
23022 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
23026 if (size
== UNITS_PER_WORD
&& aligned_p
)
23028 fputs ("\t.word\t", asm_out_file
);
23029 output_addr_const (asm_out_file
, x
);
23031 /* Mark symbols as position independent. We only do this in the
23032 .text segment, not in the .data segment. */
23033 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
23034 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
23036 /* See legitimize_pic_address for an explanation of the
23037 TARGET_VXWORKS_RTP check. */
23038 /* References to weak symbols cannot be resolved locally:
23039 they may be overridden by a non-weak definition at link
23041 if (!arm_pic_data_is_text_relative
23042 || (GET_CODE (x
) == SYMBOL_REF
23043 && (!SYMBOL_REF_LOCAL_P (x
)
23044 || (SYMBOL_REF_DECL (x
)
23045 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
23046 fputs ("(GOT)", asm_out_file
);
23048 fputs ("(GOTOFF)", asm_out_file
);
23050 fputc ('\n', asm_out_file
);
23054 mode
= GET_MODE (x
);
23056 if (arm_vector_mode_supported_p (mode
))
23060 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
23062 units
= CONST_VECTOR_NUNITS (x
);
23063 size
= GET_MODE_UNIT_SIZE (mode
);
23065 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
23066 for (i
= 0; i
< units
; i
++)
23068 rtx elt
= CONST_VECTOR_ELT (x
, i
);
23070 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
23073 for (i
= 0; i
< units
; i
++)
23075 rtx elt
= CONST_VECTOR_ELT (x
, i
);
23077 (*CONST_DOUBLE_REAL_VALUE (elt
),
23078 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
23079 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
23085 return default_assemble_integer (x
, size
, aligned_p
);
23089 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
23093 if (!TARGET_AAPCS_BASED
)
23096 default_named_section_asm_out_constructor
23097 : default_named_section_asm_out_destructor
) (symbol
, priority
);
23101 /* Put these in the .init_array section, using a special relocation. */
23102 if (priority
!= DEFAULT_INIT_PRIORITY
)
23105 sprintf (buf
, "%s.%.5u",
23106 is_ctor
? ".init_array" : ".fini_array",
23108 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
23115 switch_to_section (s
);
23116 assemble_align (POINTER_SIZE
);
23117 fputs ("\t.word\t", asm_out_file
);
23118 output_addr_const (asm_out_file
, symbol
);
23119 fputs ("(target1)\n", asm_out_file
);
23122 /* Add a function to the list of static constructors. */
23125 arm_elf_asm_constructor (rtx symbol
, int priority
)
23127 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
23130 /* Add a function to the list of static destructors. */
23133 arm_elf_asm_destructor (rtx symbol
, int priority
)
23135 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
23138 /* A finite state machine takes care of noticing whether or not instructions
23139 can be conditionally executed, and thus decrease execution time and code
23140 size by deleting branch instructions. The fsm is controlled by
23141 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
23143 /* The state of the fsm controlling condition codes are:
23144 0: normal, do nothing special
23145 1: make ASM_OUTPUT_OPCODE not output this instruction
23146 2: make ASM_OUTPUT_OPCODE not output this instruction
23147 3: make instructions conditional
23148 4: make instructions conditional
23150 State transitions (state->state by whom under condition):
23151 0 -> 1 final_prescan_insn if the `target' is a label
23152 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23153 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23154 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23155 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23156 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23157 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23158 (the target insn is arm_target_insn).
23160 If the jump clobbers the conditions then we use states 2 and 4.
23162 A similar thing can be done with conditional return insns.
23164 XXX In case the `target' is an unconditional branch, this conditionalising
23165 of the instructions always reduces code size, but not always execution
23166 time. But then, I want to reduce the code size to somewhere near what
23167 /bin/cc produces. */
23169 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23170 instructions. When a COND_EXEC instruction is seen the subsequent
23171 instructions are scanned so that multiple conditional instructions can be
23172 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23173 specify the length and true/false mask for the IT block. These will be
23174 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23176 /* Returns the index of the ARM condition code string in
23177 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23178 COMPARISON should be an rtx like `(eq (...) (...))'. */
23181 maybe_get_arm_condition_code (rtx comparison
)
23183 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
23184 enum arm_cond_code code
;
23185 enum rtx_code comp_code
= GET_CODE (comparison
);
23187 if (GET_MODE_CLASS (mode
) != MODE_CC
)
23188 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
23189 XEXP (comparison
, 1));
23193 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
23194 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
23195 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
23196 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
23197 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
23198 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
23199 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
23200 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
23201 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
23202 case E_CC_DLTUmode
: code
= ARM_CC
;
23205 if (comp_code
== EQ
)
23206 return ARM_INVERSE_CONDITION_CODE (code
);
23207 if (comp_code
== NE
)
23211 case E_CC_NOOVmode
:
23214 case NE
: return ARM_NE
;
23215 case EQ
: return ARM_EQ
;
23216 case GE
: return ARM_PL
;
23217 case LT
: return ARM_MI
;
23218 default: return ARM_NV
;
23224 case NE
: return ARM_NE
;
23225 case EQ
: return ARM_EQ
;
23226 default: return ARM_NV
;
23232 case NE
: return ARM_MI
;
23233 case EQ
: return ARM_PL
;
23234 default: return ARM_NV
;
23239 /* We can handle all cases except UNEQ and LTGT. */
23242 case GE
: return ARM_GE
;
23243 case GT
: return ARM_GT
;
23244 case LE
: return ARM_LS
;
23245 case LT
: return ARM_MI
;
23246 case NE
: return ARM_NE
;
23247 case EQ
: return ARM_EQ
;
23248 case ORDERED
: return ARM_VC
;
23249 case UNORDERED
: return ARM_VS
;
23250 case UNLT
: return ARM_LT
;
23251 case UNLE
: return ARM_LE
;
23252 case UNGT
: return ARM_HI
;
23253 case UNGE
: return ARM_PL
;
23254 /* UNEQ and LTGT do not have a representation. */
23255 case UNEQ
: /* Fall through. */
23256 case LTGT
: /* Fall through. */
23257 default: return ARM_NV
;
23263 case NE
: return ARM_NE
;
23264 case EQ
: return ARM_EQ
;
23265 case GE
: return ARM_LE
;
23266 case GT
: return ARM_LT
;
23267 case LE
: return ARM_GE
;
23268 case LT
: return ARM_GT
;
23269 case GEU
: return ARM_LS
;
23270 case GTU
: return ARM_CC
;
23271 case LEU
: return ARM_CS
;
23272 case LTU
: return ARM_HI
;
23273 default: return ARM_NV
;
23279 case LTU
: return ARM_CS
;
23280 case GEU
: return ARM_CC
;
23281 case NE
: return ARM_CS
;
23282 case EQ
: return ARM_CC
;
23283 default: return ARM_NV
;
23289 case NE
: return ARM_NE
;
23290 case EQ
: return ARM_EQ
;
23291 case GEU
: return ARM_CS
;
23292 case GTU
: return ARM_HI
;
23293 case LEU
: return ARM_LS
;
23294 case LTU
: return ARM_CC
;
23295 default: return ARM_NV
;
23301 case GE
: return ARM_GE
;
23302 case LT
: return ARM_LT
;
23303 case GEU
: return ARM_CS
;
23304 case LTU
: return ARM_CC
;
23305 default: return ARM_NV
;
23311 case NE
: return ARM_VS
;
23312 case EQ
: return ARM_VC
;
23313 default: return ARM_NV
;
23319 case NE
: return ARM_NE
;
23320 case EQ
: return ARM_EQ
;
23321 case GE
: return ARM_GE
;
23322 case GT
: return ARM_GT
;
23323 case LE
: return ARM_LE
;
23324 case LT
: return ARM_LT
;
23325 case GEU
: return ARM_CS
;
23326 case GTU
: return ARM_HI
;
23327 case LEU
: return ARM_LS
;
23328 case LTU
: return ARM_CC
;
23329 default: return ARM_NV
;
23332 default: gcc_unreachable ();
23336 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23337 static enum arm_cond_code
23338 get_arm_condition_code (rtx comparison
)
23340 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
23341 gcc_assert (code
!= ARM_NV
);
23345 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23346 code registers when not targetting Thumb1. The VFP condition register
23347 only exists when generating hard-float code. */
23349 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
23355 *p2
= TARGET_HARD_FLOAT
? VFPCC_REGNUM
: INVALID_REGNUM
;
23359 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23362 thumb2_final_prescan_insn (rtx_insn
*insn
)
23364 rtx_insn
*first_insn
= insn
;
23365 rtx body
= PATTERN (insn
);
23367 enum arm_cond_code code
;
23372 /* max_insns_skipped in the tune was already taken into account in the
23373 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23374 just emit the IT blocks as we can. It does not make sense to split
23376 max
= MAX_INSN_PER_IT_BLOCK
;
23378 /* Remove the previous insn from the count of insns to be output. */
23379 if (arm_condexec_count
)
23380 arm_condexec_count
--;
23382 /* Nothing to do if we are already inside a conditional block. */
23383 if (arm_condexec_count
)
23386 if (GET_CODE (body
) != COND_EXEC
)
23389 /* Conditional jumps are implemented directly. */
23393 predicate
= COND_EXEC_TEST (body
);
23394 arm_current_cc
= get_arm_condition_code (predicate
);
23396 n
= get_attr_ce_count (insn
);
23397 arm_condexec_count
= 1;
23398 arm_condexec_mask
= (1 << n
) - 1;
23399 arm_condexec_masklen
= n
;
23400 /* See if subsequent instructions can be combined into the same block. */
23403 insn
= next_nonnote_insn (insn
);
23405 /* Jumping into the middle of an IT block is illegal, so a label or
23406 barrier terminates the block. */
23407 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23410 body
= PATTERN (insn
);
23411 /* USE and CLOBBER aren't really insns, so just skip them. */
23412 if (GET_CODE (body
) == USE
23413 || GET_CODE (body
) == CLOBBER
)
23416 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23417 if (GET_CODE (body
) != COND_EXEC
)
23419 /* Maximum number of conditionally executed instructions in a block. */
23420 n
= get_attr_ce_count (insn
);
23421 if (arm_condexec_masklen
+ n
> max
)
23424 predicate
= COND_EXEC_TEST (body
);
23425 code
= get_arm_condition_code (predicate
);
23426 mask
= (1 << n
) - 1;
23427 if (arm_current_cc
== code
)
23428 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23429 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23432 arm_condexec_count
++;
23433 arm_condexec_masklen
+= n
;
23435 /* A jump must be the last instruction in a conditional block. */
23439 /* Restore recog_data (getting the attributes of other insns can
23440 destroy this array, but final.c assumes that it remains intact
23441 across this call). */
23442 extract_constrain_insn_cached (first_insn
);
23446 arm_final_prescan_insn (rtx_insn
*insn
)
23448 /* BODY will hold the body of INSN. */
23449 rtx body
= PATTERN (insn
);
23451 /* This will be 1 if trying to repeat the trick, and things need to be
23452 reversed if it appears to fail. */
23455 /* If we start with a return insn, we only succeed if we find another one. */
23456 int seeking_return
= 0;
23457 enum rtx_code return_code
= UNKNOWN
;
23459 /* START_INSN will hold the insn from where we start looking. This is the
23460 first insn after the following code_label if REVERSE is true. */
23461 rtx_insn
*start_insn
= insn
;
23463 /* If in state 4, check if the target branch is reached, in order to
23464 change back to state 0. */
23465 if (arm_ccfsm_state
== 4)
23467 if (insn
== arm_target_insn
)
23469 arm_target_insn
= NULL
;
23470 arm_ccfsm_state
= 0;
23475 /* If in state 3, it is possible to repeat the trick, if this insn is an
23476 unconditional branch to a label, and immediately following this branch
23477 is the previous target label which is only used once, and the label this
23478 branch jumps to is not too far off. */
23479 if (arm_ccfsm_state
== 3)
23481 if (simplejump_p (insn
))
23483 start_insn
= next_nonnote_insn (start_insn
);
23484 if (BARRIER_P (start_insn
))
23486 /* XXX Isn't this always a barrier? */
23487 start_insn
= next_nonnote_insn (start_insn
);
23489 if (LABEL_P (start_insn
)
23490 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23491 && LABEL_NUSES (start_insn
) == 1)
23496 else if (ANY_RETURN_P (body
))
23498 start_insn
= next_nonnote_insn (start_insn
);
23499 if (BARRIER_P (start_insn
))
23500 start_insn
= next_nonnote_insn (start_insn
);
23501 if (LABEL_P (start_insn
)
23502 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23503 && LABEL_NUSES (start_insn
) == 1)
23506 seeking_return
= 1;
23507 return_code
= GET_CODE (body
);
23516 gcc_assert (!arm_ccfsm_state
|| reverse
);
23517 if (!JUMP_P (insn
))
23520 /* This jump might be paralleled with a clobber of the condition codes
23521 the jump should always come first */
23522 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23523 body
= XVECEXP (body
, 0, 0);
23526 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23527 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23530 int fail
= FALSE
, succeed
= FALSE
;
23531 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23532 int then_not_else
= TRUE
;
23533 rtx_insn
*this_insn
= start_insn
;
23536 /* Register the insn jumped to. */
23539 if (!seeking_return
)
23540 label
= XEXP (SET_SRC (body
), 0);
23542 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23543 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23544 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23546 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23547 then_not_else
= FALSE
;
23549 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23551 seeking_return
= 1;
23552 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23554 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23556 seeking_return
= 1;
23557 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23558 then_not_else
= FALSE
;
23561 gcc_unreachable ();
23563 /* See how many insns this branch skips, and what kind of insns. If all
23564 insns are okay, and the label or unconditional branch to the same
23565 label is not too far away, succeed. */
23566 for (insns_skipped
= 0;
23567 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23571 this_insn
= next_nonnote_insn (this_insn
);
23575 switch (GET_CODE (this_insn
))
23578 /* Succeed if it is the target label, otherwise fail since
23579 control falls in from somewhere else. */
23580 if (this_insn
== label
)
23582 arm_ccfsm_state
= 1;
23590 /* Succeed if the following insn is the target label.
23592 If return insns are used then the last insn in a function
23593 will be a barrier. */
23594 this_insn
= next_nonnote_insn (this_insn
);
23595 if (this_insn
&& this_insn
== label
)
23597 arm_ccfsm_state
= 1;
23605 /* The AAPCS says that conditional calls should not be
23606 used since they make interworking inefficient (the
23607 linker can't transform BL<cond> into BLX). That's
23608 only a problem if the machine has BLX. */
23615 /* Succeed if the following insn is the target label, or
23616 if the following two insns are a barrier and the
23618 this_insn
= next_nonnote_insn (this_insn
);
23619 if (this_insn
&& BARRIER_P (this_insn
))
23620 this_insn
= next_nonnote_insn (this_insn
);
23622 if (this_insn
&& this_insn
== label
23623 && insns_skipped
< max_insns_skipped
)
23625 arm_ccfsm_state
= 1;
23633 /* If this is an unconditional branch to the same label, succeed.
23634 If it is to another label, do nothing. If it is conditional,
23636 /* XXX Probably, the tests for SET and the PC are
23639 scanbody
= PATTERN (this_insn
);
23640 if (GET_CODE (scanbody
) == SET
23641 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23643 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23644 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23646 arm_ccfsm_state
= 2;
23649 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23652 /* Fail if a conditional return is undesirable (e.g. on a
23653 StrongARM), but still allow this if optimizing for size. */
23654 else if (GET_CODE (scanbody
) == return_code
23655 && !use_return_insn (TRUE
, NULL
)
23658 else if (GET_CODE (scanbody
) == return_code
)
23660 arm_ccfsm_state
= 2;
23663 else if (GET_CODE (scanbody
) == PARALLEL
)
23665 switch (get_attr_conds (this_insn
))
23675 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23680 /* Instructions using or affecting the condition codes make it
23682 scanbody
= PATTERN (this_insn
);
23683 if (!(GET_CODE (scanbody
) == SET
23684 || GET_CODE (scanbody
) == PARALLEL
)
23685 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23695 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23696 arm_target_label
= CODE_LABEL_NUMBER (label
);
23699 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23701 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23703 this_insn
= next_nonnote_insn (this_insn
);
23704 gcc_assert (!this_insn
23705 || (!BARRIER_P (this_insn
)
23706 && !LABEL_P (this_insn
)));
23710 /* Oh, dear! we ran off the end.. give up. */
23711 extract_constrain_insn_cached (insn
);
23712 arm_ccfsm_state
= 0;
23713 arm_target_insn
= NULL
;
23716 arm_target_insn
= this_insn
;
23719 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23722 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23724 if (reverse
|| then_not_else
)
23725 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23728 /* Restore recog_data (getting the attributes of other insns can
23729 destroy this array, but final.c assumes that it remains intact
23730 across this call. */
23731 extract_constrain_insn_cached (insn
);
23735 /* Output IT instructions. */
23737 thumb2_asm_output_opcode (FILE * stream
)
23742 if (arm_condexec_mask
)
23744 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23745 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23747 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23748 arm_condition_codes
[arm_current_cc
]);
23749 arm_condexec_mask
= 0;
23753 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23754 UNITS_PER_WORD bytes wide. */
23755 static unsigned int
23756 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
23759 && regno
> PC_REGNUM
23760 && regno
!= FRAME_POINTER_REGNUM
23761 && regno
!= ARG_POINTER_REGNUM
23762 && !IS_VFP_REGNUM (regno
))
23765 return ARM_NUM_REGS (mode
);
23768 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23770 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23772 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23773 return (regno
== CC_REGNUM
23774 || (TARGET_HARD_FLOAT
23775 && regno
== VFPCC_REGNUM
));
23777 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23781 /* For the Thumb we only allow values bigger than SImode in
23782 registers 0 - 6, so that there is always a second low
23783 register available to hold the upper part of the value.
23784 We probably we ought to ensure that the register is the
23785 start of an even numbered register pair. */
23786 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23788 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23790 if (mode
== SFmode
|| mode
== SImode
)
23791 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23793 if (mode
== DFmode
)
23794 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23796 if (mode
== HFmode
)
23797 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23799 /* VFP registers can hold HImode values. */
23800 if (mode
== HImode
)
23801 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23804 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23805 || (VALID_NEON_QREG_MODE (mode
)
23806 && NEON_REGNO_OK_FOR_QUAD (regno
))
23807 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23808 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23809 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23810 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23811 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23816 if (TARGET_REALLY_IWMMXT
)
23818 if (IS_IWMMXT_GR_REGNUM (regno
))
23819 return mode
== SImode
;
23821 if (IS_IWMMXT_REGNUM (regno
))
23822 return VALID_IWMMXT_REG_MODE (mode
);
23825 /* We allow almost any value to be stored in the general registers.
23826 Restrict doubleword quantities to even register pairs in ARM state
23827 so that we can use ldrd. Do not allow very large Neon structure
23828 opaque modes in general registers; they would use too many. */
23829 if (regno
<= LAST_ARM_REGNUM
)
23831 if (ARM_NUM_REGS (mode
) > 4)
23837 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23840 if (regno
== FRAME_POINTER_REGNUM
23841 || regno
== ARG_POINTER_REGNUM
)
23842 /* We only allow integers in the fake hard registers. */
23843 return GET_MODE_CLASS (mode
) == MODE_INT
;
23848 /* Implement TARGET_MODES_TIEABLE_P. */
23851 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23853 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23856 /* We specifically want to allow elements of "structure" modes to
23857 be tieable to the structure. This more general condition allows
23858 other rarer situations too. */
23860 && (VALID_NEON_DREG_MODE (mode1
)
23861 || VALID_NEON_QREG_MODE (mode1
)
23862 || VALID_NEON_STRUCT_MODE (mode1
))
23863 && (VALID_NEON_DREG_MODE (mode2
)
23864 || VALID_NEON_QREG_MODE (mode2
)
23865 || VALID_NEON_STRUCT_MODE (mode2
)))
23871 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23872 not used in arm mode. */
23875 arm_regno_class (int regno
)
23877 if (regno
== PC_REGNUM
)
23882 if (regno
== STACK_POINTER_REGNUM
)
23884 if (regno
== CC_REGNUM
)
23891 if (TARGET_THUMB2
&& regno
< 8)
23894 if ( regno
<= LAST_ARM_REGNUM
23895 || regno
== FRAME_POINTER_REGNUM
23896 || regno
== ARG_POINTER_REGNUM
)
23897 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23899 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23900 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23902 if (IS_VFP_REGNUM (regno
))
23904 if (regno
<= D7_VFP_REGNUM
)
23905 return VFP_D0_D7_REGS
;
23906 else if (regno
<= LAST_LO_VFP_REGNUM
)
23907 return VFP_LO_REGS
;
23909 return VFP_HI_REGS
;
23912 if (IS_IWMMXT_REGNUM (regno
))
23913 return IWMMXT_REGS
;
23915 if (IS_IWMMXT_GR_REGNUM (regno
))
23916 return IWMMXT_GR_REGS
;
23921 /* Handle a special case when computing the offset
23922 of an argument from the frame pointer. */
23924 arm_debugger_arg_offset (int value
, rtx addr
)
23928 /* We are only interested if dbxout_parms() failed to compute the offset. */
23932 /* We can only cope with the case where the address is held in a register. */
23936 /* If we are using the frame pointer to point at the argument, then
23937 an offset of 0 is correct. */
23938 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23941 /* If we are using the stack pointer to point at the
23942 argument, then an offset of 0 is correct. */
23943 /* ??? Check this is consistent with thumb2 frame layout. */
23944 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23945 && REGNO (addr
) == SP_REGNUM
)
23948 /* Oh dear. The argument is pointed to by a register rather
23949 than being held in a register, or being stored at a known
23950 offset from the frame pointer. Since GDB only understands
23951 those two kinds of argument we must translate the address
23952 held in the register into an offset from the frame pointer.
23953 We do this by searching through the insns for the function
23954 looking to see where this register gets its value. If the
23955 register is initialized from the frame pointer plus an offset
23956 then we are in luck and we can continue, otherwise we give up.
23958 This code is exercised by producing debugging information
23959 for a function with arguments like this:
23961 double func (double a, double b, int c, double d) {return d;}
23963 Without this code the stab for parameter 'd' will be set to
23964 an offset of 0 from the frame pointer, rather than 8. */
23966 /* The if() statement says:
23968 If the insn is a normal instruction
23969 and if the insn is setting the value in a register
23970 and if the register being set is the register holding the address of the argument
23971 and if the address is computing by an addition
23972 that involves adding to a register
23973 which is the frame pointer
23978 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23980 if ( NONJUMP_INSN_P (insn
)
23981 && GET_CODE (PATTERN (insn
)) == SET
23982 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23983 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23984 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23985 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23986 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23989 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23998 warning (0, "unable to compute real location of stacked parameter");
23999 value
= 8; /* XXX magic hack */
24005 /* Implement TARGET_PROMOTED_TYPE. */
24008 arm_promoted_type (const_tree t
)
24010 if (SCALAR_FLOAT_TYPE_P (t
)
24011 && TYPE_PRECISION (t
) == 16
24012 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
24013 return float_type_node
;
24017 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24018 This simply adds HFmode as a supported mode; even though we don't
24019 implement arithmetic on this type directly, it's supported by
24020 optabs conversions, much the way the double-word arithmetic is
24021 special-cased in the default hook. */
24024 arm_scalar_mode_supported_p (scalar_mode mode
)
24026 if (mode
== HFmode
)
24027 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24028 else if (ALL_FIXED_POINT_MODE_P (mode
))
24031 return default_scalar_mode_supported_p (mode
);
24034 /* Set the value of FLT_EVAL_METHOD.
24035 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
24037 0: evaluate all operations and constants, whose semantic type has at
24038 most the range and precision of type float, to the range and
24039 precision of float; evaluate all other operations and constants to
24040 the range and precision of the semantic type;
24042 N, where _FloatN is a supported interchange floating type
24043 evaluate all operations and constants, whose semantic type has at
24044 most the range and precision of _FloatN type, to the range and
24045 precision of the _FloatN type; evaluate all other operations and
24046 constants to the range and precision of the semantic type;
24048 If we have the ARMv8.2-A extensions then we support _Float16 in native
24049 precision, so we should set this to 16. Otherwise, we support the type,
24050 but want to evaluate expressions in float precision, so set this to
24053 static enum flt_eval_method
24054 arm_excess_precision (enum excess_precision_type type
)
24058 case EXCESS_PRECISION_TYPE_FAST
:
24059 case EXCESS_PRECISION_TYPE_STANDARD
:
24060 /* We can calculate either in 16-bit range and precision or
24061 32-bit range and precision. Make that decision based on whether
24062 we have native support for the ARMv8.2-A 16-bit floating-point
24063 instructions or not. */
24064 return (TARGET_VFP_FP16INST
24065 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24066 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
24067 case EXCESS_PRECISION_TYPE_IMPLICIT
:
24068 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
24070 gcc_unreachable ();
24072 return FLT_EVAL_METHOD_UNPREDICTABLE
;
24076 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
24077 _Float16 if we are using anything other than ieee format for 16-bit
24078 floating point. Otherwise, punt to the default implementation. */
24079 static opt_scalar_float_mode
24080 arm_floatn_mode (int n
, bool extended
)
24082 if (!extended
&& n
== 16)
24084 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
24086 return opt_scalar_float_mode ();
24089 return default_floatn_mode (n
, extended
);
24093 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24094 not to early-clobber SRC registers in the process.
24096 We assume that the operands described by SRC and DEST represent a
24097 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24098 number of components into which the copy has been decomposed. */
24100 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
24104 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
24105 || REGNO (operands
[0]) < REGNO (operands
[1]))
24107 for (i
= 0; i
< count
; i
++)
24109 operands
[2 * i
] = dest
[i
];
24110 operands
[2 * i
+ 1] = src
[i
];
24115 for (i
= 0; i
< count
; i
++)
24117 operands
[2 * i
] = dest
[count
- i
- 1];
24118 operands
[2 * i
+ 1] = src
[count
- i
- 1];
24123 /* Split operands into moves from op[1] + op[2] into op[0]. */
24126 neon_split_vcombine (rtx operands
[3])
24128 unsigned int dest
= REGNO (operands
[0]);
24129 unsigned int src1
= REGNO (operands
[1]);
24130 unsigned int src2
= REGNO (operands
[2]);
24131 machine_mode halfmode
= GET_MODE (operands
[1]);
24132 unsigned int halfregs
= REG_NREGS (operands
[1]);
24133 rtx destlo
, desthi
;
24135 if (src1
== dest
&& src2
== dest
+ halfregs
)
24137 /* No-op move. Can't split to nothing; emit something. */
24138 emit_note (NOTE_INSN_DELETED
);
24142 /* Preserve register attributes for variable tracking. */
24143 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
24144 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
24145 GET_MODE_SIZE (halfmode
));
24147 /* Special case of reversed high/low parts. Use VSWP. */
24148 if (src2
== dest
&& src1
== dest
+ halfregs
)
24150 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
24151 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
24152 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
24156 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
24158 /* Try to avoid unnecessary moves if part of the result
24159 is in the right place already. */
24161 emit_move_insn (destlo
, operands
[1]);
24162 if (src2
!= dest
+ halfregs
)
24163 emit_move_insn (desthi
, operands
[2]);
24167 if (src2
!= dest
+ halfregs
)
24168 emit_move_insn (desthi
, operands
[2]);
24170 emit_move_insn (destlo
, operands
[1]);
24174 /* Return the number (counting from 0) of
24175 the least significant set bit in MASK. */
24178 number_of_first_bit_set (unsigned mask
)
24180 return ctz_hwi (mask
);
24183 /* Like emit_multi_reg_push, but allowing for a different set of
24184 registers to be described as saved. MASK is the set of registers
24185 to be saved; REAL_REGS is the set of registers to be described as
24186 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24189 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
24191 unsigned long regno
;
24192 rtx par
[10], tmp
, reg
;
24196 /* Build the parallel of the registers actually being stored. */
24197 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
24199 regno
= ctz_hwi (mask
);
24200 reg
= gen_rtx_REG (SImode
, regno
);
24203 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
24205 tmp
= gen_rtx_USE (VOIDmode
, reg
);
24210 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24211 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
24212 tmp
= gen_frame_mem (BLKmode
, tmp
);
24213 tmp
= gen_rtx_SET (tmp
, par
[0]);
24216 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
24217 insn
= emit_insn (tmp
);
24219 /* Always build the stack adjustment note for unwind info. */
24220 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24221 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
24224 /* Build the parallel of the registers recorded as saved for unwind. */
24225 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
24227 regno
= ctz_hwi (real_regs
);
24228 reg
= gen_rtx_REG (SImode
, regno
);
24230 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
24231 tmp
= gen_frame_mem (SImode
, tmp
);
24232 tmp
= gen_rtx_SET (tmp
, reg
);
24233 RTX_FRAME_RELATED_P (tmp
) = 1;
24241 RTX_FRAME_RELATED_P (par
[0]) = 1;
24242 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
24245 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
24250 /* Emit code to push or pop registers to or from the stack. F is the
24251 assembly file. MASK is the registers to pop. */
24253 thumb_pop (FILE *f
, unsigned long mask
)
24256 int lo_mask
= mask
& 0xFF;
24260 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
24262 /* Special case. Do not generate a POP PC statement here, do it in
24264 thumb_exit (f
, -1);
24268 fprintf (f
, "\tpop\t{");
24270 /* Look at the low registers first. */
24271 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
24275 asm_fprintf (f
, "%r", regno
);
24277 if ((lo_mask
& ~1) != 0)
24282 if (mask
& (1 << PC_REGNUM
))
24284 /* Catch popping the PC. */
24285 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
24286 || IS_CMSE_ENTRY (arm_current_func_type ()))
24288 /* The PC is never poped directly, instead
24289 it is popped into r3 and then BX is used. */
24290 fprintf (f
, "}\n");
24292 thumb_exit (f
, -1);
24301 asm_fprintf (f
, "%r", PC_REGNUM
);
24305 fprintf (f
, "}\n");
24308 /* Generate code to return from a thumb function.
24309 If 'reg_containing_return_addr' is -1, then the return address is
24310 actually on the stack, at the stack pointer.
24312 Note: do not forget to update length attribute of corresponding insn pattern
24313 when changing assembly output (eg. length attribute of epilogue_insns when
24314 updating Armv8-M Baseline Security Extensions register clearing
24317 thumb_exit (FILE *f
, int reg_containing_return_addr
)
24319 unsigned regs_available_for_popping
;
24320 unsigned regs_to_pop
;
24322 unsigned available
;
24326 int restore_a4
= FALSE
;
24328 /* Compute the registers we need to pop. */
24332 if (reg_containing_return_addr
== -1)
24334 regs_to_pop
|= 1 << LR_REGNUM
;
24338 if (TARGET_BACKTRACE
)
24340 /* Restore the (ARM) frame pointer and stack pointer. */
24341 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
24345 /* If there is nothing to pop then just emit the BX instruction and
24347 if (pops_needed
== 0)
24349 if (crtl
->calls_eh_return
)
24350 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24352 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24354 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
24355 reg_containing_return_addr
);
24356 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24359 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24362 /* Otherwise if we are not supporting interworking and we have not created
24363 a backtrace structure and the function was not entered in ARM mode then
24364 just pop the return address straight into the PC. */
24365 else if (!TARGET_INTERWORK
24366 && !TARGET_BACKTRACE
24367 && !is_called_in_ARM_mode (current_function_decl
)
24368 && !crtl
->calls_eh_return
24369 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24371 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
24375 /* Find out how many of the (return) argument registers we can corrupt. */
24376 regs_available_for_popping
= 0;
24378 /* If returning via __builtin_eh_return, the bottom three registers
24379 all contain information needed for the return. */
24380 if (crtl
->calls_eh_return
)
24384 /* If we can deduce the registers used from the function's
24385 return value. This is more reliable that examining
24386 df_regs_ever_live_p () because that will be set if the register is
24387 ever used in the function, not just if the register is used
24388 to hold a return value. */
24390 if (crtl
->return_rtx
!= 0)
24391 mode
= GET_MODE (crtl
->return_rtx
);
24393 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
24395 size
= GET_MODE_SIZE (mode
);
24399 /* In a void function we can use any argument register.
24400 In a function that returns a structure on the stack
24401 we can use the second and third argument registers. */
24402 if (mode
== VOIDmode
)
24403 regs_available_for_popping
=
24404 (1 << ARG_REGISTER (1))
24405 | (1 << ARG_REGISTER (2))
24406 | (1 << ARG_REGISTER (3));
24408 regs_available_for_popping
=
24409 (1 << ARG_REGISTER (2))
24410 | (1 << ARG_REGISTER (3));
24412 else if (size
<= 4)
24413 regs_available_for_popping
=
24414 (1 << ARG_REGISTER (2))
24415 | (1 << ARG_REGISTER (3));
24416 else if (size
<= 8)
24417 regs_available_for_popping
=
24418 (1 << ARG_REGISTER (3));
24421 /* Match registers to be popped with registers into which we pop them. */
24422 for (available
= regs_available_for_popping
,
24423 required
= regs_to_pop
;
24424 required
!= 0 && available
!= 0;
24425 available
&= ~(available
& - available
),
24426 required
&= ~(required
& - required
))
24429 /* If we have any popping registers left over, remove them. */
24431 regs_available_for_popping
&= ~available
;
24433 /* Otherwise if we need another popping register we can use
24434 the fourth argument register. */
24435 else if (pops_needed
)
24437 /* If we have not found any free argument registers and
24438 reg a4 contains the return address, we must move it. */
24439 if (regs_available_for_popping
== 0
24440 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24442 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24443 reg_containing_return_addr
= LR_REGNUM
;
24445 else if (size
> 12)
24447 /* Register a4 is being used to hold part of the return value,
24448 but we have dire need of a free, low register. */
24451 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24454 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24456 /* The fourth argument register is available. */
24457 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24463 /* Pop as many registers as we can. */
24464 thumb_pop (f
, regs_available_for_popping
);
24466 /* Process the registers we popped. */
24467 if (reg_containing_return_addr
== -1)
24469 /* The return address was popped into the lowest numbered register. */
24470 regs_to_pop
&= ~(1 << LR_REGNUM
);
24472 reg_containing_return_addr
=
24473 number_of_first_bit_set (regs_available_for_popping
);
24475 /* Remove this register for the mask of available registers, so that
24476 the return address will not be corrupted by further pops. */
24477 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24480 /* If we popped other registers then handle them here. */
24481 if (regs_available_for_popping
)
24485 /* Work out which register currently contains the frame pointer. */
24486 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24488 /* Move it into the correct place. */
24489 asm_fprintf (f
, "\tmov\t%r, %r\n",
24490 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24492 /* (Temporarily) remove it from the mask of popped registers. */
24493 regs_available_for_popping
&= ~(1 << frame_pointer
);
24494 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24496 if (regs_available_for_popping
)
24500 /* We popped the stack pointer as well,
24501 find the register that contains it. */
24502 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24504 /* Move it into the stack register. */
24505 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24507 /* At this point we have popped all necessary registers, so
24508 do not worry about restoring regs_available_for_popping
24509 to its correct value:
24511 assert (pops_needed == 0)
24512 assert (regs_available_for_popping == (1 << frame_pointer))
24513 assert (regs_to_pop == (1 << STACK_POINTER)) */
24517 /* Since we have just move the popped value into the frame
24518 pointer, the popping register is available for reuse, and
24519 we know that we still have the stack pointer left to pop. */
24520 regs_available_for_popping
|= (1 << frame_pointer
);
24524 /* If we still have registers left on the stack, but we no longer have
24525 any registers into which we can pop them, then we must move the return
24526 address into the link register and make available the register that
24528 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24530 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24532 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24533 reg_containing_return_addr
);
24535 reg_containing_return_addr
= LR_REGNUM
;
24538 /* If we have registers left on the stack then pop some more.
24539 We know that at most we will want to pop FP and SP. */
24540 if (pops_needed
> 0)
24545 thumb_pop (f
, regs_available_for_popping
);
24547 /* We have popped either FP or SP.
24548 Move whichever one it is into the correct register. */
24549 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24550 move_to
= number_of_first_bit_set (regs_to_pop
);
24552 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24556 /* If we still have not popped everything then we must have only
24557 had one register available to us and we are now popping the SP. */
24558 if (pops_needed
> 0)
24562 thumb_pop (f
, regs_available_for_popping
);
24564 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24566 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24568 assert (regs_to_pop == (1 << STACK_POINTER))
24569 assert (pops_needed == 1)
24573 /* If necessary restore the a4 register. */
24576 if (reg_containing_return_addr
!= LR_REGNUM
)
24578 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24579 reg_containing_return_addr
= LR_REGNUM
;
24582 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24585 if (crtl
->calls_eh_return
)
24586 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24588 /* Return to caller. */
24589 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24591 /* This is for the cases where LR is not being used to contain the return
24592 address. It may therefore contain information that we might not want
24593 to leak, hence it must be cleared. The value in R0 will never be a
24594 secret at this point, so it is safe to use it, see the clearing code
24595 in 'cmse_nonsecure_entry_clear_before_return'. */
24596 if (reg_containing_return_addr
!= LR_REGNUM
)
24597 asm_fprintf (f
, "\tmov\tlr, r0\n");
24599 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24600 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24603 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24606 /* Scan INSN just before assembler is output for it.
24607 For Thumb-1, we track the status of the condition codes; this
24608 information is used in the cbranchsi4_insn pattern. */
24610 thumb1_final_prescan_insn (rtx_insn
*insn
)
24612 if (flag_print_asm_name
)
24613 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24614 INSN_ADDRESSES (INSN_UID (insn
)));
24615 /* Don't overwrite the previous setter when we get to a cbranch. */
24616 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24618 enum attr_conds conds
;
24620 if (cfun
->machine
->thumb1_cc_insn
)
24622 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24623 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24626 conds
= get_attr_conds (insn
);
24627 if (conds
== CONDS_SET
)
24629 rtx set
= single_set (insn
);
24630 cfun
->machine
->thumb1_cc_insn
= insn
;
24631 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24632 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24633 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24634 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24636 rtx src1
= XEXP (SET_SRC (set
), 1);
24637 if (src1
== const0_rtx
)
24638 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24640 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24642 /* Record the src register operand instead of dest because
24643 cprop_hardreg pass propagates src. */
24644 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24647 else if (conds
!= CONDS_NOCOND
)
24648 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24651 /* Check if unexpected far jump is used. */
24652 if (cfun
->machine
->lr_save_eliminated
24653 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24654 internal_error("Unexpected thumb1 far jump");
24658 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24660 unsigned HOST_WIDE_INT mask
= 0xff;
24663 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24664 if (val
== 0) /* XXX */
24667 for (i
= 0; i
< 25; i
++)
24668 if ((val
& (mask
<< i
)) == val
)
24674 /* Returns nonzero if the current function contains,
24675 or might contain a far jump. */
24677 thumb_far_jump_used_p (void)
24680 bool far_jump
= false;
24681 unsigned int func_size
= 0;
24683 /* If we have already decided that far jumps may be used,
24684 do not bother checking again, and always return true even if
24685 it turns out that they are not being used. Once we have made
24686 the decision that far jumps are present (and that hence the link
24687 register will be pushed onto the stack) we cannot go back on it. */
24688 if (cfun
->machine
->far_jump_used
)
24691 /* If this function is not being called from the prologue/epilogue
24692 generation code then it must be being called from the
24693 INITIAL_ELIMINATION_OFFSET macro. */
24694 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24696 /* In this case we know that we are being asked about the elimination
24697 of the arg pointer register. If that register is not being used,
24698 then there are no arguments on the stack, and we do not have to
24699 worry that a far jump might force the prologue to push the link
24700 register, changing the stack offsets. In this case we can just
24701 return false, since the presence of far jumps in the function will
24702 not affect stack offsets.
24704 If the arg pointer is live (or if it was live, but has now been
24705 eliminated and so set to dead) then we do have to test to see if
24706 the function might contain a far jump. This test can lead to some
24707 false negatives, since before reload is completed, then length of
24708 branch instructions is not known, so gcc defaults to returning their
24709 longest length, which in turn sets the far jump attribute to true.
24711 A false negative will not result in bad code being generated, but it
24712 will result in a needless push and pop of the link register. We
24713 hope that this does not occur too often.
24715 If we need doubleword stack alignment this could affect the other
24716 elimination offsets so we can't risk getting it wrong. */
24717 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24718 cfun
->machine
->arg_pointer_live
= 1;
24719 else if (!cfun
->machine
->arg_pointer_live
)
24723 /* We should not change far_jump_used during or after reload, as there is
24724 no chance to change stack frame layout. */
24725 if (reload_in_progress
|| reload_completed
)
24728 /* Check to see if the function contains a branch
24729 insn with the far jump attribute set. */
24730 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24732 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24736 func_size
+= get_attr_length (insn
);
24739 /* Attribute far_jump will always be true for thumb1 before
24740 shorten_branch pass. So checking far_jump attribute before
24741 shorten_branch isn't much useful.
24743 Following heuristic tries to estimate more accurately if a far jump
24744 may finally be used. The heuristic is very conservative as there is
24745 no chance to roll-back the decision of not to use far jump.
24747 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24748 2-byte insn is associated with a 4 byte constant pool. Using
24749 function size 2048/3 as the threshold is conservative enough. */
24752 if ((func_size
* 3) >= 2048)
24754 /* Record the fact that we have decided that
24755 the function does use far jumps. */
24756 cfun
->machine
->far_jump_used
= 1;
24764 /* Return nonzero if FUNC must be entered in ARM mode. */
24766 is_called_in_ARM_mode (tree func
)
24768 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24770 /* Ignore the problem about functions whose address is taken. */
24771 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24775 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24781 /* Given the stack offsets and register mask in OFFSETS, decide how
24782 many additional registers to push instead of subtracting a constant
24783 from SP. For epilogues the principle is the same except we use pop.
24784 FOR_PROLOGUE indicates which we're generating. */
24786 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24788 HOST_WIDE_INT amount
;
24789 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24790 /* Extract a mask of the ones we can give to the Thumb's push/pop
24792 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24793 /* Then count how many other high registers will need to be pushed. */
24794 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24795 int n_free
, reg_base
, size
;
24797 if (!for_prologue
&& frame_pointer_needed
)
24798 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24800 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24802 /* If the stack frame size is 512 exactly, we can save one load
24803 instruction, which should make this a win even when optimizing
24805 if (!optimize_size
&& amount
!= 512)
24808 /* Can't do this if there are high registers to push. */
24809 if (high_regs_pushed
!= 0)
24812 /* Shouldn't do it in the prologue if no registers would normally
24813 be pushed at all. In the epilogue, also allow it if we'll have
24814 a pop insn for the PC. */
24817 || TARGET_BACKTRACE
24818 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24819 || TARGET_INTERWORK
24820 || crtl
->args
.pretend_args_size
!= 0))
24823 /* Don't do this if thumb_expand_prologue wants to emit instructions
24824 between the push and the stack frame allocation. */
24826 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24827 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24834 size
= arm_size_return_regs ();
24835 reg_base
= ARM_NUM_INTS (size
);
24836 live_regs_mask
>>= reg_base
;
24839 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24840 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24842 live_regs_mask
>>= 1;
24848 gcc_assert (amount
/ 4 * 4 == amount
);
24850 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24851 return (amount
- 508) / 4;
24852 if (amount
<= n_free
* 4)
24857 /* The bits which aren't usefully expanded as rtl. */
24859 thumb1_unexpanded_epilogue (void)
24861 arm_stack_offsets
*offsets
;
24863 unsigned long live_regs_mask
= 0;
24864 int high_regs_pushed
= 0;
24866 int had_to_push_lr
;
24869 if (cfun
->machine
->return_used_this_function
!= 0)
24872 if (IS_NAKED (arm_current_func_type ()))
24875 offsets
= arm_get_frame_offsets ();
24876 live_regs_mask
= offsets
->saved_regs_mask
;
24877 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24879 /* If we can deduce the registers used from the function's return value.
24880 This is more reliable that examining df_regs_ever_live_p () because that
24881 will be set if the register is ever used in the function, not just if
24882 the register is used to hold a return value. */
24883 size
= arm_size_return_regs ();
24885 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24888 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24889 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24892 /* The prolog may have pushed some high registers to use as
24893 work registers. e.g. the testsuite file:
24894 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24895 compiles to produce:
24896 push {r4, r5, r6, r7, lr}
24900 as part of the prolog. We have to undo that pushing here. */
24902 if (high_regs_pushed
)
24904 unsigned long mask
= live_regs_mask
& 0xff;
24907 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
24910 /* Oh dear! We have no low registers into which we can pop
24913 ("no low registers available for popping high registers");
24915 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24916 if (live_regs_mask
& (1 << next_hi_reg
))
24919 while (high_regs_pushed
)
24921 /* Find lo register(s) into which the high register(s) can
24923 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24925 if (mask
& (1 << regno
))
24926 high_regs_pushed
--;
24927 if (high_regs_pushed
== 0)
24931 if (high_regs_pushed
== 0 && regno
>= 0)
24932 mask
&= ~((1 << regno
) - 1);
24934 /* Pop the values into the low register(s). */
24935 thumb_pop (asm_out_file
, mask
);
24937 /* Move the value(s) into the high registers. */
24938 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24940 if (mask
& (1 << regno
))
24942 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24945 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24947 if (live_regs_mask
& (1 << next_hi_reg
))
24952 live_regs_mask
&= ~0x0f00;
24955 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24956 live_regs_mask
&= 0xff;
24958 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24960 /* Pop the return address into the PC. */
24961 if (had_to_push_lr
)
24962 live_regs_mask
|= 1 << PC_REGNUM
;
24964 /* Either no argument registers were pushed or a backtrace
24965 structure was created which includes an adjusted stack
24966 pointer, so just pop everything. */
24967 if (live_regs_mask
)
24968 thumb_pop (asm_out_file
, live_regs_mask
);
24970 /* We have either just popped the return address into the
24971 PC or it is was kept in LR for the entire function.
24972 Note that thumb_pop has already called thumb_exit if the
24973 PC was in the list. */
24974 if (!had_to_push_lr
)
24975 thumb_exit (asm_out_file
, LR_REGNUM
);
24979 /* Pop everything but the return address. */
24980 if (live_regs_mask
)
24981 thumb_pop (asm_out_file
, live_regs_mask
);
24983 if (had_to_push_lr
)
24987 /* We have no free low regs, so save one. */
24988 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24992 /* Get the return address into a temporary register. */
24993 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24997 /* Move the return address to lr. */
24998 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
25000 /* Restore the low register. */
25001 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
25006 regno
= LAST_ARG_REGNUM
;
25011 /* Remove the argument registers that were pushed onto the stack. */
25012 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
25013 SP_REGNUM
, SP_REGNUM
,
25014 crtl
->args
.pretend_args_size
);
25016 thumb_exit (asm_out_file
, regno
);
25022 /* Functions to save and restore machine-specific function data. */
25023 static struct machine_function
*
25024 arm_init_machine_status (void)
25026 struct machine_function
*machine
;
25027 machine
= ggc_cleared_alloc
<machine_function
> ();
25029 #if ARM_FT_UNKNOWN != 0
25030 machine
->func_type
= ARM_FT_UNKNOWN
;
25032 machine
->static_chain_stack_bytes
= -1;
25036 /* Return an RTX indicating where the return address to the
25037 calling function can be found. */
25039 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
25044 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
25047 /* Do anything needed before RTL is emitted for each function. */
25049 arm_init_expanders (void)
25051 /* Arrange to initialize and mark the machine per-function status. */
25052 init_machine_status
= arm_init_machine_status
;
25054 /* This is to stop the combine pass optimizing away the alignment
25055 adjustment of va_arg. */
25056 /* ??? It is claimed that this should not be necessary. */
25058 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
25061 /* Check that FUNC is called with a different mode. */
25064 arm_change_mode_p (tree func
)
25066 if (TREE_CODE (func
) != FUNCTION_DECL
)
25069 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
25072 callee_tree
= target_option_default_node
;
25074 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25075 int flags
= callee_opts
->x_target_flags
;
25077 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
25080 /* Like arm_compute_initial_elimination offset. Simpler because there
25081 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25082 to point at the base of the local variables after static stack
25083 space for a function has been allocated. */
25086 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
25088 arm_stack_offsets
*offsets
;
25090 offsets
= arm_get_frame_offsets ();
25094 case ARG_POINTER_REGNUM
:
25097 case STACK_POINTER_REGNUM
:
25098 return offsets
->outgoing_args
- offsets
->saved_args
;
25100 case FRAME_POINTER_REGNUM
:
25101 return offsets
->soft_frame
- offsets
->saved_args
;
25103 case ARM_HARD_FRAME_POINTER_REGNUM
:
25104 return offsets
->saved_regs
- offsets
->saved_args
;
25106 case THUMB_HARD_FRAME_POINTER_REGNUM
:
25107 return offsets
->locals_base
- offsets
->saved_args
;
25110 gcc_unreachable ();
25114 case FRAME_POINTER_REGNUM
:
25117 case STACK_POINTER_REGNUM
:
25118 return offsets
->outgoing_args
- offsets
->soft_frame
;
25120 case ARM_HARD_FRAME_POINTER_REGNUM
:
25121 return offsets
->saved_regs
- offsets
->soft_frame
;
25123 case THUMB_HARD_FRAME_POINTER_REGNUM
:
25124 return offsets
->locals_base
- offsets
->soft_frame
;
25127 gcc_unreachable ();
25132 gcc_unreachable ();
25136 /* Generate the function's prologue. */
25139 thumb1_expand_prologue (void)
25143 HOST_WIDE_INT amount
;
25144 HOST_WIDE_INT size
;
25145 arm_stack_offsets
*offsets
;
25146 unsigned long func_type
;
25148 unsigned long live_regs_mask
;
25149 unsigned long l_mask
;
25150 unsigned high_regs_pushed
= 0;
25151 bool lr_needs_saving
;
25153 func_type
= arm_current_func_type ();
25155 /* Naked functions don't have prologues. */
25156 if (IS_NAKED (func_type
))
25158 if (flag_stack_usage_info
)
25159 current_function_static_stack_size
= 0;
25163 if (IS_INTERRUPT (func_type
))
25165 error ("interrupt Service Routines cannot be coded in Thumb mode");
25169 if (is_called_in_ARM_mode (current_function_decl
))
25170 emit_insn (gen_prologue_thumb1_interwork ());
25172 offsets
= arm_get_frame_offsets ();
25173 live_regs_mask
= offsets
->saved_regs_mask
;
25174 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
25176 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25177 l_mask
= live_regs_mask
& 0x40ff;
25178 /* Then count how many other high registers will need to be pushed. */
25179 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25181 if (crtl
->args
.pretend_args_size
)
25183 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
25185 if (cfun
->machine
->uses_anonymous_args
)
25187 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
25188 unsigned long mask
;
25190 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
25191 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
25193 insn
= thumb1_emit_multi_reg_push (mask
, 0);
25197 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25198 stack_pointer_rtx
, x
));
25200 RTX_FRAME_RELATED_P (insn
) = 1;
25203 if (TARGET_BACKTRACE
)
25205 HOST_WIDE_INT offset
= 0;
25206 unsigned work_register
;
25207 rtx work_reg
, x
, arm_hfp_rtx
;
25209 /* We have been asked to create a stack backtrace structure.
25210 The code looks like this:
25214 0 sub SP, #16 Reserve space for 4 registers.
25215 2 push {R7} Push low registers.
25216 4 add R7, SP, #20 Get the stack pointer before the push.
25217 6 str R7, [SP, #8] Store the stack pointer
25218 (before reserving the space).
25219 8 mov R7, PC Get hold of the start of this code + 12.
25220 10 str R7, [SP, #16] Store it.
25221 12 mov R7, FP Get hold of the current frame pointer.
25222 14 str R7, [SP, #4] Store it.
25223 16 mov R7, LR Get hold of the current return address.
25224 18 str R7, [SP, #12] Store it.
25225 20 add R7, SP, #16 Point at the start of the
25226 backtrace structure.
25227 22 mov FP, R7 Put this value into the frame pointer. */
25229 work_register
= thumb_find_work_register (live_regs_mask
);
25230 work_reg
= gen_rtx_REG (SImode
, work_register
);
25231 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
25233 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25234 stack_pointer_rtx
, GEN_INT (-16)));
25235 RTX_FRAME_RELATED_P (insn
) = 1;
25239 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
25240 RTX_FRAME_RELATED_P (insn
) = 1;
25241 lr_needs_saving
= false;
25243 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
25246 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
25247 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25249 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
25250 x
= gen_frame_mem (SImode
, x
);
25251 emit_move_insn (x
, work_reg
);
25253 /* Make sure that the instruction fetching the PC is in the right place
25254 to calculate "start of backtrace creation code + 12". */
25255 /* ??? The stores using the common WORK_REG ought to be enough to
25256 prevent the scheduler from doing anything weird. Failing that
25257 we could always move all of the following into an UNSPEC_VOLATILE. */
25260 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25261 emit_move_insn (work_reg
, x
);
25263 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25264 x
= gen_frame_mem (SImode
, x
);
25265 emit_move_insn (x
, work_reg
);
25267 emit_move_insn (work_reg
, arm_hfp_rtx
);
25269 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25270 x
= gen_frame_mem (SImode
, x
);
25271 emit_move_insn (x
, work_reg
);
25275 emit_move_insn (work_reg
, arm_hfp_rtx
);
25277 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25278 x
= gen_frame_mem (SImode
, x
);
25279 emit_move_insn (x
, work_reg
);
25281 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25282 emit_move_insn (work_reg
, x
);
25284 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25285 x
= gen_frame_mem (SImode
, x
);
25286 emit_move_insn (x
, work_reg
);
25289 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
25290 emit_move_insn (work_reg
, x
);
25292 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
25293 x
= gen_frame_mem (SImode
, x
);
25294 emit_move_insn (x
, work_reg
);
25296 x
= GEN_INT (offset
+ 12);
25297 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25299 emit_move_insn (arm_hfp_rtx
, work_reg
);
25301 /* Optimization: If we are not pushing any low registers but we are going
25302 to push some high registers then delay our first push. This will just
25303 be a push of LR and we can combine it with the push of the first high
25305 else if ((l_mask
& 0xff) != 0
25306 || (high_regs_pushed
== 0 && lr_needs_saving
))
25308 unsigned long mask
= l_mask
;
25309 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
25310 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
25311 RTX_FRAME_RELATED_P (insn
) = 1;
25312 lr_needs_saving
= false;
25315 if (high_regs_pushed
)
25317 unsigned pushable_regs
;
25318 unsigned next_hi_reg
;
25319 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
25320 : crtl
->args
.info
.nregs
;
25321 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
25323 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
25324 if (live_regs_mask
& (1 << next_hi_reg
))
25327 /* Here we need to mask out registers used for passing arguments
25328 even if they can be pushed. This is to avoid using them to
25329 stash the high registers. Such kind of stash may clobber the
25330 use of arguments. */
25331 pushable_regs
= l_mask
& (~arg_regs_mask
);
25332 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
25334 /* Normally, LR can be used as a scratch register once it has been
25335 saved; but if the function examines its own return address then
25336 the value is still live and we need to avoid using it. */
25337 bool return_addr_live
25338 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
25341 if (lr_needs_saving
|| return_addr_live
)
25342 pushable_regs
&= ~(1 << LR_REGNUM
);
25344 if (pushable_regs
== 0)
25345 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
25347 while (high_regs_pushed
> 0)
25349 unsigned long real_regs_mask
= 0;
25350 unsigned long push_mask
= 0;
25352 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
25354 if (pushable_regs
& (1 << regno
))
25356 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25357 gen_rtx_REG (SImode
, next_hi_reg
));
25359 high_regs_pushed
--;
25360 real_regs_mask
|= (1 << next_hi_reg
);
25361 push_mask
|= (1 << regno
);
25363 if (high_regs_pushed
)
25365 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
25367 if (live_regs_mask
& (1 << next_hi_reg
))
25375 /* If we had to find a work register and we have not yet
25376 saved the LR then add it to the list of regs to push. */
25377 if (lr_needs_saving
)
25379 push_mask
|= 1 << LR_REGNUM
;
25380 real_regs_mask
|= 1 << LR_REGNUM
;
25381 lr_needs_saving
= false;
25382 /* If the return address is not live at this point, we
25383 can add LR to the list of registers that we can use
25385 if (!return_addr_live
)
25386 pushable_regs
|= 1 << LR_REGNUM
;
25389 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
25390 RTX_FRAME_RELATED_P (insn
) = 1;
25394 /* Load the pic register before setting the frame pointer,
25395 so we can use r7 as a temporary work register. */
25396 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
25397 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
25399 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
25400 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
25401 stack_pointer_rtx
);
25403 size
= offsets
->outgoing_args
- offsets
->saved_args
;
25404 if (flag_stack_usage_info
)
25405 current_function_static_stack_size
= size
;
25407 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25408 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
25409 || flag_stack_clash_protection
)
25411 sorry ("%<-fstack-check=specific%> for Thumb-1");
25413 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25414 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
25419 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25420 GEN_INT (- amount
)));
25421 RTX_FRAME_RELATED_P (insn
) = 1;
25427 /* The stack decrement is too big for an immediate value in a single
25428 insn. In theory we could issue multiple subtracts, but after
25429 three of them it becomes more space efficient to place the full
25430 value in the constant pool and load into a register. (Also the
25431 ARM debugger really likes to see only one stack decrement per
25432 function). So instead we look for a scratch register into which
25433 we can load the decrement, and then we subtract this from the
25434 stack pointer. Unfortunately on the thumb the only available
25435 scratch registers are the argument registers, and we cannot use
25436 these as they may hold arguments to the function. Instead we
25437 attempt to locate a call preserved register which is used by this
25438 function. If we can find one, then we know that it will have
25439 been pushed at the start of the prologue and so we can corrupt
25441 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25442 if (live_regs_mask
& (1 << regno
))
25445 gcc_assert(regno
<= LAST_LO_REGNUM
);
25447 reg
= gen_rtx_REG (SImode
, regno
);
25449 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25451 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25452 stack_pointer_rtx
, reg
));
25454 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25455 plus_constant (Pmode
, stack_pointer_rtx
,
25457 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25458 RTX_FRAME_RELATED_P (insn
) = 1;
25462 if (frame_pointer_needed
)
25463 thumb_set_frame_pointer (offsets
);
25465 /* If we are profiling, make sure no instructions are scheduled before
25466 the call to mcount. Similarly if the user has requested no
25467 scheduling in the prolog. Similarly if we want non-call exceptions
25468 using the EABI unwinder, to prevent faulting instructions from being
25469 swapped with a stack adjustment. */
25470 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25471 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25472 && cfun
->can_throw_non_call_exceptions
))
25473 emit_insn (gen_blockage ());
25475 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25476 if (live_regs_mask
& 0xff)
25477 cfun
->machine
->lr_save_eliminated
= 0;
25480 /* Clear caller saved registers not used to pass return values and leaked
25481 condition flags before exiting a cmse_nonsecure_entry function. */
25484 cmse_nonsecure_entry_clear_before_return (void)
25486 int regno
, maxregno
= TARGET_HARD_FLOAT
? LAST_VFP_REGNUM
: IP_REGNUM
;
25487 uint32_t padding_bits_to_clear
= 0;
25488 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
25489 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
25492 bitmap_clear (to_clear_bitmap
);
25493 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
25494 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
25496 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25498 if (TARGET_HARD_FLOAT
)
25500 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
25502 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
25504 /* Make sure we don't clear the two scratch registers used to clear the
25505 relevant FPSCR bits in output_return_instruction. */
25506 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25507 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
25508 emit_use (gen_rtx_REG (SImode
, 4));
25509 bitmap_clear_bit (to_clear_bitmap
, 4);
25512 /* If the user has defined registers to be caller saved, these are no longer
25513 restored by the function before returning and must thus be cleared for
25514 security purposes. */
25515 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
25517 /* We do not touch registers that can be used to pass arguments as per
25518 the AAPCS, since these should never be made callee-saved by user
25520 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25522 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25524 if (call_used_regs
[regno
])
25525 bitmap_set_bit (to_clear_bitmap
, regno
);
25528 /* Make sure we do not clear the registers used to return the result in. */
25529 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25530 if (!VOID_TYPE_P (result_type
))
25532 uint64_t to_clear_return_mask
;
25533 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25535 /* No need to check that we return in registers, because we don't
25536 support returning on stack yet. */
25537 gcc_assert (REG_P (result_rtl
));
25538 to_clear_return_mask
25539 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25540 &padding_bits_to_clear
);
25541 if (to_clear_return_mask
)
25543 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
25544 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25546 if (to_clear_return_mask
& (1ULL << regno
))
25547 bitmap_clear_bit (to_clear_bitmap
, regno
);
25552 if (padding_bits_to_clear
!= 0)
25554 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
25555 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
25557 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25558 returning a composite type, which only uses r0. Let's make sure that
25559 r1-r3 is cleared too. */
25560 bitmap_clear (to_clear_arg_regs_bitmap
);
25561 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
25562 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
25565 /* Clear full registers that leak before returning. */
25566 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
25567 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
25568 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
25572 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25573 POP instruction can be generated. LR should be replaced by PC. All
25574 the checks required are already done by USE_RETURN_INSN (). Hence,
25575 all we really need to check here is if single register is to be
25576 returned, or multiple register return. */
25578 thumb2_expand_return (bool simple_return
)
25581 unsigned long saved_regs_mask
;
25582 arm_stack_offsets
*offsets
;
25584 offsets
= arm_get_frame_offsets ();
25585 saved_regs_mask
= offsets
->saved_regs_mask
;
25587 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25588 if (saved_regs_mask
& (1 << i
))
25591 if (!simple_return
&& saved_regs_mask
)
25593 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25594 functions or adapt code to handle according to ACLE. This path should
25595 not be reachable for cmse_nonsecure_entry functions though we prefer
25596 to assert it for now to ensure that future code changes do not silently
25597 change this behavior. */
25598 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25601 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25602 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25603 rtx addr
= gen_rtx_MEM (SImode
,
25604 gen_rtx_POST_INC (SImode
,
25605 stack_pointer_rtx
));
25606 set_mem_alias_set (addr
, get_frame_alias_set ());
25607 XVECEXP (par
, 0, 0) = ret_rtx
;
25608 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25609 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25610 emit_jump_insn (par
);
25614 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25615 saved_regs_mask
|= (1 << PC_REGNUM
);
25616 arm_emit_multi_reg_pop (saved_regs_mask
);
25621 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25622 cmse_nonsecure_entry_clear_before_return ();
25623 emit_jump_insn (simple_return_rtx
);
25628 thumb1_expand_epilogue (void)
25630 HOST_WIDE_INT amount
;
25631 arm_stack_offsets
*offsets
;
25634 /* Naked functions don't have prologues. */
25635 if (IS_NAKED (arm_current_func_type ()))
25638 offsets
= arm_get_frame_offsets ();
25639 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25641 if (frame_pointer_needed
)
25643 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25644 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25646 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25648 gcc_assert (amount
>= 0);
25651 emit_insn (gen_blockage ());
25654 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25655 GEN_INT (amount
)));
25658 /* r3 is always free in the epilogue. */
25659 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25661 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25662 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25666 /* Emit a USE (stack_pointer_rtx), so that
25667 the stack adjustment will not be deleted. */
25668 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25670 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25671 emit_insn (gen_blockage ());
25673 /* Emit a clobber for each insn that will be restored in the epilogue,
25674 so that flow2 will get register lifetimes correct. */
25675 for (regno
= 0; regno
< 13; regno
++)
25676 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25677 emit_clobber (gen_rtx_REG (SImode
, regno
));
25679 if (! df_regs_ever_live_p (LR_REGNUM
))
25680 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25682 /* Clear all caller-saved regs that are not used to return. */
25683 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25684 cmse_nonsecure_entry_clear_before_return ();
25687 /* Epilogue code for APCS frame. */
25689 arm_expand_epilogue_apcs_frame (bool really_return
)
25691 unsigned long func_type
;
25692 unsigned long saved_regs_mask
;
25695 int floats_from_frame
= 0;
25696 arm_stack_offsets
*offsets
;
25698 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25699 func_type
= arm_current_func_type ();
25701 /* Get frame offsets for ARM. */
25702 offsets
= arm_get_frame_offsets ();
25703 saved_regs_mask
= offsets
->saved_regs_mask
;
25705 /* Find the offset of the floating-point save area in the frame. */
25707 = (offsets
->saved_args
25708 + arm_compute_static_chain_stack_bytes ()
25711 /* Compute how many core registers saved and how far away the floats are. */
25712 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25713 if (saved_regs_mask
& (1 << i
))
25716 floats_from_frame
+= 4;
25719 if (TARGET_HARD_FLOAT
)
25722 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25724 /* The offset is from IP_REGNUM. */
25725 int saved_size
= arm_get_vfp_saved_size ();
25726 if (saved_size
> 0)
25729 floats_from_frame
+= saved_size
;
25730 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25731 hard_frame_pointer_rtx
,
25732 GEN_INT (-floats_from_frame
)));
25733 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25734 ip_rtx
, hard_frame_pointer_rtx
);
25737 /* Generate VFP register multi-pop. */
25738 start_reg
= FIRST_VFP_REGNUM
;
25740 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25741 /* Look for a case where a reg does not need restoring. */
25742 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25743 && (!df_regs_ever_live_p (i
+ 1)
25744 || call_used_regs
[i
+ 1]))
25746 if (start_reg
!= i
)
25747 arm_emit_vfp_multi_reg_pop (start_reg
,
25748 (i
- start_reg
) / 2,
25749 gen_rtx_REG (SImode
,
25754 /* Restore the remaining regs that we have discovered (or possibly
25755 even all of them, if the conditional in the for loop never
25757 if (start_reg
!= i
)
25758 arm_emit_vfp_multi_reg_pop (start_reg
,
25759 (i
- start_reg
) / 2,
25760 gen_rtx_REG (SImode
, IP_REGNUM
));
25765 /* The frame pointer is guaranteed to be non-double-word aligned, as
25766 it is set to double-word-aligned old_stack_pointer - 4. */
25768 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25770 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25771 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25773 rtx addr
= gen_frame_mem (V2SImode
,
25774 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25776 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25777 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25778 gen_rtx_REG (V2SImode
, i
),
25784 /* saved_regs_mask should contain IP which contains old stack pointer
25785 at the time of activation creation. Since SP and IP are adjacent registers,
25786 we can restore the value directly into SP. */
25787 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25788 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25789 saved_regs_mask
|= (1 << SP_REGNUM
);
25791 /* There are two registers left in saved_regs_mask - LR and PC. We
25792 only need to restore LR (the return address), but to
25793 save time we can load it directly into PC, unless we need a
25794 special function exit sequence, or we are not really returning. */
25796 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25797 && !crtl
->calls_eh_return
)
25798 /* Delete LR from the register mask, so that LR on
25799 the stack is loaded into the PC in the register mask. */
25800 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25802 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25804 num_regs
= bit_count (saved_regs_mask
);
25805 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25808 emit_insn (gen_blockage ());
25809 /* Unwind the stack to just below the saved registers. */
25810 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25811 hard_frame_pointer_rtx
,
25812 GEN_INT (- 4 * num_regs
)));
25814 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25815 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25818 arm_emit_multi_reg_pop (saved_regs_mask
);
25820 if (IS_INTERRUPT (func_type
))
25822 /* Interrupt handlers will have pushed the
25823 IP onto the stack, so restore it now. */
25825 rtx addr
= gen_rtx_MEM (SImode
,
25826 gen_rtx_POST_INC (SImode
,
25827 stack_pointer_rtx
));
25828 set_mem_alias_set (addr
, get_frame_alias_set ());
25829 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25830 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25831 gen_rtx_REG (SImode
, IP_REGNUM
),
25835 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25838 if (crtl
->calls_eh_return
)
25839 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25841 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25843 if (IS_STACKALIGN (func_type
))
25844 /* Restore the original stack pointer. Before prologue, the stack was
25845 realigned and the original stack pointer saved in r0. For details,
25846 see comment in arm_expand_prologue. */
25847 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25849 emit_jump_insn (simple_return_rtx
);
25852 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25853 function is not a sibcall. */
25855 arm_expand_epilogue (bool really_return
)
25857 unsigned long func_type
;
25858 unsigned long saved_regs_mask
;
25862 arm_stack_offsets
*offsets
;
25864 func_type
= arm_current_func_type ();
25866 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25867 let output_return_instruction take care of instruction emission if any. */
25868 if (IS_NAKED (func_type
)
25869 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25872 emit_jump_insn (simple_return_rtx
);
25876 /* If we are throwing an exception, then we really must be doing a
25877 return, so we can't tail-call. */
25878 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25880 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25882 arm_expand_epilogue_apcs_frame (really_return
);
25886 /* Get frame offsets for ARM. */
25887 offsets
= arm_get_frame_offsets ();
25888 saved_regs_mask
= offsets
->saved_regs_mask
;
25889 num_regs
= bit_count (saved_regs_mask
);
25891 if (frame_pointer_needed
)
25894 /* Restore stack pointer if necessary. */
25897 /* In ARM mode, frame pointer points to first saved register.
25898 Restore stack pointer to last saved register. */
25899 amount
= offsets
->frame
- offsets
->saved_regs
;
25901 /* Force out any pending memory operations that reference stacked data
25902 before stack de-allocation occurs. */
25903 emit_insn (gen_blockage ());
25904 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25905 hard_frame_pointer_rtx
,
25906 GEN_INT (amount
)));
25907 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25909 hard_frame_pointer_rtx
);
25911 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25913 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25917 /* In Thumb-2 mode, the frame pointer points to the last saved
25919 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25922 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25923 hard_frame_pointer_rtx
,
25924 GEN_INT (amount
)));
25925 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25926 hard_frame_pointer_rtx
,
25927 hard_frame_pointer_rtx
);
25930 /* Force out any pending memory operations that reference stacked data
25931 before stack de-allocation occurs. */
25932 emit_insn (gen_blockage ());
25933 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25934 hard_frame_pointer_rtx
));
25935 arm_add_cfa_adjust_cfa_note (insn
, 0,
25937 hard_frame_pointer_rtx
);
25938 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25940 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25945 /* Pop off outgoing args and local frame to adjust stack pointer to
25946 last saved register. */
25947 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25951 /* Force out any pending memory operations that reference stacked data
25952 before stack de-allocation occurs. */
25953 emit_insn (gen_blockage ());
25954 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25956 GEN_INT (amount
)));
25957 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25958 stack_pointer_rtx
, stack_pointer_rtx
);
25959 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25961 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25965 if (TARGET_HARD_FLOAT
)
25967 /* Generate VFP register multi-pop. */
25968 int end_reg
= LAST_VFP_REGNUM
+ 1;
25970 /* Scan the registers in reverse order. We need to match
25971 any groupings made in the prologue and generate matching
25972 vldm operations. The need to match groups is because,
25973 unlike pop, vldm can only do consecutive regs. */
25974 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25975 /* Look for a case where a reg does not need restoring. */
25976 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25977 && (!df_regs_ever_live_p (i
+ 1)
25978 || call_used_regs
[i
+ 1]))
25980 /* Restore the regs discovered so far (from reg+2 to
25982 if (end_reg
> i
+ 2)
25983 arm_emit_vfp_multi_reg_pop (i
+ 2,
25984 (end_reg
- (i
+ 2)) / 2,
25985 stack_pointer_rtx
);
25989 /* Restore the remaining regs that we have discovered (or possibly
25990 even all of them, if the conditional in the for loop never
25992 if (end_reg
> i
+ 2)
25993 arm_emit_vfp_multi_reg_pop (i
+ 2,
25994 (end_reg
- (i
+ 2)) / 2,
25995 stack_pointer_rtx
);
25999 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
26000 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
26003 rtx addr
= gen_rtx_MEM (V2SImode
,
26004 gen_rtx_POST_INC (SImode
,
26005 stack_pointer_rtx
));
26006 set_mem_alias_set (addr
, get_frame_alias_set ());
26007 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
26008 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26009 gen_rtx_REG (V2SImode
, i
),
26011 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
26012 stack_pointer_rtx
, stack_pointer_rtx
);
26015 if (saved_regs_mask
)
26018 bool return_in_pc
= false;
26020 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
26021 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
26022 && !IS_CMSE_ENTRY (func_type
)
26023 && !IS_STACKALIGN (func_type
)
26025 && crtl
->args
.pretend_args_size
== 0
26026 && saved_regs_mask
& (1 << LR_REGNUM
)
26027 && !crtl
->calls_eh_return
)
26029 saved_regs_mask
&= ~(1 << LR_REGNUM
);
26030 saved_regs_mask
|= (1 << PC_REGNUM
);
26031 return_in_pc
= true;
26034 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
26036 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26037 if (saved_regs_mask
& (1 << i
))
26039 rtx addr
= gen_rtx_MEM (SImode
,
26040 gen_rtx_POST_INC (SImode
,
26041 stack_pointer_rtx
));
26042 set_mem_alias_set (addr
, get_frame_alias_set ());
26044 if (i
== PC_REGNUM
)
26046 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
26047 XVECEXP (insn
, 0, 0) = ret_rtx
;
26048 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
26050 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
26051 insn
= emit_jump_insn (insn
);
26055 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
26057 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26058 gen_rtx_REG (SImode
, i
),
26060 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
26062 stack_pointer_rtx
);
26069 && current_tune
->prefer_ldrd_strd
26070 && !optimize_function_for_size_p (cfun
))
26073 thumb2_emit_ldrd_pop (saved_regs_mask
);
26074 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
26075 arm_emit_ldrd_pop (saved_regs_mask
);
26077 arm_emit_multi_reg_pop (saved_regs_mask
);
26080 arm_emit_multi_reg_pop (saved_regs_mask
);
26088 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
26092 rtx dwarf
= NULL_RTX
;
26094 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26096 GEN_INT (amount
)));
26098 RTX_FRAME_RELATED_P (tmp
) = 1;
26100 if (cfun
->machine
->uses_anonymous_args
)
26102 /* Restore pretend args. Refer arm_expand_prologue on how to save
26103 pretend_args in stack. */
26104 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
26105 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
26106 for (j
= 0, i
= 0; j
< num_regs
; i
++)
26107 if (saved_regs_mask
& (1 << i
))
26109 rtx reg
= gen_rtx_REG (SImode
, i
);
26110 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
26113 REG_NOTES (tmp
) = dwarf
;
26115 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
26116 stack_pointer_rtx
, stack_pointer_rtx
);
26119 /* Clear all caller-saved regs that are not used to return. */
26120 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26122 /* CMSE_ENTRY always returns. */
26123 gcc_assert (really_return
);
26124 cmse_nonsecure_entry_clear_before_return ();
26127 if (!really_return
)
26130 if (crtl
->calls_eh_return
)
26131 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26133 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
26135 if (IS_STACKALIGN (func_type
))
26136 /* Restore the original stack pointer. Before prologue, the stack was
26137 realigned and the original stack pointer saved in r0. For details,
26138 see comment in arm_expand_prologue. */
26139 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
26141 emit_jump_insn (simple_return_rtx
);
26144 /* Implementation of insn prologue_thumb1_interwork. This is the first
26145 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26148 thumb1_output_interwork (void)
26151 FILE *f
= asm_out_file
;
26153 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
26154 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
26156 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
26158 /* Generate code sequence to switch us into Thumb mode. */
26159 /* The .code 32 directive has already been emitted by
26160 ASM_DECLARE_FUNCTION_NAME. */
26161 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
26162 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
26164 /* Generate a label, so that the debugger will notice the
26165 change in instruction sets. This label is also used by
26166 the assembler to bypass the ARM code when this function
26167 is called from a Thumb encoded function elsewhere in the
26168 same file. Hence the definition of STUB_NAME here must
26169 agree with the definition in gas/config/tc-arm.c. */
26171 #define STUB_NAME ".real_start_of"
26173 fprintf (f
, "\t.code\t16\n");
26175 if (arm_dllexport_name_p (name
))
26176 name
= arm_strip_name_encoding (name
);
26178 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
26179 fprintf (f
, "\t.thumb_func\n");
26180 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
26185 /* Handle the case of a double word load into a low register from
26186 a computed memory address. The computed address may involve a
26187 register which is overwritten by the load. */
26189 thumb_load_double_from_address (rtx
*operands
)
26197 gcc_assert (REG_P (operands
[0]));
26198 gcc_assert (MEM_P (operands
[1]));
26200 /* Get the memory address. */
26201 addr
= XEXP (operands
[1], 0);
26203 /* Work out how the memory address is computed. */
26204 switch (GET_CODE (addr
))
26207 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26209 if (REGNO (operands
[0]) == REGNO (addr
))
26211 output_asm_insn ("ldr\t%H0, %2", operands
);
26212 output_asm_insn ("ldr\t%0, %1", operands
);
26216 output_asm_insn ("ldr\t%0, %1", operands
);
26217 output_asm_insn ("ldr\t%H0, %2", operands
);
26222 /* Compute <address> + 4 for the high order load. */
26223 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26225 output_asm_insn ("ldr\t%0, %1", operands
);
26226 output_asm_insn ("ldr\t%H0, %2", operands
);
26230 arg1
= XEXP (addr
, 0);
26231 arg2
= XEXP (addr
, 1);
26233 if (CONSTANT_P (arg1
))
26234 base
= arg2
, offset
= arg1
;
26236 base
= arg1
, offset
= arg2
;
26238 gcc_assert (REG_P (base
));
26240 /* Catch the case of <address> = <reg> + <reg> */
26241 if (REG_P (offset
))
26243 int reg_offset
= REGNO (offset
);
26244 int reg_base
= REGNO (base
);
26245 int reg_dest
= REGNO (operands
[0]);
26247 /* Add the base and offset registers together into the
26248 higher destination register. */
26249 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
26250 reg_dest
+ 1, reg_base
, reg_offset
);
26252 /* Load the lower destination register from the address in
26253 the higher destination register. */
26254 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
26255 reg_dest
, reg_dest
+ 1);
26257 /* Load the higher destination register from its own address
26259 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
26260 reg_dest
+ 1, reg_dest
+ 1);
26264 /* Compute <address> + 4 for the high order load. */
26265 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26267 /* If the computed address is held in the low order register
26268 then load the high order register first, otherwise always
26269 load the low order register first. */
26270 if (REGNO (operands
[0]) == REGNO (base
))
26272 output_asm_insn ("ldr\t%H0, %2", operands
);
26273 output_asm_insn ("ldr\t%0, %1", operands
);
26277 output_asm_insn ("ldr\t%0, %1", operands
);
26278 output_asm_insn ("ldr\t%H0, %2", operands
);
26284 /* With no registers to worry about we can just load the value
26286 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26288 output_asm_insn ("ldr\t%H0, %2", operands
);
26289 output_asm_insn ("ldr\t%0, %1", operands
);
26293 gcc_unreachable ();
26300 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
26305 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26306 std::swap (operands
[4], operands
[5]);
26308 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
26309 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
26313 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26314 std::swap (operands
[4], operands
[5]);
26315 if (REGNO (operands
[5]) > REGNO (operands
[6]))
26316 std::swap (operands
[5], operands
[6]);
26317 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26318 std::swap (operands
[4], operands
[5]);
26320 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
26321 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
26325 gcc_unreachable ();
26331 /* Output a call-via instruction for thumb state. */
26333 thumb_call_via_reg (rtx reg
)
26335 int regno
= REGNO (reg
);
26338 gcc_assert (regno
< LR_REGNUM
);
26340 /* If we are in the normal text section we can use a single instance
26341 per compilation unit. If we are doing function sections, then we need
26342 an entry per section, since we can't rely on reachability. */
26343 if (in_section
== text_section
)
26345 thumb_call_reg_needed
= 1;
26347 if (thumb_call_via_label
[regno
] == NULL
)
26348 thumb_call_via_label
[regno
] = gen_label_rtx ();
26349 labelp
= thumb_call_via_label
+ regno
;
26353 if (cfun
->machine
->call_via
[regno
] == NULL
)
26354 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
26355 labelp
= cfun
->machine
->call_via
+ regno
;
26358 output_asm_insn ("bl\t%a0", labelp
);
26362 /* Routines for generating rtl. */
26364 thumb_expand_cpymemqi (rtx
*operands
)
26366 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
26367 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
26368 HOST_WIDE_INT len
= INTVAL (operands
[2]);
26369 HOST_WIDE_INT offset
= 0;
26373 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
26379 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
26385 rtx reg
= gen_reg_rtx (SImode
);
26386 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
26387 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
26394 rtx reg
= gen_reg_rtx (HImode
);
26395 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
26396 plus_constant (Pmode
, in
,
26398 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
26407 rtx reg
= gen_reg_rtx (QImode
);
26408 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
26409 plus_constant (Pmode
, in
,
26411 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
26418 thumb_reload_out_hi (rtx
*operands
)
26420 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
26423 /* Return the length of a function name prefix
26424 that starts with the character 'c'. */
26426 arm_get_strip_length (int c
)
26430 ARM_NAME_ENCODING_LENGTHS
26435 /* Return a pointer to a function's name with any
26436 and all prefix encodings stripped from it. */
26438 arm_strip_name_encoding (const char *name
)
26442 while ((skip
= arm_get_strip_length (* name
)))
26448 /* If there is a '*' anywhere in the name's prefix, then
26449 emit the stripped name verbatim, otherwise prepend an
26450 underscore if leading underscores are being used. */
26452 arm_asm_output_labelref (FILE *stream
, const char *name
)
26457 while ((skip
= arm_get_strip_length (* name
)))
26459 verbatim
|= (*name
== '*');
26464 fputs (name
, stream
);
26466 asm_fprintf (stream
, "%U%s", name
);
26469 /* This function is used to emit an EABI tag and its associated value.
26470 We emit the numerical value of the tag in case the assembler does not
26471 support textual tags. (Eg gas prior to 2.20). If requested we include
26472 the tag name in a comment so that anyone reading the assembler output
26473 will know which tag is being set.
26475 This function is not static because arm-c.c needs it too. */
26478 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26480 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26481 if (flag_verbose_asm
|| flag_debug_asm
)
26482 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26483 asm_fprintf (asm_out_file
, "\n");
26486 /* This function is used to print CPU tuning information as comment
26487 in assembler file. Pointers are not printed for now. */
26490 arm_print_tune_info (void)
26492 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26493 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26494 current_tune
->constant_limit
);
26495 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26496 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26497 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26498 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26499 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26500 "prefetch.l1_cache_size:\t%d\n",
26501 current_tune
->prefetch
.l1_cache_size
);
26502 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26503 "prefetch.l1_cache_line_size:\t%d\n",
26504 current_tune
->prefetch
.l1_cache_line_size
);
26505 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26506 "prefer_constant_pool:\t%d\n",
26507 (int) current_tune
->prefer_constant_pool
);
26508 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26509 "branch_cost:\t(s:speed, p:predictable)\n");
26510 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26511 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26512 current_tune
->branch_cost (false, false));
26513 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26514 current_tune
->branch_cost (false, true));
26515 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26516 current_tune
->branch_cost (true, false));
26517 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26518 current_tune
->branch_cost (true, true));
26519 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26520 "prefer_ldrd_strd:\t%d\n",
26521 (int) current_tune
->prefer_ldrd_strd
);
26522 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26523 "logical_op_non_short_circuit:\t[%d,%d]\n",
26524 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26525 (int) current_tune
->logical_op_non_short_circuit_arm
);
26526 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26527 "prefer_neon_for_64bits:\t%d\n",
26528 (int) current_tune
->prefer_neon_for_64bits
);
26529 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26530 "disparage_flag_setting_t16_encodings:\t%d\n",
26531 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26532 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26533 "string_ops_prefer_neon:\t%d\n",
26534 (int) current_tune
->string_ops_prefer_neon
);
26535 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26536 "max_insns_inline_memset:\t%d\n",
26537 current_tune
->max_insns_inline_memset
);
26538 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26539 current_tune
->fusible_ops
);
26540 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26541 (int) current_tune
->sched_autopref
);
26544 /* Print .arch and .arch_extension directives corresponding to the
26545 current architecture configuration. */
26547 arm_print_asm_arch_directives ()
26549 const arch_option
*arch
26550 = arm_parse_arch_option_name (all_architectures
, "-march",
26551 arm_active_target
.arch_name
);
26552 auto_sbitmap
opt_bits (isa_num_bits
);
26556 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
26557 arm_last_printed_arch_string
= arm_active_target
.arch_name
;
26558 if (!arch
->common
.extensions
)
26561 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
26567 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
26569 /* If every feature bit of this option is set in the target
26570 ISA specification, print out the option name. However,
26571 don't print anything if all the bits are part of the
26572 FPU specification. */
26573 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
26574 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
26575 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
26581 arm_file_start (void)
26587 /* We don't have a specified CPU. Use the architecture to
26590 Note: it might be better to do this unconditionally, then the
26591 assembler would not need to know about all new CPU names as
26593 if (!arm_active_target
.core_name
)
26595 /* armv7ve doesn't support any extensions. */
26596 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26598 /* Keep backward compatability for assemblers
26599 which don't support armv7ve. */
26600 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26601 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26602 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26603 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26604 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26605 arm_last_printed_arch_string
= "armv7ve";
26608 arm_print_asm_arch_directives ();
26610 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26612 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26613 arm_active_target
.core_name
+ 8);
26614 arm_last_printed_arch_string
= arm_active_target
.core_name
+ 8;
26618 const char* truncated_name
26619 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26620 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26623 if (print_tune_info
)
26624 arm_print_tune_info ();
26626 if (! TARGET_SOFT_FLOAT
)
26628 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26629 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26631 if (TARGET_HARD_FLOAT_ABI
)
26632 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26635 /* Some of these attributes only apply when the corresponding features
26636 are used. However we don't have any easy way of figuring this out.
26637 Conservatively record the setting that would have been used. */
26639 if (flag_rounding_math
)
26640 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26642 if (!flag_unsafe_math_optimizations
)
26644 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26645 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26647 if (flag_signaling_nans
)
26648 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26650 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26651 flag_finite_math_only
? 1 : 3);
26653 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26654 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26655 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26656 flag_short_enums
? 1 : 2);
26658 /* Tag_ABI_optimization_goals. */
26661 else if (optimize
>= 2)
26667 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26669 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26672 if (arm_fp16_format
)
26673 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26674 (int) arm_fp16_format
);
26676 if (arm_lang_output_object_attributes_hook
)
26677 arm_lang_output_object_attributes_hook();
26680 default_file_start ();
26684 arm_file_end (void)
26688 if (NEED_INDICATE_EXEC_STACK
)
26689 /* Add .note.GNU-stack. */
26690 file_end_indicate_exec_stack ();
26692 if (! thumb_call_reg_needed
)
26695 switch_to_section (text_section
);
26696 asm_fprintf (asm_out_file
, "\t.code 16\n");
26697 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26699 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26701 rtx label
= thumb_call_via_label
[regno
];
26705 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26706 CODE_LABEL_NUMBER (label
));
26707 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26713 /* Symbols in the text segment can be accessed without indirecting via the
26714 constant pool; it may take an extra binary operation, but this is still
26715 faster than indirecting via memory. Don't do this when not optimizing,
26716 since we won't be calculating al of the offsets necessary to do this
26720 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26722 if (optimize
> 0 && TREE_CONSTANT (decl
))
26723 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26725 default_encode_section_info (decl
, rtl
, first
);
26727 #endif /* !ARM_PE */
26730 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26732 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26733 && !strcmp (prefix
, "L"))
26735 arm_ccfsm_state
= 0;
26736 arm_target_insn
= NULL
;
26738 default_internal_label (stream
, prefix
, labelno
);
26741 /* Output code to add DELTA to the first argument, and then jump
26742 to FUNCTION. Used for C++ multiple inheritance. */
26745 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26746 HOST_WIDE_INT
, tree function
)
26748 static int thunk_label
= 0;
26751 int mi_delta
= delta
;
26752 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26754 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26757 mi_delta
= - mi_delta
;
26759 final_start_function (emit_barrier (), file
, 1);
26763 int labelno
= thunk_label
++;
26764 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26765 /* Thunks are entered in arm mode when available. */
26766 if (TARGET_THUMB1_ONLY
)
26768 /* push r3 so we can use it as a temporary. */
26769 /* TODO: Omit this save if r3 is not used. */
26770 fputs ("\tpush {r3}\n", file
);
26771 fputs ("\tldr\tr3, ", file
);
26775 fputs ("\tldr\tr12, ", file
);
26777 assemble_name (file
, label
);
26778 fputc ('\n', file
);
26781 /* If we are generating PIC, the ldr instruction below loads
26782 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26783 the address of the add + 8, so we have:
26785 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26788 Note that we have "+ 1" because some versions of GNU ld
26789 don't set the low bit of the result for R_ARM_REL32
26790 relocations against thumb function symbols.
26791 On ARMv6M this is +4, not +8. */
26792 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26793 assemble_name (file
, labelpc
);
26794 fputs (":\n", file
);
26795 if (TARGET_THUMB1_ONLY
)
26797 /* This is 2 insns after the start of the thunk, so we know it
26798 is 4-byte aligned. */
26799 fputs ("\tadd\tr3, pc, r3\n", file
);
26800 fputs ("\tmov r12, r3\n", file
);
26803 fputs ("\tadd\tr12, pc, r12\n", file
);
26805 else if (TARGET_THUMB1_ONLY
)
26806 fputs ("\tmov r12, r3\n", file
);
26808 if (TARGET_THUMB1_ONLY
)
26810 if (mi_delta
> 255)
26812 fputs ("\tldr\tr3, ", file
);
26813 assemble_name (file
, label
);
26814 fputs ("+4\n", file
);
26815 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26816 mi_op
, this_regno
, this_regno
);
26818 else if (mi_delta
!= 0)
26820 /* Thumb1 unified syntax requires s suffix in instruction name when
26821 one of the operands is immediate. */
26822 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26823 mi_op
, this_regno
, this_regno
,
26829 /* TODO: Use movw/movt for large constants when available. */
26830 while (mi_delta
!= 0)
26832 if ((mi_delta
& (3 << shift
)) == 0)
26836 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26837 mi_op
, this_regno
, this_regno
,
26838 mi_delta
& (0xff << shift
));
26839 mi_delta
&= ~(0xff << shift
);
26846 if (TARGET_THUMB1_ONLY
)
26847 fputs ("\tpop\t{r3}\n", file
);
26849 fprintf (file
, "\tbx\tr12\n");
26850 ASM_OUTPUT_ALIGN (file
, 2);
26851 assemble_name (file
, label
);
26852 fputs (":\n", file
);
26855 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26856 rtx tem
= XEXP (DECL_RTL (function
), 0);
26857 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26858 pipeline offset is four rather than eight. Adjust the offset
26860 tem
= plus_constant (GET_MODE (tem
), tem
,
26861 TARGET_THUMB1_ONLY
? -3 : -7);
26862 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26864 gen_rtx_SYMBOL_REF (Pmode
,
26865 ggc_strdup (labelpc
)));
26866 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26869 /* Output ".word .LTHUNKn". */
26870 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26872 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26873 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26877 fputs ("\tb\t", file
);
26878 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26879 if (NEED_PLT_RELOC
)
26880 fputs ("(PLT)", file
);
26881 fputc ('\n', file
);
26884 final_end_function ();
26887 /* MI thunk handling for TARGET_32BIT. */
26890 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26891 HOST_WIDE_INT vcall_offset
, tree function
)
26893 const bool long_call_p
= arm_is_long_call_p (function
);
26895 /* On ARM, this_regno is R0 or R1 depending on
26896 whether the function returns an aggregate or not.
26898 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26900 ? R1_REGNUM
: R0_REGNUM
);
26902 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26903 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26904 reload_completed
= 1;
26905 emit_note (NOTE_INSN_PROLOGUE_END
);
26907 /* Add DELTA to THIS_RTX. */
26909 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26910 delta
, this_rtx
, this_rtx
, false);
26912 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26913 if (vcall_offset
!= 0)
26915 /* Load *THIS_RTX. */
26916 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26917 /* Compute *THIS_RTX + VCALL_OFFSET. */
26918 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26920 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26921 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26922 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26925 /* Generate a tail call to the target function. */
26926 if (!TREE_USED (function
))
26928 assemble_external (function
);
26929 TREE_USED (function
) = 1;
26931 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26934 emit_move_insn (temp
, funexp
);
26937 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26938 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26939 SIBLING_CALL_P (insn
) = 1;
26942 /* Indirect calls require a bit of fixup in PIC mode. */
26945 split_all_insns_noflow ();
26949 insn
= get_insns ();
26950 shorten_branches (insn
);
26951 final_start_function (insn
, file
, 1);
26952 final (insn
, file
, 1);
26953 final_end_function ();
26955 /* Stop pretending this is a post-reload pass. */
26956 reload_completed
= 0;
26959 /* Output code to add DELTA to the first argument, and then jump
26960 to FUNCTION. Used for C++ multiple inheritance. */
26963 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26964 HOST_WIDE_INT vcall_offset
, tree function
)
26966 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
26968 assemble_start_function (thunk
, fnname
);
26970 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26972 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26973 assemble_end_function (thunk
, fnname
);
26977 arm_emit_vector_const (FILE *file
, rtx x
)
26980 const char * pattern
;
26982 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26984 switch (GET_MODE (x
))
26986 case E_V2SImode
: pattern
= "%08x"; break;
26987 case E_V4HImode
: pattern
= "%04x"; break;
26988 case E_V8QImode
: pattern
= "%02x"; break;
26989 default: gcc_unreachable ();
26992 fprintf (file
, "0x");
26993 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26997 element
= CONST_VECTOR_ELT (x
, i
);
26998 fprintf (file
, pattern
, INTVAL (element
));
27004 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27005 HFmode constant pool entries are actually loaded with ldr. */
27007 arm_emit_fp16_const (rtx c
)
27011 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
27012 if (WORDS_BIG_ENDIAN
)
27013 assemble_zeros (2);
27014 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
27015 if (!WORDS_BIG_ENDIAN
)
27016 assemble_zeros (2);
27020 arm_output_load_gr (rtx
*operands
)
27027 if (!MEM_P (operands
[1])
27028 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
27029 || !REG_P (reg
= XEXP (sum
, 0))
27030 || !CONST_INT_P (offset
= XEXP (sum
, 1))
27031 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
27032 return "wldrw%?\t%0, %1";
27034 /* Fix up an out-of-range load of a GR register. */
27035 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
27036 wcgr
= operands
[0];
27038 output_asm_insn ("ldr%?\t%0, %1", operands
);
27040 operands
[0] = wcgr
;
27042 output_asm_insn ("tmcr%?\t%0, %1", operands
);
27043 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
27048 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27050 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27051 named arg and all anonymous args onto the stack.
27052 XXX I know the prologue shouldn't be pushing registers, but it is faster
27056 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
27060 int second_time ATTRIBUTE_UNUSED
)
27062 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
27065 cfun
->machine
->uses_anonymous_args
= 1;
27066 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
27068 nregs
= pcum
->aapcs_ncrn
;
27071 int res
= arm_needs_doubleword_align (mode
, type
);
27072 if (res
< 0 && warn_psabi
)
27073 inform (input_location
, "parameter passing for argument of "
27074 "type %qT changed in GCC 7.1", type
);
27078 if (res
> 1 && warn_psabi
)
27079 inform (input_location
,
27080 "parameter passing for argument of type "
27081 "%qT changed in GCC 9.1", type
);
27086 nregs
= pcum
->nregs
;
27088 if (nregs
< NUM_ARG_REGS
)
27089 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
27092 /* We can't rely on the caller doing the proper promotion when
27093 using APCS or ATPCS. */
27096 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
27098 return !TARGET_AAPCS_BASED
;
27101 static machine_mode
27102 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
27104 int *punsignedp ATTRIBUTE_UNUSED
,
27105 const_tree fntype ATTRIBUTE_UNUSED
,
27106 int for_return ATTRIBUTE_UNUSED
)
27108 if (GET_MODE_CLASS (mode
) == MODE_INT
27109 && GET_MODE_SIZE (mode
) < 4)
27117 arm_default_short_enums (void)
27119 return ARM_DEFAULT_SHORT_ENUMS
;
27123 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27126 arm_align_anon_bitfield (void)
27128 return TARGET_AAPCS_BASED
;
27132 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27135 arm_cxx_guard_type (void)
27137 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
27141 /* The EABI says test the least significant bit of a guard variable. */
27144 arm_cxx_guard_mask_bit (void)
27146 return TARGET_AAPCS_BASED
;
27150 /* The EABI specifies that all array cookies are 8 bytes long. */
27153 arm_get_cookie_size (tree type
)
27157 if (!TARGET_AAPCS_BASED
)
27158 return default_cxx_get_cookie_size (type
);
27160 size
= build_int_cst (sizetype
, 8);
27165 /* The EABI says that array cookies should also contain the element size. */
27168 arm_cookie_has_size (void)
27170 return TARGET_AAPCS_BASED
;
27174 /* The EABI says constructors and destructors should return a pointer to
27175 the object constructed/destroyed. */
27178 arm_cxx_cdtor_returns_this (void)
27180 return TARGET_AAPCS_BASED
;
27183 /* The EABI says that an inline function may never be the key
27187 arm_cxx_key_method_may_be_inline (void)
27189 return !TARGET_AAPCS_BASED
;
27193 arm_cxx_determine_class_data_visibility (tree decl
)
27195 if (!TARGET_AAPCS_BASED
27196 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
27199 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27200 is exported. However, on systems without dynamic vague linkage,
27201 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27202 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
27203 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
27205 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
27206 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
27210 arm_cxx_class_data_always_comdat (void)
27212 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27213 vague linkage if the class has no key function. */
27214 return !TARGET_AAPCS_BASED
;
27218 /* The EABI says __aeabi_atexit should be used to register static
27222 arm_cxx_use_aeabi_atexit (void)
27224 return TARGET_AAPCS_BASED
;
27229 arm_set_return_address (rtx source
, rtx scratch
)
27231 arm_stack_offsets
*offsets
;
27232 HOST_WIDE_INT delta
;
27234 unsigned long saved_regs
;
27236 offsets
= arm_get_frame_offsets ();
27237 saved_regs
= offsets
->saved_regs_mask
;
27239 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
27240 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27243 if (frame_pointer_needed
)
27244 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
27247 /* LR will be the first saved register. */
27248 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
27253 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
27254 GEN_INT (delta
& ~4095)));
27259 addr
= stack_pointer_rtx
;
27261 addr
= plus_constant (Pmode
, addr
, delta
);
27264 /* The store needs to be marked to prevent DSE from deleting
27265 it as dead if it is based on fp. */
27266 mem
= gen_frame_mem (Pmode
, addr
);
27267 MEM_VOLATILE_P (mem
) = true;
27268 emit_move_insn (mem
, source
);
27274 thumb_set_return_address (rtx source
, rtx scratch
)
27276 arm_stack_offsets
*offsets
;
27277 HOST_WIDE_INT delta
;
27278 HOST_WIDE_INT limit
;
27281 unsigned long mask
;
27285 offsets
= arm_get_frame_offsets ();
27286 mask
= offsets
->saved_regs_mask
;
27287 if (mask
& (1 << LR_REGNUM
))
27290 /* Find the saved regs. */
27291 if (frame_pointer_needed
)
27293 delta
= offsets
->soft_frame
- offsets
->saved_args
;
27294 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
27300 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
27303 /* Allow for the stack frame. */
27304 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
27306 /* The link register is always the first saved register. */
27309 /* Construct the address. */
27310 addr
= gen_rtx_REG (SImode
, reg
);
27313 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
27314 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
27318 addr
= plus_constant (Pmode
, addr
, delta
);
27320 /* The store needs to be marked to prevent DSE from deleting
27321 it as dead if it is based on fp. */
27322 mem
= gen_frame_mem (Pmode
, addr
);
27323 MEM_VOLATILE_P (mem
) = true;
27324 emit_move_insn (mem
, source
);
27327 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27330 /* Implements target hook vector_mode_supported_p. */
27332 arm_vector_mode_supported_p (machine_mode mode
)
27334 /* Neon also supports V2SImode, etc. listed in the clause below. */
27335 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
27336 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
27337 || mode
== V2DImode
|| mode
== V8HFmode
))
27340 if ((TARGET_NEON
|| TARGET_IWMMXT
)
27341 && ((mode
== V2SImode
)
27342 || (mode
== V4HImode
)
27343 || (mode
== V8QImode
)))
27346 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
27347 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
27348 || mode
== V2HAmode
))
27354 /* Implements target hook array_mode_supported_p. */
27357 arm_array_mode_supported_p (machine_mode mode
,
27358 unsigned HOST_WIDE_INT nelems
)
27360 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27361 for now, as the lane-swapping logic needs to be extended in the expanders.
27362 See PR target/82518. */
27363 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
27364 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
27365 && (nelems
>= 2 && nelems
<= 4))
27371 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27372 registers when autovectorizing for Neon, at least until multiple vector
27373 widths are supported properly by the middle-end. */
27375 static machine_mode
27376 arm_preferred_simd_mode (scalar_mode mode
)
27382 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
27384 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
27386 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
27388 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
27390 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
27397 if (TARGET_REALLY_IWMMXT
)
27413 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27415 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27416 using r0-r4 for function arguments, r7 for the stack frame and don't have
27417 enough left over to do doubleword arithmetic. For Thumb-2 all the
27418 potentially problematic instructions accept high registers so this is not
27419 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27420 that require many low registers. */
27422 arm_class_likely_spilled_p (reg_class_t rclass
)
27424 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
27425 || rclass
== CC_REG
)
27431 /* Implements target hook small_register_classes_for_mode_p. */
27433 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
27435 return TARGET_THUMB1
;
27438 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27439 ARM insns and therefore guarantee that the shift count is modulo 256.
27440 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27441 guarantee no particular behavior for out-of-range counts. */
27443 static unsigned HOST_WIDE_INT
27444 arm_shift_truncation_mask (machine_mode mode
)
27446 return mode
== SImode
? 255 : 0;
27450 /* Map internal gcc register numbers to DWARF2 register numbers. */
27453 arm_dbx_register_number (unsigned int regno
)
27458 if (IS_VFP_REGNUM (regno
))
27460 /* See comment in arm_dwarf_register_span. */
27461 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27462 return 64 + regno
- FIRST_VFP_REGNUM
;
27464 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
27467 if (IS_IWMMXT_GR_REGNUM (regno
))
27468 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
27470 if (IS_IWMMXT_REGNUM (regno
))
27471 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
27473 return DWARF_FRAME_REGISTERS
;
27476 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27477 GCC models tham as 64 32-bit registers, so we need to describe this to
27478 the DWARF generation code. Other registers can use the default. */
27480 arm_dwarf_register_span (rtx rtl
)
27488 regno
= REGNO (rtl
);
27489 if (!IS_VFP_REGNUM (regno
))
27492 /* XXX FIXME: The EABI defines two VFP register ranges:
27493 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27495 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27496 corresponding D register. Until GDB supports this, we shall use the
27497 legacy encodings. We also use these encodings for D0-D15 for
27498 compatibility with older debuggers. */
27499 mode
= GET_MODE (rtl
);
27500 if (GET_MODE_SIZE (mode
) < 8)
27503 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27505 nregs
= GET_MODE_SIZE (mode
) / 4;
27506 for (i
= 0; i
< nregs
; i
+= 2)
27507 if (TARGET_BIG_END
)
27509 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27510 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27514 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27515 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27520 nregs
= GET_MODE_SIZE (mode
) / 8;
27521 for (i
= 0; i
< nregs
; i
++)
27522 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27525 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27528 #if ARM_UNWIND_INFO
27529 /* Emit unwind directives for a store-multiple instruction or stack pointer
27530 push during alignment.
27531 These should only ever be generated by the function prologue code, so
27532 expect them to have a particular form.
27533 The store-multiple instruction sometimes pushes pc as the last register,
27534 although it should not be tracked into unwind information, or for -Os
27535 sometimes pushes some dummy registers before first register that needs
27536 to be tracked in unwind information; such dummy registers are there just
27537 to avoid separate stack adjustment, and will not be restored in the
27541 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27544 HOST_WIDE_INT offset
;
27545 HOST_WIDE_INT nregs
;
27549 unsigned padfirst
= 0, padlast
= 0;
27552 e
= XVECEXP (p
, 0, 0);
27553 gcc_assert (GET_CODE (e
) == SET
);
27555 /* First insn will adjust the stack pointer. */
27556 gcc_assert (GET_CODE (e
) == SET
27557 && REG_P (SET_DEST (e
))
27558 && REGNO (SET_DEST (e
)) == SP_REGNUM
27559 && GET_CODE (SET_SRC (e
)) == PLUS
);
27561 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27562 nregs
= XVECLEN (p
, 0) - 1;
27563 gcc_assert (nregs
);
27565 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27568 /* For -Os dummy registers can be pushed at the beginning to
27569 avoid separate stack pointer adjustment. */
27570 e
= XVECEXP (p
, 0, 1);
27571 e
= XEXP (SET_DEST (e
), 0);
27572 if (GET_CODE (e
) == PLUS
)
27573 padfirst
= INTVAL (XEXP (e
, 1));
27574 gcc_assert (padfirst
== 0 || optimize_size
);
27575 /* The function prologue may also push pc, but not annotate it as it is
27576 never restored. We turn this into a stack pointer adjustment. */
27577 e
= XVECEXP (p
, 0, nregs
);
27578 e
= XEXP (SET_DEST (e
), 0);
27579 if (GET_CODE (e
) == PLUS
)
27580 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27582 padlast
= offset
- 4;
27583 gcc_assert (padlast
== 0 || padlast
== 4);
27585 fprintf (asm_out_file
, "\t.pad #4\n");
27587 fprintf (asm_out_file
, "\t.save {");
27589 else if (IS_VFP_REGNUM (reg
))
27592 fprintf (asm_out_file
, "\t.vsave {");
27595 /* Unknown register type. */
27596 gcc_unreachable ();
27598 /* If the stack increment doesn't match the size of the saved registers,
27599 something has gone horribly wrong. */
27600 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27604 /* The remaining insns will describe the stores. */
27605 for (i
= 1; i
<= nregs
; i
++)
27607 /* Expect (set (mem <addr>) (reg)).
27608 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27609 e
= XVECEXP (p
, 0, i
);
27610 gcc_assert (GET_CODE (e
) == SET
27611 && MEM_P (SET_DEST (e
))
27612 && REG_P (SET_SRC (e
)));
27614 reg
= REGNO (SET_SRC (e
));
27615 gcc_assert (reg
>= lastreg
);
27618 fprintf (asm_out_file
, ", ");
27619 /* We can't use %r for vfp because we need to use the
27620 double precision register names. */
27621 if (IS_VFP_REGNUM (reg
))
27622 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27624 asm_fprintf (asm_out_file
, "%r", reg
);
27628 /* Check that the addresses are consecutive. */
27629 e
= XEXP (SET_DEST (e
), 0);
27630 if (GET_CODE (e
) == PLUS
)
27631 gcc_assert (REG_P (XEXP (e
, 0))
27632 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27633 && CONST_INT_P (XEXP (e
, 1))
27634 && offset
== INTVAL (XEXP (e
, 1)));
27638 && REGNO (e
) == SP_REGNUM
);
27639 offset
+= reg_size
;
27642 fprintf (asm_out_file
, "}\n");
27644 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27647 /* Emit unwind directives for a SET. */
27650 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27658 switch (GET_CODE (e0
))
27661 /* Pushing a single register. */
27662 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27663 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27664 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27667 asm_fprintf (asm_out_file
, "\t.save ");
27668 if (IS_VFP_REGNUM (REGNO (e1
)))
27669 asm_fprintf(asm_out_file
, "{d%d}\n",
27670 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27672 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27676 if (REGNO (e0
) == SP_REGNUM
)
27678 /* A stack increment. */
27679 if (GET_CODE (e1
) != PLUS
27680 || !REG_P (XEXP (e1
, 0))
27681 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27682 || !CONST_INT_P (XEXP (e1
, 1)))
27685 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27686 -INTVAL (XEXP (e1
, 1)));
27688 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27690 HOST_WIDE_INT offset
;
27692 if (GET_CODE (e1
) == PLUS
)
27694 if (!REG_P (XEXP (e1
, 0))
27695 || !CONST_INT_P (XEXP (e1
, 1)))
27697 reg
= REGNO (XEXP (e1
, 0));
27698 offset
= INTVAL (XEXP (e1
, 1));
27699 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27700 HARD_FRAME_POINTER_REGNUM
, reg
,
27703 else if (REG_P (e1
))
27706 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27707 HARD_FRAME_POINTER_REGNUM
, reg
);
27712 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27714 /* Move from sp to reg. */
27715 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27717 else if (GET_CODE (e1
) == PLUS
27718 && REG_P (XEXP (e1
, 0))
27719 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27720 && CONST_INT_P (XEXP (e1
, 1)))
27722 /* Set reg to offset from sp. */
27723 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27724 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27736 /* Emit unwind directives for the given insn. */
27739 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27742 bool handled_one
= false;
27744 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27747 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27748 && (TREE_NOTHROW (current_function_decl
)
27749 || crtl
->all_throwers_are_sibcalls
))
27752 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27755 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27757 switch (REG_NOTE_KIND (note
))
27759 case REG_FRAME_RELATED_EXPR
:
27760 pat
= XEXP (note
, 0);
27763 case REG_CFA_REGISTER
:
27764 pat
= XEXP (note
, 0);
27767 pat
= PATTERN (insn
);
27768 if (GET_CODE (pat
) == PARALLEL
)
27769 pat
= XVECEXP (pat
, 0, 0);
27772 /* Only emitted for IS_STACKALIGN re-alignment. */
27777 src
= SET_SRC (pat
);
27778 dest
= SET_DEST (pat
);
27780 gcc_assert (src
== stack_pointer_rtx
);
27781 reg
= REGNO (dest
);
27782 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27785 handled_one
= true;
27788 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27789 to get correct dwarf information for shrink-wrap. We should not
27790 emit unwind information for it because these are used either for
27791 pretend arguments or notes to adjust sp and restore registers from
27793 case REG_CFA_DEF_CFA
:
27794 case REG_CFA_ADJUST_CFA
:
27795 case REG_CFA_RESTORE
:
27798 case REG_CFA_EXPRESSION
:
27799 case REG_CFA_OFFSET
:
27800 /* ??? Only handling here what we actually emit. */
27801 gcc_unreachable ();
27809 pat
= PATTERN (insn
);
27812 switch (GET_CODE (pat
))
27815 arm_unwind_emit_set (asm_out_file
, pat
);
27819 /* Store multiple. */
27820 arm_unwind_emit_sequence (asm_out_file
, pat
);
27829 /* Output a reference from a function exception table to the type_info
27830 object X. The EABI specifies that the symbol should be relocated by
27831 an R_ARM_TARGET2 relocation. */
27834 arm_output_ttype (rtx x
)
27836 fputs ("\t.word\t", asm_out_file
);
27837 output_addr_const (asm_out_file
, x
);
27838 /* Use special relocations for symbol references. */
27839 if (!CONST_INT_P (x
))
27840 fputs ("(TARGET2)", asm_out_file
);
27841 fputc ('\n', asm_out_file
);
27846 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27849 arm_asm_emit_except_personality (rtx personality
)
27851 fputs ("\t.personality\t", asm_out_file
);
27852 output_addr_const (asm_out_file
, personality
);
27853 fputc ('\n', asm_out_file
);
27855 #endif /* ARM_UNWIND_INFO */
27857 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27860 arm_asm_init_sections (void)
27862 #if ARM_UNWIND_INFO
27863 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27865 #endif /* ARM_UNWIND_INFO */
27867 #ifdef OBJECT_FORMAT_ELF
27868 if (target_pure_code
)
27869 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27873 /* Output unwind directives for the start/end of a function. */
27876 arm_output_fn_unwind (FILE * f
, bool prologue
)
27878 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27882 fputs ("\t.fnstart\n", f
);
27885 /* If this function will never be unwound, then mark it as such.
27886 The came condition is used in arm_unwind_emit to suppress
27887 the frame annotations. */
27888 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27889 && (TREE_NOTHROW (current_function_decl
)
27890 || crtl
->all_throwers_are_sibcalls
))
27891 fputs("\t.cantunwind\n", f
);
27893 fputs ("\t.fnend\n", f
);
27898 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27900 enum tls_reloc reloc
;
27903 val
= XVECEXP (x
, 0, 0);
27904 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27906 output_addr_const (fp
, val
);
27911 fputs ("(tlsgd)", fp
);
27914 fputs ("(tlsldm)", fp
);
27917 fputs ("(tlsldo)", fp
);
27920 fputs ("(gottpoff)", fp
);
27923 fputs ("(tpoff)", fp
);
27926 fputs ("(tlsdesc)", fp
);
27929 gcc_unreachable ();
27938 fputs (" + (. - ", fp
);
27939 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27940 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27941 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27942 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27952 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27955 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27957 gcc_assert (size
== 4);
27958 fputs ("\t.word\t", file
);
27959 output_addr_const (file
, x
);
27960 fputs ("(tlsldo)", file
);
27963 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27966 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27968 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27969 return arm_emit_tls_decoration (fp
, x
);
27970 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27973 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27975 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27976 assemble_name_raw (fp
, label
);
27980 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27982 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27986 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27990 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27992 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27996 output_addr_const (fp
, XVECEXP (x
, 0, 1));
28000 else if (GET_CODE (x
) == CONST_VECTOR
)
28001 return arm_emit_vector_const (fp
, x
);
28006 /* Output assembly for a shift instruction.
28007 SET_FLAGS determines how the instruction modifies the condition codes.
28008 0 - Do not set condition codes.
28009 1 - Set condition codes.
28010 2 - Use smallest instruction. */
28012 arm_output_shift(rtx
* operands
, int set_flags
)
28015 static const char flag_chars
[3] = {'?', '.', '!'};
28020 c
= flag_chars
[set_flags
];
28021 shift
= shift_op(operands
[3], &val
);
28025 operands
[2] = GEN_INT(val
);
28026 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
28029 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
28031 output_asm_insn (pattern
, operands
);
28035 /* Output assembly for a WMMX immediate shift instruction. */
28037 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
28039 int shift
= INTVAL (operands
[2]);
28041 machine_mode opmode
= GET_MODE (operands
[0]);
28043 gcc_assert (shift
>= 0);
28045 /* If the shift value in the register versions is > 63 (for D qualifier),
28046 31 (for W qualifier) or 15 (for H qualifier). */
28047 if (((opmode
== V4HImode
) && (shift
> 15))
28048 || ((opmode
== V2SImode
) && (shift
> 31))
28049 || ((opmode
== DImode
) && (shift
> 63)))
28053 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
28054 output_asm_insn (templ
, operands
);
28055 if (opmode
== DImode
)
28057 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
28058 output_asm_insn (templ
, operands
);
28063 /* The destination register will contain all zeros. */
28064 sprintf (templ
, "wzero\t%%0");
28065 output_asm_insn (templ
, operands
);
28070 if ((opmode
== DImode
) && (shift
> 32))
28072 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
28073 output_asm_insn (templ
, operands
);
28074 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
28075 output_asm_insn (templ
, operands
);
28079 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
28080 output_asm_insn (templ
, operands
);
28085 /* Output assembly for a WMMX tinsr instruction. */
28087 arm_output_iwmmxt_tinsr (rtx
*operands
)
28089 int mask
= INTVAL (operands
[3]);
28092 int units
= mode_nunits
[GET_MODE (operands
[0])];
28093 gcc_assert ((mask
& (mask
- 1)) == 0);
28094 for (i
= 0; i
< units
; ++i
)
28096 if ((mask
& 0x01) == 1)
28102 gcc_assert (i
< units
);
28104 switch (GET_MODE (operands
[0]))
28107 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
28110 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
28113 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
28116 gcc_unreachable ();
28119 output_asm_insn (templ
, operands
);
28124 /* Output a Thumb-1 casesi dispatch sequence. */
28126 thumb1_output_casesi (rtx
*operands
)
28128 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
28130 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
28132 switch (GET_MODE(diff_vec
))
28135 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
28136 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28138 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
28139 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28141 return "bl\t%___gnu_thumb1_case_si";
28143 gcc_unreachable ();
28147 /* Output a Thumb-2 casesi instruction. */
28149 thumb2_output_casesi (rtx
*operands
)
28151 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
28153 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
28155 output_asm_insn ("cmp\t%0, %1", operands
);
28156 output_asm_insn ("bhi\t%l3", operands
);
28157 switch (GET_MODE(diff_vec
))
28160 return "tbb\t[%|pc, %0]";
28162 return "tbh\t[%|pc, %0, lsl #1]";
28166 output_asm_insn ("adr\t%4, %l2", operands
);
28167 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
28168 output_asm_insn ("add\t%4, %4, %5", operands
);
28173 output_asm_insn ("adr\t%4, %l2", operands
);
28174 return "ldr\t%|pc, [%4, %0, lsl #2]";
28177 gcc_unreachable ();
28181 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28182 per-core tuning structs. */
28184 arm_issue_rate (void)
28186 return current_tune
->issue_rate
;
28189 /* Return how many instructions should scheduler lookahead to choose the
28192 arm_first_cycle_multipass_dfa_lookahead (void)
28194 int issue_rate
= arm_issue_rate ();
28196 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
28199 /* Enable modeling of L2 auto-prefetcher. */
28201 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
28203 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
28207 arm_mangle_type (const_tree type
)
28209 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28210 has to be managled as if it is in the "std" namespace. */
28211 if (TARGET_AAPCS_BASED
28212 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
28213 return "St9__va_list";
28215 /* Half-precision float. */
28216 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
28219 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28221 if (TYPE_NAME (type
) != NULL
)
28222 return arm_mangle_builtin_type (type
);
28224 /* Use the default mangling. */
28228 /* Order of allocation of core registers for Thumb: this allocation is
28229 written over the corresponding initial entries of the array
28230 initialized with REG_ALLOC_ORDER. We allocate all low registers
28231 first. Saving and restoring a low register is usually cheaper than
28232 using a call-clobbered high register. */
28234 static const int thumb_core_reg_alloc_order
[] =
28236 3, 2, 1, 0, 4, 5, 6, 7,
28237 12, 14, 8, 9, 10, 11
28240 /* Adjust register allocation order when compiling for Thumb. */
28243 arm_order_regs_for_local_alloc (void)
28245 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
28246 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
28248 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
28249 sizeof (thumb_core_reg_alloc_order
));
28252 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28255 arm_frame_pointer_required (void)
28257 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
28260 /* If the function receives nonlocal gotos, it needs to save the frame
28261 pointer in the nonlocal_goto_save_area object. */
28262 if (cfun
->has_nonlocal_label
)
28265 /* The frame pointer is required for non-leaf APCS frames. */
28266 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
28269 /* If we are probing the stack in the prologue, we will have a faulting
28270 instruction prior to the stack adjustment and this requires a frame
28271 pointer if we want to catch the exception using the EABI unwinder. */
28272 if (!IS_INTERRUPT (arm_current_func_type ())
28273 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
28274 || flag_stack_clash_protection
)
28275 && arm_except_unwind_info (&global_options
) == UI_TARGET
28276 && cfun
->can_throw_non_call_exceptions
)
28278 HOST_WIDE_INT size
= get_frame_size ();
28280 /* That's irrelevant if there is no stack adjustment. */
28284 /* That's relevant only if there is a stack probe. */
28285 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
28287 /* We don't have the final size of the frame so adjust. */
28288 size
+= 32 * UNITS_PER_WORD
;
28289 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
28299 /* Only thumb1 can't support conditional execution, so return true if
28300 the target is not thumb1. */
28302 arm_have_conditional_execution (void)
28304 return !TARGET_THUMB1
;
28307 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28308 static HOST_WIDE_INT
28309 arm_vector_alignment (const_tree type
)
28311 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
28313 if (TARGET_AAPCS_BASED
)
28314 align
= MIN (align
, 64);
28320 arm_autovectorize_vector_sizes (vector_sizes
*sizes
, bool)
28322 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28324 sizes
->safe_push (16);
28325 sizes
->safe_push (8);
28330 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
28332 /* Vectors which aren't in packed structures will not be less aligned than
28333 the natural alignment of their element type, so this is safe. */
28334 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
28337 return default_builtin_vector_alignment_reachable (type
, is_packed
);
28341 arm_builtin_support_vector_misalignment (machine_mode mode
,
28342 const_tree type
, int misalignment
,
28345 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
28347 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
28352 /* If the misalignment is unknown, we should be able to handle the access
28353 so long as it is not to a member of a packed data structure. */
28354 if (misalignment
== -1)
28357 /* Return true if the misalignment is a multiple of the natural alignment
28358 of the vector's element type. This is probably always going to be
28359 true in practice, since we've already established that this isn't a
28361 return ((misalignment
% align
) == 0);
28364 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
28369 arm_conditional_register_usage (void)
28373 if (TARGET_THUMB1
&& optimize_size
)
28375 /* When optimizing for size on Thumb-1, it's better not
28376 to use the HI regs, because of the overhead of
28378 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
28379 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
28382 /* The link register can be clobbered by any branch insn,
28383 but we have no way to track that at present, so mark
28384 it as unavailable. */
28386 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
28388 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
28390 /* VFPv3 registers are disabled when earlier VFP
28391 versions are selected due to the definition of
28392 LAST_VFP_REGNUM. */
28393 for (regno
= FIRST_VFP_REGNUM
;
28394 regno
<= LAST_VFP_REGNUM
; ++ regno
)
28396 fixed_regs
[regno
] = 0;
28397 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
28398 || regno
>= FIRST_VFP_REGNUM
+ 32;
28402 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
28404 regno
= FIRST_IWMMXT_GR_REGNUM
;
28405 /* The 2002/10/09 revision of the XScale ABI has wCG0
28406 and wCG1 as call-preserved registers. The 2002/11/21
28407 revision changed this so that all wCG registers are
28408 scratch registers. */
28409 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
28410 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
28411 fixed_regs
[regno
] = 0;
28412 /* The XScale ABI has wR0 - wR9 as scratch registers,
28413 the rest as call-preserved registers. */
28414 for (regno
= FIRST_IWMMXT_REGNUM
;
28415 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
28417 fixed_regs
[regno
] = 0;
28418 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
28422 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
28424 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28425 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28427 else if (TARGET_APCS_STACK
)
28429 fixed_regs
[10] = 1;
28430 call_used_regs
[10] = 1;
28432 /* -mcaller-super-interworking reserves r11 for calls to
28433 _interwork_r11_call_via_rN(). Making the register global
28434 is an easy way of ensuring that it remains valid for all
28436 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
28437 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
28439 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28440 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28441 if (TARGET_CALLER_INTERWORKING
)
28442 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28444 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28448 arm_preferred_rename_class (reg_class_t rclass
)
28450 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28451 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28452 and code size can be reduced. */
28453 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
28459 /* Compute the attribute "length" of insn "*push_multi".
28460 So this function MUST be kept in sync with that insn pattern. */
28462 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
28464 int i
, regno
, hi_reg
;
28465 int num_saves
= XVECLEN (parallel_op
, 0);
28475 regno
= REGNO (first_op
);
28476 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28477 list is 8-bit. Normally this means all registers in the list must be
28478 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28479 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28480 with 16-bit encoding. */
28481 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28482 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
28484 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
28485 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28493 /* Compute the attribute "length" of insn. Currently, this function is used
28494 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28495 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28496 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28497 true if OPERANDS contains insn which explicit updates base register. */
28500 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28509 rtx parallel_op
= operands
[0];
28510 /* Initialize to elements number of PARALLEL. */
28511 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28512 /* Initialize the value to base register. */
28513 unsigned regno
= REGNO (operands
[1]);
28514 /* Skip return and write back pattern.
28515 We only need register pop pattern for later analysis. */
28516 unsigned first_indx
= 0;
28517 first_indx
+= return_pc
? 1 : 0;
28518 first_indx
+= write_back_p
? 1 : 0;
28520 /* A pop operation can be done through LDM or POP. If the base register is SP
28521 and if it's with write back, then a LDM will be alias of POP. */
28522 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28523 bool ldm_p
= !pop_p
;
28525 /* Check base register for LDM. */
28526 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28529 /* Check each register in the list. */
28530 for (; indx
>= first_indx
; indx
--)
28532 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28533 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28534 comment in arm_attr_length_push_multi. */
28535 if (REGNO_REG_CLASS (regno
) == HI_REGS
28536 && (regno
!= PC_REGNUM
|| ldm_p
))
28543 /* Compute the number of instructions emitted by output_move_double. */
28545 arm_count_output_move_double_insns (rtx
*operands
)
28549 /* output_move_double may modify the operands array, so call it
28550 here on a copy of the array. */
28551 ops
[0] = operands
[0];
28552 ops
[1] = operands
[1];
28553 output_move_double (ops
, false, &count
);
28557 /* Same as above, but operands are a register/memory pair in SImode.
28558 Assumes operands has the base register in position 0 and memory in position
28559 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
28561 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
28565 int regnum
, memnum
;
28567 regnum
= 0, memnum
= 1;
28569 regnum
= 1, memnum
= 0;
28570 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
28571 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
28572 output_move_double (ops
, false, &count
);
28578 vfp3_const_double_for_fract_bits (rtx operand
)
28580 REAL_VALUE_TYPE r0
;
28582 if (!CONST_DOUBLE_P (operand
))
28585 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28586 if (exact_real_inverse (DFmode
, &r0
)
28587 && !REAL_VALUE_NEGATIVE (r0
))
28589 if (exact_real_truncate (DFmode
, &r0
))
28591 HOST_WIDE_INT value
= real_to_integer (&r0
);
28592 value
= value
& 0xffffffff;
28593 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28595 int ret
= exact_log2 (value
);
28596 gcc_assert (IN_RANGE (ret
, 0, 31));
28604 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28605 log2 is in [1, 32], return that log2. Otherwise return -1.
28606 This is used in the patterns for vcvt.s32.f32 floating-point to
28607 fixed-point conversions. */
28610 vfp3_const_double_for_bits (rtx x
)
28612 const REAL_VALUE_TYPE
*r
;
28614 if (!CONST_DOUBLE_P (x
))
28617 r
= CONST_DOUBLE_REAL_VALUE (x
);
28619 if (REAL_VALUE_NEGATIVE (*r
)
28620 || REAL_VALUE_ISNAN (*r
)
28621 || REAL_VALUE_ISINF (*r
)
28622 || !real_isinteger (r
, SFmode
))
28625 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28627 /* The exact_log2 above will have returned -1 if this is
28628 not an exact log2. */
28629 if (!IN_RANGE (hwint
, 1, 32))
28636 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28639 arm_pre_atomic_barrier (enum memmodel model
)
28641 if (need_atomic_barrier_p (model
, true))
28642 emit_insn (gen_memory_barrier ());
28646 arm_post_atomic_barrier (enum memmodel model
)
28648 if (need_atomic_barrier_p (model
, false))
28649 emit_insn (gen_memory_barrier ());
28652 /* Emit the load-exclusive and store-exclusive instructions.
28653 Use acquire and release versions if necessary. */
28656 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28658 rtx (*gen
) (rtx
, rtx
);
28664 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28665 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28666 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28667 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28669 gcc_unreachable ();
28676 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28677 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
28678 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
28679 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
28681 gcc_unreachable ();
28685 emit_insn (gen (rval
, mem
));
28689 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28692 rtx (*gen
) (rtx
, rtx
, rtx
);
28698 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28699 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28700 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28701 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28703 gcc_unreachable ();
28710 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28711 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
28712 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
28713 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
28715 gcc_unreachable ();
28719 emit_insn (gen (bval
, rval
, mem
));
28722 /* Mark the previous jump instruction as unlikely. */
28725 emit_unlikely_jump (rtx insn
)
28727 rtx_insn
*jump
= emit_jump_insn (insn
);
28728 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
28731 /* Expand a compare and swap pattern. */
28734 arm_expand_compare_and_swap (rtx operands
[])
28736 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28737 machine_mode mode
, cmp_mode
;
28739 bval
= operands
[0];
28740 rval
= operands
[1];
28742 oldval
= operands
[3];
28743 newval
= operands
[4];
28744 is_weak
= operands
[5];
28745 mod_s
= operands
[6];
28746 mod_f
= operands
[7];
28747 mode
= GET_MODE (mem
);
28749 /* Normally the succ memory model must be stronger than fail, but in the
28750 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28751 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28753 if (TARGET_HAVE_LDACQ
28754 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28755 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28756 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28762 /* For narrow modes, we're going to perform the comparison in SImode,
28763 so do the zero-extension now. */
28764 rval
= gen_reg_rtx (SImode
);
28765 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28769 /* Force the value into a register if needed. We waited until after
28770 the zero-extension above to do this properly. */
28771 if (!arm_add_operand (oldval
, SImode
))
28772 oldval
= force_reg (SImode
, oldval
);
28776 if (!cmpdi_operand (oldval
, mode
))
28777 oldval
= force_reg (mode
, oldval
);
28781 gcc_unreachable ();
28785 cmp_mode
= E_SImode
;
28787 cmp_mode
= CC_Zmode
;
28789 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28790 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
28791 oldval
, newval
, is_weak
, mod_s
, mod_f
));
28793 if (mode
== QImode
|| mode
== HImode
)
28794 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28796 /* In all cases, we arrange for success to be signaled by Z set.
28797 This arrangement allows for the boolean result to be used directly
28798 in a subsequent branch, post optimization. For Thumb-1 targets, the
28799 boolean negation of the result is also stored in bval because Thumb-1
28800 backend lacks dependency tracking for CC flag due to flag-setting not
28801 being represented at RTL level. */
28803 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28806 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28807 emit_insn (gen_rtx_SET (bval
, x
));
28811 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28812 another memory store between the load-exclusive and store-exclusive can
28813 reset the monitor from Exclusive to Open state. This means we must wait
28814 until after reload to split the pattern, lest we get a register spill in
28815 the middle of the atomic sequence. Success of the compare and swap is
28816 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28817 for Thumb-1 targets (ie. negation of the boolean value returned by
28818 atomic_compare_and_swapmode standard pattern in operand 0). */
28821 arm_split_compare_and_swap (rtx operands
[])
28823 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
28825 enum memmodel mod_s
, mod_f
;
28827 rtx_code_label
*label1
, *label2
;
28830 rval
= operands
[1];
28832 oldval
= operands
[3];
28833 newval
= operands
[4];
28834 is_weak
= (operands
[5] != const0_rtx
);
28835 mod_s_rtx
= operands
[6];
28836 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
28837 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28838 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28839 mode
= GET_MODE (mem
);
28841 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28843 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
28844 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
28846 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28847 a full barrier is emitted after the store-release. */
28849 use_acquire
= false;
28851 /* Checks whether a barrier is needed and emits one accordingly. */
28852 if (!(use_acquire
|| use_release
))
28853 arm_pre_atomic_barrier (mod_s
);
28858 label1
= gen_label_rtx ();
28859 emit_label (label1
);
28861 label2
= gen_label_rtx ();
28863 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28865 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28866 as required to communicate with arm_expand_compare_and_swap. */
28869 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28870 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28871 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28872 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28873 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28877 emit_move_insn (neg_bval
, const1_rtx
);
28878 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28879 if (thumb1_cmpneg_operand (oldval
, SImode
))
28880 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28883 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28886 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28888 /* Weak or strong, we want EQ to be true for success, so that we
28889 match the flags that we got from the compare above. */
28892 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28893 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28894 emit_insn (gen_rtx_SET (cond
, x
));
28899 /* Z is set to boolean value of !neg_bval, as required to communicate
28900 with arm_expand_compare_and_swap. */
28901 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28902 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28905 if (!is_mm_relaxed (mod_f
))
28906 emit_label (label2
);
28908 /* Checks whether a barrier is needed and emits one accordingly. */
28910 || !(use_acquire
|| use_release
))
28911 arm_post_atomic_barrier (mod_s
);
28913 if (is_mm_relaxed (mod_f
))
28914 emit_label (label2
);
28917 /* Split an atomic operation pattern. Operation is given by CODE and is one
28918 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28919 operation). Operation is performed on the content at MEM and on VALUE
28920 following the memory model MODEL_RTX. The content at MEM before and after
28921 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28922 success of the operation is returned in COND. Using a scratch register or
28923 an operand register for these determines what result is returned for that
28927 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28928 rtx value
, rtx model_rtx
, rtx cond
)
28930 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28931 machine_mode mode
= GET_MODE (mem
);
28932 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28933 rtx_code_label
*label
;
28934 bool all_low_regs
, bind_old_new
;
28937 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28939 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
28940 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
28942 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28943 a full barrier is emitted after the store-release. */
28945 use_acquire
= false;
28947 /* Checks whether a barrier is needed and emits one accordingly. */
28948 if (!(use_acquire
|| use_release
))
28949 arm_pre_atomic_barrier (model
);
28951 label
= gen_label_rtx ();
28952 emit_label (label
);
28955 new_out
= gen_lowpart (wmode
, new_out
);
28957 old_out
= gen_lowpart (wmode
, old_out
);
28960 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28962 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28964 /* Does the operation require destination and first operand to use the same
28965 register? This is decided by register constraints of relevant insn
28966 patterns in thumb1.md. */
28967 gcc_assert (!new_out
|| REG_P (new_out
));
28968 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28969 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28970 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28975 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28977 /* We want to return the old value while putting the result of the operation
28978 in the same register as the old value so copy the old value over to the
28979 destination register and use that register for the operation. */
28980 if (old_out
&& bind_old_new
)
28982 emit_move_insn (new_out
, old_out
);
28993 x
= gen_rtx_AND (wmode
, old_out
, value
);
28994 emit_insn (gen_rtx_SET (new_out
, x
));
28995 x
= gen_rtx_NOT (wmode
, new_out
);
28996 emit_insn (gen_rtx_SET (new_out
, x
));
29000 if (CONST_INT_P (value
))
29002 value
= GEN_INT (-INTVAL (value
));
29008 if (mode
== DImode
)
29010 /* DImode plus/minus need to clobber flags. */
29011 /* The adddi3 and subdi3 patterns are incorrectly written so that
29012 they require matching operands, even when we could easily support
29013 three operands. Thankfully, this can be fixed up post-splitting,
29014 as the individual add+adc patterns do accept three operands and
29015 post-reload cprop can make these moves go away. */
29016 emit_move_insn (new_out
, old_out
);
29018 x
= gen_adddi3 (new_out
, new_out
, value
);
29020 x
= gen_subdi3 (new_out
, new_out
, value
);
29027 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
29028 emit_insn (gen_rtx_SET (new_out
, x
));
29032 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
29035 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29036 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
29038 /* Checks whether a barrier is needed and emits one accordingly. */
29040 || !(use_acquire
|| use_release
))
29041 arm_post_atomic_barrier (model
);
29044 #define MAX_VECT_LEN 16
29046 struct expand_vec_perm_d
29048 rtx target
, op0
, op1
;
29049 vec_perm_indices perm
;
29050 machine_mode vmode
;
29055 /* Generate a variable permutation. */
29058 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29060 machine_mode vmode
= GET_MODE (target
);
29061 bool one_vector_p
= rtx_equal_p (op0
, op1
);
29063 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
29064 gcc_checking_assert (GET_MODE (op0
) == vmode
);
29065 gcc_checking_assert (GET_MODE (op1
) == vmode
);
29066 gcc_checking_assert (GET_MODE (sel
) == vmode
);
29067 gcc_checking_assert (TARGET_NEON
);
29071 if (vmode
== V8QImode
)
29072 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
29074 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
29080 if (vmode
== V8QImode
)
29082 pair
= gen_reg_rtx (V16QImode
);
29083 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
29084 pair
= gen_lowpart (TImode
, pair
);
29085 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
29089 pair
= gen_reg_rtx (OImode
);
29090 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
29091 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
29097 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29099 machine_mode vmode
= GET_MODE (target
);
29100 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
29101 bool one_vector_p
= rtx_equal_p (op0
, op1
);
29104 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29105 numbering of elements for big-endian, we must reverse the order. */
29106 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
29108 /* The VTBL instruction does not use a modulo index, so we must take care
29109 of that ourselves. */
29110 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29111 mask
= gen_const_vec_duplicate (vmode
, mask
);
29112 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
29114 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
29117 /* Map lane ordering between architectural lane order, and GCC lane order,
29118 taking into account ABI. See comment above output_move_neon for details. */
29121 neon_endian_lane_map (machine_mode mode
, int lane
)
29123 if (BYTES_BIG_ENDIAN
)
29125 int nelems
= GET_MODE_NUNITS (mode
);
29126 /* Reverse lane order. */
29127 lane
= (nelems
- 1 - lane
);
29128 /* Reverse D register order, to match ABI. */
29129 if (GET_MODE_SIZE (mode
) == 16)
29130 lane
= lane
^ (nelems
/ 2);
29135 /* Some permutations index into pairs of vectors, this is a helper function
29136 to map indexes into those pairs of vectors. */
29139 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
29141 int nelem
= GET_MODE_NUNITS (mode
);
29142 if (BYTES_BIG_ENDIAN
)
29144 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
29148 /* Generate or test for an insn that supports a constant permutation. */
29150 /* Recognize patterns for the VUZP insns. */
29153 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
29155 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
29156 rtx out0
, out1
, in0
, in1
;
29160 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29163 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29164 big endian pattern on 64 bit vectors, so we correct for that. */
29165 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
29166 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
29168 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
29170 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
29172 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
29176 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29178 for (i
= 0; i
< nelt
; i
++)
29181 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
29182 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
29192 if (swap_nelt
!= 0)
29193 std::swap (in0
, in1
);
29196 out1
= gen_reg_rtx (d
->vmode
);
29198 std::swap (out0
, out1
);
29200 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
29204 /* Recognize patterns for the VZIP insns. */
29207 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
29209 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
29210 rtx out0
, out1
, in0
, in1
;
29214 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29217 is_swapped
= BYTES_BIG_ENDIAN
;
29219 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
29222 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
29224 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
29228 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29230 for (i
= 0; i
< nelt
/ 2; i
++)
29233 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
29234 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
29238 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
29239 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
29251 std::swap (in0
, in1
);
29254 out1
= gen_reg_rtx (d
->vmode
);
29256 std::swap (out0
, out1
);
29258 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
29262 /* Recognize patterns for the VREV insns. */
29264 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
29266 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
29267 rtx (*gen
) (machine_mode
, rtx
, rtx
);
29269 if (!d
->one_vector_p
)
29280 gen
= gen_neon_vrev64
;
29291 gen
= gen_neon_vrev32
;
29297 gen
= gen_neon_vrev64
;
29308 gen
= gen_neon_vrev16
;
29312 gen
= gen_neon_vrev32
;
29318 gen
= gen_neon_vrev64
;
29328 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
29329 for (j
= 0; j
<= diff
; j
+= 1)
29331 /* This is guaranteed to be true as the value of diff
29332 is 7, 3, 1 and we should have enough elements in the
29333 queue to generate this. Getting a vector mask with a
29334 value of diff other than these values implies that
29335 something is wrong by the time we get here. */
29336 gcc_assert (i
+ j
< nelt
);
29337 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
29345 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
29349 /* Recognize patterns for the VTRN insns. */
29352 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
29354 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
29355 rtx out0
, out1
, in0
, in1
;
29357 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29360 /* Note that these are little-endian tests. Adjust for big-endian later. */
29361 if (d
->perm
[0] == 0)
29363 else if (d
->perm
[0] == 1)
29367 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29369 for (i
= 0; i
< nelt
; i
+= 2)
29371 if (d
->perm
[i
] != i
+ odd
)
29373 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
29383 if (BYTES_BIG_ENDIAN
)
29385 std::swap (in0
, in1
);
29390 out1
= gen_reg_rtx (d
->vmode
);
29392 std::swap (out0
, out1
);
29394 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
29398 /* Recognize patterns for the VEXT insns. */
29401 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
29403 unsigned int i
, nelt
= d
->perm
.length ();
29406 unsigned int location
;
29408 unsigned int next
= d
->perm
[0] + 1;
29410 /* TODO: Handle GCC's numbering of elements for big-endian. */
29411 if (BYTES_BIG_ENDIAN
)
29414 /* Check if the extracted indexes are increasing by one. */
29415 for (i
= 1; i
< nelt
; next
++, i
++)
29417 /* If we hit the most significant element of the 2nd vector in
29418 the previous iteration, no need to test further. */
29419 if (next
== 2 * nelt
)
29422 /* If we are operating on only one vector: it could be a
29423 rotation. If there are only two elements of size < 64, let
29424 arm_evpc_neon_vrev catch it. */
29425 if (d
->one_vector_p
&& (next
== nelt
))
29427 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
29433 if (d
->perm
[i
] != next
)
29437 location
= d
->perm
[0];
29443 offset
= GEN_INT (location
);
29445 if(d
->vmode
== E_DImode
)
29448 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
29452 /* The NEON VTBL instruction is a fully variable permuation that's even
29453 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29454 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29455 can do slightly better by expanding this as a constant where we don't
29456 have to apply a mask. */
29459 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29461 rtx rperm
[MAX_VECT_LEN
], sel
;
29462 machine_mode vmode
= d
->vmode
;
29463 unsigned int i
, nelt
= d
->perm
.length ();
29465 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29466 numbering of elements for big-endian, we must reverse the order. */
29467 if (BYTES_BIG_ENDIAN
)
29473 /* Generic code will try constant permutation twice. Once with the
29474 original mode and again with the elements lowered to QImode.
29475 So wait and don't do the selector expansion ourselves. */
29476 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29479 for (i
= 0; i
< nelt
; ++i
)
29480 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29481 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29482 sel
= force_reg (vmode
, sel
);
29484 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29489 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29491 /* Check if the input mask matches vext before reordering the
29494 if (arm_evpc_neon_vext (d
))
29497 /* The pattern matching functions above are written to look for a small
29498 number to begin the sequence (0, 1, N/2). If we begin with an index
29499 from the second operand, we can swap the operands. */
29500 unsigned int nelt
= d
->perm
.length ();
29501 if (d
->perm
[0] >= nelt
)
29503 d
->perm
.rotate_inputs (1);
29504 std::swap (d
->op0
, d
->op1
);
29509 if (arm_evpc_neon_vuzp (d
))
29511 if (arm_evpc_neon_vzip (d
))
29513 if (arm_evpc_neon_vrev (d
))
29515 if (arm_evpc_neon_vtrn (d
))
29517 return arm_evpc_neon_vtbl (d
);
29522 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29525 arm_vectorize_vec_perm_const (machine_mode vmode
, rtx target
, rtx op0
, rtx op1
,
29526 const vec_perm_indices
&sel
)
29528 struct expand_vec_perm_d d
;
29529 int i
, nelt
, which
;
29531 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
29539 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29540 d
.testing_p
= !target
;
29542 nelt
= GET_MODE_NUNITS (d
.vmode
);
29543 for (i
= which
= 0; i
< nelt
; ++i
)
29545 int ei
= sel
[i
] & (2 * nelt
- 1);
29546 which
|= (ei
< nelt
? 1 : 2);
29555 d
.one_vector_p
= false;
29556 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
29559 /* The elements of PERM do not suggest that only the first operand
29560 is used, but both operands are identical. Allow easier matching
29561 of the permutation by folding the permutation into the single
29566 d
.one_vector_p
= true;
29571 d
.one_vector_p
= true;
29575 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
29578 return arm_expand_vec_perm_const_1 (&d
);
29580 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29581 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29582 if (!d
.one_vector_p
)
29583 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29586 bool ret
= arm_expand_vec_perm_const_1 (&d
);
29593 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29595 /* If we are soft float and we do not have ldrd
29596 then all auto increment forms are ok. */
29597 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29602 /* Post increment and Pre Decrement are supported for all
29603 instruction forms except for vector forms. */
29606 if (VECTOR_MODE_P (mode
))
29608 if (code
!= ARM_PRE_DEC
)
29618 /* Without LDRD and mode size greater than
29619 word size, there is no point in auto-incrementing
29620 because ldm and stm will not have these forms. */
29621 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29624 /* Vector and floating point modes do not support
29625 these auto increment forms. */
29626 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29639 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29640 on ARM, since we know that shifts by negative amounts are no-ops.
29641 Additionally, the default expansion code is not available or suitable
29642 for post-reload insn splits (this can occur when the register allocator
29643 chooses not to do a shift in NEON).
29645 This function is used in both initial expand and post-reload splits, and
29646 handles all kinds of 64-bit shifts.
29648 Input requirements:
29649 - It is safe for the input and output to be the same register, but
29650 early-clobber rules apply for the shift amount and scratch registers.
29651 - Shift by register requires both scratch registers. In all other cases
29652 the scratch registers may be NULL.
29653 - Ashiftrt by a register also clobbers the CC register. */
29655 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29656 rtx amount
, rtx scratch1
, rtx scratch2
)
29658 rtx out_high
= gen_highpart (SImode
, out
);
29659 rtx out_low
= gen_lowpart (SImode
, out
);
29660 rtx in_high
= gen_highpart (SImode
, in
);
29661 rtx in_low
= gen_lowpart (SImode
, in
);
29664 in = the register pair containing the input value.
29665 out = the destination register pair.
29666 up = the high- or low-part of each pair.
29667 down = the opposite part to "up".
29668 In a shift, we can consider bits to shift from "up"-stream to
29669 "down"-stream, so in a left-shift "up" is the low-part and "down"
29670 is the high-part of each register pair. */
29672 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29673 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29674 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29675 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29677 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29679 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29680 && GET_MODE (out
) == DImode
);
29682 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29683 && GET_MODE (in
) == DImode
);
29685 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29686 && GET_MODE (amount
) == SImode
)
29687 || CONST_INT_P (amount
)));
29688 gcc_assert (scratch1
== NULL
29689 || (GET_CODE (scratch1
) == SCRATCH
)
29690 || (GET_MODE (scratch1
) == SImode
29691 && REG_P (scratch1
)));
29692 gcc_assert (scratch2
== NULL
29693 || (GET_CODE (scratch2
) == SCRATCH
)
29694 || (GET_MODE (scratch2
) == SImode
29695 && REG_P (scratch2
)));
29696 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29697 || !HARD_REGISTER_P (out
)
29698 || (REGNO (out
) != REGNO (amount
)
29699 && REGNO (out
) + 1 != REGNO (amount
)));
29701 /* Macros to make following code more readable. */
29702 #define SUB_32(DEST,SRC) \
29703 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29704 #define RSB_32(DEST,SRC) \
29705 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29706 #define SUB_S_32(DEST,SRC) \
29707 gen_addsi3_compare0 ((DEST), (SRC), \
29709 #define SET(DEST,SRC) \
29710 gen_rtx_SET ((DEST), (SRC))
29711 #define SHIFT(CODE,SRC,AMOUNT) \
29712 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29713 #define LSHIFT(CODE,SRC,AMOUNT) \
29714 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29715 SImode, (SRC), (AMOUNT))
29716 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29717 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29718 SImode, (SRC), (AMOUNT))
29720 gen_rtx_IOR (SImode, (A), (B))
29721 #define BRANCH(COND,LABEL) \
29722 gen_arm_cond_branch ((LABEL), \
29723 gen_rtx_ ## COND (CCmode, cc_reg, \
29727 /* Shifts by register and shifts by constant are handled separately. */
29728 if (CONST_INT_P (amount
))
29730 /* We have a shift-by-constant. */
29732 /* First, handle out-of-range shift amounts.
29733 In both cases we try to match the result an ARM instruction in a
29734 shift-by-register would give. This helps reduce execution
29735 differences between optimization levels, but it won't stop other
29736 parts of the compiler doing different things. This is "undefined
29737 behavior, in any case. */
29738 if (INTVAL (amount
) <= 0)
29739 emit_insn (gen_movdi (out
, in
));
29740 else if (INTVAL (amount
) >= 64)
29742 if (code
== ASHIFTRT
)
29744 rtx const31_rtx
= GEN_INT (31);
29745 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29746 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29749 emit_insn (gen_movdi (out
, const0_rtx
));
29752 /* Now handle valid shifts. */
29753 else if (INTVAL (amount
) < 32)
29755 /* Shifts by a constant less than 32. */
29756 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29758 /* Clearing the out register in DImode first avoids lots
29759 of spilling and results in less stack usage.
29760 Later this redundant insn is completely removed.
29761 Do that only if "in" and "out" are different registers. */
29762 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29763 emit_insn (SET (out
, const0_rtx
));
29764 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29765 emit_insn (SET (out_down
,
29766 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29768 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29772 /* Shifts by a constant greater than 31. */
29773 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29775 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29776 emit_insn (SET (out
, const0_rtx
));
29777 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29778 if (code
== ASHIFTRT
)
29779 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29782 emit_insn (SET (out_up
, const0_rtx
));
29787 /* We have a shift-by-register. */
29788 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29790 /* This alternative requires the scratch registers. */
29791 gcc_assert (scratch1
&& REG_P (scratch1
));
29792 gcc_assert (scratch2
&& REG_P (scratch2
));
29794 /* We will need the values "amount-32" and "32-amount" later.
29795 Swapping them around now allows the later code to be more general. */
29799 emit_insn (SUB_32 (scratch1
, amount
));
29800 emit_insn (RSB_32 (scratch2
, amount
));
29803 emit_insn (RSB_32 (scratch1
, amount
));
29804 /* Also set CC = amount > 32. */
29805 emit_insn (SUB_S_32 (scratch2
, amount
));
29808 emit_insn (RSB_32 (scratch1
, amount
));
29809 emit_insn (SUB_32 (scratch2
, amount
));
29812 gcc_unreachable ();
29815 /* Emit code like this:
29818 out_down = in_down << amount;
29819 out_down = (in_up << (amount - 32)) | out_down;
29820 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29821 out_up = in_up << amount;
29824 out_down = in_down >> amount;
29825 out_down = (in_up << (32 - amount)) | out_down;
29827 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29828 out_up = in_up << amount;
29831 out_down = in_down >> amount;
29832 out_down = (in_up << (32 - amount)) | out_down;
29834 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29835 out_up = in_up << amount;
29837 The ARM and Thumb2 variants are the same but implemented slightly
29838 differently. If this were only called during expand we could just
29839 use the Thumb2 case and let combine do the right thing, but this
29840 can also be called from post-reload splitters. */
29842 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29844 if (!TARGET_THUMB2
)
29846 /* Emit code for ARM mode. */
29847 emit_insn (SET (out_down
,
29848 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29849 if (code
== ASHIFTRT
)
29851 rtx_code_label
*done_label
= gen_label_rtx ();
29852 emit_jump_insn (BRANCH (LT
, done_label
));
29853 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29855 emit_label (done_label
);
29858 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29863 /* Emit code for Thumb2 mode.
29864 Thumb2 can't do shift and or in one insn. */
29865 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29866 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29868 if (code
== ASHIFTRT
)
29870 rtx_code_label
*done_label
= gen_label_rtx ();
29871 emit_jump_insn (BRANCH (LT
, done_label
));
29872 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29873 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29874 emit_label (done_label
);
29878 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29879 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29883 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29897 /* Returns true if the pattern is a valid symbolic address, which is either a
29898 symbol_ref or (symbol_ref + addend).
29900 According to the ARM ELF ABI, the initial addend of REL-type relocations
29901 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29902 literal field of the instruction as a 16-bit signed value in the range
29903 -32768 <= A < 32768. */
29906 arm_valid_symbolic_address_p (rtx addr
)
29908 rtx xop0
, xop1
= NULL_RTX
;
29911 if (target_word_relocations
)
29914 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29917 /* (const (plus: symbol_ref const_int)) */
29918 if (GET_CODE (addr
) == CONST
)
29919 tmp
= XEXP (addr
, 0);
29921 if (GET_CODE (tmp
) == PLUS
)
29923 xop0
= XEXP (tmp
, 0);
29924 xop1
= XEXP (tmp
, 1);
29926 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29927 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29933 /* Returns true if a valid comparison operation and makes
29934 the operands in a form that is valid. */
29936 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29938 enum rtx_code code
= GET_CODE (*comparison
);
29940 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29941 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29943 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29945 if (code
== UNEQ
|| code
== LTGT
)
29948 code_int
= (int)code
;
29949 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29950 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29955 if (!arm_add_operand (*op1
, mode
))
29956 *op1
= force_reg (mode
, *op1
);
29957 if (!arm_add_operand (*op2
, mode
))
29958 *op2
= force_reg (mode
, *op2
);
29962 if (!cmpdi_operand (*op1
, mode
))
29963 *op1
= force_reg (mode
, *op1
);
29964 if (!cmpdi_operand (*op2
, mode
))
29965 *op2
= force_reg (mode
, *op2
);
29969 if (!TARGET_VFP_FP16INST
)
29971 /* FP16 comparisons are done in SF mode. */
29973 *op1
= convert_to_mode (mode
, *op1
, 1);
29974 *op2
= convert_to_mode (mode
, *op2
, 1);
29975 /* Fall through. */
29978 if (!vfp_compare_operand (*op1
, mode
))
29979 *op1
= force_reg (mode
, *op1
);
29980 if (!vfp_compare_operand (*op2
, mode
))
29981 *op2
= force_reg (mode
, *op2
);
29991 /* Maximum number of instructions to set block of memory. */
29993 arm_block_set_max_insns (void)
29995 if (optimize_function_for_size_p (cfun
))
29998 return current_tune
->max_insns_inline_memset
;
30001 /* Return TRUE if it's profitable to set block of memory for
30002 non-vectorized case. VAL is the value to set the memory
30003 with. LENGTH is the number of bytes to set. ALIGN is the
30004 alignment of the destination memory in bytes. UNALIGNED_P
30005 is TRUE if we can only set the memory with instructions
30006 meeting alignment requirements. USE_STRD_P is TRUE if we
30007 can use strd to set the memory. */
30009 arm_block_set_non_vect_profit_p (rtx val
,
30010 unsigned HOST_WIDE_INT length
,
30011 unsigned HOST_WIDE_INT align
,
30012 bool unaligned_p
, bool use_strd_p
)
30015 /* For leftovers in bytes of 0-7, we can set the memory block using
30016 strb/strh/str with minimum instruction number. */
30017 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
30021 num
= arm_const_inline_cost (SET
, val
);
30022 num
+= length
/ align
+ length
% align
;
30024 else if (use_strd_p
)
30026 num
= arm_const_double_inline_cost (val
);
30027 num
+= (length
>> 3) + leftover
[length
& 7];
30031 num
= arm_const_inline_cost (SET
, val
);
30032 num
+= (length
>> 2) + leftover
[length
& 3];
30035 /* We may be able to combine last pair STRH/STRB into a single STR
30036 by shifting one byte back. */
30037 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
30040 return (num
<= arm_block_set_max_insns ());
30043 /* Return TRUE if it's profitable to set block of memory for
30044 vectorized case. LENGTH is the number of bytes to set.
30045 ALIGN is the alignment of destination memory in bytes.
30046 MODE is the vector mode used to set the memory. */
30048 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
30049 unsigned HOST_WIDE_INT align
,
30053 bool unaligned_p
= ((align
& 3) != 0);
30054 unsigned int nelt
= GET_MODE_NUNITS (mode
);
30056 /* Instruction loading constant value. */
30058 /* Instructions storing the memory. */
30059 num
+= (length
+ nelt
- 1) / nelt
;
30060 /* Instructions adjusting the address expression. Only need to
30061 adjust address expression if it's 4 bytes aligned and bytes
30062 leftover can only be stored by mis-aligned store instruction. */
30063 if (!unaligned_p
&& (length
& 3) != 0)
30066 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
30067 if (!unaligned_p
&& mode
== V16QImode
)
30070 return (num
<= arm_block_set_max_insns ());
30073 /* Set a block of memory using vectorization instructions for the
30074 unaligned case. We fill the first LENGTH bytes of the memory
30075 area starting from DSTBASE with byte constant VALUE. ALIGN is
30076 the alignment requirement of memory. Return TRUE if succeeded. */
30078 arm_block_set_unaligned_vect (rtx dstbase
,
30079 unsigned HOST_WIDE_INT length
,
30080 unsigned HOST_WIDE_INT value
,
30081 unsigned HOST_WIDE_INT align
)
30083 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
30086 rtx (*gen_func
) (rtx
, rtx
);
30088 unsigned HOST_WIDE_INT v
= value
;
30089 unsigned int offset
= 0;
30090 gcc_assert ((align
& 0x3) != 0);
30091 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
30092 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
30093 if (length
>= nelt_v16
)
30096 gen_func
= gen_movmisalignv16qi
;
30101 gen_func
= gen_movmisalignv8qi
;
30103 nelt_mode
= GET_MODE_NUNITS (mode
);
30104 gcc_assert (length
>= nelt_mode
);
30105 /* Skip if it isn't profitable. */
30106 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
30109 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30110 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30112 v
= sext_hwi (v
, BITS_PER_WORD
);
30114 reg
= gen_reg_rtx (mode
);
30115 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
30116 /* Emit instruction loading the constant value. */
30117 emit_move_insn (reg
, val_vec
);
30119 /* Handle nelt_mode bytes in a vector. */
30120 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
30122 emit_insn ((*gen_func
) (mem
, reg
));
30123 if (i
+ 2 * nelt_mode
<= length
)
30125 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
30126 offset
+= nelt_mode
;
30127 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30131 /* If there are not less than nelt_v8 bytes leftover, we must be in
30133 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
30135 /* Handle (8, 16) bytes leftover. */
30136 if (i
+ nelt_v8
< length
)
30138 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
30139 offset
+= length
- i
;
30140 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30142 /* We are shifting bytes back, set the alignment accordingly. */
30143 if ((length
& 1) != 0 && align
>= 2)
30144 set_mem_align (mem
, BITS_PER_UNIT
);
30146 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30148 /* Handle (0, 8] bytes leftover. */
30149 else if (i
< length
&& i
+ nelt_v8
>= length
)
30151 if (mode
== V16QImode
)
30152 reg
= gen_lowpart (V8QImode
, reg
);
30154 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
30155 + (nelt_mode
- nelt_v8
))));
30156 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
30157 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
30159 /* We are shifting bytes back, set the alignment accordingly. */
30160 if ((length
& 1) != 0 && align
>= 2)
30161 set_mem_align (mem
, BITS_PER_UNIT
);
30163 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30169 /* Set a block of memory using vectorization instructions for the
30170 aligned case. We fill the first LENGTH bytes of the memory area
30171 starting from DSTBASE with byte constant VALUE. ALIGN is the
30172 alignment requirement of memory. Return TRUE if succeeded. */
30174 arm_block_set_aligned_vect (rtx dstbase
,
30175 unsigned HOST_WIDE_INT length
,
30176 unsigned HOST_WIDE_INT value
,
30177 unsigned HOST_WIDE_INT align
)
30179 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
30180 rtx dst
, addr
, mem
;
30183 unsigned int offset
= 0;
30185 gcc_assert ((align
& 0x3) == 0);
30186 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
30187 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
30188 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
30193 nelt_mode
= GET_MODE_NUNITS (mode
);
30194 gcc_assert (length
>= nelt_mode
);
30195 /* Skip if it isn't profitable. */
30196 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
30199 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30201 reg
= gen_reg_rtx (mode
);
30202 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
30203 /* Emit instruction loading the constant value. */
30204 emit_move_insn (reg
, val_vec
);
30207 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30208 if (mode
== V16QImode
)
30210 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30211 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30213 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30214 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
30216 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30217 offset
+= length
- nelt_mode
;
30218 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30219 /* We are shifting bytes back, set the alignment accordingly. */
30220 if ((length
& 0x3) == 0)
30221 set_mem_align (mem
, BITS_PER_UNIT
* 4);
30222 else if ((length
& 0x1) == 0)
30223 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30225 set_mem_align (mem
, BITS_PER_UNIT
);
30227 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30230 /* Fall through for bytes leftover. */
30232 nelt_mode
= GET_MODE_NUNITS (mode
);
30233 reg
= gen_lowpart (V8QImode
, reg
);
30236 /* Handle 8 bytes in a vector. */
30237 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
30239 addr
= plus_constant (Pmode
, dst
, i
);
30240 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
30241 emit_move_insn (mem
, reg
);
30244 /* Handle single word leftover by shifting 4 bytes back. We can
30245 use aligned access for this case. */
30246 if (i
+ UNITS_PER_WORD
== length
)
30248 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
30249 offset
+= i
- UNITS_PER_WORD
;
30250 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
30251 /* We are shifting 4 bytes back, set the alignment accordingly. */
30252 if (align
> UNITS_PER_WORD
)
30253 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
30255 emit_move_insn (mem
, reg
);
30257 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30258 We have to use unaligned access for this case. */
30259 else if (i
< length
)
30261 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30262 offset
+= length
- nelt_mode
;
30263 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30264 /* We are shifting bytes back, set the alignment accordingly. */
30265 if ((length
& 1) == 0)
30266 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30268 set_mem_align (mem
, BITS_PER_UNIT
);
30270 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30276 /* Set a block of memory using plain strh/strb instructions, only
30277 using instructions allowed by ALIGN on processor. We fill the
30278 first LENGTH bytes of the memory area starting from DSTBASE
30279 with byte constant VALUE. ALIGN is the alignment requirement
30282 arm_block_set_unaligned_non_vect (rtx dstbase
,
30283 unsigned HOST_WIDE_INT length
,
30284 unsigned HOST_WIDE_INT value
,
30285 unsigned HOST_WIDE_INT align
)
30288 rtx dst
, addr
, mem
;
30289 rtx val_exp
, val_reg
, reg
;
30291 HOST_WIDE_INT v
= value
;
30293 gcc_assert (align
== 1 || align
== 2);
30296 v
|= (value
<< BITS_PER_UNIT
);
30298 v
= sext_hwi (v
, BITS_PER_WORD
);
30299 val_exp
= GEN_INT (v
);
30300 /* Skip if it isn't profitable. */
30301 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30302 align
, true, false))
30305 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30306 mode
= (align
== 2 ? HImode
: QImode
);
30307 val_reg
= force_reg (SImode
, val_exp
);
30308 reg
= gen_lowpart (mode
, val_reg
);
30310 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
30312 addr
= plus_constant (Pmode
, dst
, i
);
30313 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
30314 emit_move_insn (mem
, reg
);
30317 /* Handle single byte leftover. */
30318 if (i
+ 1 == length
)
30320 reg
= gen_lowpart (QImode
, val_reg
);
30321 addr
= plus_constant (Pmode
, dst
, i
);
30322 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30323 emit_move_insn (mem
, reg
);
30327 gcc_assert (i
== length
);
30331 /* Set a block of memory using plain strd/str/strh/strb instructions,
30332 to permit unaligned copies on processors which support unaligned
30333 semantics for those instructions. We fill the first LENGTH bytes
30334 of the memory area starting from DSTBASE with byte constant VALUE.
30335 ALIGN is the alignment requirement of memory. */
30337 arm_block_set_aligned_non_vect (rtx dstbase
,
30338 unsigned HOST_WIDE_INT length
,
30339 unsigned HOST_WIDE_INT value
,
30340 unsigned HOST_WIDE_INT align
)
30343 rtx dst
, addr
, mem
;
30344 rtx val_exp
, val_reg
, reg
;
30345 unsigned HOST_WIDE_INT v
;
30348 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
30349 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
30351 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
30352 if (length
< UNITS_PER_WORD
)
30353 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
30356 v
|= (v
<< BITS_PER_WORD
);
30358 v
= sext_hwi (v
, BITS_PER_WORD
);
30360 val_exp
= GEN_INT (v
);
30361 /* Skip if it isn't profitable. */
30362 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30363 align
, false, use_strd_p
))
30368 /* Try without strd. */
30369 v
= (v
>> BITS_PER_WORD
);
30370 v
= sext_hwi (v
, BITS_PER_WORD
);
30371 val_exp
= GEN_INT (v
);
30372 use_strd_p
= false;
30373 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30374 align
, false, use_strd_p
))
30379 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30380 /* Handle double words using strd if possible. */
30383 val_reg
= force_reg (DImode
, val_exp
);
30385 for (; (i
+ 8 <= length
); i
+= 8)
30387 addr
= plus_constant (Pmode
, dst
, i
);
30388 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
30389 emit_move_insn (mem
, reg
);
30393 val_reg
= force_reg (SImode
, val_exp
);
30395 /* Handle words. */
30396 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30397 for (; (i
+ 4 <= length
); i
+= 4)
30399 addr
= plus_constant (Pmode
, dst
, i
);
30400 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30401 if ((align
& 3) == 0)
30402 emit_move_insn (mem
, reg
);
30404 emit_insn (gen_unaligned_storesi (mem
, reg
));
30407 /* Merge last pair of STRH and STRB into a STR if possible. */
30408 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30410 addr
= plus_constant (Pmode
, dst
, i
- 1);
30411 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30412 /* We are shifting one byte back, set the alignment accordingly. */
30413 if ((align
& 1) == 0)
30414 set_mem_align (mem
, BITS_PER_UNIT
);
30416 /* Most likely this is an unaligned access, and we can't tell at
30417 compilation time. */
30418 emit_insn (gen_unaligned_storesi (mem
, reg
));
30422 /* Handle half word leftover. */
30423 if (i
+ 2 <= length
)
30425 reg
= gen_lowpart (HImode
, val_reg
);
30426 addr
= plus_constant (Pmode
, dst
, i
);
30427 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30428 if ((align
& 1) == 0)
30429 emit_move_insn (mem
, reg
);
30431 emit_insn (gen_unaligned_storehi (mem
, reg
));
30436 /* Handle single byte leftover. */
30437 if (i
+ 1 == length
)
30439 reg
= gen_lowpart (QImode
, val_reg
);
30440 addr
= plus_constant (Pmode
, dst
, i
);
30441 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30442 emit_move_insn (mem
, reg
);
30448 /* Set a block of memory using vectorization instructions for both
30449 aligned and unaligned cases. We fill the first LENGTH bytes of
30450 the memory area starting from DSTBASE with byte constant VALUE.
30451 ALIGN is the alignment requirement of memory. */
30453 arm_block_set_vect (rtx dstbase
,
30454 unsigned HOST_WIDE_INT length
,
30455 unsigned HOST_WIDE_INT value
,
30456 unsigned HOST_WIDE_INT align
)
30458 /* Check whether we need to use unaligned store instruction. */
30459 if (((align
& 3) != 0 || (length
& 3) != 0)
30460 /* Check whether unaligned store instruction is available. */
30461 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30464 if ((align
& 3) == 0)
30465 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30467 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30470 /* Expand string store operation. Firstly we try to do that by using
30471 vectorization instructions, then try with ARM unaligned access and
30472 double-word store if profitable. OPERANDS[0] is the destination,
30473 OPERANDS[1] is the number of bytes, operands[2] is the value to
30474 initialize the memory, OPERANDS[3] is the known alignment of the
30477 arm_gen_setmem (rtx
*operands
)
30479 rtx dstbase
= operands
[0];
30480 unsigned HOST_WIDE_INT length
;
30481 unsigned HOST_WIDE_INT value
;
30482 unsigned HOST_WIDE_INT align
;
30484 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30487 length
= UINTVAL (operands
[1]);
30491 value
= (UINTVAL (operands
[2]) & 0xFF);
30492 align
= UINTVAL (operands
[3]);
30493 if (TARGET_NEON
&& length
>= 8
30494 && current_tune
->string_ops_prefer_neon
30495 && arm_block_set_vect (dstbase
, length
, value
, align
))
30498 if (!unaligned_access
&& (align
& 3) != 0)
30499 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30501 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30506 arm_macro_fusion_p (void)
30508 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30511 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30512 for MOVW / MOVT macro fusion. */
30515 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30517 /* We are trying to fuse
30518 movw imm / movt imm
30519 instructions as a group that gets scheduled together. */
30521 rtx set_dest
= SET_DEST (curr_set
);
30523 if (GET_MODE (set_dest
) != SImode
)
30526 /* We are trying to match:
30527 prev (movw) == (set (reg r0) (const_int imm16))
30528 curr (movt) == (set (zero_extract (reg r0)
30531 (const_int imm16_1))
30533 prev (movw) == (set (reg r1)
30534 (high (symbol_ref ("SYM"))))
30535 curr (movt) == (set (reg r0)
30537 (symbol_ref ("SYM")))) */
30539 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30541 if (CONST_INT_P (SET_SRC (curr_set
))
30542 && CONST_INT_P (SET_SRC (prev_set
))
30543 && REG_P (XEXP (set_dest
, 0))
30544 && REG_P (SET_DEST (prev_set
))
30545 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30549 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30550 && REG_P (SET_DEST (curr_set
))
30551 && REG_P (SET_DEST (prev_set
))
30552 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30553 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30560 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30562 rtx prev_set
= single_set (prev
);
30563 rtx curr_set
= single_set (curr
);
30569 if (any_condjump_p (curr
))
30572 if (!arm_macro_fusion_p ())
30575 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30576 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30582 /* Return true iff the instruction fusion described by OP is enabled. */
30584 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30586 return current_tune
->fusible_ops
& op
;
30589 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30590 scheduled for speculative execution. Reject the long-running division
30591 and square-root instructions. */
30594 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30596 switch (get_attr_type (insn
))
30604 case TYPE_NEON_FP_SQRT_S
:
30605 case TYPE_NEON_FP_SQRT_D
:
30606 case TYPE_NEON_FP_SQRT_S_Q
:
30607 case TYPE_NEON_FP_SQRT_D_Q
:
30608 case TYPE_NEON_FP_DIV_S
:
30609 case TYPE_NEON_FP_DIV_D
:
30610 case TYPE_NEON_FP_DIV_S_Q
:
30611 case TYPE_NEON_FP_DIV_D_Q
:
30618 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30620 static unsigned HOST_WIDE_INT
30621 arm_asan_shadow_offset (void)
30623 return HOST_WIDE_INT_1U
<< 29;
30627 /* This is a temporary fix for PR60655. Ideally we need
30628 to handle most of these cases in the generic part but
30629 currently we reject minus (..) (sym_ref). We try to
30630 ameliorate the case with minus (sym_ref1) (sym_ref2)
30631 where they are in the same section. */
30634 arm_const_not_ok_for_debug_p (rtx p
)
30636 tree decl_op0
= NULL
;
30637 tree decl_op1
= NULL
;
30639 if (GET_CODE (p
) == UNSPEC
)
30641 if (GET_CODE (p
) == MINUS
)
30643 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30645 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30647 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30648 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30650 if ((VAR_P (decl_op1
)
30651 || TREE_CODE (decl_op1
) == CONST_DECL
)
30652 && (VAR_P (decl_op0
)
30653 || TREE_CODE (decl_op0
) == CONST_DECL
))
30654 return (get_variable_section (decl_op1
, false)
30655 != get_variable_section (decl_op0
, false));
30657 if (TREE_CODE (decl_op1
) == LABEL_DECL
30658 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30659 return (DECL_CONTEXT (decl_op1
)
30660 != DECL_CONTEXT (decl_op0
));
30670 /* return TRUE if x is a reference to a value in a constant pool */
30672 arm_is_constant_pool_ref (rtx x
)
30675 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30676 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30679 /* Remember the last target of arm_set_current_function. */
30680 static GTY(()) tree arm_previous_fndecl
;
30682 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30685 save_restore_target_globals (tree new_tree
)
30687 /* If we have a previous state, use it. */
30688 if (TREE_TARGET_GLOBALS (new_tree
))
30689 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30690 else if (new_tree
== target_option_default_node
)
30691 restore_target_globals (&default_target_globals
);
30694 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30695 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30698 arm_option_params_internal ();
30701 /* Invalidate arm_previous_fndecl. */
30704 arm_reset_previous_fndecl (void)
30706 arm_previous_fndecl
= NULL_TREE
;
30709 /* Establish appropriate back-end context for processing the function
30710 FNDECL. The argument might be NULL to indicate processing at top
30711 level, outside of any function scope. */
30714 arm_set_current_function (tree fndecl
)
30716 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30719 tree old_tree
= (arm_previous_fndecl
30720 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30723 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30725 /* If current function has no attributes but previous one did,
30726 use the default node. */
30727 if (! new_tree
&& old_tree
)
30728 new_tree
= target_option_default_node
;
30730 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30731 the default have been handled by save_restore_target_globals from
30732 arm_pragma_target_parse. */
30733 if (old_tree
== new_tree
)
30736 arm_previous_fndecl
= fndecl
;
30738 /* First set the target options. */
30739 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30741 save_restore_target_globals (new_tree
);
30744 /* Implement TARGET_OPTION_PRINT. */
30747 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30749 int flags
= ptr
->x_target_flags
;
30750 const char *fpu_name
;
30752 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30753 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30755 fprintf (file
, "%*sselected isa %s\n", indent
, "",
30756 TARGET_THUMB2_P (flags
) ? "thumb2" :
30757 TARGET_THUMB_P (flags
) ? "thumb1" :
30760 if (ptr
->x_arm_arch_string
)
30761 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
30762 ptr
->x_arm_arch_string
);
30764 if (ptr
->x_arm_cpu_string
)
30765 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
30766 ptr
->x_arm_cpu_string
);
30768 if (ptr
->x_arm_tune_string
)
30769 fprintf (file
, "%*sselected tune %s\n", indent
, "",
30770 ptr
->x_arm_tune_string
);
30772 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30775 /* Hook to determine if one function can safely inline another. */
30778 arm_can_inline_p (tree caller
, tree callee
)
30780 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30781 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30782 bool can_inline
= true;
30784 struct cl_target_option
*caller_opts
30785 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30786 : target_option_default_node
);
30788 struct cl_target_option
*callee_opts
30789 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30790 : target_option_default_node
);
30792 if (callee_opts
== caller_opts
)
30795 /* Callee's ISA features should be a subset of the caller's. */
30796 struct arm_build_target caller_target
;
30797 struct arm_build_target callee_target
;
30798 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30799 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30801 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30803 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30805 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30806 can_inline
= false;
30808 sbitmap_free (caller_target
.isa
);
30809 sbitmap_free (callee_target
.isa
);
30811 /* OK to inline between different modes.
30812 Function with mode specific instructions, e.g using asm,
30813 must be explicitly protected with noinline. */
30817 /* Hook to fix function's alignment affected by target attribute. */
30820 arm_relayout_function (tree fndecl
)
30822 if (DECL_USER_ALIGN (fndecl
))
30825 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30828 callee_tree
= target_option_default_node
;
30830 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30833 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30836 /* Inner function to process the attribute((target(...))), take an argument and
30837 set the current options from the argument. If we have a list, recursively
30838 go over the list. */
30841 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30843 if (TREE_CODE (args
) == TREE_LIST
)
30847 for (; args
; args
= TREE_CHAIN (args
))
30848 if (TREE_VALUE (args
)
30849 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30854 else if (TREE_CODE (args
) != STRING_CST
)
30856 error ("attribute %<target%> argument not a string");
30860 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30863 while ((q
= strtok (argstr
, ",")) != NULL
)
30866 if (!strcmp (q
, "thumb"))
30867 opts
->x_target_flags
|= MASK_THUMB
;
30869 else if (!strcmp (q
, "arm"))
30870 opts
->x_target_flags
&= ~MASK_THUMB
;
30872 else if (!strcmp (q
, "general-regs-only"))
30873 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
30875 else if (!strncmp (q
, "fpu=", 4))
30878 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
30879 &fpu_index
, CL_TARGET
))
30881 error ("invalid fpu for target attribute or pragma %qs", q
);
30884 if (fpu_index
== TARGET_FPU_auto
)
30886 /* This doesn't really make sense until we support
30887 general dynamic selection of the architecture and all
30889 sorry ("auto fpu selection not currently permitted here");
30892 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30894 else if (!strncmp (q
, "arch=", 5))
30896 char *arch
= q
+ 5;
30897 const arch_option
*arm_selected_arch
30898 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
30900 if (!arm_selected_arch
)
30902 error ("invalid architecture for target attribute or pragma %qs",
30907 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
30909 else if (q
[0] == '+')
30911 opts
->x_arm_arch_string
30912 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
30916 error ("unknown target attribute or pragma %qs", q
);
30924 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30927 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30928 struct gcc_options
*opts_set
)
30930 struct cl_target_option cl_opts
;
30932 if (!arm_valid_target_attribute_rec (args
, opts
))
30935 cl_target_option_save (&cl_opts
, opts
);
30936 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30937 arm_option_check_internal (opts
);
30938 /* Do any overrides, such as global options arch=xxx.
30939 We do this since arm_active_target was overridden. */
30940 arm_option_reconfigure_globals ();
30941 arm_options_perform_arch_sanity_checks ();
30942 arm_option_override_internal (opts
, opts_set
);
30944 return build_target_option_node (opts
);
30948 add_attribute (const char * mode
, tree
*attributes
)
30950 size_t len
= strlen (mode
);
30951 tree value
= build_string (len
, mode
);
30953 TREE_TYPE (value
) = build_array_type (char_type_node
,
30954 build_index_type (size_int (len
)));
30956 *attributes
= tree_cons (get_identifier ("target"),
30957 build_tree_list (NULL_TREE
, value
),
30961 /* For testing. Insert thumb or arm modes alternatively on functions. */
30964 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30968 if (! TARGET_FLIP_THUMB
)
30971 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30972 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30975 /* Nested definitions must inherit mode. */
30976 if (current_function_decl
)
30978 mode
= TARGET_THUMB
? "thumb" : "arm";
30979 add_attribute (mode
, attributes
);
30983 /* If there is already a setting don't change it. */
30984 if (lookup_attribute ("target", *attributes
) != NULL
)
30987 mode
= thumb_flipper
? "thumb" : "arm";
30988 add_attribute (mode
, attributes
);
30990 thumb_flipper
= !thumb_flipper
;
30993 /* Hook to validate attribute((target("string"))). */
30996 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30997 tree args
, int ARG_UNUSED (flags
))
31000 struct gcc_options func_options
;
31001 tree cur_tree
, new_optimize
;
31002 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
31004 /* Get the optimization options of the current function. */
31005 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
31007 /* If the function changed the optimization levels as well as setting target
31008 options, start with the optimizations specified. */
31009 if (!func_optimize
)
31010 func_optimize
= optimization_default_node
;
31012 /* Init func_options. */
31013 memset (&func_options
, 0, sizeof (func_options
));
31014 init_options_struct (&func_options
, NULL
);
31015 lang_hooks
.init_options_struct (&func_options
);
31017 /* Initialize func_options to the defaults. */
31018 cl_optimization_restore (&func_options
,
31019 TREE_OPTIMIZATION (func_optimize
));
31021 cl_target_option_restore (&func_options
,
31022 TREE_TARGET_OPTION (target_option_default_node
));
31024 /* Set func_options flags with new target mode. */
31025 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
31026 &global_options_set
);
31028 if (cur_tree
== NULL_TREE
)
31031 new_optimize
= build_optimization_node (&func_options
);
31033 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
31035 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
31037 finalize_options_struct (&func_options
);
31042 /* Match an ISA feature bitmap to a named FPU. We always use the
31043 first entry that exactly matches the feature set, so that we
31044 effectively canonicalize the FPU name for the assembler. */
31046 arm_identify_fpu_from_isa (sbitmap isa
)
31048 auto_sbitmap
fpubits (isa_num_bits
);
31049 auto_sbitmap
cand_fpubits (isa_num_bits
);
31051 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
31053 /* If there are no ISA feature bits relating to the FPU, we must be
31054 doing soft-float. */
31055 if (bitmap_empty_p (fpubits
))
31058 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
31060 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
31061 if (bitmap_equal_p (fpubits
, cand_fpubits
))
31062 return all_fpus
[i
].name
;
31064 /* We must find an entry, or things have gone wrong. */
31065 gcc_unreachable ();
31068 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
31069 by the function fndecl. */
31071 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
31073 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
31075 struct cl_target_option
*targ_options
;
31077 targ_options
= TREE_TARGET_OPTION (target_parts
);
31079 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
31080 gcc_assert (targ_options
);
31082 /* Only update the assembler .arch string if it is distinct from the last
31083 such string we printed. arch_to_print is set conditionally in case
31084 targ_options->x_arm_arch_string is NULL which can be the case
31085 when cc1 is invoked directly without passing -march option. */
31086 std::string arch_to_print
;
31087 if (targ_options
->x_arm_arch_string
)
31088 arch_to_print
= targ_options
->x_arm_arch_string
;
31090 if (arch_to_print
!= arm_last_printed_arch_string
)
31092 std::string arch_name
31093 = arch_to_print
.substr (0, arch_to_print
.find ("+"));
31094 asm_fprintf (asm_out_file
, "\t.arch %s\n", arch_name
.c_str ());
31095 const arch_option
*arch
31096 = arm_parse_arch_option_name (all_architectures
, "-march",
31097 targ_options
->x_arm_arch_string
);
31098 auto_sbitmap
opt_bits (isa_num_bits
);
31101 if (arch
->common
.extensions
)
31103 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
31109 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
31110 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
31111 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
31112 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n",
31118 arm_last_printed_arch_string
= arch_to_print
;
31121 fprintf (stream
, "\t.syntax unified\n");
31125 if (is_called_in_ARM_mode (decl
)
31126 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
31127 && cfun
->is_thunk
))
31128 fprintf (stream
, "\t.code 32\n");
31129 else if (TARGET_THUMB1
)
31130 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
31132 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
31135 fprintf (stream
, "\t.arm\n");
31137 std::string fpu_to_print
31138 = TARGET_SOFT_FLOAT
31139 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target
.isa
);
31141 if (fpu_to_print
!= arm_last_printed_arch_string
)
31143 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_to_print
.c_str ());
31144 arm_last_printed_fpu_string
= fpu_to_print
;
31147 if (TARGET_POKE_FUNCTION_NAME
)
31148 arm_poke_function_name (stream
, (const char *) name
);
31151 /* If MEM is in the form of [base+offset], extract the two parts
31152 of address and set to BASE and OFFSET, otherwise return false
31153 after clearing BASE and OFFSET. */
31156 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
31160 gcc_assert (MEM_P (mem
));
31162 addr
= XEXP (mem
, 0);
31164 /* Strip off const from addresses like (const (addr)). */
31165 if (GET_CODE (addr
) == CONST
)
31166 addr
= XEXP (addr
, 0);
31168 if (GET_CODE (addr
) == REG
)
31171 *offset
= const0_rtx
;
31175 if (GET_CODE (addr
) == PLUS
31176 && GET_CODE (XEXP (addr
, 0)) == REG
31177 && CONST_INT_P (XEXP (addr
, 1)))
31179 *base
= XEXP (addr
, 0);
31180 *offset
= XEXP (addr
, 1);
31185 *offset
= NULL_RTX
;
31190 /* If INSN is a load or store of address in the form of [base+offset],
31191 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31192 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31193 otherwise return FALSE. */
31196 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
31200 gcc_assert (INSN_P (insn
));
31201 x
= PATTERN (insn
);
31202 if (GET_CODE (x
) != SET
)
31206 dest
= SET_DEST (x
);
31207 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
31210 extract_base_offset_in_addr (dest
, base
, offset
);
31212 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
31215 extract_base_offset_in_addr (src
, base
, offset
);
31220 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
31223 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31225 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31226 and PRI are only calculated for these instructions. For other instruction,
31227 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31228 instruction fusion can be supported by returning different priorities.
31230 It's important that irrelevant instructions get the largest FUSION_PRI. */
31233 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
31234 int *fusion_pri
, int *pri
)
31240 gcc_assert (INSN_P (insn
));
31243 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
31250 /* Load goes first. */
31252 *fusion_pri
= tmp
- 1;
31254 *fusion_pri
= tmp
- 2;
31258 /* INSN with smaller base register goes first. */
31259 tmp
-= ((REGNO (base
) & 0xff) << 20);
31261 /* INSN with smaller offset goes first. */
31262 off_val
= (int)(INTVAL (offset
));
31264 tmp
-= (off_val
& 0xfffff);
31266 tmp
+= ((- off_val
) & 0xfffff);
31273 /* Construct and return a PARALLEL RTX vector with elements numbering the
31274 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31275 the vector - from the perspective of the architecture. This does not
31276 line up with GCC's perspective on lane numbers, so we end up with
31277 different masks depending on our target endian-ness. The diagram
31278 below may help. We must draw the distinction when building masks
31279 which select one half of the vector. An instruction selecting
31280 architectural low-lanes for a big-endian target, must be described using
31281 a mask selecting GCC high-lanes.
31283 Big-Endian Little-Endian
31285 GCC 0 1 2 3 3 2 1 0
31286 | x | x | x | x | | x | x | x | x |
31287 Architecture 3 2 1 0 3 2 1 0
31289 Low Mask: { 2, 3 } { 0, 1 }
31290 High Mask: { 0, 1 } { 2, 3 }
31294 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
31296 int nunits
= GET_MODE_NUNITS (mode
);
31297 rtvec v
= rtvec_alloc (nunits
/ 2);
31298 int high_base
= nunits
/ 2;
31304 if (BYTES_BIG_ENDIAN
)
31305 base
= high
? low_base
: high_base
;
31307 base
= high
? high_base
: low_base
;
31309 for (i
= 0; i
< nunits
/ 2; i
++)
31310 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
31312 t1
= gen_rtx_PARALLEL (mode
, v
);
31316 /* Check OP for validity as a PARALLEL RTX vector with elements
31317 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31318 from the perspective of the architecture. See the diagram above
31319 arm_simd_vect_par_cnst_half_p for more details. */
31322 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
31325 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
31326 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
31327 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
31330 if (!VECTOR_MODE_P (mode
))
31333 if (count_op
!= count_ideal
)
31336 for (i
= 0; i
< count_ideal
; i
++)
31338 rtx elt_op
= XVECEXP (op
, 0, i
);
31339 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
31341 if (!CONST_INT_P (elt_op
)
31342 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
31348 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31351 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
31354 /* For now, we punt and not handle this for TARGET_THUMB1. */
31355 if (vcall_offset
&& TARGET_THUMB1
)
31358 /* Otherwise ok. */
31362 /* Generate RTL for a conditional branch with rtx comparison CODE in
31363 mode CC_MODE. The destination of the unlikely conditional branch
31367 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
31371 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
31372 gen_rtx_REG (cc_mode
, CC_REGNUM
),
31375 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31376 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
31378 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31381 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31383 For pure-code sections there is no letter code for this attribute, so
31384 output all the section flags numerically when this is needed. */
31387 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
31390 if (flags
& SECTION_ARM_PURECODE
)
31394 if (!(flags
& SECTION_DEBUG
))
31396 if (flags
& SECTION_EXCLUDE
)
31397 *num
|= 0x80000000;
31398 if (flags
& SECTION_WRITE
)
31400 if (flags
& SECTION_CODE
)
31402 if (flags
& SECTION_MERGE
)
31404 if (flags
& SECTION_STRINGS
)
31406 if (flags
& SECTION_TLS
)
31408 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
31417 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31419 If pure-code is passed as an option, make sure all functions are in
31420 sections that have the SHF_ARM_PURECODE attribute. */
31423 arm_function_section (tree decl
, enum node_frequency freq
,
31424 bool startup
, bool exit
)
31426 const char * section_name
;
31429 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
31430 return default_function_section (decl
, freq
, startup
, exit
);
31432 if (!target_pure_code
)
31433 return default_function_section (decl
, freq
, startup
, exit
);
31436 section_name
= DECL_SECTION_NAME (decl
);
31438 /* If a function is not in a named section then it falls under the 'default'
31439 text section, also known as '.text'. We can preserve previous behavior as
31440 the default text section already has the SHF_ARM_PURECODE section
31444 section
*default_sec
= default_function_section (decl
, freq
, startup
,
31447 /* If default_sec is not null, then it must be a special section like for
31448 example .text.startup. We set the pure-code attribute and return the
31449 same section to preserve existing behavior. */
31451 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31452 return default_sec
;
31455 /* Otherwise look whether a section has already been created with
31457 sec
= get_named_section (decl
, section_name
, 0);
31459 /* If that is not the case passing NULL as the section's name to
31460 'get_named_section' will create a section with the declaration's
31462 sec
= get_named_section (decl
, NULL
, 0);
31464 /* Set the SHF_ARM_PURECODE attribute. */
31465 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31470 /* Implements the TARGET_SECTION_FLAGS hook.
31472 If DECL is a function declaration and pure-code is passed as an option
31473 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31474 section's name and RELOC indicates whether the declarations initializer may
31475 contain runtime relocations. */
31477 static unsigned int
31478 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31480 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31482 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31483 flags
|= SECTION_ARM_PURECODE
;
31488 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31491 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31493 rtx
*quot_p
, rtx
*rem_p
)
31495 if (mode
== SImode
)
31496 gcc_assert (!TARGET_IDIV
);
31498 scalar_int_mode libval_mode
31499 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
31501 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31503 op0
, GET_MODE (op0
),
31504 op1
, GET_MODE (op1
));
31506 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31507 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31508 GET_MODE_SIZE (mode
));
31510 gcc_assert (quotient
);
31511 gcc_assert (remainder
);
31513 *quot_p
= quotient
;
31514 *rem_p
= remainder
;
31517 /* This function checks for the availability of the coprocessor builtin passed
31518 in BUILTIN for the current target. Returns true if it is available and
31519 false otherwise. If a BUILTIN is passed for which this function has not
31520 been implemented it will cause an exception. */
31523 arm_coproc_builtin_available (enum unspecv builtin
)
31525 /* None of these builtins are available in Thumb mode if the target only
31526 supports Thumb-1. */
31544 case VUNSPEC_LDC2L
:
31546 case VUNSPEC_STC2L
:
31549 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31556 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31558 if (arm_arch6
|| arm_arch5te
)
31561 case VUNSPEC_MCRR2
:
31562 case VUNSPEC_MRRC2
:
31567 gcc_unreachable ();
31572 /* This function returns true if OP is a valid memory operand for the ldc and
31573 stc coprocessor instructions and false otherwise. */
31576 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31578 HOST_WIDE_INT range
;
31579 /* Has to be a memory operand. */
31585 /* We accept registers. */
31589 switch GET_CODE (op
)
31593 /* Or registers with an offset. */
31594 if (!REG_P (XEXP (op
, 0)))
31599 /* The offset must be an immediate though. */
31600 if (!CONST_INT_P (op
))
31603 range
= INTVAL (op
);
31605 /* Within the range of [-1020,1020]. */
31606 if (!IN_RANGE (range
, -1020, 1020))
31609 /* And a multiple of 4. */
31610 return (range
% 4) == 0;
31616 return REG_P (XEXP (op
, 0));
31618 gcc_unreachable ();
31623 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31625 In VFPv1, VFP registers could only be accessed in the mode they were
31626 set, so subregs would be invalid there. However, we don't support
31627 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31629 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31630 VFP registers in little-endian order. We can't describe that accurately to
31631 GCC, so avoid taking subregs of such values.
31633 The only exception is going from a 128-bit to a 64-bit type. In that
31634 case the data layout happens to be consistent for big-endian, so we
31635 explicitly allow that case. */
31638 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
31639 reg_class_t rclass
)
31642 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
31643 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
31644 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
31645 && reg_classes_intersect_p (VFP_REGS
, rclass
))
31650 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31651 strcpy from constants will be faster. */
31653 static HOST_WIDE_INT
31654 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
31656 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
31657 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
31658 return MAX (align
, BITS_PER_WORD
* factor
);
31662 /* Emit a speculation barrier on target architectures that do not have
31663 DSB/ISB directly. Such systems probably don't need a barrier
31664 themselves, but if the code is ever run on a later architecture, it
31665 might become a problem. */
31667 arm_emit_speculation_barrier_function ()
31669 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
31673 namespace selftest
{
31675 /* Scan the static data tables generated by parsecpu.awk looking for
31676 potential issues with the data. We primarily check for
31677 inconsistencies in the option extensions at present (extensions
31678 that duplicate others but aren't marked as aliases). Furthermore,
31679 for correct canonicalization later options must never be a subset
31680 of an earlier option. Any extension should also only specify other
31681 feature bits and never an architecture bit. The architecture is inferred
31682 from the declaration of the extension. */
31684 arm_test_cpu_arch_data (void)
31686 const arch_option
*arch
;
31687 const cpu_option
*cpu
;
31688 auto_sbitmap
target_isa (isa_num_bits
);
31689 auto_sbitmap
isa1 (isa_num_bits
);
31690 auto_sbitmap
isa2 (isa_num_bits
);
31692 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
31694 const cpu_arch_extension
*ext1
, *ext2
;
31696 if (arch
->common
.extensions
== NULL
)
31699 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31701 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31706 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31707 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31709 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31712 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31713 /* If the option is a subset of the parent option, it doesn't
31714 add anything and so isn't useful. */
31715 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31717 /* If the extension specifies any architectural bits then
31718 disallow it. Extensions should only specify feature bits. */
31719 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31724 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
31726 const cpu_arch_extension
*ext1
, *ext2
;
31728 if (cpu
->common
.extensions
== NULL
)
31731 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31733 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31738 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31739 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31741 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31744 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31745 /* If the option is a subset of the parent option, it doesn't
31746 add anything and so isn't useful. */
31747 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31749 /* If the extension specifies any architectural bits then
31750 disallow it. Extensions should only specify feature bits. */
31751 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31757 /* Scan the static data tables generated by parsecpu.awk looking for
31758 potential issues with the data. Here we check for consistency between the
31759 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31760 a feature bit that is not defined by any FPU flag. */
31762 arm_test_fpu_data (void)
31764 auto_sbitmap
isa_all_fpubits (isa_num_bits
);
31765 auto_sbitmap
fpubits (isa_num_bits
);
31766 auto_sbitmap
tmpset (isa_num_bits
);
31768 static const enum isa_feature fpu_bitlist
[]
31769 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
31770 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
31772 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
31774 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
31775 bitmap_and_compl (tmpset
, isa_all_fpubits
, fpubits
);
31776 bitmap_clear (isa_all_fpubits
);
31777 bitmap_copy (isa_all_fpubits
, tmpset
);
31780 if (!bitmap_empty_p (isa_all_fpubits
))
31782 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
31783 " group that are not defined by any FPU.\n"
31784 " Check your arm-cpus.in.\n");
31785 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits
));
31790 arm_run_selftests (void)
31792 arm_test_cpu_arch_data ();
31793 arm_test_fpu_data ();
31795 } /* Namespace selftest. */
31797 #undef TARGET_RUN_TARGET_SELFTESTS
31798 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31799 #endif /* CHECKING_P */
31801 struct gcc_target targetm
= TARGET_INITIALIZER
;
31803 #include "gt-arm.h"