1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
51 #include "insn-attr.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
74 /* This file should be included last. */
75 #include "target-def.h"
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode
;
79 typedef struct minipool_fixup Mfix
;
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string
;
83 static std::string arm_last_printed_fpu_string
;
85 void (*arm_lang_output_object_attributes_hook
)(void);
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx
);
94 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets
*arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
100 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap
);
103 static int arm_address_register_rtx_p (rtx
, int);
104 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
105 static bool is_called_in_ARM_mode (tree
);
106 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
107 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
108 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
109 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
110 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
111 inline static int thumb1_index_register_rtx_p (rtx
, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx
, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx
, int);
117 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
118 static bool arm_print_operand_punct_valid_p (unsigned char code
);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
120 static arm_cc
get_arm_condition_code (rtx
);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx
*, const char *, const char *,
124 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
125 static struct machine_function
*arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
128 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
129 static Mnode
*add_minipool_forward_ref (Mfix
*);
130 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
131 static Mnode
*add_minipool_backward_ref (Mfix
*);
132 static void assign_minipool_offsets (Mfix
*);
133 static void arm_print_value (FILE *, rtx
);
134 static void dump_minipool (rtx_insn
*);
135 static int arm_barrier_cost (rtx_insn
*);
136 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
137 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
138 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree
);
145 static unsigned long arm_compute_func_type (void);
146 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
147 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
148 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
152 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree
, const_tree
);
157 static void arm_set_default_type_attributes (tree
);
158 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code
,
161 unsigned HOST_WIDE_INT val
,
162 struct four_ints
*return_sequence
);
163 static int optimal_immediate_sequence_1 (enum rtx_code code
,
164 unsigned HOST_WIDE_INT val
,
165 struct four_ints
*return_sequence
,
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree
, tree
);
169 static machine_mode
arm_promote_function_mode (const_tree
,
172 static bool arm_return_in_memory (const_tree
, const_tree
);
173 static rtx
arm_function_value (const_tree
, const_tree
, bool);
174 static rtx
arm_libcall_value_1 (machine_mode
);
175 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
182 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
183 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
184 static int arm_insn_cost (rtx_insn
*, bool);
185 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
186 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
187 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
188 static void emit_constant_insn (rtx cond
, rtx pattern
);
189 static rtx_insn
*emit_set_insn (rtx
, rtx
);
190 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
191 static int arm_arg_partial_bytes (cumulative_args_t
,
192 const function_arg_info
&);
193 static rtx
arm_function_arg (cumulative_args_t
, const function_arg_info
&);
194 static void arm_function_arg_advance (cumulative_args_t
,
195 const function_arg_info
&);
196 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
197 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
198 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
200 static rtx
aapcs_libcall_value (machine_mode
);
201 static int aapcs_select_return_coproc (const_tree
, const_tree
);
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
205 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
208 static void arm_encode_section_info (tree
, rtx
, int);
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree
, tree
*);
215 static void arm_setup_incoming_varargs (cumulative_args_t
,
216 const function_arg_info
&, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t
,
218 const function_arg_info
&);
219 static bool arm_promote_prototypes (const_tree
);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree
);
223 static bool arm_must_pass_in_stack (const function_arg_info
&);
224 static bool arm_return_in_memory (const_tree
, const_tree
);
226 static void arm_unwind_emit (FILE *, rtx_insn
*);
227 static bool arm_output_ttype (rtx
);
228 static void arm_asm_emit_except_personality (rtx
);
230 static void arm_asm_init_sections (void);
231 static rtx
arm_dwarf_register_span (rtx
);
233 static tree
arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree
arm_get_cookie_size (tree
);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree
);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree
arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree
, rtx
);
245 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option
*, struct gcc_options
*);
248 static void arm_option_restore (struct gcc_options
*,
249 struct cl_target_option
*);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option
*);
252 static void arm_set_current_function (tree
);
253 static bool arm_can_inline_p (tree
, tree
);
254 static void arm_relayout_function (tree
);
255 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
256 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
257 static bool arm_sched_can_speculate_insn (rtx_insn
*);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn
*);
260 static int arm_issue_rate (void);
261 static int arm_sched_variable_issue (FILE *, int, rtx_insn
*, int);
262 static int arm_first_cycle_multipass_dfa_lookahead (void);
263 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
264 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
265 static bool arm_output_addr_const_extra (FILE *, rtx
);
266 static bool arm_allocate_stack_slots_for_args (void);
267 static bool arm_warn_func_return (tree
);
268 static tree
arm_promoted_type (const_tree t
);
269 static bool arm_scalar_mode_supported_p (scalar_mode
);
270 static bool arm_frame_pointer_required (void);
271 static bool arm_can_eliminate (const int, const int);
272 static void arm_asm_trampoline_template (FILE *);
273 static void arm_trampoline_init (rtx
, tree
, rtx
);
274 static rtx
arm_trampoline_adjust_address (rtx
);
275 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
276 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
277 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
278 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
279 static bool arm_array_mode_supported_p (machine_mode
,
280 unsigned HOST_WIDE_INT
);
281 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
282 static bool arm_class_likely_spilled_p (reg_class_t
);
283 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
284 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
285 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
289 static void arm_conditional_register_usage (void);
290 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
291 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
292 static void arm_autovectorize_vector_sizes (vector_sizes
*, bool);
293 static int arm_default_branch_cost (bool, bool);
294 static int arm_cortex_a5_branch_cost (bool, bool);
295 static int arm_cortex_m_branch_cost (bool, bool);
296 static int arm_cortex_m7_branch_cost (bool, bool);
298 static bool arm_vectorize_vec_perm_const (machine_mode
, rtx
, rtx
, rtx
,
299 const vec_perm_indices
&);
301 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
303 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
305 int misalign ATTRIBUTE_UNUSED
);
306 static unsigned arm_add_stmt_cost (void *data
, int count
,
307 enum vect_cost_for_stmt kind
,
308 struct _stmt_vec_info
*stmt_info
,
310 enum vect_cost_model_location where
);
312 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
313 bool op0_preserve_value
);
314 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
316 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
317 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
319 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
320 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
321 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
323 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
324 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
325 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
326 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
327 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
328 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
330 /* Table of machine attributes. */
331 static const struct attribute_spec arm_attribute_table
[] =
333 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
334 affects_type_identity, handler, exclude } */
335 /* Function calls made to this symbol must be done indirectly, because
336 it may lie outside of the 26 bit addressing range of a normal function
338 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
339 /* Whereas these functions are always known to reside within the 26 bit
341 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
342 /* Specify the procedure call conventions for a function. */
343 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
345 /* Interrupt Service Routines have special prologue and epilogue requirements. */
346 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
348 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
350 { "naked", 0, 0, true, false, false, false,
351 arm_handle_fndecl_attribute
, NULL
},
353 /* ARM/PE has three new attributes:
355 dllexport - for exporting a function/variable that will live in a dll
356 dllimport - for importing a function/variable from a dll
358 Microsoft allows multiple declspecs in one __declspec, separating
359 them with spaces. We do NOT support this. Instead, use __declspec
362 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
363 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
364 { "interfacearm", 0, 0, true, false, false, false,
365 arm_handle_fndecl_attribute
, NULL
},
366 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
367 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
369 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
371 { "notshared", 0, 0, false, true, false, false,
372 arm_handle_notshared_attribute
, NULL
},
374 /* ARMv8-M Security Extensions support. */
375 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
376 arm_handle_cmse_nonsecure_entry
, NULL
},
377 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
378 arm_handle_cmse_nonsecure_call
, NULL
},
379 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
382 /* Initialize the GCC target structure. */
383 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
384 #undef TARGET_MERGE_DECL_ATTRIBUTES
385 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
388 #undef TARGET_LEGITIMIZE_ADDRESS
389 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
391 #undef TARGET_ATTRIBUTE_TABLE
392 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
394 #undef TARGET_INSERT_ATTRIBUTES
395 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
397 #undef TARGET_ASM_FILE_START
398 #define TARGET_ASM_FILE_START arm_file_start
399 #undef TARGET_ASM_FILE_END
400 #define TARGET_ASM_FILE_END arm_file_end
402 #undef TARGET_ASM_ALIGNED_SI_OP
403 #define TARGET_ASM_ALIGNED_SI_OP NULL
404 #undef TARGET_ASM_INTEGER
405 #define TARGET_ASM_INTEGER arm_assemble_integer
407 #undef TARGET_PRINT_OPERAND
408 #define TARGET_PRINT_OPERAND arm_print_operand
409 #undef TARGET_PRINT_OPERAND_ADDRESS
410 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
411 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
412 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
414 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
415 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
417 #undef TARGET_ASM_FUNCTION_PROLOGUE
418 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
420 #undef TARGET_ASM_FUNCTION_EPILOGUE
421 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
423 #undef TARGET_CAN_INLINE_P
424 #define TARGET_CAN_INLINE_P arm_can_inline_p
426 #undef TARGET_RELAYOUT_FUNCTION
427 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
429 #undef TARGET_OPTION_OVERRIDE
430 #define TARGET_OPTION_OVERRIDE arm_option_override
432 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
433 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
435 #undef TARGET_OPTION_SAVE
436 #define TARGET_OPTION_SAVE arm_option_save
438 #undef TARGET_OPTION_RESTORE
439 #define TARGET_OPTION_RESTORE arm_option_restore
441 #undef TARGET_OPTION_PRINT
442 #define TARGET_OPTION_PRINT arm_option_print
444 #undef TARGET_COMP_TYPE_ATTRIBUTES
445 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
447 #undef TARGET_SCHED_CAN_SPECULATE_INSN
448 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
450 #undef TARGET_SCHED_MACRO_FUSION_P
451 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
453 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
454 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
456 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
457 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
459 #undef TARGET_SCHED_ADJUST_COST
460 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
462 #undef TARGET_SET_CURRENT_FUNCTION
463 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
465 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
466 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
468 #undef TARGET_SCHED_REORDER
469 #define TARGET_SCHED_REORDER arm_sched_reorder
471 #undef TARGET_REGISTER_MOVE_COST
472 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
474 #undef TARGET_MEMORY_MOVE_COST
475 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
477 #undef TARGET_ENCODE_SECTION_INFO
479 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
481 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
484 #undef TARGET_STRIP_NAME_ENCODING
485 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
487 #undef TARGET_ASM_INTERNAL_LABEL
488 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
490 #undef TARGET_FLOATN_MODE
491 #define TARGET_FLOATN_MODE arm_floatn_mode
493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
494 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
496 #undef TARGET_FUNCTION_VALUE
497 #define TARGET_FUNCTION_VALUE arm_function_value
499 #undef TARGET_LIBCALL_VALUE
500 #define TARGET_LIBCALL_VALUE arm_libcall_value
502 #undef TARGET_FUNCTION_VALUE_REGNO_P
503 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
505 #undef TARGET_ASM_OUTPUT_MI_THUNK
506 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
507 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
508 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
510 #undef TARGET_RTX_COSTS
511 #define TARGET_RTX_COSTS arm_rtx_costs
512 #undef TARGET_ADDRESS_COST
513 #define TARGET_ADDRESS_COST arm_address_cost
514 #undef TARGET_INSN_COST
515 #define TARGET_INSN_COST arm_insn_cost
517 #undef TARGET_SHIFT_TRUNCATION_MASK
518 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
519 #undef TARGET_VECTOR_MODE_SUPPORTED_P
520 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
521 #undef TARGET_ARRAY_MODE_SUPPORTED_P
522 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
523 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
524 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
525 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
526 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
527 arm_autovectorize_vector_sizes
529 #undef TARGET_MACHINE_DEPENDENT_REORG
530 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
532 #undef TARGET_INIT_BUILTINS
533 #define TARGET_INIT_BUILTINS arm_init_builtins
534 #undef TARGET_EXPAND_BUILTIN
535 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
536 #undef TARGET_BUILTIN_DECL
537 #define TARGET_BUILTIN_DECL arm_builtin_decl
539 #undef TARGET_INIT_LIBFUNCS
540 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
542 #undef TARGET_PROMOTE_FUNCTION_MODE
543 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
544 #undef TARGET_PROMOTE_PROTOTYPES
545 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
546 #undef TARGET_PASS_BY_REFERENCE
547 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
548 #undef TARGET_ARG_PARTIAL_BYTES
549 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
550 #undef TARGET_FUNCTION_ARG
551 #define TARGET_FUNCTION_ARG arm_function_arg
552 #undef TARGET_FUNCTION_ARG_ADVANCE
553 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
554 #undef TARGET_FUNCTION_ARG_PADDING
555 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
556 #undef TARGET_FUNCTION_ARG_BOUNDARY
557 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
559 #undef TARGET_SETUP_INCOMING_VARARGS
560 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
562 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
563 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
565 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
566 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
567 #undef TARGET_TRAMPOLINE_INIT
568 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
569 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
570 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
572 #undef TARGET_WARN_FUNC_RETURN
573 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
575 #undef TARGET_DEFAULT_SHORT_ENUMS
576 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
578 #undef TARGET_ALIGN_ANON_BITFIELD
579 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
581 #undef TARGET_NARROW_VOLATILE_BITFIELD
582 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
584 #undef TARGET_CXX_GUARD_TYPE
585 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
587 #undef TARGET_CXX_GUARD_MASK_BIT
588 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
590 #undef TARGET_CXX_GET_COOKIE_SIZE
591 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
593 #undef TARGET_CXX_COOKIE_HAS_SIZE
594 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
596 #undef TARGET_CXX_CDTOR_RETURNS_THIS
597 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
599 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
600 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
602 #undef TARGET_CXX_USE_AEABI_ATEXIT
603 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
605 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
606 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
607 arm_cxx_determine_class_data_visibility
609 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
610 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
612 #undef TARGET_RETURN_IN_MSB
613 #define TARGET_RETURN_IN_MSB arm_return_in_msb
615 #undef TARGET_RETURN_IN_MEMORY
616 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
618 #undef TARGET_MUST_PASS_IN_STACK
619 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
622 #undef TARGET_ASM_UNWIND_EMIT
623 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
625 /* EABI unwinding tables use a different format for the typeinfo tables. */
626 #undef TARGET_ASM_TTYPE
627 #define TARGET_ASM_TTYPE arm_output_ttype
629 #undef TARGET_ARM_EABI_UNWINDER
630 #define TARGET_ARM_EABI_UNWINDER true
632 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
633 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
635 #endif /* ARM_UNWIND_INFO */
637 #undef TARGET_ASM_INIT_SECTIONS
638 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
640 #undef TARGET_DWARF_REGISTER_SPAN
641 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
643 #undef TARGET_CANNOT_COPY_INSN_P
644 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
647 #undef TARGET_HAVE_TLS
648 #define TARGET_HAVE_TLS true
651 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
652 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
654 #undef TARGET_LEGITIMATE_CONSTANT_P
655 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
657 #undef TARGET_CANNOT_FORCE_CONST_MEM
658 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
660 #undef TARGET_MAX_ANCHOR_OFFSET
661 #define TARGET_MAX_ANCHOR_OFFSET 4095
663 /* The minimum is set such that the total size of the block
664 for a particular anchor is -4088 + 1 + 4095 bytes, which is
665 divisible by eight, ensuring natural spacing of anchors. */
666 #undef TARGET_MIN_ANCHOR_OFFSET
667 #define TARGET_MIN_ANCHOR_OFFSET -4088
669 #undef TARGET_SCHED_ISSUE_RATE
670 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
672 #undef TARGET_SCHED_VARIABLE_ISSUE
673 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
675 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
676 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
677 arm_first_cycle_multipass_dfa_lookahead
679 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
680 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
681 arm_first_cycle_multipass_dfa_lookahead_guard
683 #undef TARGET_MANGLE_TYPE
684 #define TARGET_MANGLE_TYPE arm_mangle_type
686 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
687 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
689 #undef TARGET_BUILD_BUILTIN_VA_LIST
690 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
691 #undef TARGET_EXPAND_BUILTIN_VA_START
692 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
693 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
694 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
697 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
698 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
701 #undef TARGET_LEGITIMATE_ADDRESS_P
702 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
704 #undef TARGET_PREFERRED_RELOAD_CLASS
705 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
707 #undef TARGET_PROMOTED_TYPE
708 #define TARGET_PROMOTED_TYPE arm_promoted_type
710 #undef TARGET_SCALAR_MODE_SUPPORTED_P
711 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
713 #undef TARGET_COMPUTE_FRAME_LAYOUT
714 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
716 #undef TARGET_FRAME_POINTER_REQUIRED
717 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
719 #undef TARGET_CAN_ELIMINATE
720 #define TARGET_CAN_ELIMINATE arm_can_eliminate
722 #undef TARGET_CONDITIONAL_REGISTER_USAGE
723 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
725 #undef TARGET_CLASS_LIKELY_SPILLED_P
726 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
728 #undef TARGET_VECTORIZE_BUILTINS
729 #define TARGET_VECTORIZE_BUILTINS
731 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
732 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
733 arm_builtin_vectorized_function
735 #undef TARGET_VECTOR_ALIGNMENT
736 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
738 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
739 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
740 arm_vector_alignment_reachable
742 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
743 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
744 arm_builtin_support_vector_misalignment
746 #undef TARGET_PREFERRED_RENAME_CLASS
747 #define TARGET_PREFERRED_RENAME_CLASS \
748 arm_preferred_rename_class
750 #undef TARGET_VECTORIZE_VEC_PERM_CONST
751 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
753 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
754 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
755 arm_builtin_vectorization_cost
756 #undef TARGET_VECTORIZE_ADD_STMT_COST
757 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
759 #undef TARGET_CANONICALIZE_COMPARISON
760 #define TARGET_CANONICALIZE_COMPARISON \
761 arm_canonicalize_comparison
763 #undef TARGET_ASAN_SHADOW_OFFSET
764 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
766 #undef MAX_INSN_PER_IT_BLOCK
767 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
769 #undef TARGET_CAN_USE_DOLOOP_P
770 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
772 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
773 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
775 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
776 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
778 #undef TARGET_SCHED_FUSION_PRIORITY
779 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
781 #undef TARGET_ASM_FUNCTION_SECTION
782 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
784 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
785 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
787 #undef TARGET_SECTION_TYPE_FLAGS
788 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
790 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
791 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
793 #undef TARGET_C_EXCESS_PRECISION
794 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
796 /* Although the architecture reserves bits 0 and 1, only the former is
797 used for ARM/Thumb ISA selection in v7 and earlier versions. */
798 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
799 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
801 #undef TARGET_FIXED_CONDITION_CODE_REGS
802 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
804 #undef TARGET_HARD_REGNO_NREGS
805 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
806 #undef TARGET_HARD_REGNO_MODE_OK
807 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
809 #undef TARGET_MODES_TIEABLE_P
810 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
812 #undef TARGET_CAN_CHANGE_MODE_CLASS
813 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
815 #undef TARGET_CONSTANT_ALIGNMENT
816 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
818 /* Obstack for minipool constant handling. */
819 static struct obstack minipool_obstack
;
820 static char * minipool_startobj
;
822 /* The maximum number of insns skipped which
823 will be conditionalised if possible. */
824 static int max_insns_skipped
= 5;
826 extern FILE * asm_out_file
;
828 /* True if we are currently building a constant table. */
829 int making_const_table
;
831 /* The processor for which instructions should be scheduled. */
832 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
834 /* The current tuning set. */
835 const struct tune_params
*current_tune
;
837 /* Which floating point hardware to schedule for. */
840 /* Used for Thumb call_via trampolines. */
841 rtx thumb_call_via_label
[14];
842 static int thumb_call_reg_needed
;
844 /* The bits in this mask specify which instruction scheduling options should
846 unsigned int tune_flags
= 0;
848 /* The highest ARM architecture version supported by the
850 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
852 /* Active target architecture and tuning. */
854 struct arm_build_target arm_active_target
;
856 /* The following are used in the arm.md file as equivalents to bits
857 in the above two flag variables. */
859 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
862 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
865 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
868 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
871 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
874 /* Nonzero if this chip supports the ARM 6K extensions. */
877 /* Nonzero if this chip supports the ARM 6KZ extensions. */
880 /* Nonzero if instructions present in ARMv6-M can be used. */
883 /* Nonzero if this chip supports the ARM 7 extensions. */
886 /* Nonzero if this chip supports the Large Physical Address Extension. */
887 int arm_arch_lpae
= 0;
889 /* Nonzero if instructions not present in the 'M' profile can be used. */
890 int arm_arch_notm
= 0;
892 /* Nonzero if instructions present in ARMv7E-M can be used. */
895 /* Nonzero if instructions present in ARMv8 can be used. */
898 /* Nonzero if this chip supports the ARMv8.1 extensions. */
901 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
904 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
907 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
910 /* Nonzero if this chip supports the FP16 instructions extension of ARM
912 int arm_fp16_inst
= 0;
914 /* Nonzero if this chip can benefit from load scheduling. */
915 int arm_ld_sched
= 0;
917 /* Nonzero if this chip is a StrongARM. */
918 int arm_tune_strongarm
= 0;
920 /* Nonzero if this chip supports Intel Wireless MMX technology. */
921 int arm_arch_iwmmxt
= 0;
923 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
924 int arm_arch_iwmmxt2
= 0;
926 /* Nonzero if this chip is an XScale. */
927 int arm_arch_xscale
= 0;
929 /* Nonzero if tuning for XScale */
930 int arm_tune_xscale
= 0;
932 /* Nonzero if we want to tune for stores that access the write-buffer.
933 This typically means an ARM6 or ARM7 with MMU or MPU. */
934 int arm_tune_wbuf
= 0;
936 /* Nonzero if tuning for Cortex-A9. */
937 int arm_tune_cortex_a9
= 0;
939 /* Nonzero if we should define __THUMB_INTERWORK__ in the
941 XXX This is a bit of a hack, it's intended to help work around
942 problems in GLD which doesn't understand that armv5t code is
943 interworking clean. */
944 int arm_cpp_interwork
= 0;
946 /* Nonzero if chip supports Thumb 1. */
949 /* Nonzero if chip supports Thumb 2. */
952 /* Nonzero if chip supports integer division instruction. */
953 int arm_arch_arm_hwdiv
;
954 int arm_arch_thumb_hwdiv
;
956 /* Nonzero if chip disallows volatile memory access in IT block. */
957 int arm_arch_no_volatile_ce
;
959 /* Nonzero if we shouldn't use literal pools. */
960 bool arm_disable_literal_pool
= false;
962 /* The register number to be used for the PIC offset register. */
963 unsigned arm_pic_register
= INVALID_REGNUM
;
965 enum arm_pcs arm_pcs_default
;
967 /* For an explanation of these variables, see final_prescan_insn below. */
969 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
970 enum arm_cond_code arm_current_cc
;
973 int arm_target_label
;
974 /* The number of conditionally executed insns, including the current insn. */
975 int arm_condexec_count
= 0;
976 /* A bitmask specifying the patterns for the IT block.
977 Zero means do not output an IT block before this insn. */
978 int arm_condexec_mask
= 0;
979 /* The number of bits used in arm_condexec_mask. */
980 int arm_condexec_masklen
= 0;
982 /* Nonzero if chip supports the ARMv8 CRC instructions. */
983 int arm_arch_crc
= 0;
985 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
986 int arm_arch_dotprod
= 0;
988 /* Nonzero if chip supports the ARMv8-M security extensions. */
989 int arm_arch_cmse
= 0;
991 /* Nonzero if the core has a very small, high-latency, multiply unit. */
992 int arm_m_profile_small_mul
= 0;
994 /* The condition codes of the ARM, and the inverse function. */
995 static const char * const arm_condition_codes
[] =
997 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
998 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1001 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1002 int arm_regs_in_sequence
[] =
1004 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1007 #define ARM_LSL_NAME "lsl"
1008 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1010 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1011 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1012 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1014 /* Initialization code. */
1018 enum processor_type scheduler
;
1019 unsigned int tune_flags
;
1020 const struct tune_params
*tune
;
1023 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1024 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1031 /* arm generic vectorizer costs. */
1033 struct cpu_vec_costs arm_default_vec_cost
= {
1034 1, /* scalar_stmt_cost. */
1035 1, /* scalar load_cost. */
1036 1, /* scalar_store_cost. */
1037 1, /* vec_stmt_cost. */
1038 1, /* vec_to_scalar_cost. */
1039 1, /* scalar_to_vec_cost. */
1040 1, /* vec_align_load_cost. */
1041 1, /* vec_unalign_load_cost. */
1042 1, /* vec_unalign_store_cost. */
1043 1, /* vec_store_cost. */
1044 3, /* cond_taken_branch_cost. */
1045 1, /* cond_not_taken_branch_cost. */
1048 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1049 #include "aarch-cost-tables.h"
1053 const struct cpu_cost_table cortexa9_extra_costs
=
1060 COSTS_N_INSNS (1), /* shift_reg. */
1061 COSTS_N_INSNS (1), /* arith_shift. */
1062 COSTS_N_INSNS (2), /* arith_shift_reg. */
1064 COSTS_N_INSNS (1), /* log_shift_reg. */
1065 COSTS_N_INSNS (1), /* extend. */
1066 COSTS_N_INSNS (2), /* extend_arith. */
1067 COSTS_N_INSNS (1), /* bfi. */
1068 COSTS_N_INSNS (1), /* bfx. */
1072 true /* non_exec_costs_exec. */
1077 COSTS_N_INSNS (3), /* simple. */
1078 COSTS_N_INSNS (3), /* flag_setting. */
1079 COSTS_N_INSNS (2), /* extend. */
1080 COSTS_N_INSNS (3), /* add. */
1081 COSTS_N_INSNS (2), /* extend_add. */
1082 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1086 0, /* simple (N/A). */
1087 0, /* flag_setting (N/A). */
1088 COSTS_N_INSNS (4), /* extend. */
1090 COSTS_N_INSNS (4), /* extend_add. */
1096 COSTS_N_INSNS (2), /* load. */
1097 COSTS_N_INSNS (2), /* load_sign_extend. */
1098 COSTS_N_INSNS (2), /* ldrd. */
1099 COSTS_N_INSNS (2), /* ldm_1st. */
1100 1, /* ldm_regs_per_insn_1st. */
1101 2, /* ldm_regs_per_insn_subsequent. */
1102 COSTS_N_INSNS (5), /* loadf. */
1103 COSTS_N_INSNS (5), /* loadd. */
1104 COSTS_N_INSNS (1), /* load_unaligned. */
1105 COSTS_N_INSNS (2), /* store. */
1106 COSTS_N_INSNS (2), /* strd. */
1107 COSTS_N_INSNS (2), /* stm_1st. */
1108 1, /* stm_regs_per_insn_1st. */
1109 2, /* stm_regs_per_insn_subsequent. */
1110 COSTS_N_INSNS (1), /* storef. */
1111 COSTS_N_INSNS (1), /* stored. */
1112 COSTS_N_INSNS (1), /* store_unaligned. */
1113 COSTS_N_INSNS (1), /* loadv. */
1114 COSTS_N_INSNS (1) /* storev. */
1119 COSTS_N_INSNS (14), /* div. */
1120 COSTS_N_INSNS (4), /* mult. */
1121 COSTS_N_INSNS (7), /* mult_addsub. */
1122 COSTS_N_INSNS (30), /* fma. */
1123 COSTS_N_INSNS (3), /* addsub. */
1124 COSTS_N_INSNS (1), /* fpconst. */
1125 COSTS_N_INSNS (1), /* neg. */
1126 COSTS_N_INSNS (3), /* compare. */
1127 COSTS_N_INSNS (3), /* widen. */
1128 COSTS_N_INSNS (3), /* narrow. */
1129 COSTS_N_INSNS (3), /* toint. */
1130 COSTS_N_INSNS (3), /* fromint. */
1131 COSTS_N_INSNS (3) /* roundint. */
1135 COSTS_N_INSNS (24), /* div. */
1136 COSTS_N_INSNS (5), /* mult. */
1137 COSTS_N_INSNS (8), /* mult_addsub. */
1138 COSTS_N_INSNS (30), /* fma. */
1139 COSTS_N_INSNS (3), /* addsub. */
1140 COSTS_N_INSNS (1), /* fpconst. */
1141 COSTS_N_INSNS (1), /* neg. */
1142 COSTS_N_INSNS (3), /* compare. */
1143 COSTS_N_INSNS (3), /* widen. */
1144 COSTS_N_INSNS (3), /* narrow. */
1145 COSTS_N_INSNS (3), /* toint. */
1146 COSTS_N_INSNS (3), /* fromint. */
1147 COSTS_N_INSNS (3) /* roundint. */
1152 COSTS_N_INSNS (1) /* alu. */
1156 const struct cpu_cost_table cortexa8_extra_costs
=
1162 COSTS_N_INSNS (1), /* shift. */
1164 COSTS_N_INSNS (1), /* arith_shift. */
1165 0, /* arith_shift_reg. */
1166 COSTS_N_INSNS (1), /* log_shift. */
1167 0, /* log_shift_reg. */
1169 0, /* extend_arith. */
1175 true /* non_exec_costs_exec. */
1180 COSTS_N_INSNS (1), /* simple. */
1181 COSTS_N_INSNS (1), /* flag_setting. */
1182 COSTS_N_INSNS (1), /* extend. */
1183 COSTS_N_INSNS (1), /* add. */
1184 COSTS_N_INSNS (1), /* extend_add. */
1185 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1189 0, /* simple (N/A). */
1190 0, /* flag_setting (N/A). */
1191 COSTS_N_INSNS (2), /* extend. */
1193 COSTS_N_INSNS (2), /* extend_add. */
1199 COSTS_N_INSNS (1), /* load. */
1200 COSTS_N_INSNS (1), /* load_sign_extend. */
1201 COSTS_N_INSNS (1), /* ldrd. */
1202 COSTS_N_INSNS (1), /* ldm_1st. */
1203 1, /* ldm_regs_per_insn_1st. */
1204 2, /* ldm_regs_per_insn_subsequent. */
1205 COSTS_N_INSNS (1), /* loadf. */
1206 COSTS_N_INSNS (1), /* loadd. */
1207 COSTS_N_INSNS (1), /* load_unaligned. */
1208 COSTS_N_INSNS (1), /* store. */
1209 COSTS_N_INSNS (1), /* strd. */
1210 COSTS_N_INSNS (1), /* stm_1st. */
1211 1, /* stm_regs_per_insn_1st. */
1212 2, /* stm_regs_per_insn_subsequent. */
1213 COSTS_N_INSNS (1), /* storef. */
1214 COSTS_N_INSNS (1), /* stored. */
1215 COSTS_N_INSNS (1), /* store_unaligned. */
1216 COSTS_N_INSNS (1), /* loadv. */
1217 COSTS_N_INSNS (1) /* storev. */
1222 COSTS_N_INSNS (36), /* div. */
1223 COSTS_N_INSNS (11), /* mult. */
1224 COSTS_N_INSNS (20), /* mult_addsub. */
1225 COSTS_N_INSNS (30), /* fma. */
1226 COSTS_N_INSNS (9), /* addsub. */
1227 COSTS_N_INSNS (3), /* fpconst. */
1228 COSTS_N_INSNS (3), /* neg. */
1229 COSTS_N_INSNS (6), /* compare. */
1230 COSTS_N_INSNS (4), /* widen. */
1231 COSTS_N_INSNS (4), /* narrow. */
1232 COSTS_N_INSNS (8), /* toint. */
1233 COSTS_N_INSNS (8), /* fromint. */
1234 COSTS_N_INSNS (8) /* roundint. */
1238 COSTS_N_INSNS (64), /* div. */
1239 COSTS_N_INSNS (16), /* mult. */
1240 COSTS_N_INSNS (25), /* mult_addsub. */
1241 COSTS_N_INSNS (30), /* fma. */
1242 COSTS_N_INSNS (9), /* addsub. */
1243 COSTS_N_INSNS (3), /* fpconst. */
1244 COSTS_N_INSNS (3), /* neg. */
1245 COSTS_N_INSNS (6), /* compare. */
1246 COSTS_N_INSNS (6), /* widen. */
1247 COSTS_N_INSNS (6), /* narrow. */
1248 COSTS_N_INSNS (8), /* toint. */
1249 COSTS_N_INSNS (8), /* fromint. */
1250 COSTS_N_INSNS (8) /* roundint. */
1255 COSTS_N_INSNS (1) /* alu. */
1259 const struct cpu_cost_table cortexa5_extra_costs
=
1265 COSTS_N_INSNS (1), /* shift. */
1266 COSTS_N_INSNS (1), /* shift_reg. */
1267 COSTS_N_INSNS (1), /* arith_shift. */
1268 COSTS_N_INSNS (1), /* arith_shift_reg. */
1269 COSTS_N_INSNS (1), /* log_shift. */
1270 COSTS_N_INSNS (1), /* log_shift_reg. */
1271 COSTS_N_INSNS (1), /* extend. */
1272 COSTS_N_INSNS (1), /* extend_arith. */
1273 COSTS_N_INSNS (1), /* bfi. */
1274 COSTS_N_INSNS (1), /* bfx. */
1275 COSTS_N_INSNS (1), /* clz. */
1276 COSTS_N_INSNS (1), /* rev. */
1278 true /* non_exec_costs_exec. */
1285 COSTS_N_INSNS (1), /* flag_setting. */
1286 COSTS_N_INSNS (1), /* extend. */
1287 COSTS_N_INSNS (1), /* add. */
1288 COSTS_N_INSNS (1), /* extend_add. */
1289 COSTS_N_INSNS (7) /* idiv. */
1293 0, /* simple (N/A). */
1294 0, /* flag_setting (N/A). */
1295 COSTS_N_INSNS (1), /* extend. */
1297 COSTS_N_INSNS (2), /* extend_add. */
1303 COSTS_N_INSNS (1), /* load. */
1304 COSTS_N_INSNS (1), /* load_sign_extend. */
1305 COSTS_N_INSNS (6), /* ldrd. */
1306 COSTS_N_INSNS (1), /* ldm_1st. */
1307 1, /* ldm_regs_per_insn_1st. */
1308 2, /* ldm_regs_per_insn_subsequent. */
1309 COSTS_N_INSNS (2), /* loadf. */
1310 COSTS_N_INSNS (4), /* loadd. */
1311 COSTS_N_INSNS (1), /* load_unaligned. */
1312 COSTS_N_INSNS (1), /* store. */
1313 COSTS_N_INSNS (3), /* strd. */
1314 COSTS_N_INSNS (1), /* stm_1st. */
1315 1, /* stm_regs_per_insn_1st. */
1316 2, /* stm_regs_per_insn_subsequent. */
1317 COSTS_N_INSNS (2), /* storef. */
1318 COSTS_N_INSNS (2), /* stored. */
1319 COSTS_N_INSNS (1), /* store_unaligned. */
1320 COSTS_N_INSNS (1), /* loadv. */
1321 COSTS_N_INSNS (1) /* storev. */
1326 COSTS_N_INSNS (15), /* div. */
1327 COSTS_N_INSNS (3), /* mult. */
1328 COSTS_N_INSNS (7), /* mult_addsub. */
1329 COSTS_N_INSNS (7), /* fma. */
1330 COSTS_N_INSNS (3), /* addsub. */
1331 COSTS_N_INSNS (3), /* fpconst. */
1332 COSTS_N_INSNS (3), /* neg. */
1333 COSTS_N_INSNS (3), /* compare. */
1334 COSTS_N_INSNS (3), /* widen. */
1335 COSTS_N_INSNS (3), /* narrow. */
1336 COSTS_N_INSNS (3), /* toint. */
1337 COSTS_N_INSNS (3), /* fromint. */
1338 COSTS_N_INSNS (3) /* roundint. */
1342 COSTS_N_INSNS (30), /* div. */
1343 COSTS_N_INSNS (6), /* mult. */
1344 COSTS_N_INSNS (10), /* mult_addsub. */
1345 COSTS_N_INSNS (7), /* fma. */
1346 COSTS_N_INSNS (3), /* addsub. */
1347 COSTS_N_INSNS (3), /* fpconst. */
1348 COSTS_N_INSNS (3), /* neg. */
1349 COSTS_N_INSNS (3), /* compare. */
1350 COSTS_N_INSNS (3), /* widen. */
1351 COSTS_N_INSNS (3), /* narrow. */
1352 COSTS_N_INSNS (3), /* toint. */
1353 COSTS_N_INSNS (3), /* fromint. */
1354 COSTS_N_INSNS (3) /* roundint. */
1359 COSTS_N_INSNS (1) /* alu. */
1364 const struct cpu_cost_table cortexa7_extra_costs
=
1370 COSTS_N_INSNS (1), /* shift. */
1371 COSTS_N_INSNS (1), /* shift_reg. */
1372 COSTS_N_INSNS (1), /* arith_shift. */
1373 COSTS_N_INSNS (1), /* arith_shift_reg. */
1374 COSTS_N_INSNS (1), /* log_shift. */
1375 COSTS_N_INSNS (1), /* log_shift_reg. */
1376 COSTS_N_INSNS (1), /* extend. */
1377 COSTS_N_INSNS (1), /* extend_arith. */
1378 COSTS_N_INSNS (1), /* bfi. */
1379 COSTS_N_INSNS (1), /* bfx. */
1380 COSTS_N_INSNS (1), /* clz. */
1381 COSTS_N_INSNS (1), /* rev. */
1383 true /* non_exec_costs_exec. */
1390 COSTS_N_INSNS (1), /* flag_setting. */
1391 COSTS_N_INSNS (1), /* extend. */
1392 COSTS_N_INSNS (1), /* add. */
1393 COSTS_N_INSNS (1), /* extend_add. */
1394 COSTS_N_INSNS (7) /* idiv. */
1398 0, /* simple (N/A). */
1399 0, /* flag_setting (N/A). */
1400 COSTS_N_INSNS (1), /* extend. */
1402 COSTS_N_INSNS (2), /* extend_add. */
1408 COSTS_N_INSNS (1), /* load. */
1409 COSTS_N_INSNS (1), /* load_sign_extend. */
1410 COSTS_N_INSNS (3), /* ldrd. */
1411 COSTS_N_INSNS (1), /* ldm_1st. */
1412 1, /* ldm_regs_per_insn_1st. */
1413 2, /* ldm_regs_per_insn_subsequent. */
1414 COSTS_N_INSNS (2), /* loadf. */
1415 COSTS_N_INSNS (2), /* loadd. */
1416 COSTS_N_INSNS (1), /* load_unaligned. */
1417 COSTS_N_INSNS (1), /* store. */
1418 COSTS_N_INSNS (3), /* strd. */
1419 COSTS_N_INSNS (1), /* stm_1st. */
1420 1, /* stm_regs_per_insn_1st. */
1421 2, /* stm_regs_per_insn_subsequent. */
1422 COSTS_N_INSNS (2), /* storef. */
1423 COSTS_N_INSNS (2), /* stored. */
1424 COSTS_N_INSNS (1), /* store_unaligned. */
1425 COSTS_N_INSNS (1), /* loadv. */
1426 COSTS_N_INSNS (1) /* storev. */
1431 COSTS_N_INSNS (15), /* div. */
1432 COSTS_N_INSNS (3), /* mult. */
1433 COSTS_N_INSNS (7), /* mult_addsub. */
1434 COSTS_N_INSNS (7), /* fma. */
1435 COSTS_N_INSNS (3), /* addsub. */
1436 COSTS_N_INSNS (3), /* fpconst. */
1437 COSTS_N_INSNS (3), /* neg. */
1438 COSTS_N_INSNS (3), /* compare. */
1439 COSTS_N_INSNS (3), /* widen. */
1440 COSTS_N_INSNS (3), /* narrow. */
1441 COSTS_N_INSNS (3), /* toint. */
1442 COSTS_N_INSNS (3), /* fromint. */
1443 COSTS_N_INSNS (3) /* roundint. */
1447 COSTS_N_INSNS (30), /* div. */
1448 COSTS_N_INSNS (6), /* mult. */
1449 COSTS_N_INSNS (10), /* mult_addsub. */
1450 COSTS_N_INSNS (7), /* fma. */
1451 COSTS_N_INSNS (3), /* addsub. */
1452 COSTS_N_INSNS (3), /* fpconst. */
1453 COSTS_N_INSNS (3), /* neg. */
1454 COSTS_N_INSNS (3), /* compare. */
1455 COSTS_N_INSNS (3), /* widen. */
1456 COSTS_N_INSNS (3), /* narrow. */
1457 COSTS_N_INSNS (3), /* toint. */
1458 COSTS_N_INSNS (3), /* fromint. */
1459 COSTS_N_INSNS (3) /* roundint. */
1464 COSTS_N_INSNS (1) /* alu. */
1468 const struct cpu_cost_table cortexa12_extra_costs
=
1475 COSTS_N_INSNS (1), /* shift_reg. */
1476 COSTS_N_INSNS (1), /* arith_shift. */
1477 COSTS_N_INSNS (1), /* arith_shift_reg. */
1478 COSTS_N_INSNS (1), /* log_shift. */
1479 COSTS_N_INSNS (1), /* log_shift_reg. */
1481 COSTS_N_INSNS (1), /* extend_arith. */
1483 COSTS_N_INSNS (1), /* bfx. */
1484 COSTS_N_INSNS (1), /* clz. */
1485 COSTS_N_INSNS (1), /* rev. */
1487 true /* non_exec_costs_exec. */
1492 COSTS_N_INSNS (2), /* simple. */
1493 COSTS_N_INSNS (3), /* flag_setting. */
1494 COSTS_N_INSNS (2), /* extend. */
1495 COSTS_N_INSNS (3), /* add. */
1496 COSTS_N_INSNS (2), /* extend_add. */
1497 COSTS_N_INSNS (18) /* idiv. */
1501 0, /* simple (N/A). */
1502 0, /* flag_setting (N/A). */
1503 COSTS_N_INSNS (3), /* extend. */
1505 COSTS_N_INSNS (3), /* extend_add. */
1511 COSTS_N_INSNS (3), /* load. */
1512 COSTS_N_INSNS (3), /* load_sign_extend. */
1513 COSTS_N_INSNS (3), /* ldrd. */
1514 COSTS_N_INSNS (3), /* ldm_1st. */
1515 1, /* ldm_regs_per_insn_1st. */
1516 2, /* ldm_regs_per_insn_subsequent. */
1517 COSTS_N_INSNS (3), /* loadf. */
1518 COSTS_N_INSNS (3), /* loadd. */
1519 0, /* load_unaligned. */
1523 1, /* stm_regs_per_insn_1st. */
1524 2, /* stm_regs_per_insn_subsequent. */
1525 COSTS_N_INSNS (2), /* storef. */
1526 COSTS_N_INSNS (2), /* stored. */
1527 0, /* store_unaligned. */
1528 COSTS_N_INSNS (1), /* loadv. */
1529 COSTS_N_INSNS (1) /* storev. */
1534 COSTS_N_INSNS (17), /* div. */
1535 COSTS_N_INSNS (4), /* mult. */
1536 COSTS_N_INSNS (8), /* mult_addsub. */
1537 COSTS_N_INSNS (8), /* fma. */
1538 COSTS_N_INSNS (4), /* addsub. */
1539 COSTS_N_INSNS (2), /* fpconst. */
1540 COSTS_N_INSNS (2), /* neg. */
1541 COSTS_N_INSNS (2), /* compare. */
1542 COSTS_N_INSNS (4), /* widen. */
1543 COSTS_N_INSNS (4), /* narrow. */
1544 COSTS_N_INSNS (4), /* toint. */
1545 COSTS_N_INSNS (4), /* fromint. */
1546 COSTS_N_INSNS (4) /* roundint. */
1550 COSTS_N_INSNS (31), /* div. */
1551 COSTS_N_INSNS (4), /* mult. */
1552 COSTS_N_INSNS (8), /* mult_addsub. */
1553 COSTS_N_INSNS (8), /* fma. */
1554 COSTS_N_INSNS (4), /* addsub. */
1555 COSTS_N_INSNS (2), /* fpconst. */
1556 COSTS_N_INSNS (2), /* neg. */
1557 COSTS_N_INSNS (2), /* compare. */
1558 COSTS_N_INSNS (4), /* widen. */
1559 COSTS_N_INSNS (4), /* narrow. */
1560 COSTS_N_INSNS (4), /* toint. */
1561 COSTS_N_INSNS (4), /* fromint. */
1562 COSTS_N_INSNS (4) /* roundint. */
1567 COSTS_N_INSNS (1) /* alu. */
1571 const struct cpu_cost_table cortexa15_extra_costs
=
1579 COSTS_N_INSNS (1), /* arith_shift. */
1580 COSTS_N_INSNS (1), /* arith_shift_reg. */
1581 COSTS_N_INSNS (1), /* log_shift. */
1582 COSTS_N_INSNS (1), /* log_shift_reg. */
1584 COSTS_N_INSNS (1), /* extend_arith. */
1585 COSTS_N_INSNS (1), /* bfi. */
1590 true /* non_exec_costs_exec. */
1595 COSTS_N_INSNS (2), /* simple. */
1596 COSTS_N_INSNS (3), /* flag_setting. */
1597 COSTS_N_INSNS (2), /* extend. */
1598 COSTS_N_INSNS (2), /* add. */
1599 COSTS_N_INSNS (2), /* extend_add. */
1600 COSTS_N_INSNS (18) /* idiv. */
1604 0, /* simple (N/A). */
1605 0, /* flag_setting (N/A). */
1606 COSTS_N_INSNS (3), /* extend. */
1608 COSTS_N_INSNS (3), /* extend_add. */
1614 COSTS_N_INSNS (3), /* load. */
1615 COSTS_N_INSNS (3), /* load_sign_extend. */
1616 COSTS_N_INSNS (3), /* ldrd. */
1617 COSTS_N_INSNS (4), /* ldm_1st. */
1618 1, /* ldm_regs_per_insn_1st. */
1619 2, /* ldm_regs_per_insn_subsequent. */
1620 COSTS_N_INSNS (4), /* loadf. */
1621 COSTS_N_INSNS (4), /* loadd. */
1622 0, /* load_unaligned. */
1625 COSTS_N_INSNS (1), /* stm_1st. */
1626 1, /* stm_regs_per_insn_1st. */
1627 2, /* stm_regs_per_insn_subsequent. */
1630 0, /* store_unaligned. */
1631 COSTS_N_INSNS (1), /* loadv. */
1632 COSTS_N_INSNS (1) /* storev. */
1637 COSTS_N_INSNS (17), /* div. */
1638 COSTS_N_INSNS (4), /* mult. */
1639 COSTS_N_INSNS (8), /* mult_addsub. */
1640 COSTS_N_INSNS (8), /* fma. */
1641 COSTS_N_INSNS (4), /* addsub. */
1642 COSTS_N_INSNS (2), /* fpconst. */
1643 COSTS_N_INSNS (2), /* neg. */
1644 COSTS_N_INSNS (5), /* compare. */
1645 COSTS_N_INSNS (4), /* widen. */
1646 COSTS_N_INSNS (4), /* narrow. */
1647 COSTS_N_INSNS (4), /* toint. */
1648 COSTS_N_INSNS (4), /* fromint. */
1649 COSTS_N_INSNS (4) /* roundint. */
1653 COSTS_N_INSNS (31), /* div. */
1654 COSTS_N_INSNS (4), /* mult. */
1655 COSTS_N_INSNS (8), /* mult_addsub. */
1656 COSTS_N_INSNS (8), /* fma. */
1657 COSTS_N_INSNS (4), /* addsub. */
1658 COSTS_N_INSNS (2), /* fpconst. */
1659 COSTS_N_INSNS (2), /* neg. */
1660 COSTS_N_INSNS (2), /* compare. */
1661 COSTS_N_INSNS (4), /* widen. */
1662 COSTS_N_INSNS (4), /* narrow. */
1663 COSTS_N_INSNS (4), /* toint. */
1664 COSTS_N_INSNS (4), /* fromint. */
1665 COSTS_N_INSNS (4) /* roundint. */
1670 COSTS_N_INSNS (1) /* alu. */
1674 const struct cpu_cost_table v7m_extra_costs
=
1682 0, /* arith_shift. */
1683 COSTS_N_INSNS (1), /* arith_shift_reg. */
1685 COSTS_N_INSNS (1), /* log_shift_reg. */
1687 COSTS_N_INSNS (1), /* extend_arith. */
1692 COSTS_N_INSNS (1), /* non_exec. */
1693 false /* non_exec_costs_exec. */
1698 COSTS_N_INSNS (1), /* simple. */
1699 COSTS_N_INSNS (1), /* flag_setting. */
1700 COSTS_N_INSNS (2), /* extend. */
1701 COSTS_N_INSNS (1), /* add. */
1702 COSTS_N_INSNS (3), /* extend_add. */
1703 COSTS_N_INSNS (8) /* idiv. */
1707 0, /* simple (N/A). */
1708 0, /* flag_setting (N/A). */
1709 COSTS_N_INSNS (2), /* extend. */
1711 COSTS_N_INSNS (3), /* extend_add. */
1717 COSTS_N_INSNS (2), /* load. */
1718 0, /* load_sign_extend. */
1719 COSTS_N_INSNS (3), /* ldrd. */
1720 COSTS_N_INSNS (2), /* ldm_1st. */
1721 1, /* ldm_regs_per_insn_1st. */
1722 1, /* ldm_regs_per_insn_subsequent. */
1723 COSTS_N_INSNS (2), /* loadf. */
1724 COSTS_N_INSNS (3), /* loadd. */
1725 COSTS_N_INSNS (1), /* load_unaligned. */
1726 COSTS_N_INSNS (2), /* store. */
1727 COSTS_N_INSNS (3), /* strd. */
1728 COSTS_N_INSNS (2), /* stm_1st. */
1729 1, /* stm_regs_per_insn_1st. */
1730 1, /* stm_regs_per_insn_subsequent. */
1731 COSTS_N_INSNS (2), /* storef. */
1732 COSTS_N_INSNS (3), /* stored. */
1733 COSTS_N_INSNS (1), /* store_unaligned. */
1734 COSTS_N_INSNS (1), /* loadv. */
1735 COSTS_N_INSNS (1) /* storev. */
1740 COSTS_N_INSNS (7), /* div. */
1741 COSTS_N_INSNS (2), /* mult. */
1742 COSTS_N_INSNS (5), /* mult_addsub. */
1743 COSTS_N_INSNS (3), /* fma. */
1744 COSTS_N_INSNS (1), /* addsub. */
1756 COSTS_N_INSNS (15), /* div. */
1757 COSTS_N_INSNS (5), /* mult. */
1758 COSTS_N_INSNS (7), /* mult_addsub. */
1759 COSTS_N_INSNS (7), /* fma. */
1760 COSTS_N_INSNS (3), /* addsub. */
1773 COSTS_N_INSNS (1) /* alu. */
1777 const struct addr_mode_cost_table generic_addr_mode_costs
=
1781 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1782 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1783 COSTS_N_INSNS (0) /* AMO_WB. */
1787 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1788 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1789 COSTS_N_INSNS (0) /* AMO_WB. */
1793 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1794 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1795 COSTS_N_INSNS (0) /* AMO_WB. */
1799 const struct tune_params arm_slowmul_tune
=
1801 &generic_extra_costs
, /* Insn extra costs. */
1802 &generic_addr_mode_costs
, /* Addressing mode costs. */
1803 NULL
, /* Sched adj cost. */
1804 arm_default_branch_cost
,
1805 &arm_default_vec_cost
,
1806 3, /* Constant limit. */
1807 5, /* Max cond insns. */
1808 8, /* Memset max inline. */
1809 1, /* Issue rate. */
1810 ARM_PREFETCH_NOT_BENEFICIAL
,
1811 tune_params::PREF_CONST_POOL_TRUE
,
1812 tune_params::PREF_LDRD_FALSE
,
1813 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1814 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1815 tune_params::DISPARAGE_FLAGS_NEITHER
,
1816 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1817 tune_params::FUSE_NOTHING
,
1818 tune_params::SCHED_AUTOPREF_OFF
1821 const struct tune_params arm_fastmul_tune
=
1823 &generic_extra_costs
, /* Insn extra costs. */
1824 &generic_addr_mode_costs
, /* Addressing mode costs. */
1825 NULL
, /* Sched adj cost. */
1826 arm_default_branch_cost
,
1827 &arm_default_vec_cost
,
1828 1, /* Constant limit. */
1829 5, /* Max cond insns. */
1830 8, /* Memset max inline. */
1831 1, /* Issue rate. */
1832 ARM_PREFETCH_NOT_BENEFICIAL
,
1833 tune_params::PREF_CONST_POOL_TRUE
,
1834 tune_params::PREF_LDRD_FALSE
,
1835 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1836 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1837 tune_params::DISPARAGE_FLAGS_NEITHER
,
1838 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1839 tune_params::FUSE_NOTHING
,
1840 tune_params::SCHED_AUTOPREF_OFF
1843 /* StrongARM has early execution of branches, so a sequence that is worth
1844 skipping is shorter. Set max_insns_skipped to a lower value. */
1846 const struct tune_params arm_strongarm_tune
=
1848 &generic_extra_costs
, /* Insn extra costs. */
1849 &generic_addr_mode_costs
, /* Addressing mode costs. */
1850 NULL
, /* Sched adj cost. */
1851 arm_default_branch_cost
,
1852 &arm_default_vec_cost
,
1853 1, /* Constant limit. */
1854 3, /* Max cond insns. */
1855 8, /* Memset max inline. */
1856 1, /* Issue rate. */
1857 ARM_PREFETCH_NOT_BENEFICIAL
,
1858 tune_params::PREF_CONST_POOL_TRUE
,
1859 tune_params::PREF_LDRD_FALSE
,
1860 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1862 tune_params::DISPARAGE_FLAGS_NEITHER
,
1863 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1864 tune_params::FUSE_NOTHING
,
1865 tune_params::SCHED_AUTOPREF_OFF
1868 const struct tune_params arm_xscale_tune
=
1870 &generic_extra_costs
, /* Insn extra costs. */
1871 &generic_addr_mode_costs
, /* Addressing mode costs. */
1872 xscale_sched_adjust_cost
,
1873 arm_default_branch_cost
,
1874 &arm_default_vec_cost
,
1875 2, /* Constant limit. */
1876 3, /* Max cond insns. */
1877 8, /* Memset max inline. */
1878 1, /* Issue rate. */
1879 ARM_PREFETCH_NOT_BENEFICIAL
,
1880 tune_params::PREF_CONST_POOL_TRUE
,
1881 tune_params::PREF_LDRD_FALSE
,
1882 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1884 tune_params::DISPARAGE_FLAGS_NEITHER
,
1885 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1886 tune_params::FUSE_NOTHING
,
1887 tune_params::SCHED_AUTOPREF_OFF
1890 const struct tune_params arm_9e_tune
=
1892 &generic_extra_costs
, /* Insn extra costs. */
1893 &generic_addr_mode_costs
, /* Addressing mode costs. */
1894 NULL
, /* Sched adj cost. */
1895 arm_default_branch_cost
,
1896 &arm_default_vec_cost
,
1897 1, /* Constant limit. */
1898 5, /* Max cond insns. */
1899 8, /* Memset max inline. */
1900 1, /* Issue rate. */
1901 ARM_PREFETCH_NOT_BENEFICIAL
,
1902 tune_params::PREF_CONST_POOL_TRUE
,
1903 tune_params::PREF_LDRD_FALSE
,
1904 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1905 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1906 tune_params::DISPARAGE_FLAGS_NEITHER
,
1907 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1908 tune_params::FUSE_NOTHING
,
1909 tune_params::SCHED_AUTOPREF_OFF
1912 const struct tune_params arm_marvell_pj4_tune
=
1914 &generic_extra_costs
, /* Insn extra costs. */
1915 &generic_addr_mode_costs
, /* Addressing mode costs. */
1916 NULL
, /* Sched adj cost. */
1917 arm_default_branch_cost
,
1918 &arm_default_vec_cost
,
1919 1, /* Constant limit. */
1920 5, /* Max cond insns. */
1921 8, /* Memset max inline. */
1922 2, /* Issue rate. */
1923 ARM_PREFETCH_NOT_BENEFICIAL
,
1924 tune_params::PREF_CONST_POOL_TRUE
,
1925 tune_params::PREF_LDRD_FALSE
,
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1927 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1928 tune_params::DISPARAGE_FLAGS_NEITHER
,
1929 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1930 tune_params::FUSE_NOTHING
,
1931 tune_params::SCHED_AUTOPREF_OFF
1934 const struct tune_params arm_v6t2_tune
=
1936 &generic_extra_costs
, /* Insn extra costs. */
1937 &generic_addr_mode_costs
, /* Addressing mode costs. */
1938 NULL
, /* Sched adj cost. */
1939 arm_default_branch_cost
,
1940 &arm_default_vec_cost
,
1941 1, /* Constant limit. */
1942 5, /* Max cond insns. */
1943 8, /* Memset max inline. */
1944 1, /* Issue rate. */
1945 ARM_PREFETCH_NOT_BENEFICIAL
,
1946 tune_params::PREF_CONST_POOL_FALSE
,
1947 tune_params::PREF_LDRD_FALSE
,
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1950 tune_params::DISPARAGE_FLAGS_NEITHER
,
1951 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1952 tune_params::FUSE_NOTHING
,
1953 tune_params::SCHED_AUTOPREF_OFF
1957 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1958 const struct tune_params arm_cortex_tune
=
1960 &generic_extra_costs
,
1961 &generic_addr_mode_costs
, /* Addressing mode costs. */
1962 NULL
, /* Sched adj cost. */
1963 arm_default_branch_cost
,
1964 &arm_default_vec_cost
,
1965 1, /* Constant limit. */
1966 5, /* Max cond insns. */
1967 8, /* Memset max inline. */
1968 2, /* Issue rate. */
1969 ARM_PREFETCH_NOT_BENEFICIAL
,
1970 tune_params::PREF_CONST_POOL_FALSE
,
1971 tune_params::PREF_LDRD_FALSE
,
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1974 tune_params::DISPARAGE_FLAGS_NEITHER
,
1975 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1976 tune_params::FUSE_NOTHING
,
1977 tune_params::SCHED_AUTOPREF_OFF
1980 const struct tune_params arm_cortex_a8_tune
=
1982 &cortexa8_extra_costs
,
1983 &generic_addr_mode_costs
, /* Addressing mode costs. */
1984 NULL
, /* Sched adj cost. */
1985 arm_default_branch_cost
,
1986 &arm_default_vec_cost
,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 8, /* Memset max inline. */
1990 2, /* Issue rate. */
1991 ARM_PREFETCH_NOT_BENEFICIAL
,
1992 tune_params::PREF_CONST_POOL_FALSE
,
1993 tune_params::PREF_LDRD_FALSE
,
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1996 tune_params::DISPARAGE_FLAGS_NEITHER
,
1997 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1998 tune_params::FUSE_NOTHING
,
1999 tune_params::SCHED_AUTOPREF_OFF
2002 const struct tune_params arm_cortex_a7_tune
=
2004 &cortexa7_extra_costs
,
2005 &generic_addr_mode_costs
, /* Addressing mode costs. */
2006 NULL
, /* Sched adj cost. */
2007 arm_default_branch_cost
,
2008 &arm_default_vec_cost
,
2009 1, /* Constant limit. */
2010 5, /* Max cond insns. */
2011 8, /* Memset max inline. */
2012 2, /* Issue rate. */
2013 ARM_PREFETCH_NOT_BENEFICIAL
,
2014 tune_params::PREF_CONST_POOL_FALSE
,
2015 tune_params::PREF_LDRD_FALSE
,
2016 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2018 tune_params::DISPARAGE_FLAGS_NEITHER
,
2019 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2020 tune_params::FUSE_NOTHING
,
2021 tune_params::SCHED_AUTOPREF_OFF
2024 const struct tune_params arm_cortex_a15_tune
=
2026 &cortexa15_extra_costs
,
2027 &generic_addr_mode_costs
, /* Addressing mode costs. */
2028 NULL
, /* Sched adj cost. */
2029 arm_default_branch_cost
,
2030 &arm_default_vec_cost
,
2031 1, /* Constant limit. */
2032 2, /* Max cond insns. */
2033 8, /* Memset max inline. */
2034 3, /* Issue rate. */
2035 ARM_PREFETCH_NOT_BENEFICIAL
,
2036 tune_params::PREF_CONST_POOL_FALSE
,
2037 tune_params::PREF_LDRD_TRUE
,
2038 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2039 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2040 tune_params::DISPARAGE_FLAGS_ALL
,
2041 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2042 tune_params::FUSE_NOTHING
,
2043 tune_params::SCHED_AUTOPREF_FULL
2046 const struct tune_params arm_cortex_a35_tune
=
2048 &cortexa53_extra_costs
,
2049 &generic_addr_mode_costs
, /* Addressing mode costs. */
2050 NULL
, /* Sched adj cost. */
2051 arm_default_branch_cost
,
2052 &arm_default_vec_cost
,
2053 1, /* Constant limit. */
2054 5, /* Max cond insns. */
2055 8, /* Memset max inline. */
2056 1, /* Issue rate. */
2057 ARM_PREFETCH_NOT_BENEFICIAL
,
2058 tune_params::PREF_CONST_POOL_FALSE
,
2059 tune_params::PREF_LDRD_FALSE
,
2060 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2062 tune_params::DISPARAGE_FLAGS_NEITHER
,
2063 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2064 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2065 tune_params::SCHED_AUTOPREF_OFF
2068 const struct tune_params arm_cortex_a53_tune
=
2070 &cortexa53_extra_costs
,
2071 &generic_addr_mode_costs
, /* Addressing mode costs. */
2072 NULL
, /* Sched adj cost. */
2073 arm_default_branch_cost
,
2074 &arm_default_vec_cost
,
2075 1, /* Constant limit. */
2076 5, /* Max cond insns. */
2077 8, /* Memset max inline. */
2078 2, /* Issue rate. */
2079 ARM_PREFETCH_NOT_BENEFICIAL
,
2080 tune_params::PREF_CONST_POOL_FALSE
,
2081 tune_params::PREF_LDRD_FALSE
,
2082 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2084 tune_params::DISPARAGE_FLAGS_NEITHER
,
2085 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2086 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2087 tune_params::SCHED_AUTOPREF_OFF
2090 const struct tune_params arm_cortex_a57_tune
=
2092 &cortexa57_extra_costs
,
2093 &generic_addr_mode_costs
, /* addressing mode costs */
2094 NULL
, /* Sched adj cost. */
2095 arm_default_branch_cost
,
2096 &arm_default_vec_cost
,
2097 1, /* Constant limit. */
2098 2, /* Max cond insns. */
2099 8, /* Memset max inline. */
2100 3, /* Issue rate. */
2101 ARM_PREFETCH_NOT_BENEFICIAL
,
2102 tune_params::PREF_CONST_POOL_FALSE
,
2103 tune_params::PREF_LDRD_TRUE
,
2104 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2106 tune_params::DISPARAGE_FLAGS_ALL
,
2107 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2108 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2109 tune_params::SCHED_AUTOPREF_FULL
2112 const struct tune_params arm_exynosm1_tune
=
2114 &exynosm1_extra_costs
,
2115 &generic_addr_mode_costs
, /* Addressing mode costs. */
2116 NULL
, /* Sched adj cost. */
2117 arm_default_branch_cost
,
2118 &arm_default_vec_cost
,
2119 1, /* Constant limit. */
2120 2, /* Max cond insns. */
2121 8, /* Memset max inline. */
2122 3, /* Issue rate. */
2123 ARM_PREFETCH_NOT_BENEFICIAL
,
2124 tune_params::PREF_CONST_POOL_FALSE
,
2125 tune_params::PREF_LDRD_TRUE
,
2126 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2128 tune_params::DISPARAGE_FLAGS_ALL
,
2129 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2130 tune_params::FUSE_NOTHING
,
2131 tune_params::SCHED_AUTOPREF_OFF
2134 const struct tune_params arm_xgene1_tune
=
2136 &xgene1_extra_costs
,
2137 &generic_addr_mode_costs
, /* Addressing mode costs. */
2138 NULL
, /* Sched adj cost. */
2139 arm_default_branch_cost
,
2140 &arm_default_vec_cost
,
2141 1, /* Constant limit. */
2142 2, /* Max cond insns. */
2143 32, /* Memset max inline. */
2144 4, /* Issue rate. */
2145 ARM_PREFETCH_NOT_BENEFICIAL
,
2146 tune_params::PREF_CONST_POOL_FALSE
,
2147 tune_params::PREF_LDRD_TRUE
,
2148 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2150 tune_params::DISPARAGE_FLAGS_ALL
,
2151 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2152 tune_params::FUSE_NOTHING
,
2153 tune_params::SCHED_AUTOPREF_OFF
2156 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2157 less appealing. Set max_insns_skipped to a low value. */
2159 const struct tune_params arm_cortex_a5_tune
=
2161 &cortexa5_extra_costs
,
2162 &generic_addr_mode_costs
, /* Addressing mode costs. */
2163 NULL
, /* Sched adj cost. */
2164 arm_cortex_a5_branch_cost
,
2165 &arm_default_vec_cost
,
2166 1, /* Constant limit. */
2167 1, /* Max cond insns. */
2168 8, /* Memset max inline. */
2169 2, /* Issue rate. */
2170 ARM_PREFETCH_NOT_BENEFICIAL
,
2171 tune_params::PREF_CONST_POOL_FALSE
,
2172 tune_params::PREF_LDRD_FALSE
,
2173 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2175 tune_params::DISPARAGE_FLAGS_NEITHER
,
2176 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2177 tune_params::FUSE_NOTHING
,
2178 tune_params::SCHED_AUTOPREF_OFF
2181 const struct tune_params arm_cortex_a9_tune
=
2183 &cortexa9_extra_costs
,
2184 &generic_addr_mode_costs
, /* Addressing mode costs. */
2185 cortex_a9_sched_adjust_cost
,
2186 arm_default_branch_cost
,
2187 &arm_default_vec_cost
,
2188 1, /* Constant limit. */
2189 5, /* Max cond insns. */
2190 8, /* Memset max inline. */
2191 2, /* Issue rate. */
2192 ARM_PREFETCH_BENEFICIAL(4,32,32),
2193 tune_params::PREF_CONST_POOL_FALSE
,
2194 tune_params::PREF_LDRD_FALSE
,
2195 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2197 tune_params::DISPARAGE_FLAGS_NEITHER
,
2198 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2199 tune_params::FUSE_NOTHING
,
2200 tune_params::SCHED_AUTOPREF_OFF
2203 const struct tune_params arm_cortex_a12_tune
=
2205 &cortexa12_extra_costs
,
2206 &generic_addr_mode_costs
, /* Addressing mode costs. */
2207 NULL
, /* Sched adj cost. */
2208 arm_default_branch_cost
,
2209 &arm_default_vec_cost
, /* Vectorizer costs. */
2210 1, /* Constant limit. */
2211 2, /* Max cond insns. */
2212 8, /* Memset max inline. */
2213 2, /* Issue rate. */
2214 ARM_PREFETCH_NOT_BENEFICIAL
,
2215 tune_params::PREF_CONST_POOL_FALSE
,
2216 tune_params::PREF_LDRD_TRUE
,
2217 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2218 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2219 tune_params::DISPARAGE_FLAGS_ALL
,
2220 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2221 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2222 tune_params::SCHED_AUTOPREF_OFF
2225 const struct tune_params arm_cortex_a73_tune
=
2227 &cortexa57_extra_costs
,
2228 &generic_addr_mode_costs
, /* Addressing mode costs. */
2229 NULL
, /* Sched adj cost. */
2230 arm_default_branch_cost
,
2231 &arm_default_vec_cost
, /* Vectorizer costs. */
2232 1, /* Constant limit. */
2233 2, /* Max cond insns. */
2234 8, /* Memset max inline. */
2235 2, /* Issue rate. */
2236 ARM_PREFETCH_NOT_BENEFICIAL
,
2237 tune_params::PREF_CONST_POOL_FALSE
,
2238 tune_params::PREF_LDRD_TRUE
,
2239 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2240 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2241 tune_params::DISPARAGE_FLAGS_ALL
,
2242 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2243 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2244 tune_params::SCHED_AUTOPREF_FULL
2247 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2248 cycle to execute each. An LDR from the constant pool also takes two cycles
2249 to execute, but mildly increases pipelining opportunity (consecutive
2250 loads/stores can be pipelined together, saving one cycle), and may also
2251 improve icache utilisation. Hence we prefer the constant pool for such
2254 const struct tune_params arm_v7m_tune
=
2257 &generic_addr_mode_costs
, /* Addressing mode costs. */
2258 NULL
, /* Sched adj cost. */
2259 arm_cortex_m_branch_cost
,
2260 &arm_default_vec_cost
,
2261 1, /* Constant limit. */
2262 2, /* Max cond insns. */
2263 8, /* Memset max inline. */
2264 1, /* Issue rate. */
2265 ARM_PREFETCH_NOT_BENEFICIAL
,
2266 tune_params::PREF_CONST_POOL_TRUE
,
2267 tune_params::PREF_LDRD_FALSE
,
2268 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2269 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2270 tune_params::DISPARAGE_FLAGS_NEITHER
,
2271 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2272 tune_params::FUSE_NOTHING
,
2273 tune_params::SCHED_AUTOPREF_OFF
2276 /* Cortex-M7 tuning. */
2278 const struct tune_params arm_cortex_m7_tune
=
2281 &generic_addr_mode_costs
, /* Addressing mode costs. */
2282 NULL
, /* Sched adj cost. */
2283 arm_cortex_m7_branch_cost
,
2284 &arm_default_vec_cost
,
2285 0, /* Constant limit. */
2286 1, /* Max cond insns. */
2287 8, /* Memset max inline. */
2288 2, /* Issue rate. */
2289 ARM_PREFETCH_NOT_BENEFICIAL
,
2290 tune_params::PREF_CONST_POOL_TRUE
,
2291 tune_params::PREF_LDRD_FALSE
,
2292 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2293 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2294 tune_params::DISPARAGE_FLAGS_NEITHER
,
2295 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2296 tune_params::FUSE_NOTHING
,
2297 tune_params::SCHED_AUTOPREF_OFF
2300 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2301 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2303 const struct tune_params arm_v6m_tune
=
2305 &generic_extra_costs
, /* Insn extra costs. */
2306 &generic_addr_mode_costs
, /* Addressing mode costs. */
2307 NULL
, /* Sched adj cost. */
2308 arm_default_branch_cost
,
2309 &arm_default_vec_cost
, /* Vectorizer costs. */
2310 1, /* Constant limit. */
2311 5, /* Max cond insns. */
2312 8, /* Memset max inline. */
2313 1, /* Issue rate. */
2314 ARM_PREFETCH_NOT_BENEFICIAL
,
2315 tune_params::PREF_CONST_POOL_FALSE
,
2316 tune_params::PREF_LDRD_FALSE
,
2317 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2318 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2319 tune_params::DISPARAGE_FLAGS_NEITHER
,
2320 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2321 tune_params::FUSE_NOTHING
,
2322 tune_params::SCHED_AUTOPREF_OFF
2325 const struct tune_params arm_fa726te_tune
=
2327 &generic_extra_costs
, /* Insn extra costs. */
2328 &generic_addr_mode_costs
, /* Addressing mode costs. */
2329 fa726te_sched_adjust_cost
,
2330 arm_default_branch_cost
,
2331 &arm_default_vec_cost
,
2332 1, /* Constant limit. */
2333 5, /* Max cond insns. */
2334 8, /* Memset max inline. */
2335 2, /* Issue rate. */
2336 ARM_PREFETCH_NOT_BENEFICIAL
,
2337 tune_params::PREF_CONST_POOL_TRUE
,
2338 tune_params::PREF_LDRD_FALSE
,
2339 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2340 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2341 tune_params::DISPARAGE_FLAGS_NEITHER
,
2342 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2343 tune_params::FUSE_NOTHING
,
2344 tune_params::SCHED_AUTOPREF_OFF
2347 /* Auto-generated CPU, FPU and architecture tables. */
2348 #include "arm-cpu-data.h"
2350 /* The name of the preprocessor macro to define for this architecture. PROFILE
2351 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2352 is thus chosen to be big enough to hold the longest architecture name. */
2354 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2356 /* Supported TLS relocations. */
2367 TLS_DESCSEQ
/* GNU scheme */
2370 /* The maximum number of insns to be used when loading a constant. */
2372 arm_constant_limit (bool size_p
)
2374 return size_p
? 1 : current_tune
->constant_limit
;
2377 /* Emit an insn that's a simple single-set. Both the operands must be known
2379 inline static rtx_insn
*
2380 emit_set_insn (rtx x
, rtx y
)
2382 return emit_insn (gen_rtx_SET (x
, y
));
2385 /* Return the number of bits set in VALUE. */
2387 bit_count (unsigned long value
)
2389 unsigned long count
= 0;
2394 value
&= value
- 1; /* Clear the least-significant set bit. */
2400 /* Return the number of bits set in BMAP. */
2402 bitmap_popcount (const sbitmap bmap
)
2404 unsigned int count
= 0;
2406 sbitmap_iterator sbi
;
2408 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2417 } arm_fixed_mode_set
;
2419 /* A small helper for setting fixed-point library libfuncs. */
2422 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2423 const char *funcname
, const char *modename
,
2428 if (num_suffix
== 0)
2429 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2431 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2433 set_optab_libfunc (optable
, mode
, buffer
);
2437 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2438 machine_mode from
, const char *funcname
,
2439 const char *toname
, const char *fromname
)
2442 const char *maybe_suffix_2
= "";
2444 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2445 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2446 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2447 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2448 maybe_suffix_2
= "2";
2450 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2453 set_conv_libfunc (optable
, to
, from
, buffer
);
2456 static GTY(()) rtx speculation_barrier_libfunc
;
2458 /* Set up library functions unique to ARM. */
2460 arm_init_libfuncs (void)
2462 /* For Linux, we have access to kernel support for atomic operations. */
2463 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2464 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2466 /* There are no special library functions unless we are using the
2471 /* The functions below are described in Section 4 of the "Run-Time
2472 ABI for the ARM architecture", Version 1.0. */
2474 /* Double-precision floating-point arithmetic. Table 2. */
2475 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2476 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2477 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2478 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2479 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2481 /* Double-precision comparisons. Table 3. */
2482 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2483 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2484 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2485 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2486 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2487 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2488 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2490 /* Single-precision floating-point arithmetic. Table 4. */
2491 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2492 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2493 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2494 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2495 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2497 /* Single-precision comparisons. Table 5. */
2498 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2499 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2500 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2501 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2502 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2503 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2504 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2506 /* Floating-point to integer conversions. Table 6. */
2507 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2508 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2509 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2510 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2511 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2512 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2513 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2514 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2516 /* Conversions between floating types. Table 7. */
2517 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2518 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2520 /* Integer to floating-point conversions. Table 8. */
2521 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2522 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2523 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2524 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2525 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2526 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2527 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2528 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2530 /* Long long. Table 9. */
2531 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2532 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2533 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2534 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2535 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2536 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2537 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2538 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2540 /* Integer (32/32->32) division. \S 4.3.1. */
2541 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2542 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2544 /* The divmod functions are designed so that they can be used for
2545 plain division, even though they return both the quotient and the
2546 remainder. The quotient is returned in the usual location (i.e.,
2547 r0 for SImode, {r0, r1} for DImode), just as would be expected
2548 for an ordinary division routine. Because the AAPCS calling
2549 conventions specify that all of { r0, r1, r2, r3 } are
2550 callee-saved registers, there is no need to tell the compiler
2551 explicitly that those registers are clobbered by these
2553 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2554 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2556 /* For SImode division the ABI provides div-without-mod routines,
2557 which are faster. */
2558 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2559 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2561 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2562 divmod libcalls instead. */
2563 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2564 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2565 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2566 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2568 /* Half-precision float operations. The compiler handles all operations
2569 with NULL libfuncs by converting the SFmode. */
2570 switch (arm_fp16_format
)
2572 case ARM_FP16_FORMAT_IEEE
:
2573 case ARM_FP16_FORMAT_ALTERNATIVE
:
2576 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2577 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2579 : "__gnu_f2h_alternative"));
2580 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2581 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2583 : "__gnu_h2f_alternative"));
2585 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2586 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2588 : "__gnu_d2h_alternative"));
2591 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2592 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2593 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2594 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2595 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2598 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2599 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2600 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2601 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2602 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2603 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2604 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2611 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2613 const arm_fixed_mode_set fixed_arith_modes
[] =
2616 { E_UQQmode
, "uqq" },
2618 { E_UHQmode
, "uhq" },
2620 { E_USQmode
, "usq" },
2622 { E_UDQmode
, "udq" },
2624 { E_UTQmode
, "utq" },
2626 { E_UHAmode
, "uha" },
2628 { E_USAmode
, "usa" },
2630 { E_UDAmode
, "uda" },
2632 { E_UTAmode
, "uta" }
2634 const arm_fixed_mode_set fixed_conv_modes
[] =
2637 { E_UQQmode
, "uqq" },
2639 { E_UHQmode
, "uhq" },
2641 { E_USQmode
, "usq" },
2643 { E_UDQmode
, "udq" },
2645 { E_UTQmode
, "utq" },
2647 { E_UHAmode
, "uha" },
2649 { E_USAmode
, "usa" },
2651 { E_UDAmode
, "uda" },
2653 { E_UTAmode
, "uta" },
2664 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2666 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2667 "add", fixed_arith_modes
[i
].name
, 3);
2668 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2669 "ssadd", fixed_arith_modes
[i
].name
, 3);
2670 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2671 "usadd", fixed_arith_modes
[i
].name
, 3);
2672 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2673 "sub", fixed_arith_modes
[i
].name
, 3);
2674 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2675 "sssub", fixed_arith_modes
[i
].name
, 3);
2676 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2677 "ussub", fixed_arith_modes
[i
].name
, 3);
2678 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2679 "mul", fixed_arith_modes
[i
].name
, 3);
2680 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2681 "ssmul", fixed_arith_modes
[i
].name
, 3);
2682 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2683 "usmul", fixed_arith_modes
[i
].name
, 3);
2684 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2685 "div", fixed_arith_modes
[i
].name
, 3);
2686 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2687 "udiv", fixed_arith_modes
[i
].name
, 3);
2688 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2689 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2690 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2691 "usdiv", fixed_arith_modes
[i
].name
, 3);
2692 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2693 "neg", fixed_arith_modes
[i
].name
, 2);
2694 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2695 "ssneg", fixed_arith_modes
[i
].name
, 2);
2696 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2697 "usneg", fixed_arith_modes
[i
].name
, 2);
2698 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2699 "ashl", fixed_arith_modes
[i
].name
, 3);
2700 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2701 "ashr", fixed_arith_modes
[i
].name
, 3);
2702 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2703 "lshr", fixed_arith_modes
[i
].name
, 3);
2704 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2705 "ssashl", fixed_arith_modes
[i
].name
, 3);
2706 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2707 "usashl", fixed_arith_modes
[i
].name
, 3);
2708 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2709 "cmp", fixed_arith_modes
[i
].name
, 2);
2712 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2713 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2716 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2717 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2720 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2721 fixed_conv_modes
[j
].mode
, "fract",
2722 fixed_conv_modes
[i
].name
,
2723 fixed_conv_modes
[j
].name
);
2724 arm_set_fixed_conv_libfunc (satfract_optab
,
2725 fixed_conv_modes
[i
].mode
,
2726 fixed_conv_modes
[j
].mode
, "satfract",
2727 fixed_conv_modes
[i
].name
,
2728 fixed_conv_modes
[j
].name
);
2729 arm_set_fixed_conv_libfunc (fractuns_optab
,
2730 fixed_conv_modes
[i
].mode
,
2731 fixed_conv_modes
[j
].mode
, "fractuns",
2732 fixed_conv_modes
[i
].name
,
2733 fixed_conv_modes
[j
].name
);
2734 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2735 fixed_conv_modes
[i
].mode
,
2736 fixed_conv_modes
[j
].mode
, "satfractuns",
2737 fixed_conv_modes
[i
].name
,
2738 fixed_conv_modes
[j
].name
);
2742 if (TARGET_AAPCS_BASED
)
2743 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2745 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2748 /* On AAPCS systems, this is the "struct __va_list". */
2749 static GTY(()) tree va_list_type
;
2751 /* Return the type to use as __builtin_va_list. */
2753 arm_build_builtin_va_list (void)
2758 if (!TARGET_AAPCS_BASED
)
2759 return std_build_builtin_va_list ();
2761 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2769 The C Library ABI further reinforces this definition in \S
2772 We must follow this definition exactly. The structure tag
2773 name is visible in C++ mangled names, and thus forms a part
2774 of the ABI. The field name may be used by people who
2775 #include <stdarg.h>. */
2776 /* Create the type. */
2777 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2778 /* Give it the required name. */
2779 va_list_name
= build_decl (BUILTINS_LOCATION
,
2781 get_identifier ("__va_list"),
2783 DECL_ARTIFICIAL (va_list_name
) = 1;
2784 TYPE_NAME (va_list_type
) = va_list_name
;
2785 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2786 /* Create the __ap field. */
2787 ap_field
= build_decl (BUILTINS_LOCATION
,
2789 get_identifier ("__ap"),
2791 DECL_ARTIFICIAL (ap_field
) = 1;
2792 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2793 TYPE_FIELDS (va_list_type
) = ap_field
;
2794 /* Compute its layout. */
2795 layout_type (va_list_type
);
2797 return va_list_type
;
2800 /* Return an expression of type "void *" pointing to the next
2801 available argument in a variable-argument list. VALIST is the
2802 user-level va_list object, of type __builtin_va_list. */
2804 arm_extract_valist_ptr (tree valist
)
2806 if (TREE_TYPE (valist
) == error_mark_node
)
2807 return error_mark_node
;
2809 /* On an AAPCS target, the pointer is stored within "struct
2811 if (TARGET_AAPCS_BASED
)
2813 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2814 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2815 valist
, ap_field
, NULL_TREE
);
2821 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2823 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2825 valist
= arm_extract_valist_ptr (valist
);
2826 std_expand_builtin_va_start (valist
, nextarg
);
2829 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2831 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2834 valist
= arm_extract_valist_ptr (valist
);
2835 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2838 /* Check any incompatible options that the user has specified. */
2840 arm_option_check_internal (struct gcc_options
*opts
)
2842 int flags
= opts
->x_target_flags
;
2844 /* iWMMXt and NEON are incompatible. */
2846 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2847 error ("iWMMXt and NEON are incompatible");
2849 /* Make sure that the processor choice does not conflict with any of the
2850 other command line choices. */
2851 if (TARGET_ARM_P (flags
)
2852 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2853 error ("target CPU does not support ARM mode");
2855 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2856 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2857 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2859 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2860 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2862 /* If this target is normally configured to use APCS frames, warn if they
2863 are turned off and debugging is turned on. */
2864 if (TARGET_ARM_P (flags
)
2865 && write_symbols
!= NO_DEBUG
2866 && !TARGET_APCS_FRAME
2867 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2868 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2871 /* iWMMXt unsupported under Thumb mode. */
2872 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2873 error ("iWMMXt unsupported under Thumb mode");
2875 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2876 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2878 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2880 error ("RTP PIC is incompatible with Thumb");
2884 if (target_pure_code
|| target_slow_flash_data
)
2886 const char *flag
= (target_pure_code
? "-mpure-code" :
2887 "-mslow-flash-data");
2889 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2891 if (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
)
2892 error ("%s only supports non-pic code on M-profile targets with the "
2893 "MOVT instruction", flag
);
2895 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2896 -mword-relocations forbids relocation of MOVT/MOVW. */
2897 if (target_word_relocations
)
2898 error ("%s incompatible with %<-mword-relocations%>", flag
);
2902 /* Recompute the global settings depending on target attribute options. */
2905 arm_option_params_internal (void)
2907 /* If we are not using the default (ARM mode) section anchor offset
2908 ranges, then set the correct ranges now. */
2911 /* Thumb-1 LDR instructions cannot have negative offsets.
2912 Permissible positive offset ranges are 5-bit (for byte loads),
2913 6-bit (for halfword loads), or 7-bit (for word loads).
2914 Empirical results suggest a 7-bit anchor range gives the best
2915 overall code size. */
2916 targetm
.min_anchor_offset
= 0;
2917 targetm
.max_anchor_offset
= 127;
2919 else if (TARGET_THUMB2
)
2921 /* The minimum is set such that the total size of the block
2922 for a particular anchor is 248 + 1 + 4095 bytes, which is
2923 divisible by eight, ensuring natural spacing of anchors. */
2924 targetm
.min_anchor_offset
= -248;
2925 targetm
.max_anchor_offset
= 4095;
2929 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2930 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2933 /* Increase the number of conditional instructions with -Os. */
2934 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
2936 /* For THUMB2, we limit the conditional sequence to one IT block. */
2938 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
2941 /* True if -mflip-thumb should next add an attribute for the default
2942 mode, false if it should next add an attribute for the opposite mode. */
2943 static GTY(()) bool thumb_flipper
;
2945 /* Options after initial target override. */
2946 static GTY(()) tree init_optimize
;
2949 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2951 /* -falign-functions without argument: supply one. */
2952 if (opts
->x_flag_align_functions
&& !opts
->x_str_align_functions
)
2953 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2954 && opts
->x_optimize_size
? "2" : "4";
2957 /* Implement targetm.override_options_after_change. */
2960 arm_override_options_after_change (void)
2962 arm_configure_build_target (&arm_active_target
,
2963 TREE_TARGET_OPTION (target_option_default_node
),
2964 &global_options_set
, false);
2966 arm_override_options_after_change_1 (&global_options
);
2969 /* Implement TARGET_OPTION_SAVE. */
2971 arm_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
2973 ptr
->x_arm_arch_string
= opts
->x_arm_arch_string
;
2974 ptr
->x_arm_cpu_string
= opts
->x_arm_cpu_string
;
2975 ptr
->x_arm_tune_string
= opts
->x_arm_tune_string
;
2978 /* Implement TARGET_OPTION_RESTORE. */
2980 arm_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
2982 opts
->x_arm_arch_string
= ptr
->x_arm_arch_string
;
2983 opts
->x_arm_cpu_string
= ptr
->x_arm_cpu_string
;
2984 opts
->x_arm_tune_string
= ptr
->x_arm_tune_string
;
2985 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2989 /* Reset options between modes that the user has specified. */
2991 arm_option_override_internal (struct gcc_options
*opts
,
2992 struct gcc_options
*opts_set
)
2994 arm_override_options_after_change_1 (opts
);
2996 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2998 /* The default is to enable interworking, so this warning message would
2999 be confusing to users who have just compiled with
3000 eg, -march=armv4. */
3001 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3002 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3005 if (TARGET_THUMB_P (opts
->x_target_flags
)
3006 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3008 warning (0, "target CPU does not support THUMB instructions");
3009 opts
->x_target_flags
&= ~MASK_THUMB
;
3012 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3014 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3015 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3018 /* Callee super interworking implies thumb interworking. Adding
3019 this to the flags here simplifies the logic elsewhere. */
3020 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3021 opts
->x_target_flags
|= MASK_INTERWORK
;
3023 /* need to remember initial values so combinaisons of options like
3024 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3025 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3027 if (! opts_set
->x_arm_restrict_it
)
3028 opts
->x_arm_restrict_it
= arm_arch8
;
3030 /* ARM execution state and M profile don't have [restrict] IT. */
3031 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3032 opts
->x_arm_restrict_it
= 0;
3034 /* Enable -munaligned-access by default for
3035 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3036 i.e. Thumb2 and ARM state only.
3037 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3038 - ARMv8 architecture-base processors.
3040 Disable -munaligned-access by default for
3041 - all pre-ARMv6 architecture-based processors
3042 - ARMv6-M architecture-based processors
3043 - ARMv8-M Baseline processors. */
3045 if (! opts_set
->x_unaligned_access
)
3047 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3048 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3050 else if (opts
->x_unaligned_access
== 1
3051 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3053 warning (0, "target CPU does not support unaligned accesses");
3054 opts
->x_unaligned_access
= 0;
3057 /* Don't warn since it's on by default in -O2. */
3058 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3059 opts
->x_flag_schedule_insns
= 0;
3061 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3063 /* Disable shrink-wrap when optimizing function for size, since it tends to
3064 generate additional returns. */
3065 if (optimize_function_for_size_p (cfun
)
3066 && TARGET_THUMB2_P (opts
->x_target_flags
))
3067 opts
->x_flag_shrink_wrap
= false;
3069 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3071 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3072 - epilogue_insns - does not accurately model the corresponding insns
3073 emitted in the asm file. In particular, see the comment in thumb_exit
3074 'Find out how many of the (return) argument registers we can corrupt'.
3075 As a consequence, the epilogue may clobber registers without fipa-ra
3076 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3077 TODO: Accurately model clobbers for epilogue_insns and reenable
3079 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3080 opts
->x_flag_ipa_ra
= 0;
3082 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3084 /* Thumb2 inline assembly code should always use unified syntax.
3085 This will apply to ARM and Thumb1 eventually. */
3086 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3087 opts
->x_inline_asm_unified
= true;
3089 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3090 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3094 static sbitmap isa_all_fpubits
;
3095 static sbitmap isa_quirkbits
;
3097 /* Configure a build target TARGET from the user-specified options OPTS and
3098 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3099 architecture have been specified, but the two are not identical. */
3101 arm_configure_build_target (struct arm_build_target
*target
,
3102 struct cl_target_option
*opts
,
3103 struct gcc_options
*opts_set
,
3104 bool warn_compatible
)
3106 const cpu_option
*arm_selected_tune
= NULL
;
3107 const arch_option
*arm_selected_arch
= NULL
;
3108 const cpu_option
*arm_selected_cpu
= NULL
;
3109 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3110 const char *tune_opts
= NULL
;
3111 const char *arch_opts
= NULL
;
3112 const char *cpu_opts
= NULL
;
3114 bitmap_clear (target
->isa
);
3115 target
->core_name
= NULL
;
3116 target
->arch_name
= NULL
;
3118 if (opts_set
->x_arm_arch_string
)
3120 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3122 opts
->x_arm_arch_string
);
3123 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3126 if (opts_set
->x_arm_cpu_string
)
3128 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3129 opts
->x_arm_cpu_string
);
3130 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3131 arm_selected_tune
= arm_selected_cpu
;
3132 /* If taking the tuning from -mcpu, we don't need to rescan the
3133 options for tuning. */
3136 if (opts_set
->x_arm_tune_string
)
3138 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3139 opts
->x_arm_tune_string
);
3140 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3143 if (arm_selected_arch
)
3145 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3146 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3149 if (arm_selected_cpu
)
3151 auto_sbitmap
cpu_isa (isa_num_bits
);
3152 auto_sbitmap
isa_delta (isa_num_bits
);
3154 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3155 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3157 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3158 /* Ignore any bits that are quirk bits. */
3159 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3160 /* Ignore (for now) any bits that might be set by -mfpu. */
3161 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits
);
3163 if (!bitmap_empty_p (isa_delta
))
3165 if (warn_compatible
)
3166 warning (0, "switch %<-mcpu=%s%> conflicts "
3167 "with %<-march=%s%> switch",
3168 arm_selected_cpu
->common
.name
,
3169 arm_selected_arch
->common
.name
);
3170 /* -march wins for code generation.
3171 -mcpu wins for default tuning. */
3172 if (!arm_selected_tune
)
3173 arm_selected_tune
= arm_selected_cpu
;
3175 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3176 target
->arch_name
= arm_selected_arch
->common
.name
;
3180 /* Architecture and CPU are essentially the same.
3181 Prefer the CPU setting. */
3182 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3183 target
->core_name
= arm_selected_cpu
->common
.name
;
3184 /* Copy the CPU's capabilities, so that we inherit the
3185 appropriate extensions and quirks. */
3186 bitmap_copy (target
->isa
, cpu_isa
);
3191 /* Pick a CPU based on the architecture. */
3192 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3193 target
->arch_name
= arm_selected_arch
->common
.name
;
3194 /* Note: target->core_name is left unset in this path. */
3197 else if (arm_selected_cpu
)
3199 target
->core_name
= arm_selected_cpu
->common
.name
;
3200 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3201 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3203 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3205 /* If the user did not specify a processor or architecture, choose
3209 const cpu_option
*sel
;
3210 auto_sbitmap
sought_isa (isa_num_bits
);
3211 bitmap_clear (sought_isa
);
3212 auto_sbitmap
default_isa (isa_num_bits
);
3214 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3215 TARGET_CPU_DEFAULT
);
3216 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3217 gcc_assert (arm_selected_cpu
->common
.name
);
3219 /* RWE: All of the selection logic below (to the end of this
3220 'if' clause) looks somewhat suspect. It appears to be mostly
3221 there to support forcing thumb support when the default CPU
3222 does not have thumb (somewhat dubious in terms of what the
3223 user might be expecting). I think it should be removed once
3224 support for the pre-thumb era cores is removed. */
3225 sel
= arm_selected_cpu
;
3226 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3227 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3230 /* Now check to see if the user has specified any command line
3231 switches that require certain abilities from the cpu. */
3233 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3234 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3236 /* If there are such requirements and the default CPU does not
3237 satisfy them, we need to run over the complete list of
3238 cores looking for one that is satisfactory. */
3239 if (!bitmap_empty_p (sought_isa
)
3240 && !bitmap_subset_p (sought_isa
, default_isa
))
3242 auto_sbitmap
candidate_isa (isa_num_bits
);
3243 /* We're only interested in a CPU with at least the
3244 capabilities of the default CPU and the required
3245 additional features. */
3246 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3248 /* Try to locate a CPU type that supports all of the abilities
3249 of the default CPU, plus the extra abilities requested by
3251 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3253 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3254 /* An exact match? */
3255 if (bitmap_equal_p (default_isa
, candidate_isa
))
3259 if (sel
->common
.name
== NULL
)
3261 unsigned current_bit_count
= isa_num_bits
;
3262 const cpu_option
*best_fit
= NULL
;
3264 /* Ideally we would like to issue an error message here
3265 saying that it was not possible to find a CPU compatible
3266 with the default CPU, but which also supports the command
3267 line options specified by the programmer, and so they
3268 ought to use the -mcpu=<name> command line option to
3269 override the default CPU type.
3271 If we cannot find a CPU that has exactly the
3272 characteristics of the default CPU and the given
3273 command line options we scan the array again looking
3274 for a best match. The best match must have at least
3275 the capabilities of the perfect match. */
3276 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3278 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3280 if (bitmap_subset_p (default_isa
, candidate_isa
))
3284 bitmap_and_compl (candidate_isa
, candidate_isa
,
3286 count
= bitmap_popcount (candidate_isa
);
3288 if (count
< current_bit_count
)
3291 current_bit_count
= count
;
3295 gcc_assert (best_fit
);
3299 arm_selected_cpu
= sel
;
3302 /* Now we know the CPU, we can finally initialize the target
3304 target
->core_name
= arm_selected_cpu
->common
.name
;
3305 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3306 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3308 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3311 gcc_assert (arm_selected_cpu
);
3312 gcc_assert (arm_selected_arch
);
3314 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3316 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3317 auto_sbitmap
fpu_bits (isa_num_bits
);
3319 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3320 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3321 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3324 if (!arm_selected_tune
)
3325 arm_selected_tune
= arm_selected_cpu
;
3326 else /* Validate the features passed to -mtune. */
3327 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3329 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3331 /* Finish initializing the target structure. */
3332 target
->arch_pp_name
= arm_selected_arch
->arch
;
3333 target
->base_arch
= arm_selected_arch
->base_arch
;
3334 target
->profile
= arm_selected_arch
->profile
;
3336 target
->tune_flags
= tune_data
->tune_flags
;
3337 target
->tune
= tune_data
->tune
;
3338 target
->tune_core
= tune_data
->scheduler
;
3339 arm_option_reconfigure_globals ();
3342 /* Fix up any incompatible options that the user has specified. */
3344 arm_option_override (void)
3346 static const enum isa_feature fpu_bitlist
[]
3347 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3348 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3349 cl_target_option opts
;
3351 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3352 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3354 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3355 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3357 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3359 if (!global_options_set
.x_arm_fpu_index
)
3364 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3367 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3370 cl_target_option_save (&opts
, &global_options
);
3371 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3374 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3375 SUBTARGET_OVERRIDE_OPTIONS
;
3378 /* Initialize boolean versions of the architectural flags, for use
3379 in the arm.md file and for enabling feature flags. */
3380 arm_option_reconfigure_globals ();
3382 arm_tune
= arm_active_target
.tune_core
;
3383 tune_flags
= arm_active_target
.tune_flags
;
3384 current_tune
= arm_active_target
.tune
;
3386 /* TBD: Dwarf info for apcs frame is not handled yet. */
3387 if (TARGET_APCS_FRAME
)
3388 flag_shrink_wrap
= false;
3390 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3392 warning (0, "%<-mapcs-stack-check%> incompatible with "
3393 "%<-mno-apcs-frame%>");
3394 target_flags
|= MASK_APCS_FRAME
;
3397 if (TARGET_POKE_FUNCTION_NAME
)
3398 target_flags
|= MASK_APCS_FRAME
;
3400 if (TARGET_APCS_REENT
&& flag_pic
)
3401 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3403 if (TARGET_APCS_REENT
)
3404 warning (0, "APCS reentrant code not supported. Ignored");
3406 /* Set up some tuning parameters. */
3407 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3408 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3409 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3410 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3411 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3412 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3414 /* For arm2/3 there is no need to do any scheduling if we are doing
3415 software floating-point. */
3416 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3417 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3419 /* Override the default structure alignment for AAPCS ABI. */
3420 if (!global_options_set
.x_arm_structure_size_boundary
)
3422 if (TARGET_AAPCS_BASED
)
3423 arm_structure_size_boundary
= 8;
3427 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3429 if (arm_structure_size_boundary
!= 8
3430 && arm_structure_size_boundary
!= 32
3431 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3433 if (ARM_DOUBLEWORD_ALIGN
)
3435 "structure size boundary can only be set to 8, 32 or 64");
3437 warning (0, "structure size boundary can only be set to 8 or 32");
3438 arm_structure_size_boundary
3439 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3443 if (TARGET_VXWORKS_RTP
)
3445 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3446 arm_pic_data_is_text_relative
= 0;
3449 && !arm_pic_data_is_text_relative
3450 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3451 /* When text & data segments don't have a fixed displacement, the
3452 intended use is with a single, read only, pic base register.
3453 Unless the user explicitly requested not to do that, set
3455 target_flags
|= MASK_SINGLE_PIC_BASE
;
3457 /* If stack checking is disabled, we can use r10 as the PIC register,
3458 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3459 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3461 if (TARGET_VXWORKS_RTP
)
3462 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3463 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3466 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3467 arm_pic_register
= 9;
3469 /* If in FDPIC mode then force arm_pic_register to be r9. */
3472 arm_pic_register
= FDPIC_REGNUM
;
3474 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3477 if (arm_pic_register_string
!= NULL
)
3479 int pic_register
= decode_reg_name (arm_pic_register_string
);
3482 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3484 /* Prevent the user from choosing an obviously stupid PIC register. */
3485 else if (pic_register
< 0 || call_used_or_fixed_reg_p (pic_register
)
3486 || pic_register
== HARD_FRAME_POINTER_REGNUM
3487 || pic_register
== STACK_POINTER_REGNUM
3488 || pic_register
>= PC_REGNUM
3489 || (TARGET_VXWORKS_RTP
3490 && (unsigned int) pic_register
!= arm_pic_register
))
3491 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3493 arm_pic_register
= pic_register
;
3497 target_word_relocations
= 1;
3499 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3500 if (fix_cm3_ldrd
== 2)
3502 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3508 /* Hot/Cold partitioning is not currently supported, since we can't
3509 handle literal pool placement in that case. */
3510 if (flag_reorder_blocks_and_partition
)
3512 inform (input_location
,
3513 "%<-freorder-blocks-and-partition%> not supported "
3514 "on this architecture");
3515 flag_reorder_blocks_and_partition
= 0;
3516 flag_reorder_blocks
= 1;
3520 /* Hoisting PIC address calculations more aggressively provides a small,
3521 but measurable, size reduction for PIC code. Therefore, we decrease
3522 the bar for unrestricted expression hoisting to the cost of PIC address
3523 calculation, which is 2 instructions. */
3524 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3525 global_options
.x_param_values
,
3526 global_options_set
.x_param_values
);
3528 /* ARM EABI defaults to strict volatile bitfields. */
3529 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3530 && abi_version_at_least(2))
3531 flag_strict_volatile_bitfields
= 1;
3533 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3534 have deemed it beneficial (signified by setting
3535 prefetch.num_slots to 1 or more). */
3536 if (flag_prefetch_loop_arrays
< 0
3539 && current_tune
->prefetch
.num_slots
> 0)
3540 flag_prefetch_loop_arrays
= 1;
3542 /* Set up parameters to be used in prefetching algorithm. Do not
3543 override the defaults unless we are tuning for a core we have
3544 researched values for. */
3545 if (current_tune
->prefetch
.num_slots
> 0)
3546 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3547 current_tune
->prefetch
.num_slots
,
3548 global_options
.x_param_values
,
3549 global_options_set
.x_param_values
);
3550 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3551 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3552 current_tune
->prefetch
.l1_cache_line_size
,
3553 global_options
.x_param_values
,
3554 global_options_set
.x_param_values
);
3555 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3556 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3557 current_tune
->prefetch
.l1_cache_size
,
3558 global_options
.x_param_values
,
3559 global_options_set
.x_param_values
);
3561 /* Look through ready list and all of queue for instructions
3562 relevant for L2 auto-prefetcher. */
3563 int param_sched_autopref_queue_depth
;
3565 switch (current_tune
->sched_autopref
)
3567 case tune_params::SCHED_AUTOPREF_OFF
:
3568 param_sched_autopref_queue_depth
= -1;
3571 case tune_params::SCHED_AUTOPREF_RANK
:
3572 param_sched_autopref_queue_depth
= 0;
3575 case tune_params::SCHED_AUTOPREF_FULL
:
3576 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3583 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3584 param_sched_autopref_queue_depth
,
3585 global_options
.x_param_values
,
3586 global_options_set
.x_param_values
);
3588 /* Currently, for slow flash data, we just disable literal pools. We also
3589 disable it for pure-code. */
3590 if (target_slow_flash_data
|| target_pure_code
)
3591 arm_disable_literal_pool
= true;
3593 /* Disable scheduling fusion by default if it's not armv7 processor
3594 or doesn't prefer ldrd/strd. */
3595 if (flag_schedule_fusion
== 2
3596 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3597 flag_schedule_fusion
= 0;
3599 /* Need to remember initial options before they are overriden. */
3600 init_optimize
= build_optimization_node (&global_options
);
3602 arm_options_perform_arch_sanity_checks ();
3603 arm_option_override_internal (&global_options
, &global_options_set
);
3604 arm_option_check_internal (&global_options
);
3605 arm_option_params_internal ();
3607 /* Create the default target_options structure. */
3608 target_option_default_node
= target_option_current_node
3609 = build_target_option_node (&global_options
);
3611 /* Register global variables with the garbage collector. */
3612 arm_add_gc_roots ();
3614 /* Init initial mode for testing. */
3615 thumb_flipper
= TARGET_THUMB
;
3619 /* Reconfigure global status flags from the active_target.isa. */
3621 arm_option_reconfigure_globals (void)
3623 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3624 arm_base_arch
= arm_active_target
.base_arch
;
3626 /* Initialize boolean versions of the architectural flags, for use
3627 in the arm.md file. */
3628 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3629 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3630 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3631 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3632 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3633 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3634 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3635 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3636 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3637 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3638 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3639 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3640 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3641 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3642 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3643 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3644 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3645 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3646 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3647 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3648 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3649 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3650 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3651 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3652 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3653 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3656 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3657 error ("selected fp16 options are incompatible");
3658 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3661 /* And finally, set up some quirks. */
3662 arm_arch_no_volatile_ce
3663 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3664 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3665 isa_bit_quirk_armv6kz
);
3667 /* Use the cp15 method if it is available. */
3668 if (target_thread_pointer
== TP_AUTO
)
3670 if (arm_arch6k
&& !TARGET_THUMB1
)
3671 target_thread_pointer
= TP_CP15
;
3673 target_thread_pointer
= TP_SOFT
;
3677 /* Perform some validation between the desired architecture and the rest of the
3680 arm_options_perform_arch_sanity_checks (void)
3682 /* V5T code we generate is completely interworking capable, so we turn off
3683 TARGET_INTERWORK here to avoid many tests later on. */
3685 /* XXX However, we must pass the right pre-processor defines to CPP
3686 or GLD can get confused. This is a hack. */
3687 if (TARGET_INTERWORK
)
3688 arm_cpp_interwork
= 1;
3691 target_flags
&= ~MASK_INTERWORK
;
3693 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3694 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3696 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3697 error ("iwmmxt abi requires an iwmmxt capable cpu");
3699 /* BPABI targets use linker tricks to allow interworking on cores
3700 without thumb support. */
3701 if (TARGET_INTERWORK
3703 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3705 warning (0, "target CPU does not support interworking" );
3706 target_flags
&= ~MASK_INTERWORK
;
3709 /* If soft-float is specified then don't use FPU. */
3710 if (TARGET_SOFT_FLOAT
)
3711 arm_fpu_attr
= FPU_NONE
;
3713 arm_fpu_attr
= FPU_VFP
;
3715 if (TARGET_AAPCS_BASED
)
3717 if (TARGET_CALLER_INTERWORKING
)
3718 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3720 if (TARGET_CALLEE_INTERWORKING
)
3721 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3724 /* __fp16 support currently assumes the core has ldrh. */
3725 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3726 sorry ("__fp16 and no ldrh");
3728 if (use_cmse
&& !arm_arch_cmse
)
3729 error ("target CPU does not support ARMv8-M Security Extensions");
3731 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3732 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3733 if (use_cmse
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3734 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3737 if (TARGET_AAPCS_BASED
)
3739 if (arm_abi
== ARM_ABI_IWMMXT
)
3740 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3741 else if (TARGET_HARD_FLOAT_ABI
)
3743 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3744 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
))
3745 error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3748 arm_pcs_default
= ARM_PCS_AAPCS
;
3752 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3753 sorry ("%<-mfloat-abi=hard%> and VFP");
3755 if (arm_abi
== ARM_ABI_APCS
)
3756 arm_pcs_default
= ARM_PCS_APCS
;
3758 arm_pcs_default
= ARM_PCS_ATPCS
;
3762 /* Test whether a local function descriptor is canonical, i.e.,
3763 whether we can use GOTOFFFUNCDESC to compute the address of the
3766 arm_fdpic_local_funcdesc_p (rtx fnx
)
3769 enum symbol_visibility vis
;
3775 if (! SYMBOL_REF_LOCAL_P (fnx
))
3778 fn
= SYMBOL_REF_DECL (fnx
);
3783 vis
= DECL_VISIBILITY (fn
);
3785 if (vis
== VISIBILITY_PROTECTED
)
3786 /* Private function descriptors for protected functions are not
3787 canonical. Temporarily change the visibility to global so that
3788 we can ensure uniqueness of funcdesc pointers. */
3789 DECL_VISIBILITY (fn
) = VISIBILITY_DEFAULT
;
3791 ret
= default_binds_local_p_1 (fn
, flag_pic
);
3793 DECL_VISIBILITY (fn
) = vis
;
3799 arm_add_gc_roots (void)
3801 gcc_obstack_init(&minipool_obstack
);
3802 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3805 /* A table of known ARM exception types.
3806 For use with the interrupt function attribute. */
3810 const char *const arg
;
3811 const unsigned long return_value
;
3815 static const isr_attribute_arg isr_attribute_args
[] =
3817 { "IRQ", ARM_FT_ISR
},
3818 { "irq", ARM_FT_ISR
},
3819 { "FIQ", ARM_FT_FIQ
},
3820 { "fiq", ARM_FT_FIQ
},
3821 { "ABORT", ARM_FT_ISR
},
3822 { "abort", ARM_FT_ISR
},
3823 { "ABORT", ARM_FT_ISR
},
3824 { "abort", ARM_FT_ISR
},
3825 { "UNDEF", ARM_FT_EXCEPTION
},
3826 { "undef", ARM_FT_EXCEPTION
},
3827 { "SWI", ARM_FT_EXCEPTION
},
3828 { "swi", ARM_FT_EXCEPTION
},
3829 { NULL
, ARM_FT_NORMAL
}
3832 /* Returns the (interrupt) function type of the current
3833 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3835 static unsigned long
3836 arm_isr_value (tree argument
)
3838 const isr_attribute_arg
* ptr
;
3842 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3844 /* No argument - default to IRQ. */
3845 if (argument
== NULL_TREE
)
3848 /* Get the value of the argument. */
3849 if (TREE_VALUE (argument
) == NULL_TREE
3850 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3851 return ARM_FT_UNKNOWN
;
3853 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3855 /* Check it against the list of known arguments. */
3856 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3857 if (streq (arg
, ptr
->arg
))
3858 return ptr
->return_value
;
3860 /* An unrecognized interrupt type. */
3861 return ARM_FT_UNKNOWN
;
3864 /* Computes the type of the current function. */
3866 static unsigned long
3867 arm_compute_func_type (void)
3869 unsigned long type
= ARM_FT_UNKNOWN
;
3873 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3875 /* Decide if the current function is volatile. Such functions
3876 never return, and many memory cycles can be saved by not storing
3877 register values that will never be needed again. This optimization
3878 was added to speed up context switching in a kernel application. */
3880 && (TREE_NOTHROW (current_function_decl
)
3881 || !(flag_unwind_tables
3883 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3884 && TREE_THIS_VOLATILE (current_function_decl
))
3885 type
|= ARM_FT_VOLATILE
;
3887 if (cfun
->static_chain_decl
!= NULL
)
3888 type
|= ARM_FT_NESTED
;
3890 attr
= DECL_ATTRIBUTES (current_function_decl
);
3892 a
= lookup_attribute ("naked", attr
);
3894 type
|= ARM_FT_NAKED
;
3896 a
= lookup_attribute ("isr", attr
);
3898 a
= lookup_attribute ("interrupt", attr
);
3901 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3903 type
|= arm_isr_value (TREE_VALUE (a
));
3905 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3906 type
|= ARM_FT_CMSE_ENTRY
;
3911 /* Returns the type of the current function. */
3914 arm_current_func_type (void)
3916 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3917 cfun
->machine
->func_type
= arm_compute_func_type ();
3919 return cfun
->machine
->func_type
;
3923 arm_allocate_stack_slots_for_args (void)
3925 /* Naked functions should not allocate stack slots for arguments. */
3926 return !IS_NAKED (arm_current_func_type ());
3930 arm_warn_func_return (tree decl
)
3932 /* Naked functions are implemented entirely in assembly, including the
3933 return sequence, so suppress warnings about this. */
3934 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3938 /* Output assembler code for a block containing the constant parts
3939 of a trampoline, leaving space for the variable parts.
3941 On the ARM, (if r8 is the static chain regnum, and remembering that
3942 referencing pc adds an offset of 8) the trampoline looks like:
3945 .word static chain value
3946 .word function's address
3947 XXX FIXME: When the trampoline returns, r8 will be clobbered.
3949 In FDPIC mode, the trampoline looks like:
3950 .word trampoline address
3951 .word trampoline GOT address
3952 ldr r12, [pc, #8] ; #4 for Arm mode
3953 ldr r9, [pc, #8] ; #4 for Arm mode
3954 ldr pc, [pc, #8] ; #4 for Arm mode
3955 .word static chain value
3957 .word function's address
3961 arm_asm_trampoline_template (FILE *f
)
3963 fprintf (f
, "\t.syntax unified\n");
3967 /* The first two words are a function descriptor pointing to the
3968 trampoline code just below. */
3970 fprintf (f
, "\t.arm\n");
3971 else if (TARGET_THUMB2
)
3972 fprintf (f
, "\t.thumb\n");
3974 /* Only ARM and Thumb-2 are supported. */
3977 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3978 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3979 /* Trampoline code which sets the static chain register but also
3980 PIC register before jumping into real code. */
3981 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
3982 STATIC_CHAIN_REGNUM
, PC_REGNUM
,
3983 TARGET_THUMB2
? 8 : 4);
3984 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
3985 PIC_OFFSET_TABLE_REGNUM
, PC_REGNUM
,
3986 TARGET_THUMB2
? 8 : 4);
3987 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
3988 PC_REGNUM
, PC_REGNUM
,
3989 TARGET_THUMB2
? 8 : 4);
3990 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3992 else if (TARGET_ARM
)
3994 fprintf (f
, "\t.arm\n");
3995 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3996 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3998 else if (TARGET_THUMB2
)
4000 fprintf (f
, "\t.thumb\n");
4001 /* The Thumb-2 trampoline is similar to the arm implementation.
4002 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4003 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
4004 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4005 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
4009 ASM_OUTPUT_ALIGN (f
, 2);
4010 fprintf (f
, "\t.code\t16\n");
4011 fprintf (f
, ".Ltrampoline_start:\n");
4012 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
4013 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4014 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
4015 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4016 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
4017 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
4019 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4020 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4023 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4026 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4028 rtx fnaddr
, mem
, a_tramp
;
4030 emit_block_move (m_tramp
, assemble_trampoline_template (),
4031 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
4035 rtx funcdesc
= XEXP (DECL_RTL (fndecl
), 0);
4036 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
4037 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
4038 /* The function start address is at offset 8, but in Thumb mode
4039 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4041 rtx trampoline_code_start
4042 = plus_constant (Pmode
, XEXP (m_tramp
, 0), TARGET_THUMB2
? 9 : 8);
4044 /* Write initial funcdesc which points to the trampoline. */
4045 mem
= adjust_address (m_tramp
, SImode
, 0);
4046 emit_move_insn (mem
, trampoline_code_start
);
4047 mem
= adjust_address (m_tramp
, SImode
, 4);
4048 emit_move_insn (mem
, gen_rtx_REG (Pmode
, PIC_OFFSET_TABLE_REGNUM
));
4049 /* Setup static chain. */
4050 mem
= adjust_address (m_tramp
, SImode
, 20);
4051 emit_move_insn (mem
, chain_value
);
4052 /* GOT + real function entry point. */
4053 mem
= adjust_address (m_tramp
, SImode
, 24);
4054 emit_move_insn (mem
, gotaddr
);
4055 mem
= adjust_address (m_tramp
, SImode
, 28);
4056 emit_move_insn (mem
, fnaddr
);
4060 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
4061 emit_move_insn (mem
, chain_value
);
4063 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
4064 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4065 emit_move_insn (mem
, fnaddr
);
4068 a_tramp
= XEXP (m_tramp
, 0);
4069 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4070 LCT_NORMAL
, VOIDmode
, a_tramp
, Pmode
,
4071 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
4074 /* Thumb trampolines should be entered in thumb mode, so set
4075 the bottom bit of the address. */
4078 arm_trampoline_adjust_address (rtx addr
)
4080 /* For FDPIC don't fix trampoline address since it's a function
4081 descriptor and not a function address. */
4082 if (TARGET_THUMB
&& !TARGET_FDPIC
)
4083 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4084 NULL
, 0, OPTAB_LIB_WIDEN
);
4088 /* Return 1 if it is possible to return using a single instruction.
4089 If SIBLING is non-null, this is a test for a return before a sibling
4090 call. SIBLING is the call insn, so we can examine its register usage. */
4093 use_return_insn (int iscond
, rtx sibling
)
4096 unsigned int func_type
;
4097 unsigned long saved_int_regs
;
4098 unsigned HOST_WIDE_INT stack_adjust
;
4099 arm_stack_offsets
*offsets
;
4101 /* Never use a return instruction before reload has run. */
4102 if (!reload_completed
)
4105 func_type
= arm_current_func_type ();
4107 /* Naked, volatile and stack alignment functions need special
4109 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4112 /* So do interrupt functions that use the frame pointer and Thumb
4113 interrupt functions. */
4114 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4117 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4118 && !optimize_function_for_size_p (cfun
))
4121 offsets
= arm_get_frame_offsets ();
4122 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4124 /* As do variadic functions. */
4125 if (crtl
->args
.pretend_args_size
4126 || cfun
->machine
->uses_anonymous_args
4127 /* Or if the function calls __builtin_eh_return () */
4128 || crtl
->calls_eh_return
4129 /* Or if the function calls alloca */
4130 || cfun
->calls_alloca
4131 /* Or if there is a stack adjustment. However, if the stack pointer
4132 is saved on the stack, we can use a pre-incrementing stack load. */
4133 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4134 && stack_adjust
== 4))
4135 /* Or if the static chain register was saved above the frame, under the
4136 assumption that the stack pointer isn't saved on the stack. */
4137 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4138 && arm_compute_static_chain_stack_bytes() != 0))
4141 saved_int_regs
= offsets
->saved_regs_mask
;
4143 /* Unfortunately, the insn
4145 ldmib sp, {..., sp, ...}
4147 triggers a bug on most SA-110 based devices, such that the stack
4148 pointer won't be correctly restored if the instruction takes a
4149 page fault. We work around this problem by popping r3 along with
4150 the other registers, since that is never slower than executing
4151 another instruction.
4153 We test for !arm_arch5t here, because code for any architecture
4154 less than this could potentially be run on one of the buggy
4156 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4158 /* Validate that r3 is a call-clobbered register (always true in
4159 the default abi) ... */
4160 if (!call_used_or_fixed_reg_p (3))
4163 /* ... that it isn't being used for a return value ... */
4164 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4167 /* ... or for a tail-call argument ... */
4170 gcc_assert (CALL_P (sibling
));
4172 if (find_regno_fusage (sibling
, USE
, 3))
4176 /* ... and that there are no call-saved registers in r0-r2
4177 (always true in the default ABI). */
4178 if (saved_int_regs
& 0x7)
4182 /* Can't be done if interworking with Thumb, and any registers have been
4184 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4187 /* On StrongARM, conditional returns are expensive if they aren't
4188 taken and multiple registers have been stacked. */
4189 if (iscond
&& arm_tune_strongarm
)
4191 /* Conditional return when just the LR is stored is a simple
4192 conditional-load instruction, that's not expensive. */
4193 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4197 && arm_pic_register
!= INVALID_REGNUM
4198 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4202 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4203 several instructions if anything needs to be popped. */
4204 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4207 /* If there are saved registers but the LR isn't saved, then we need
4208 two instructions for the return. */
4209 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4212 /* Can't be done if any of the VFP regs are pushed,
4213 since this also requires an insn. */
4214 if (TARGET_HARD_FLOAT
)
4215 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4216 if (df_regs_ever_live_p (regno
) && !call_used_or_fixed_reg_p (regno
))
4219 if (TARGET_REALLY_IWMMXT
)
4220 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4221 if (df_regs_ever_live_p (regno
) && ! call_used_or_fixed_reg_p (regno
))
4227 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4228 shrink-wrapping if possible. This is the case if we need to emit a
4229 prologue, which we can test by looking at the offsets. */
4231 use_simple_return_p (void)
4233 arm_stack_offsets
*offsets
;
4235 /* Note this function can be called before or after reload. */
4236 if (!reload_completed
)
4237 arm_compute_frame_layout ();
4239 offsets
= arm_get_frame_offsets ();
4240 return offsets
->outgoing_args
!= 0;
4243 /* Return TRUE if int I is a valid immediate ARM constant. */
4246 const_ok_for_arm (HOST_WIDE_INT i
)
4250 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4251 be all zero, or all one. */
4252 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4253 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4254 != ((~(unsigned HOST_WIDE_INT
) 0)
4255 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4258 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4260 /* Fast return for 0 and small values. We must do this for zero, since
4261 the code below can't handle that one case. */
4262 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4265 /* Get the number of trailing zeros. */
4266 lowbit
= ffs((int) i
) - 1;
4268 /* Only even shifts are allowed in ARM mode so round down to the
4269 nearest even number. */
4273 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4278 /* Allow rotated constants in ARM mode. */
4280 && ((i
& ~0xc000003f) == 0
4281 || (i
& ~0xf000000f) == 0
4282 || (i
& ~0xfc000003) == 0))
4285 else if (TARGET_THUMB2
)
4289 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4292 if (i
== v
|| i
== (v
| (v
<< 8)))
4295 /* Allow repeated pattern 0xXY00XY00. */
4301 else if (TARGET_HAVE_MOVT
)
4303 /* Thumb-1 Targets with MOVT. */
4313 /* Return true if I is a valid constant for the operation CODE. */
4315 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4317 if (const_ok_for_arm (i
))
4323 /* See if we can use movw. */
4324 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4327 /* Otherwise, try mvn. */
4328 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4331 /* See if we can use addw or subw. */
4333 && ((i
& 0xfffff000) == 0
4334 || ((-i
) & 0xfffff000) == 0))
4355 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4357 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4363 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4367 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4374 /* Return true if I is a valid di mode constant for the operation CODE. */
4376 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4378 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4379 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4380 rtx hi
= GEN_INT (hi_val
);
4381 rtx lo
= GEN_INT (lo_val
);
4391 return const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF
4392 || const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF;
4394 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4401 /* Emit a sequence of insns to handle a large constant.
4402 CODE is the code of the operation required, it can be any of SET, PLUS,
4403 IOR, AND, XOR, MINUS;
4404 MODE is the mode in which the operation is being performed;
4405 VAL is the integer to operate on;
4406 SOURCE is the other operand (a register, or a null-pointer for SET);
4407 SUBTARGETS means it is safe to create scratch registers if that will
4408 either produce a simpler sequence, or we will want to cse the values.
4409 Return value is the number of insns emitted. */
4411 /* ??? Tweak this for thumb2. */
4413 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4414 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4418 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4419 cond
= COND_EXEC_TEST (PATTERN (insn
));
4423 if (subtargets
|| code
== SET
4424 || (REG_P (target
) && REG_P (source
)
4425 && REGNO (target
) != REGNO (source
)))
4427 /* After arm_reorg has been called, we can't fix up expensive
4428 constants by pushing them into memory so we must synthesize
4429 them in-line, regardless of the cost. This is only likely to
4430 be more costly on chips that have load delay slots and we are
4431 compiling without running the scheduler (so no splitting
4432 occurred before the final instruction emission).
4434 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4436 if (!cfun
->machine
->after_arm_reorg
4438 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4440 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4445 /* Currently SET is the only monadic value for CODE, all
4446 the rest are diadic. */
4447 if (TARGET_USE_MOVT
)
4448 arm_emit_movpair (target
, GEN_INT (val
));
4450 emit_set_insn (target
, GEN_INT (val
));
4456 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4458 if (TARGET_USE_MOVT
)
4459 arm_emit_movpair (temp
, GEN_INT (val
));
4461 emit_set_insn (temp
, GEN_INT (val
));
4463 /* For MINUS, the value is subtracted from, since we never
4464 have subtraction of a constant. */
4466 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4468 emit_set_insn (target
,
4469 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4475 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4479 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4480 ARM/THUMB2 immediates, and add up to VAL.
4481 Thr function return value gives the number of insns required. */
4483 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4484 struct four_ints
*return_sequence
)
4486 int best_consecutive_zeros
= 0;
4490 struct four_ints tmp_sequence
;
4492 /* If we aren't targeting ARM, the best place to start is always at
4493 the bottom, otherwise look more closely. */
4496 for (i
= 0; i
< 32; i
+= 2)
4498 int consecutive_zeros
= 0;
4500 if (!(val
& (3 << i
)))
4502 while ((i
< 32) && !(val
& (3 << i
)))
4504 consecutive_zeros
+= 2;
4507 if (consecutive_zeros
> best_consecutive_zeros
)
4509 best_consecutive_zeros
= consecutive_zeros
;
4510 best_start
= i
- consecutive_zeros
;
4517 /* So long as it won't require any more insns to do so, it's
4518 desirable to emit a small constant (in bits 0...9) in the last
4519 insn. This way there is more chance that it can be combined with
4520 a later addressing insn to form a pre-indexed load or store
4521 operation. Consider:
4523 *((volatile int *)0xe0000100) = 1;
4524 *((volatile int *)0xe0000110) = 2;
4526 We want this to wind up as:
4530 str rB, [rA, #0x100]
4532 str rB, [rA, #0x110]
4534 rather than having to synthesize both large constants from scratch.
4536 Therefore, we calculate how many insns would be required to emit
4537 the constant starting from `best_start', and also starting from
4538 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4539 yield a shorter sequence, we may as well use zero. */
4540 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4542 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4544 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4545 if (insns2
<= insns1
)
4547 *return_sequence
= tmp_sequence
;
4555 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4557 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4558 struct four_ints
*return_sequence
, int i
)
4560 int remainder
= val
& 0xffffffff;
4563 /* Try and find a way of doing the job in either two or three
4566 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4567 location. We start at position I. This may be the MSB, or
4568 optimial_immediate_sequence may have positioned it at the largest block
4569 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4570 wrapping around to the top of the word when we drop off the bottom.
4571 In the worst case this code should produce no more than four insns.
4573 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4574 constants, shifted to any arbitrary location. We should always start
4579 unsigned int b1
, b2
, b3
, b4
;
4580 unsigned HOST_WIDE_INT result
;
4583 gcc_assert (insns
< 4);
4588 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4589 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4592 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4593 /* We can use addw/subw for the last 12 bits. */
4597 /* Use an 8-bit shifted/rotated immediate. */
4601 result
= remainder
& ((0x0ff << end
)
4602 | ((i
< end
) ? (0xff >> (32 - end
))
4609 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4610 arbitrary shifts. */
4611 i
-= TARGET_ARM
? 2 : 1;
4615 /* Next, see if we can do a better job with a thumb2 replicated
4618 We do it this way around to catch the cases like 0x01F001E0 where
4619 two 8-bit immediates would work, but a replicated constant would
4622 TODO: 16-bit constants that don't clear all the bits, but still win.
4623 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4626 b1
= (remainder
& 0xff000000) >> 24;
4627 b2
= (remainder
& 0x00ff0000) >> 16;
4628 b3
= (remainder
& 0x0000ff00) >> 8;
4629 b4
= remainder
& 0xff;
4633 /* The 8-bit immediate already found clears b1 (and maybe b2),
4634 but must leave b3 and b4 alone. */
4636 /* First try to find a 32-bit replicated constant that clears
4637 almost everything. We can assume that we can't do it in one,
4638 or else we wouldn't be here. */
4639 unsigned int tmp
= b1
& b2
& b3
& b4
;
4640 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4642 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4643 + (tmp
== b3
) + (tmp
== b4
);
4645 && (matching_bytes
>= 3
4646 || (matching_bytes
== 2
4647 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4649 /* At least 3 of the bytes match, and the fourth has at
4650 least as many bits set, or two of the bytes match
4651 and it will only require one more insn to finish. */
4659 /* Second, try to find a 16-bit replicated constant that can
4660 leave three of the bytes clear. If b2 or b4 is already
4661 zero, then we can. If the 8-bit from above would not
4662 clear b2 anyway, then we still win. */
4663 else if (b1
== b3
&& (!b2
|| !b4
4664 || (remainder
& 0x00ff0000 & ~result
)))
4666 result
= remainder
& 0xff00ff00;
4672 /* The 8-bit immediate already found clears b2 (and maybe b3)
4673 and we don't get here unless b1 is alredy clear, but it will
4674 leave b4 unchanged. */
4676 /* If we can clear b2 and b4 at once, then we win, since the
4677 8-bits couldn't possibly reach that far. */
4680 result
= remainder
& 0x00ff00ff;
4686 return_sequence
->i
[insns
++] = result
;
4687 remainder
&= ~result
;
4689 if (code
== SET
|| code
== MINUS
)
4697 /* Emit an instruction with the indicated PATTERN. If COND is
4698 non-NULL, conditionalize the execution of the instruction on COND
4702 emit_constant_insn (rtx cond
, rtx pattern
)
4705 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4706 emit_insn (pattern
);
4709 /* As above, but extra parameter GENERATE which, if clear, suppresses
4713 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4714 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4715 int subtargets
, int generate
)
4719 int final_invert
= 0;
4721 int set_sign_bit_copies
= 0;
4722 int clear_sign_bit_copies
= 0;
4723 int clear_zero_bit_copies
= 0;
4724 int set_zero_bit_copies
= 0;
4725 int insns
= 0, neg_insns
, inv_insns
;
4726 unsigned HOST_WIDE_INT temp1
, temp2
;
4727 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4728 struct four_ints
*immediates
;
4729 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4731 /* Find out which operations are safe for a given CODE. Also do a quick
4732 check for degenerate cases; these can occur when DImode operations
4745 if (remainder
== 0xffffffff)
4748 emit_constant_insn (cond
,
4749 gen_rtx_SET (target
,
4750 GEN_INT (ARM_SIGN_EXTEND (val
))));
4756 if (reload_completed
&& rtx_equal_p (target
, source
))
4760 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4769 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4772 if (remainder
== 0xffffffff)
4774 if (reload_completed
&& rtx_equal_p (target
, source
))
4777 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4786 if (reload_completed
&& rtx_equal_p (target
, source
))
4789 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4793 if (remainder
== 0xffffffff)
4796 emit_constant_insn (cond
,
4797 gen_rtx_SET (target
,
4798 gen_rtx_NOT (mode
, source
)));
4805 /* We treat MINUS as (val - source), since (source - val) is always
4806 passed as (source + (-val)). */
4810 emit_constant_insn (cond
,
4811 gen_rtx_SET (target
,
4812 gen_rtx_NEG (mode
, source
)));
4815 if (const_ok_for_arm (val
))
4818 emit_constant_insn (cond
,
4819 gen_rtx_SET (target
,
4820 gen_rtx_MINUS (mode
, GEN_INT (val
),
4831 /* If we can do it in one insn get out quickly. */
4832 if (const_ok_for_op (val
, code
))
4835 emit_constant_insn (cond
,
4836 gen_rtx_SET (target
,
4838 ? gen_rtx_fmt_ee (code
, mode
, source
,
4844 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4846 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4847 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4851 if (mode
== SImode
&& i
== 16)
4852 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4854 emit_constant_insn (cond
,
4855 gen_zero_extendhisi2
4856 (target
, gen_lowpart (HImode
, source
)));
4858 /* Extz only supports SImode, but we can coerce the operands
4860 emit_constant_insn (cond
,
4861 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4862 gen_lowpart (SImode
, source
),
4863 GEN_INT (i
), const0_rtx
));
4869 /* Calculate a few attributes that may be useful for specific
4871 /* Count number of leading zeros. */
4872 for (i
= 31; i
>= 0; i
--)
4874 if ((remainder
& (1 << i
)) == 0)
4875 clear_sign_bit_copies
++;
4880 /* Count number of leading 1's. */
4881 for (i
= 31; i
>= 0; i
--)
4883 if ((remainder
& (1 << i
)) != 0)
4884 set_sign_bit_copies
++;
4889 /* Count number of trailing zero's. */
4890 for (i
= 0; i
<= 31; i
++)
4892 if ((remainder
& (1 << i
)) == 0)
4893 clear_zero_bit_copies
++;
4898 /* Count number of trailing 1's. */
4899 for (i
= 0; i
<= 31; i
++)
4901 if ((remainder
& (1 << i
)) != 0)
4902 set_zero_bit_copies
++;
4910 /* See if we can do this by sign_extending a constant that is known
4911 to be negative. This is a good, way of doing it, since the shift
4912 may well merge into a subsequent insn. */
4913 if (set_sign_bit_copies
> 1)
4915 if (const_ok_for_arm
4916 (temp1
= ARM_SIGN_EXTEND (remainder
4917 << (set_sign_bit_copies
- 1))))
4921 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4922 emit_constant_insn (cond
,
4923 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4924 emit_constant_insn (cond
,
4925 gen_ashrsi3 (target
, new_src
,
4926 GEN_INT (set_sign_bit_copies
- 1)));
4930 /* For an inverted constant, we will need to set the low bits,
4931 these will be shifted out of harm's way. */
4932 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4933 if (const_ok_for_arm (~temp1
))
4937 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4938 emit_constant_insn (cond
,
4939 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4940 emit_constant_insn (cond
,
4941 gen_ashrsi3 (target
, new_src
,
4942 GEN_INT (set_sign_bit_copies
- 1)));
4948 /* See if we can calculate the value as the difference between two
4949 valid immediates. */
4950 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4952 int topshift
= clear_sign_bit_copies
& ~1;
4954 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4955 & (0xff000000 >> topshift
));
4957 /* If temp1 is zero, then that means the 9 most significant
4958 bits of remainder were 1 and we've caused it to overflow.
4959 When topshift is 0 we don't need to do anything since we
4960 can borrow from 'bit 32'. */
4961 if (temp1
== 0 && topshift
!= 0)
4962 temp1
= 0x80000000 >> (topshift
- 1);
4964 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4966 if (const_ok_for_arm (temp2
))
4970 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4971 emit_constant_insn (cond
,
4972 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4973 emit_constant_insn (cond
,
4974 gen_addsi3 (target
, new_src
,
4982 /* See if we can generate this by setting the bottom (or the top)
4983 16 bits, and then shifting these into the other half of the
4984 word. We only look for the simplest cases, to do more would cost
4985 too much. Be careful, however, not to generate this when the
4986 alternative would take fewer insns. */
4987 if (val
& 0xffff0000)
4989 temp1
= remainder
& 0xffff0000;
4990 temp2
= remainder
& 0x0000ffff;
4992 /* Overlaps outside this range are best done using other methods. */
4993 for (i
= 9; i
< 24; i
++)
4995 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4996 && !const_ok_for_arm (temp2
))
4998 rtx new_src
= (subtargets
4999 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5001 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
5002 source
, subtargets
, generate
);
5010 gen_rtx_ASHIFT (mode
, source
,
5017 /* Don't duplicate cases already considered. */
5018 for (i
= 17; i
< 24; i
++)
5020 if (((temp1
| (temp1
>> i
)) == remainder
)
5021 && !const_ok_for_arm (temp1
))
5023 rtx new_src
= (subtargets
5024 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5026 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
5027 source
, subtargets
, generate
);
5032 gen_rtx_SET (target
,
5035 gen_rtx_LSHIFTRT (mode
, source
,
5046 /* If we have IOR or XOR, and the constant can be loaded in a
5047 single instruction, and we can find a temporary to put it in,
5048 then this can be done in two instructions instead of 3-4. */
5050 /* TARGET can't be NULL if SUBTARGETS is 0 */
5051 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
5053 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
5057 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5059 emit_constant_insn (cond
,
5060 gen_rtx_SET (sub
, GEN_INT (val
)));
5061 emit_constant_insn (cond
,
5062 gen_rtx_SET (target
,
5063 gen_rtx_fmt_ee (code
, mode
,
5074 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5075 and the remainder 0s for e.g. 0xfff00000)
5076 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5078 This can be done in 2 instructions by using shifts with mov or mvn.
5083 mvn r0, r0, lsr #12 */
5084 if (set_sign_bit_copies
> 8
5085 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5089 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5090 rtx shift
= GEN_INT (set_sign_bit_copies
);
5096 gen_rtx_ASHIFT (mode
,
5101 gen_rtx_SET (target
,
5103 gen_rtx_LSHIFTRT (mode
, sub
,
5110 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5112 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5114 For eg. r0 = r0 | 0xfff
5119 if (set_zero_bit_copies
> 8
5120 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5124 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5125 rtx shift
= GEN_INT (set_zero_bit_copies
);
5131 gen_rtx_LSHIFTRT (mode
,
5136 gen_rtx_SET (target
,
5138 gen_rtx_ASHIFT (mode
, sub
,
5144 /* This will never be reached for Thumb2 because orn is a valid
5145 instruction. This is for Thumb1 and the ARM 32 bit cases.
5147 x = y | constant (such that ~constant is a valid constant)
5149 x = ~(~y & ~constant).
5151 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5155 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5156 emit_constant_insn (cond
,
5158 gen_rtx_NOT (mode
, source
)));
5161 sub
= gen_reg_rtx (mode
);
5162 emit_constant_insn (cond
,
5164 gen_rtx_AND (mode
, source
,
5166 emit_constant_insn (cond
,
5167 gen_rtx_SET (target
,
5168 gen_rtx_NOT (mode
, sub
)));
5175 /* See if two shifts will do 2 or more insn's worth of work. */
5176 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5178 HOST_WIDE_INT shift_mask
= ((0xffffffff
5179 << (32 - clear_sign_bit_copies
))
5182 if ((remainder
| shift_mask
) != 0xffffffff)
5184 HOST_WIDE_INT new_val
5185 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5189 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5190 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5191 new_src
, source
, subtargets
, 1);
5196 rtx targ
= subtargets
? NULL_RTX
: target
;
5197 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5198 targ
, source
, subtargets
, 0);
5204 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5205 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5207 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5208 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5214 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5216 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5218 if ((remainder
| shift_mask
) != 0xffffffff)
5220 HOST_WIDE_INT new_val
5221 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5224 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5226 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5227 new_src
, source
, subtargets
, 1);
5232 rtx targ
= subtargets
? NULL_RTX
: target
;
5234 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5235 targ
, source
, subtargets
, 0);
5241 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5242 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5244 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5245 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5257 /* Calculate what the instruction sequences would be if we generated it
5258 normally, negated, or inverted. */
5260 /* AND cannot be split into multiple insns, so invert and use BIC. */
5263 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5266 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5271 if (can_invert
|| final_invert
)
5272 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5277 immediates
= &pos_immediates
;
5279 /* Is the negated immediate sequence more efficient? */
5280 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5283 immediates
= &neg_immediates
;
5288 /* Is the inverted immediate sequence more efficient?
5289 We must allow for an extra NOT instruction for XOR operations, although
5290 there is some chance that the final 'mvn' will get optimized later. */
5291 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5294 immediates
= &inv_immediates
;
5302 /* Now output the chosen sequence as instructions. */
5305 for (i
= 0; i
< insns
; i
++)
5307 rtx new_src
, temp1_rtx
;
5309 temp1
= immediates
->i
[i
];
5311 if (code
== SET
|| code
== MINUS
)
5312 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5313 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5314 new_src
= gen_reg_rtx (mode
);
5320 else if (can_negate
)
5323 temp1
= trunc_int_for_mode (temp1
, mode
);
5324 temp1_rtx
= GEN_INT (temp1
);
5328 else if (code
== MINUS
)
5329 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5331 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5333 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5338 can_negate
= can_invert
;
5342 else if (code
== MINUS
)
5350 emit_constant_insn (cond
, gen_rtx_SET (target
,
5351 gen_rtx_NOT (mode
, source
)));
5358 /* Return TRUE if op is a constant where both the low and top words are
5359 suitable for RSB/RSC instructions. This is never true for Thumb, since
5360 we do not have RSC in that case. */
5362 arm_const_double_prefer_rsbs_rsc (rtx op
)
5364 /* Thumb lacks RSC, so we never prefer that sequence. */
5365 if (TARGET_THUMB
|| !CONST_INT_P (op
))
5367 HOST_WIDE_INT hi
, lo
;
5368 lo
= UINTVAL (op
) & 0xffffffffULL
;
5369 hi
= UINTVAL (op
) >> 32;
5370 return const_ok_for_arm (lo
) && const_ok_for_arm (hi
);
5373 /* Canonicalize a comparison so that we are more likely to recognize it.
5374 This can be done for a few constant compares, where we can make the
5375 immediate value easier to load. */
5378 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5379 bool op0_preserve_value
)
5382 unsigned HOST_WIDE_INT i
, maxval
;
5384 mode
= GET_MODE (*op0
);
5385 if (mode
== VOIDmode
)
5386 mode
= GET_MODE (*op1
);
5388 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5390 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5391 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5392 either reversed or (for constant OP1) adjusted to GE/LT.
5393 Similarly for GTU/LEU in Thumb mode. */
5397 if (*code
== GT
|| *code
== LE
5398 || *code
== GTU
|| *code
== LEU
)
5400 /* Missing comparison. First try to use an available
5402 if (CONST_INT_P (*op1
))
5411 /* Try to convert to GE/LT, unless that would be more
5413 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5414 && arm_const_double_prefer_rsbs_rsc (*op1
))
5416 *op1
= GEN_INT (i
+ 1);
5417 *code
= *code
== GT
? GE
: LT
;
5424 if (i
!= ~((unsigned HOST_WIDE_INT
) 0))
5426 /* Try to convert to GEU/LTU, unless that would
5427 be more expensive. */
5428 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5429 && arm_const_double_prefer_rsbs_rsc (*op1
))
5431 *op1
= GEN_INT (i
+ 1);
5432 *code
= *code
== GTU
? GEU
: LTU
;
5442 if (!op0_preserve_value
)
5444 std::swap (*op0
, *op1
);
5445 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5451 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5452 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5453 to facilitate possible combining with a cmp into 'ands'. */
5455 && GET_CODE (*op0
) == ZERO_EXTEND
5456 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5457 && GET_MODE (XEXP (*op0
, 0)) == QImode
5458 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5459 && subreg_lowpart_p (XEXP (*op0
, 0))
5460 && *op1
== const0_rtx
)
5461 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5464 /* Comparisons smaller than DImode. Only adjust comparisons against
5465 an out-of-range constant. */
5466 if (!CONST_INT_P (*op1
)
5467 || const_ok_for_arm (INTVAL (*op1
))
5468 || const_ok_for_arm (- INTVAL (*op1
)))
5482 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5484 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5485 *code
= *code
== GT
? GE
: LT
;
5493 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5495 *op1
= GEN_INT (i
- 1);
5496 *code
= *code
== GE
? GT
: LE
;
5503 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5504 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5506 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5507 *code
= *code
== GTU
? GEU
: LTU
;
5515 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5517 *op1
= GEN_INT (i
- 1);
5518 *code
= *code
== GEU
? GTU
: LEU
;
5529 /* Define how to find the value returned by a function. */
5532 arm_function_value(const_tree type
, const_tree func
,
5533 bool outgoing ATTRIBUTE_UNUSED
)
5536 int unsignedp ATTRIBUTE_UNUSED
;
5537 rtx r ATTRIBUTE_UNUSED
;
5539 mode
= TYPE_MODE (type
);
5541 if (TARGET_AAPCS_BASED
)
5542 return aapcs_allocate_return_reg (mode
, type
, func
);
5544 /* Promote integer types. */
5545 if (INTEGRAL_TYPE_P (type
))
5546 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5548 /* Promotes small structs returned in a register to full-word size
5549 for big-endian AAPCS. */
5550 if (arm_return_in_msb (type
))
5552 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5553 if (size
% UNITS_PER_WORD
!= 0)
5555 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5556 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5560 return arm_libcall_value_1 (mode
);
5563 /* libcall hashtable helpers. */
5565 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5567 static inline hashval_t
hash (const rtx_def
*);
5568 static inline bool equal (const rtx_def
*, const rtx_def
*);
5569 static inline void remove (rtx_def
*);
5573 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5575 return rtx_equal_p (p1
, p2
);
5579 libcall_hasher::hash (const rtx_def
*p1
)
5581 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5584 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5587 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5589 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5593 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5595 static bool init_done
= false;
5596 static libcall_table_type
*libcall_htab
= NULL
;
5602 libcall_htab
= new libcall_table_type (31);
5603 add_libcall (libcall_htab
,
5604 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5605 add_libcall (libcall_htab
,
5606 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5607 add_libcall (libcall_htab
,
5608 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5609 add_libcall (libcall_htab
,
5610 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5612 add_libcall (libcall_htab
,
5613 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5614 add_libcall (libcall_htab
,
5615 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5616 add_libcall (libcall_htab
,
5617 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5618 add_libcall (libcall_htab
,
5619 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5621 add_libcall (libcall_htab
,
5622 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5623 add_libcall (libcall_htab
,
5624 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5625 add_libcall (libcall_htab
,
5626 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5627 add_libcall (libcall_htab
,
5628 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5629 add_libcall (libcall_htab
,
5630 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5631 add_libcall (libcall_htab
,
5632 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5633 add_libcall (libcall_htab
,
5634 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5635 add_libcall (libcall_htab
,
5636 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5638 /* Values from double-precision helper functions are returned in core
5639 registers if the selected core only supports single-precision
5640 arithmetic, even if we are using the hard-float ABI. The same is
5641 true for single-precision helpers, but we will never be using the
5642 hard-float ABI on a CPU which doesn't support single-precision
5643 operations in hardware. */
5644 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5645 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5646 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5647 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5648 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5649 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5650 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5651 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5652 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5653 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5654 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5655 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5657 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5659 add_libcall (libcall_htab
,
5660 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5663 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5667 arm_libcall_value_1 (machine_mode mode
)
5669 if (TARGET_AAPCS_BASED
)
5670 return aapcs_libcall_value (mode
);
5671 else if (TARGET_IWMMXT_ABI
5672 && arm_vector_mode_supported_p (mode
))
5673 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5675 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5678 /* Define how to find the value returned by a library function
5679 assuming the value has mode MODE. */
5682 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5684 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5685 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5687 /* The following libcalls return their result in integer registers,
5688 even though they return a floating point value. */
5689 if (arm_libcall_uses_aapcs_base (libcall
))
5690 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5694 return arm_libcall_value_1 (mode
);
5697 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5700 arm_function_value_regno_p (const unsigned int regno
)
5702 if (regno
== ARG_REGISTER (1)
5704 && TARGET_AAPCS_BASED
5705 && TARGET_HARD_FLOAT
5706 && regno
== FIRST_VFP_REGNUM
)
5707 || (TARGET_IWMMXT_ABI
5708 && regno
== FIRST_IWMMXT_REGNUM
))
5714 /* Determine the amount of memory needed to store the possible return
5715 registers of an untyped call. */
5717 arm_apply_result_size (void)
5723 if (TARGET_HARD_FLOAT_ABI
)
5725 if (TARGET_IWMMXT_ABI
)
5732 /* Decide whether TYPE should be returned in memory (true)
5733 or in a register (false). FNTYPE is the type of the function making
5736 arm_return_in_memory (const_tree type
, const_tree fntype
)
5740 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5742 if (TARGET_AAPCS_BASED
)
5744 /* Simple, non-aggregate types (ie not including vectors and
5745 complex) are always returned in a register (or registers).
5746 We don't care about which register here, so we can short-cut
5747 some of the detail. */
5748 if (!AGGREGATE_TYPE_P (type
)
5749 && TREE_CODE (type
) != VECTOR_TYPE
5750 && TREE_CODE (type
) != COMPLEX_TYPE
)
5753 /* Any return value that is no larger than one word can be
5755 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5758 /* Check any available co-processors to see if they accept the
5759 type as a register candidate (VFP, for example, can return
5760 some aggregates in consecutive registers). These aren't
5761 available if the call is variadic. */
5762 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5765 /* Vector values should be returned using ARM registers, not
5766 memory (unless they're over 16 bytes, which will break since
5767 we only have four call-clobbered registers to play with). */
5768 if (TREE_CODE (type
) == VECTOR_TYPE
)
5769 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5771 /* The rest go in memory. */
5775 if (TREE_CODE (type
) == VECTOR_TYPE
)
5776 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5778 if (!AGGREGATE_TYPE_P (type
) &&
5779 (TREE_CODE (type
) != VECTOR_TYPE
))
5780 /* All simple types are returned in registers. */
5783 if (arm_abi
!= ARM_ABI_APCS
)
5785 /* ATPCS and later return aggregate types in memory only if they are
5786 larger than a word (or are variable size). */
5787 return (size
< 0 || size
> UNITS_PER_WORD
);
5790 /* For the arm-wince targets we choose to be compatible with Microsoft's
5791 ARM and Thumb compilers, which always return aggregates in memory. */
5793 /* All structures/unions bigger than one word are returned in memory.
5794 Also catch the case where int_size_in_bytes returns -1. In this case
5795 the aggregate is either huge or of variable size, and in either case
5796 we will want to return it via memory and not in a register. */
5797 if (size
< 0 || size
> UNITS_PER_WORD
)
5800 if (TREE_CODE (type
) == RECORD_TYPE
)
5804 /* For a struct the APCS says that we only return in a register
5805 if the type is 'integer like' and every addressable element
5806 has an offset of zero. For practical purposes this means
5807 that the structure can have at most one non bit-field element
5808 and that this element must be the first one in the structure. */
5810 /* Find the first field, ignoring non FIELD_DECL things which will
5811 have been created by C++. */
5812 for (field
= TYPE_FIELDS (type
);
5813 field
&& TREE_CODE (field
) != FIELD_DECL
;
5814 field
= DECL_CHAIN (field
))
5818 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5820 /* Check that the first field is valid for returning in a register. */
5822 /* ... Floats are not allowed */
5823 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5826 /* ... Aggregates that are not themselves valid for returning in
5827 a register are not allowed. */
5828 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5831 /* Now check the remaining fields, if any. Only bitfields are allowed,
5832 since they are not addressable. */
5833 for (field
= DECL_CHAIN (field
);
5835 field
= DECL_CHAIN (field
))
5837 if (TREE_CODE (field
) != FIELD_DECL
)
5840 if (!DECL_BIT_FIELD_TYPE (field
))
5847 if (TREE_CODE (type
) == UNION_TYPE
)
5851 /* Unions can be returned in registers if every element is
5852 integral, or can be returned in an integer register. */
5853 for (field
= TYPE_FIELDS (type
);
5855 field
= DECL_CHAIN (field
))
5857 if (TREE_CODE (field
) != FIELD_DECL
)
5860 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5863 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5869 #endif /* not ARM_WINCE */
5871 /* Return all other types in memory. */
5875 const struct pcs_attribute_arg
5879 } pcs_attribute_args
[] =
5881 {"aapcs", ARM_PCS_AAPCS
},
5882 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5884 /* We could recognize these, but changes would be needed elsewhere
5885 * to implement them. */
5886 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5887 {"atpcs", ARM_PCS_ATPCS
},
5888 {"apcs", ARM_PCS_APCS
},
5890 {NULL
, ARM_PCS_UNKNOWN
}
5894 arm_pcs_from_attribute (tree attr
)
5896 const struct pcs_attribute_arg
*ptr
;
5899 /* Get the value of the argument. */
5900 if (TREE_VALUE (attr
) == NULL_TREE
5901 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5902 return ARM_PCS_UNKNOWN
;
5904 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5906 /* Check it against the list of known arguments. */
5907 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5908 if (streq (arg
, ptr
->arg
))
5911 /* An unrecognized interrupt type. */
5912 return ARM_PCS_UNKNOWN
;
5915 /* Get the PCS variant to use for this call. TYPE is the function's type
5916 specification, DECL is the specific declartion. DECL may be null if
5917 the call could be indirect or if this is a library call. */
5919 arm_get_pcs_model (const_tree type
, const_tree decl
)
5921 bool user_convention
= false;
5922 enum arm_pcs user_pcs
= arm_pcs_default
;
5927 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5930 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5931 user_convention
= true;
5934 if (TARGET_AAPCS_BASED
)
5936 /* Detect varargs functions. These always use the base rules
5937 (no argument is ever a candidate for a co-processor
5939 bool base_rules
= stdarg_p (type
);
5941 if (user_convention
)
5943 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5944 sorry ("non-AAPCS derived PCS variant");
5945 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5946 error ("variadic functions must use the base AAPCS variant");
5950 return ARM_PCS_AAPCS
;
5951 else if (user_convention
)
5953 else if (decl
&& flag_unit_at_a_time
)
5955 /* Local functions never leak outside this compilation unit,
5956 so we are free to use whatever conventions are
5958 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5959 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5961 return ARM_PCS_AAPCS_LOCAL
;
5964 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5965 sorry ("PCS variant");
5967 /* For everything else we use the target's default. */
5968 return arm_pcs_default
;
5973 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5974 const_tree fntype ATTRIBUTE_UNUSED
,
5975 rtx libcall ATTRIBUTE_UNUSED
,
5976 const_tree fndecl ATTRIBUTE_UNUSED
)
5978 /* Record the unallocated VFP registers. */
5979 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5980 pcum
->aapcs_vfp_reg_alloc
= 0;
5983 /* Walk down the type tree of TYPE counting consecutive base elements.
5984 If *MODEP is VOIDmode, then set it to the first valid floating point
5985 type. If a non-floating point type is found, or if a floating point
5986 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5987 otherwise return the count in the sub-tree. */
5989 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5994 switch (TREE_CODE (type
))
5997 mode
= TYPE_MODE (type
);
5998 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
6001 if (*modep
== VOIDmode
)
6010 mode
= TYPE_MODE (TREE_TYPE (type
));
6011 if (mode
!= DFmode
&& mode
!= SFmode
)
6014 if (*modep
== VOIDmode
)
6023 /* Use V2SImode and V4SImode as representatives of all 64-bit
6024 and 128-bit vector types, whether or not those modes are
6025 supported with the present options. */
6026 size
= int_size_in_bytes (type
);
6039 if (*modep
== VOIDmode
)
6042 /* Vector modes are considered to be opaque: two vectors are
6043 equivalent for the purposes of being homogeneous aggregates
6044 if they are the same size. */
6053 tree index
= TYPE_DOMAIN (type
);
6055 /* Can't handle incomplete types nor sizes that are not
6057 if (!COMPLETE_TYPE_P (type
)
6058 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6061 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
6064 || !TYPE_MAX_VALUE (index
)
6065 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6066 || !TYPE_MIN_VALUE (index
)
6067 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6071 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6072 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6074 /* There must be no padding. */
6075 if (wi::to_wide (TYPE_SIZE (type
))
6076 != count
* GET_MODE_BITSIZE (*modep
))
6088 /* Can't handle incomplete types nor sizes that are not
6090 if (!COMPLETE_TYPE_P (type
)
6091 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6094 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6096 if (TREE_CODE (field
) != FIELD_DECL
)
6099 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6105 /* There must be no padding. */
6106 if (wi::to_wide (TYPE_SIZE (type
))
6107 != count
* GET_MODE_BITSIZE (*modep
))
6114 case QUAL_UNION_TYPE
:
6116 /* These aren't very interesting except in a degenerate case. */
6121 /* Can't handle incomplete types nor sizes that are not
6123 if (!COMPLETE_TYPE_P (type
)
6124 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6127 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6129 if (TREE_CODE (field
) != FIELD_DECL
)
6132 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6135 count
= count
> sub_count
? count
: sub_count
;
6138 /* There must be no padding. */
6139 if (wi::to_wide (TYPE_SIZE (type
))
6140 != count
* GET_MODE_BITSIZE (*modep
))
6153 /* Return true if PCS_VARIANT should use VFP registers. */
6155 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6157 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6159 static bool seen_thumb1_vfp
= false;
6161 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6163 sorry ("Thumb-1 hard-float VFP ABI");
6164 /* sorry() is not immediately fatal, so only display this once. */
6165 seen_thumb1_vfp
= true;
6171 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6174 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6175 (TARGET_VFP_DOUBLE
|| !is_double
));
6178 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6179 suitable for passing or returning in VFP registers for the PCS
6180 variant selected. If it is, then *BASE_MODE is updated to contain
6181 a machine mode describing each element of the argument's type and
6182 *COUNT to hold the number of such elements. */
6184 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6185 machine_mode mode
, const_tree type
,
6186 machine_mode
*base_mode
, int *count
)
6188 machine_mode new_mode
= VOIDmode
;
6190 /* If we have the type information, prefer that to working things
6191 out from the mode. */
6194 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6196 if (ag_count
> 0 && ag_count
<= 4)
6201 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6202 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6203 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6208 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6211 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6217 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6220 *base_mode
= new_mode
;
6222 if (TARGET_GENERAL_REGS_ONLY
)
6223 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6230 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6231 machine_mode mode
, const_tree type
)
6233 int count ATTRIBUTE_UNUSED
;
6234 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6236 if (!use_vfp_abi (pcs_variant
, false))
6238 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6243 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6246 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6249 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6250 &pcum
->aapcs_vfp_rmode
,
6251 &pcum
->aapcs_vfp_rcount
);
6254 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6255 for the behaviour of this function. */
6258 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6259 const_tree type ATTRIBUTE_UNUSED
)
6262 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6263 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6264 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6267 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6268 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6270 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6272 || (mode
== TImode
&& ! TARGET_NEON
)
6273 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6276 int rcount
= pcum
->aapcs_vfp_rcount
;
6278 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6282 /* Avoid using unsupported vector modes. */
6283 if (rmode
== V2SImode
)
6285 else if (rmode
== V4SImode
)
6292 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6293 for (i
= 0; i
< rcount
; i
++)
6295 rtx tmp
= gen_rtx_REG (rmode
,
6296 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6297 tmp
= gen_rtx_EXPR_LIST
6299 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6300 XVECEXP (par
, 0, i
) = tmp
;
6303 pcum
->aapcs_reg
= par
;
6306 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6312 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6313 comment there for the behaviour of this function. */
6316 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6318 const_tree type ATTRIBUTE_UNUSED
)
6320 if (!use_vfp_abi (pcs_variant
, false))
6324 || (GET_MODE_CLASS (mode
) == MODE_INT
6325 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6329 machine_mode ag_mode
;
6334 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6339 if (ag_mode
== V2SImode
)
6341 else if (ag_mode
== V4SImode
)
6347 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6348 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6349 for (i
= 0; i
< count
; i
++)
6351 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6352 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6353 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6354 XVECEXP (par
, 0, i
) = tmp
;
6360 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6364 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6365 machine_mode mode ATTRIBUTE_UNUSED
,
6366 const_tree type ATTRIBUTE_UNUSED
)
6368 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6369 pcum
->aapcs_vfp_reg_alloc
= 0;
6373 #define AAPCS_CP(X) \
6375 aapcs_ ## X ## _cum_init, \
6376 aapcs_ ## X ## _is_call_candidate, \
6377 aapcs_ ## X ## _allocate, \
6378 aapcs_ ## X ## _is_return_candidate, \
6379 aapcs_ ## X ## _allocate_return_reg, \
6380 aapcs_ ## X ## _advance \
6383 /* Table of co-processors that can be used to pass arguments in
6384 registers. Idealy no arugment should be a candidate for more than
6385 one co-processor table entry, but the table is processed in order
6386 and stops after the first match. If that entry then fails to put
6387 the argument into a co-processor register, the argument will go on
6391 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6392 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6394 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6395 BLKmode) is a candidate for this co-processor's registers; this
6396 function should ignore any position-dependent state in
6397 CUMULATIVE_ARGS and only use call-type dependent information. */
6398 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6400 /* Return true if the argument does get a co-processor register; it
6401 should set aapcs_reg to an RTX of the register allocated as is
6402 required for a return from FUNCTION_ARG. */
6403 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6405 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6406 be returned in this co-processor's registers. */
6407 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6409 /* Allocate and return an RTX element to hold the return type of a call. This
6410 routine must not fail and will only be called if is_return_candidate
6411 returned true with the same parameters. */
6412 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6414 /* Finish processing this argument and prepare to start processing
6416 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6417 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6425 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6430 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6431 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6438 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6440 /* We aren't passed a decl, so we can't check that a call is local.
6441 However, it isn't clear that that would be a win anyway, since it
6442 might limit some tail-calling opportunities. */
6443 enum arm_pcs pcs_variant
;
6447 const_tree fndecl
= NULL_TREE
;
6449 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6452 fntype
= TREE_TYPE (fntype
);
6455 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6458 pcs_variant
= arm_pcs_default
;
6460 if (pcs_variant
!= ARM_PCS_AAPCS
)
6464 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6465 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6474 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6477 /* We aren't passed a decl, so we can't check that a call is local.
6478 However, it isn't clear that that would be a win anyway, since it
6479 might limit some tail-calling opportunities. */
6480 enum arm_pcs pcs_variant
;
6481 int unsignedp ATTRIBUTE_UNUSED
;
6485 const_tree fndecl
= NULL_TREE
;
6487 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6490 fntype
= TREE_TYPE (fntype
);
6493 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6496 pcs_variant
= arm_pcs_default
;
6498 /* Promote integer types. */
6499 if (type
&& INTEGRAL_TYPE_P (type
))
6500 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6502 if (pcs_variant
!= ARM_PCS_AAPCS
)
6506 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6507 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6509 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6513 /* Promotes small structs returned in a register to full-word size
6514 for big-endian AAPCS. */
6515 if (type
&& arm_return_in_msb (type
))
6517 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6518 if (size
% UNITS_PER_WORD
!= 0)
6520 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6521 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6525 return gen_rtx_REG (mode
, R0_REGNUM
);
6529 aapcs_libcall_value (machine_mode mode
)
6531 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6532 && GET_MODE_SIZE (mode
) <= 4)
6535 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6538 /* Lay out a function argument using the AAPCS rules. The rule
6539 numbers referred to here are those in the AAPCS. */
6541 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6542 const_tree type
, bool named
)
6547 /* We only need to do this once per argument. */
6548 if (pcum
->aapcs_arg_processed
)
6551 pcum
->aapcs_arg_processed
= true;
6553 /* Special case: if named is false then we are handling an incoming
6554 anonymous argument which is on the stack. */
6558 /* Is this a potential co-processor register candidate? */
6559 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6561 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6562 pcum
->aapcs_cprc_slot
= slot
;
6564 /* We don't have to apply any of the rules from part B of the
6565 preparation phase, these are handled elsewhere in the
6570 /* A Co-processor register candidate goes either in its own
6571 class of registers or on the stack. */
6572 if (!pcum
->aapcs_cprc_failed
[slot
])
6574 /* C1.cp - Try to allocate the argument to co-processor
6576 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6579 /* C2.cp - Put the argument on the stack and note that we
6580 can't assign any more candidates in this slot. We also
6581 need to note that we have allocated stack space, so that
6582 we won't later try to split a non-cprc candidate between
6583 core registers and the stack. */
6584 pcum
->aapcs_cprc_failed
[slot
] = true;
6585 pcum
->can_split
= false;
6588 /* We didn't get a register, so this argument goes on the
6590 gcc_assert (pcum
->can_split
== false);
6595 /* C3 - For double-word aligned arguments, round the NCRN up to the
6596 next even number. */
6597 ncrn
= pcum
->aapcs_ncrn
;
6600 int res
= arm_needs_doubleword_align (mode
, type
);
6601 /* Only warn during RTL expansion of call stmts, otherwise we would
6602 warn e.g. during gimplification even on functions that will be
6603 always inlined, and we'd warn multiple times. Don't warn when
6604 called in expand_function_start either, as we warn instead in
6605 arm_function_arg_boundary in that case. */
6606 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6607 inform (input_location
, "parameter passing for argument of type "
6608 "%qT changed in GCC 7.1", type
);
6613 nregs
= ARM_NUM_REGS2(mode
, type
);
6615 /* Sigh, this test should really assert that nregs > 0, but a GCC
6616 extension allows empty structs and then gives them empty size; it
6617 then allows such a structure to be passed by value. For some of
6618 the code below we have to pretend that such an argument has
6619 non-zero size so that we 'locate' it correctly either in
6620 registers or on the stack. */
6621 gcc_assert (nregs
>= 0);
6623 nregs2
= nregs
? nregs
: 1;
6625 /* C4 - Argument fits entirely in core registers. */
6626 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6628 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6629 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6633 /* C5 - Some core registers left and there are no arguments already
6634 on the stack: split this argument between the remaining core
6635 registers and the stack. */
6636 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6638 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6639 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6640 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6644 /* C6 - NCRN is set to 4. */
6645 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6647 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6651 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6652 for a call to a function whose data type is FNTYPE.
6653 For a library call, FNTYPE is NULL. */
6655 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6657 tree fndecl ATTRIBUTE_UNUSED
)
6659 /* Long call handling. */
6661 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6663 pcum
->pcs_variant
= arm_pcs_default
;
6665 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6667 if (arm_libcall_uses_aapcs_base (libname
))
6668 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6670 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6671 pcum
->aapcs_reg
= NULL_RTX
;
6672 pcum
->aapcs_partial
= 0;
6673 pcum
->aapcs_arg_processed
= false;
6674 pcum
->aapcs_cprc_slot
= -1;
6675 pcum
->can_split
= true;
6677 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6681 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6683 pcum
->aapcs_cprc_failed
[i
] = false;
6684 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6692 /* On the ARM, the offset starts at 0. */
6694 pcum
->iwmmxt_nregs
= 0;
6695 pcum
->can_split
= true;
6697 /* Varargs vectors are treated the same as long long.
6698 named_count avoids having to change the way arm handles 'named' */
6699 pcum
->named_count
= 0;
6702 if (TARGET_REALLY_IWMMXT
&& fntype
)
6706 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6708 fn_arg
= TREE_CHAIN (fn_arg
))
6709 pcum
->named_count
+= 1;
6711 if (! pcum
->named_count
)
6712 pcum
->named_count
= INT_MAX
;
6716 /* Return 2 if double word alignment is required for argument passing,
6717 but wasn't required before the fix for PR88469.
6718 Return 1 if double word alignment is required for argument passing.
6719 Return -1 if double word alignment used to be required for argument
6720 passing before PR77728 ABI fix, but is not required anymore.
6721 Return 0 if double word alignment is not required and wasn't requried
6724 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6727 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6729 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6730 if (!AGGREGATE_TYPE_P (type
))
6731 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6733 /* Array types: Use member alignment of element type. */
6734 if (TREE_CODE (type
) == ARRAY_TYPE
)
6735 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6739 /* Record/aggregate types: Use greatest member alignment of any member. */
6740 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6741 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6743 if (TREE_CODE (field
) == FIELD_DECL
)
6746 /* Before PR77728 fix, we were incorrectly considering also
6747 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6748 Make sure we can warn about that with -Wpsabi. */
6751 else if (TREE_CODE (field
) == FIELD_DECL
6752 && DECL_BIT_FIELD_TYPE (field
)
6753 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
6763 /* Determine where to put an argument to a function.
6764 Value is zero to push the argument on the stack,
6765 or a hard register in which to store the argument.
6767 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6768 the preceding args and about the function being called.
6769 ARG is a description of the argument.
6771 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6772 other arguments are passed on the stack. If (NAMED == 0) (which happens
6773 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6774 defined), say it is passed in the stack (function_prologue will
6775 indeed make it pass in the stack if necessary). */
6778 arm_function_arg (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
6780 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6783 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6784 a call insn (op3 of a call_value insn). */
6785 if (arg
.end_marker_p ())
6788 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6790 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
6791 return pcum
->aapcs_reg
;
6794 /* Varargs vectors are treated the same as long long.
6795 named_count avoids having to change the way arm handles 'named' */
6796 if (TARGET_IWMMXT_ABI
6797 && arm_vector_mode_supported_p (arg
.mode
)
6798 && pcum
->named_count
> pcum
->nargs
+ 1)
6800 if (pcum
->iwmmxt_nregs
<= 9)
6801 return gen_rtx_REG (arg
.mode
,
6802 pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6805 pcum
->can_split
= false;
6810 /* Put doubleword aligned quantities in even register pairs. */
6811 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6813 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
6814 if (res
< 0 && warn_psabi
)
6815 inform (input_location
, "parameter passing for argument of type "
6816 "%qT changed in GCC 7.1", arg
.type
);
6820 if (res
> 1 && warn_psabi
)
6821 inform (input_location
, "parameter passing for argument of type "
6822 "%qT changed in GCC 9.1", arg
.type
);
6826 /* Only allow splitting an arg between regs and memory if all preceding
6827 args were allocated to regs. For args passed by reference we only count
6828 the reference pointer. */
6829 if (pcum
->can_split
)
6832 nregs
= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
6834 if (!arg
.named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6837 return gen_rtx_REG (arg
.mode
, pcum
->nregs
);
6841 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6843 if (!ARM_DOUBLEWORD_ALIGN
)
6844 return PARM_BOUNDARY
;
6846 int res
= arm_needs_doubleword_align (mode
, type
);
6847 if (res
< 0 && warn_psabi
)
6848 inform (input_location
, "parameter passing for argument of type %qT "
6849 "changed in GCC 7.1", type
);
6850 if (res
> 1 && warn_psabi
)
6851 inform (input_location
, "parameter passing for argument of type "
6852 "%qT changed in GCC 9.1", type
);
6854 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6858 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
6860 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6861 int nregs
= pcum
->nregs
;
6863 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6865 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
6866 return pcum
->aapcs_partial
;
6869 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
6872 if (NUM_ARG_REGS
> nregs
6873 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
6875 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6880 /* Update the data in PCUM to advance over argument ARG. */
6883 arm_function_arg_advance (cumulative_args_t pcum_v
,
6884 const function_arg_info
&arg
)
6886 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6888 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6890 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
6892 if (pcum
->aapcs_cprc_slot
>= 0)
6894 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, arg
.mode
,
6896 pcum
->aapcs_cprc_slot
= -1;
6899 /* Generic stuff. */
6900 pcum
->aapcs_arg_processed
= false;
6901 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6902 pcum
->aapcs_reg
= NULL_RTX
;
6903 pcum
->aapcs_partial
= 0;
6908 if (arm_vector_mode_supported_p (arg
.mode
)
6909 && pcum
->named_count
> pcum
->nargs
6910 && TARGET_IWMMXT_ABI
)
6911 pcum
->iwmmxt_nregs
+= 1;
6913 pcum
->nregs
+= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
6917 /* Variable sized types are passed by reference. This is a GCC
6918 extension to the ARM ABI. */
6921 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
6923 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
6926 /* Encode the current state of the #pragma [no_]long_calls. */
6929 OFF
, /* No #pragma [no_]long_calls is in effect. */
6930 LONG
, /* #pragma long_calls is in effect. */
6931 SHORT
/* #pragma no_long_calls is in effect. */
6934 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6937 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6939 arm_pragma_long_calls
= LONG
;
6943 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6945 arm_pragma_long_calls
= SHORT
;
6949 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6951 arm_pragma_long_calls
= OFF
;
6954 /* Handle an attribute requiring a FUNCTION_DECL;
6955 arguments as in struct attribute_spec.handler. */
6957 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6958 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6960 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6962 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6964 *no_add_attrs
= true;
6970 /* Handle an "interrupt" or "isr" attribute;
6971 arguments as in struct attribute_spec.handler. */
6973 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6978 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6980 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6982 *no_add_attrs
= true;
6984 /* FIXME: the argument if any is checked for type attributes;
6985 should it be checked for decl ones? */
6989 if (TREE_CODE (*node
) == FUNCTION_TYPE
6990 || TREE_CODE (*node
) == METHOD_TYPE
)
6992 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6994 warning (OPT_Wattributes
, "%qE attribute ignored",
6996 *no_add_attrs
= true;
6999 else if (TREE_CODE (*node
) == POINTER_TYPE
7000 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
7001 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
7002 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
7004 *node
= build_variant_type_copy (*node
);
7005 TREE_TYPE (*node
) = build_type_attribute_variant
7007 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
7008 *no_add_attrs
= true;
7012 /* Possibly pass this attribute on from the type to a decl. */
7013 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
7014 | (int) ATTR_FLAG_FUNCTION_NEXT
7015 | (int) ATTR_FLAG_ARRAY_NEXT
))
7017 *no_add_attrs
= true;
7018 return tree_cons (name
, args
, NULL_TREE
);
7022 warning (OPT_Wattributes
, "%qE attribute ignored",
7031 /* Handle a "pcs" attribute; arguments as in struct
7032 attribute_spec.handler. */
7034 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
7035 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7037 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
7039 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
7040 *no_add_attrs
= true;
7045 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7046 /* Handle the "notshared" attribute. This attribute is another way of
7047 requesting hidden visibility. ARM's compiler supports
7048 "__declspec(notshared)"; we support the same thing via an
7052 arm_handle_notshared_attribute (tree
*node
,
7053 tree name ATTRIBUTE_UNUSED
,
7054 tree args ATTRIBUTE_UNUSED
,
7055 int flags ATTRIBUTE_UNUSED
,
7058 tree decl
= TYPE_NAME (*node
);
7062 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
7063 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
7064 *no_add_attrs
= false;
7070 /* This function returns true if a function with declaration FNDECL and type
7071 FNTYPE uses the stack to pass arguments or return variables and false
7072 otherwise. This is used for functions with the attributes
7073 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7074 diagnostic messages if the stack is used. NAME is the name of the attribute
7078 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
7080 function_args_iterator args_iter
;
7081 CUMULATIVE_ARGS args_so_far_v
;
7082 cumulative_args_t args_so_far
;
7083 bool first_param
= true;
7084 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
7086 /* Error out if any argument is passed on the stack. */
7087 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
7088 args_so_far
= pack_cumulative_args (&args_so_far_v
);
7089 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
7093 prev_arg_type
= arg_type
;
7094 if (VOID_TYPE_P (arg_type
))
7097 function_arg_info
arg (arg_type
, /*named=*/true);
7099 /* ??? We should advance after processing the argument and pass
7100 the argument we're advancing past. */
7101 arm_function_arg_advance (args_so_far
, arg
);
7102 arg_rtx
= arm_function_arg (args_so_far
, arg
);
7103 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7105 error ("%qE attribute not available to functions with arguments "
7106 "passed on the stack", name
);
7109 first_param
= false;
7112 /* Error out for variadic functions since we cannot control how many
7113 arguments will be passed and thus stack could be used. stdarg_p () is not
7114 used for the checking to avoid browsing arguments twice. */
7115 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7117 error ("%qE attribute not available to functions with variable number "
7118 "of arguments", name
);
7122 /* Error out if return value is passed on the stack. */
7123 ret_type
= TREE_TYPE (fntype
);
7124 if (arm_return_in_memory (ret_type
, fntype
))
7126 error ("%qE attribute not available to functions that return value on "
7133 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7134 function will check whether the attribute is allowed here and will add the
7135 attribute to the function declaration tree or otherwise issue a warning. */
7138 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7147 *no_add_attrs
= true;
7148 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7153 /* Ignore attribute for function types. */
7154 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7156 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7158 *no_add_attrs
= true;
7164 /* Warn for static linkage functions. */
7165 if (!TREE_PUBLIC (fndecl
))
7167 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7168 "with static linkage", name
);
7169 *no_add_attrs
= true;
7173 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7174 TREE_TYPE (fndecl
));
7179 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7180 function will check whether the attribute is allowed here and will add the
7181 attribute to the function type tree or otherwise issue a diagnostic. The
7182 reason we check this at declaration time is to only allow the use of the
7183 attribute with declarations of function pointers and not function
7184 declarations. This function checks NODE is of the expected type and issues
7185 diagnostics otherwise using NAME. If it is not of the expected type
7186 *NO_ADD_ATTRS will be set to true. */
7189 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7194 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
7199 *no_add_attrs
= true;
7200 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7205 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7208 fntype
= TREE_TYPE (decl
);
7211 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7212 fntype
= TREE_TYPE (fntype
);
7214 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
7216 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7217 "function pointer", name
);
7218 *no_add_attrs
= true;
7222 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7227 /* Prevent trees being shared among function types with and without
7228 cmse_nonsecure_call attribute. */
7229 type
= TREE_TYPE (decl
);
7231 type
= build_distinct_type_copy (type
);
7232 TREE_TYPE (decl
) = type
;
7235 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7238 fntype
= TREE_TYPE (fntype
);
7239 fntype
= build_distinct_type_copy (fntype
);
7240 TREE_TYPE (type
) = fntype
;
7243 /* Construct a type attribute and add it to the function type. */
7244 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7245 TYPE_ATTRIBUTES (fntype
));
7246 TYPE_ATTRIBUTES (fntype
) = attrs
;
7250 /* Return 0 if the attributes for two types are incompatible, 1 if they
7251 are compatible, and 2 if they are nearly compatible (which causes a
7252 warning to be generated). */
7254 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7258 /* Check for mismatch of non-default calling convention. */
7259 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7262 /* Check for mismatched call attributes. */
7263 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7264 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7265 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7266 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7268 /* Only bother to check if an attribute is defined. */
7269 if (l1
| l2
| s1
| s2
)
7271 /* If one type has an attribute, the other must have the same attribute. */
7272 if ((l1
!= l2
) || (s1
!= s2
))
7275 /* Disallow mixed attributes. */
7276 if ((l1
& s2
) || (l2
& s1
))
7280 /* Check for mismatched ISR attribute. */
7281 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7283 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7284 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7286 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7290 l1
= lookup_attribute ("cmse_nonsecure_call",
7291 TYPE_ATTRIBUTES (type1
)) != NULL
;
7292 l2
= lookup_attribute ("cmse_nonsecure_call",
7293 TYPE_ATTRIBUTES (type2
)) != NULL
;
7301 /* Assigns default attributes to newly defined type. This is used to
7302 set short_call/long_call attributes for function types of
7303 functions defined inside corresponding #pragma scopes. */
7305 arm_set_default_type_attributes (tree type
)
7307 /* Add __attribute__ ((long_call)) to all functions, when
7308 inside #pragma long_calls or __attribute__ ((short_call)),
7309 when inside #pragma no_long_calls. */
7310 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7312 tree type_attr_list
, attr_name
;
7313 type_attr_list
= TYPE_ATTRIBUTES (type
);
7315 if (arm_pragma_long_calls
== LONG
)
7316 attr_name
= get_identifier ("long_call");
7317 else if (arm_pragma_long_calls
== SHORT
)
7318 attr_name
= get_identifier ("short_call");
7322 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7323 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7327 /* Return true if DECL is known to be linked into section SECTION. */
7330 arm_function_in_section_p (tree decl
, section
*section
)
7332 /* We can only be certain about the prevailing symbol definition. */
7333 if (!decl_binds_to_current_def_p (decl
))
7336 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7337 if (!DECL_SECTION_NAME (decl
))
7339 /* Make sure that we will not create a unique section for DECL. */
7340 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7344 return function_section (decl
) == section
;
7347 /* Return nonzero if a 32-bit "long_call" should be generated for
7348 a call from the current function to DECL. We generate a long_call
7351 a. has an __attribute__((long call))
7352 or b. is within the scope of a #pragma long_calls
7353 or c. the -mlong-calls command line switch has been specified
7355 However we do not generate a long call if the function:
7357 d. has an __attribute__ ((short_call))
7358 or e. is inside the scope of a #pragma no_long_calls
7359 or f. is defined in the same section as the current function. */
7362 arm_is_long_call_p (tree decl
)
7367 return TARGET_LONG_CALLS
;
7369 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7370 if (lookup_attribute ("short_call", attrs
))
7373 /* For "f", be conservative, and only cater for cases in which the
7374 whole of the current function is placed in the same section. */
7375 if (!flag_reorder_blocks_and_partition
7376 && TREE_CODE (decl
) == FUNCTION_DECL
7377 && arm_function_in_section_p (decl
, current_function_section ()))
7380 if (lookup_attribute ("long_call", attrs
))
7383 return TARGET_LONG_CALLS
;
7386 /* Return nonzero if it is ok to make a tail-call to DECL. */
7388 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7390 unsigned long func_type
;
7392 if (cfun
->machine
->sibcall_blocked
)
7397 /* In FDPIC, never tailcall something for which we have no decl:
7398 the target function could be in a different module, requiring
7399 a different FDPIC register value. */
7404 /* Never tailcall something if we are generating code for Thumb-1. */
7408 /* The PIC register is live on entry to VxWorks PLT entries, so we
7409 must make the call before restoring the PIC register. */
7410 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7413 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7414 may be used both as target of the call and base register for restoring
7415 the VFP registers */
7416 if (TARGET_APCS_FRAME
&& TARGET_ARM
7417 && TARGET_HARD_FLOAT
7418 && decl
&& arm_is_long_call_p (decl
))
7421 /* If we are interworking and the function is not declared static
7422 then we can't tail-call it unless we know that it exists in this
7423 compilation unit (since it might be a Thumb routine). */
7424 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7425 && !TREE_ASM_WRITTEN (decl
))
7428 func_type
= arm_current_func_type ();
7429 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7430 if (IS_INTERRUPT (func_type
))
7433 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7434 generated for entry functions themselves. */
7435 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7438 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7439 this would complicate matters for later code generation. */
7440 if (TREE_CODE (exp
) == CALL_EXPR
)
7442 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7443 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7447 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7449 /* Check that the return value locations are the same. For
7450 example that we aren't returning a value from the sibling in
7451 a VFP register but then need to transfer it to a core
7454 tree decl_or_type
= decl
;
7456 /* If it is an indirect function pointer, get the function type. */
7458 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7460 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7461 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7463 if (!rtx_equal_p (a
, b
))
7467 /* Never tailcall if function may be called with a misaligned SP. */
7468 if (IS_STACKALIGN (func_type
))
7471 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7472 references should become a NOP. Don't convert such calls into
7474 if (TARGET_AAPCS_BASED
7475 && arm_abi
== ARM_ABI_AAPCS
7477 && DECL_WEAK (decl
))
7480 /* We cannot do a tailcall for an indirect call by descriptor if all the
7481 argument registers are used because the only register left to load the
7482 address is IP and it will already contain the static chain. */
7483 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7485 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7486 CUMULATIVE_ARGS cum
;
7487 cumulative_args_t cum_v
;
7489 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7490 cum_v
= pack_cumulative_args (&cum
);
7492 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7494 tree type
= TREE_VALUE (t
);
7495 if (!VOID_TYPE_P (type
))
7497 function_arg_info
arg (type
, /*named=*/true);
7498 arm_function_arg_advance (cum_v
, arg
);
7502 function_arg_info
arg (integer_type_node
, /*named=*/true);
7503 if (!arm_function_arg (cum_v
, arg
))
7507 /* Everything else is ok. */
7512 /* Addressing mode support functions. */
7514 /* Return nonzero if X is a legitimate immediate operand when compiling
7515 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7517 legitimate_pic_operand_p (rtx x
)
7519 if (GET_CODE (x
) == SYMBOL_REF
7520 || (GET_CODE (x
) == CONST
7521 && GET_CODE (XEXP (x
, 0)) == PLUS
7522 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7528 /* Record that the current function needs a PIC register. If PIC_REG is null,
7529 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7530 both case cfun->machine->pic_reg is initialized if we have not already done
7531 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7532 PIC register is reloaded in the current position of the instruction stream
7533 irregardless of whether it was loaded before. Otherwise, it is only loaded
7534 if not already done so (crtl->uses_pic_offset_table is null). Note that
7535 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7536 is only supported iff COMPUTE_NOW is false. */
7539 require_pic_register (rtx pic_reg
, bool compute_now
)
7541 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
7543 /* A lot of the logic here is made obscure by the fact that this
7544 routine gets called as part of the rtx cost estimation process.
7545 We don't want those calls to affect any assumptions about the real
7546 function; and further, we can't call entry_of_function() until we
7547 start the real expansion process. */
7548 if (!crtl
->uses_pic_offset_table
|| compute_now
)
7550 gcc_assert (can_create_pseudo_p ()
7551 || (pic_reg
!= NULL_RTX
7553 && GET_MODE (pic_reg
) == Pmode
));
7554 if (arm_pic_register
!= INVALID_REGNUM
7556 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7558 if (!cfun
->machine
->pic_reg
)
7559 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7561 /* Play games to avoid marking the function as needing pic
7562 if we are being called as part of the cost-estimation
7564 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7565 crtl
->uses_pic_offset_table
= 1;
7569 rtx_insn
*seq
, *insn
;
7571 if (pic_reg
== NULL_RTX
)
7572 pic_reg
= gen_reg_rtx (Pmode
);
7573 if (!cfun
->machine
->pic_reg
)
7574 cfun
->machine
->pic_reg
= pic_reg
;
7576 /* Play games to avoid marking the function as needing pic
7577 if we are being called as part of the cost-estimation
7579 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7581 crtl
->uses_pic_offset_table
= 1;
7584 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7585 && arm_pic_register
> LAST_LO_REGNUM
7587 emit_move_insn (cfun
->machine
->pic_reg
,
7588 gen_rtx_REG (Pmode
, arm_pic_register
));
7590 arm_load_pic_register (0UL, pic_reg
);
7595 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7597 INSN_LOCATION (insn
) = prologue_location
;
7599 /* We can be called during expansion of PHI nodes, where
7600 we can't yet emit instructions directly in the final
7601 insn stream. Queue the insns on the entry edge, they will
7602 be committed after everything else is expanded. */
7603 if (currently_expanding_to_rtl
)
7604 insert_insn_on_edge (seq
,
7606 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7614 /* Generate insns to calculate the address of ORIG in pic mode. */
7616 calculate_pic_address_constant (rtx reg
, rtx pic_reg
, rtx orig
)
7621 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
7623 /* Make the MEM as close to a constant as possible. */
7624 mem
= SET_SRC (pat
);
7625 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7626 MEM_READONLY_P (mem
) = 1;
7627 MEM_NOTRAP_P (mem
) = 1;
7629 return emit_insn (pat
);
7632 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7633 created to hold the result of the load. If not NULL, PIC_REG indicates
7634 which register to use as PIC register, otherwise it is decided by register
7635 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7636 location in the instruction stream, irregardless of whether it was loaded
7637 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7638 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7640 Returns the register REG into which the PIC load is performed. */
7643 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
7646 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
7648 if (GET_CODE (orig
) == SYMBOL_REF
7649 || GET_CODE (orig
) == LABEL_REF
)
7653 gcc_assert (can_create_pseudo_p ());
7654 reg
= gen_reg_rtx (Pmode
);
7657 /* VxWorks does not impose a fixed gap between segments; the run-time
7658 gap can be different from the object-file gap. We therefore can't
7659 use GOTOFF unless we are absolutely sure that the symbol is in the
7660 same segment as the GOT. Unfortunately, the flexibility of linker
7661 scripts means that we can't be sure of that in general, so assume
7662 that GOTOFF is never valid on VxWorks. */
7663 /* References to weak symbols cannot be resolved locally: they
7664 may be overridden by a non-weak definition at link time. */
7666 if ((GET_CODE (orig
) == LABEL_REF
7667 || (GET_CODE (orig
) == SYMBOL_REF
7668 && SYMBOL_REF_LOCAL_P (orig
)
7669 && (SYMBOL_REF_DECL (orig
)
7670 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)
7671 && (!SYMBOL_REF_FUNCTION_P (orig
)
7672 || arm_fdpic_local_funcdesc_p (orig
))))
7674 && arm_pic_data_is_text_relative
)
7675 insn
= arm_pic_static_addr (orig
, reg
);
7678 /* If this function doesn't have a pic register, create one now. */
7679 require_pic_register (pic_reg
, compute_now
);
7681 if (pic_reg
== NULL_RTX
)
7682 pic_reg
= cfun
->machine
->pic_reg
;
7684 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
7687 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7689 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7693 else if (GET_CODE (orig
) == CONST
)
7697 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7698 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7701 /* Handle the case where we have: const (UNSPEC_TLS). */
7702 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7703 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7706 /* Handle the case where we have:
7707 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7709 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7710 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7711 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7713 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7719 gcc_assert (can_create_pseudo_p ());
7720 reg
= gen_reg_rtx (Pmode
);
7723 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7725 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
7726 pic_reg
, compute_now
);
7727 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7728 base
== reg
? 0 : reg
, pic_reg
,
7731 if (CONST_INT_P (offset
))
7733 /* The base register doesn't really matter, we only want to
7734 test the index for the appropriate mode. */
7735 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7737 gcc_assert (can_create_pseudo_p ());
7738 offset
= force_reg (Pmode
, offset
);
7741 if (CONST_INT_P (offset
))
7742 return plus_constant (Pmode
, base
, INTVAL (offset
));
7745 if (GET_MODE_SIZE (mode
) > 4
7746 && (GET_MODE_CLASS (mode
) == MODE_INT
7747 || TARGET_SOFT_FLOAT
))
7749 emit_insn (gen_addsi3 (reg
, base
, offset
));
7753 return gen_rtx_PLUS (Pmode
, base
, offset
);
7760 /* Whether a register is callee saved or not. This is necessary because high
7761 registers are marked as caller saved when optimizing for size on Thumb-1
7762 targets despite being callee saved in order to avoid using them. */
7763 #define callee_saved_reg_p(reg) \
7764 (!call_used_or_fixed_reg_p (reg) \
7765 || (TARGET_THUMB1 && optimize_size \
7766 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
7768 /* Return a mask for the call-clobbered low registers that are unused
7769 at the end of the prologue. */
7770 static unsigned long
7771 thumb1_prologue_unused_call_clobbered_lo_regs (void)
7773 unsigned long mask
= 0;
7774 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
7776 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
7777 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
7778 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
7782 /* Similarly for the start of the epilogue. */
7783 static unsigned long
7784 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
7786 unsigned long mask
= 0;
7787 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
7789 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
7790 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
7791 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
7795 /* Find a spare register to use during the prolog of a function. */
7798 thumb_find_work_register (unsigned long pushed_regs_mask
)
7802 unsigned long unused_regs
7803 = thumb1_prologue_unused_call_clobbered_lo_regs ();
7805 /* Check the argument registers first as these are call-used. The
7806 register allocation order means that sometimes r3 might be used
7807 but earlier argument registers might not, so check them all. */
7808 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
7809 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
7812 /* Otherwise look for a call-saved register that is going to be pushed. */
7813 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7814 if (pushed_regs_mask
& (1 << reg
))
7819 /* Thumb-2 can use high regs. */
7820 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7821 if (pushed_regs_mask
& (1 << reg
))
7824 /* Something went wrong - thumb_compute_save_reg_mask()
7825 should have arranged for a suitable register to be pushed. */
7829 static GTY(()) int pic_labelno
;
7831 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7835 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
7837 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
7839 if (crtl
->uses_pic_offset_table
== 0
7840 || TARGET_SINGLE_PIC_BASE
7844 gcc_assert (flag_pic
);
7846 if (pic_reg
== NULL_RTX
)
7847 pic_reg
= cfun
->machine
->pic_reg
;
7848 if (TARGET_VXWORKS_RTP
)
7850 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7851 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7852 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7854 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7856 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7857 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7861 /* We use an UNSPEC rather than a LABEL_REF because this label
7862 never appears in the code stream. */
7864 labelno
= GEN_INT (pic_labelno
++);
7865 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7866 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7868 /* On the ARM the PC register contains 'dot + 8' at the time of the
7869 addition, on the Thumb it is 'dot + 4'. */
7870 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7871 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7873 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7877 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7879 else /* TARGET_THUMB1 */
7881 if (arm_pic_register
!= INVALID_REGNUM
7882 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7884 /* We will have pushed the pic register, so we should always be
7885 able to find a work register. */
7886 pic_tmp
= gen_rtx_REG (SImode
,
7887 thumb_find_work_register (saved_regs
));
7888 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7889 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7890 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7892 else if (arm_pic_register
!= INVALID_REGNUM
7893 && arm_pic_register
> LAST_LO_REGNUM
7894 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7896 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7897 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7898 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7901 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7905 /* Need to emit this whether or not we obey regdecls,
7906 since setjmp/longjmp can cause life info to screw up. */
7910 /* Try to determine whether an object, referenced via ORIG, will be
7911 placed in the text or data segment. This is used in FDPIC mode, to
7912 decide which relocations to use when accessing ORIG. *IS_READONLY
7913 is set to true if ORIG is a read-only location, false otherwise.
7914 Return true if we could determine the location of ORIG, false
7915 otherwise. *IS_READONLY is valid only when we return true. */
7917 arm_is_segment_info_known (rtx orig
, bool *is_readonly
)
7919 *is_readonly
= false;
7921 if (GET_CODE (orig
) == LABEL_REF
)
7923 *is_readonly
= true;
7927 if (SYMBOL_REF_P (orig
))
7929 if (CONSTANT_POOL_ADDRESS_P (orig
))
7931 *is_readonly
= true;
7934 if (SYMBOL_REF_LOCAL_P (orig
)
7935 && !SYMBOL_REF_EXTERNAL_P (orig
)
7936 && SYMBOL_REF_DECL (orig
)
7937 && (!DECL_P (SYMBOL_REF_DECL (orig
))
7938 || !DECL_COMMON (SYMBOL_REF_DECL (orig
))))
7940 tree decl
= SYMBOL_REF_DECL (orig
);
7941 tree init
= (TREE_CODE (decl
) == VAR_DECL
)
7942 ? DECL_INITIAL (decl
) : (TREE_CODE (decl
) == CONSTRUCTOR
)
7945 bool named_section
, readonly
;
7947 if (init
&& init
!= error_mark_node
)
7948 reloc
= compute_reloc_for_constant (init
);
7950 named_section
= TREE_CODE (decl
) == VAR_DECL
7951 && lookup_attribute ("section", DECL_ATTRIBUTES (decl
));
7952 readonly
= decl_readonly_section (decl
, reloc
);
7954 /* We don't know where the link script will put a named
7955 section, so return false in such a case. */
7959 *is_readonly
= readonly
;
7963 /* We don't know. */
7970 /* Generate code to load the address of a static var when flag_pic is set. */
7972 arm_pic_static_addr (rtx orig
, rtx reg
)
7974 rtx l1
, labelno
, offset_rtx
;
7977 gcc_assert (flag_pic
);
7979 bool is_readonly
= false;
7980 bool info_known
= false;
7983 && SYMBOL_REF_P (orig
)
7984 && !SYMBOL_REF_FUNCTION_P (orig
))
7985 info_known
= arm_is_segment_info_known (orig
, &is_readonly
);
7988 && SYMBOL_REF_P (orig
)
7989 && !SYMBOL_REF_FUNCTION_P (orig
)
7992 /* We don't know where orig is stored, so we have be
7993 pessimistic and use a GOT relocation. */
7994 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
7996 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
7998 else if (TARGET_FDPIC
7999 && SYMBOL_REF_P (orig
)
8000 && (SYMBOL_REF_FUNCTION_P (orig
)
8003 /* We use the GOTOFF relocation. */
8004 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8006 rtx l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig
), UNSPEC_PIC_SYM
);
8007 emit_insn (gen_movsi (reg
, l1
));
8008 insn
= emit_insn (gen_addsi3 (reg
, reg
, pic_reg
));
8012 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8013 PC-relative access. */
8014 /* We use an UNSPEC rather than a LABEL_REF because this label
8015 never appears in the code stream. */
8016 labelno
= GEN_INT (pic_labelno
++);
8017 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8018 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8020 /* On the ARM the PC register contains 'dot + 8' at the time of the
8021 addition, on the Thumb it is 'dot + 4'. */
8022 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8023 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
8024 UNSPEC_SYMBOL_OFFSET
);
8025 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
8027 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
,
8034 /* Return nonzero if X is valid as an ARM state addressing register. */
8036 arm_address_register_rtx_p (rtx x
, int strict_p
)
8046 return ARM_REGNO_OK_FOR_BASE_P (regno
);
8048 return (regno
<= LAST_ARM_REGNUM
8049 || regno
>= FIRST_PSEUDO_REGISTER
8050 || regno
== FRAME_POINTER_REGNUM
8051 || regno
== ARG_POINTER_REGNUM
);
8054 /* Return TRUE if this rtx is the difference of a symbol and a label,
8055 and will reduce to a PC-relative relocation in the object file.
8056 Expressions like this can be left alone when generating PIC, rather
8057 than forced through the GOT. */
8059 pcrel_constant_p (rtx x
)
8061 if (GET_CODE (x
) == MINUS
)
8062 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
8067 /* Return true if X will surely end up in an index register after next
8070 will_be_in_index_register (const_rtx x
)
8072 /* arm.md: calculate_pic_address will split this into a register. */
8073 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
8076 /* Return nonzero if X is a valid ARM state address operand. */
8078 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
8082 enum rtx_code code
= GET_CODE (x
);
8084 if (arm_address_register_rtx_p (x
, strict_p
))
8087 use_ldrd
= (TARGET_LDRD
8088 && (mode
== DImode
|| mode
== DFmode
));
8090 if (code
== POST_INC
|| code
== PRE_DEC
8091 || ((code
== PRE_INC
|| code
== POST_DEC
)
8092 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8093 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8095 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8096 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8097 && GET_CODE (XEXP (x
, 1)) == PLUS
8098 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8100 rtx addend
= XEXP (XEXP (x
, 1), 1);
8102 /* Don't allow ldrd post increment by register because it's hard
8103 to fixup invalid register choices. */
8105 && GET_CODE (x
) == POST_MODIFY
8109 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
8110 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
8113 /* After reload constants split into minipools will have addresses
8114 from a LABEL_REF. */
8115 else if (reload_completed
8116 && (code
== LABEL_REF
8118 && GET_CODE (XEXP (x
, 0)) == PLUS
8119 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8120 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8123 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8126 else if (code
== PLUS
)
8128 rtx xop0
= XEXP (x
, 0);
8129 rtx xop1
= XEXP (x
, 1);
8131 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8132 && ((CONST_INT_P (xop1
)
8133 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
8134 || (!strict_p
&& will_be_in_index_register (xop1
))))
8135 || (arm_address_register_rtx_p (xop1
, strict_p
)
8136 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
8140 /* Reload currently can't handle MINUS, so disable this for now */
8141 else if (GET_CODE (x
) == MINUS
)
8143 rtx xop0
= XEXP (x
, 0);
8144 rtx xop1
= XEXP (x
, 1);
8146 return (arm_address_register_rtx_p (xop0
, strict_p
)
8147 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
8151 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8152 && code
== SYMBOL_REF
8153 && CONSTANT_POOL_ADDRESS_P (x
)
8155 && symbol_mentioned_p (get_pool_constant (x
))
8156 && ! pcrel_constant_p (get_pool_constant (x
))))
8162 /* Return true if we can avoid creating a constant pool entry for x. */
8164 can_avoid_literal_pool_for_label_p (rtx x
)
8166 /* Normally we can assign constant values to target registers without
8167 the help of constant pool. But there are cases we have to use constant
8169 1) assign a label to register.
8170 2) sign-extend a 8bit value to 32bit and then assign to register.
8172 Constant pool access in format:
8173 (set (reg r0) (mem (symbol_ref (".LC0"))))
8174 will cause the use of literal pool (later in function arm_reorg).
8175 So here we mark such format as an invalid format, then the compiler
8176 will adjust it into:
8177 (set (reg r0) (symbol_ref (".LC0")))
8178 (set (reg r0) (mem (reg r0))).
8179 No extra register is required, and (mem (reg r0)) won't cause the use
8180 of literal pools. */
8181 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
8182 && CONSTANT_POOL_ADDRESS_P (x
))
8188 /* Return nonzero if X is a valid Thumb-2 address operand. */
8190 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8193 enum rtx_code code
= GET_CODE (x
);
8195 if (arm_address_register_rtx_p (x
, strict_p
))
8198 use_ldrd
= (TARGET_LDRD
8199 && (mode
== DImode
|| mode
== DFmode
));
8201 if (code
== POST_INC
|| code
== PRE_DEC
8202 || ((code
== PRE_INC
|| code
== POST_DEC
)
8203 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8204 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8206 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8207 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8208 && GET_CODE (XEXP (x
, 1)) == PLUS
8209 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8211 /* Thumb-2 only has autoincrement by constant. */
8212 rtx addend
= XEXP (XEXP (x
, 1), 1);
8213 HOST_WIDE_INT offset
;
8215 if (!CONST_INT_P (addend
))
8218 offset
= INTVAL(addend
);
8219 if (GET_MODE_SIZE (mode
) <= 4)
8220 return (offset
> -256 && offset
< 256);
8222 return (use_ldrd
&& offset
> -1024 && offset
< 1024
8223 && (offset
& 3) == 0);
8226 /* After reload constants split into minipools will have addresses
8227 from a LABEL_REF. */
8228 else if (reload_completed
8229 && (code
== LABEL_REF
8231 && GET_CODE (XEXP (x
, 0)) == PLUS
8232 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8233 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8236 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8239 else if (code
== PLUS
)
8241 rtx xop0
= XEXP (x
, 0);
8242 rtx xop1
= XEXP (x
, 1);
8244 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8245 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8246 || (!strict_p
&& will_be_in_index_register (xop1
))))
8247 || (arm_address_register_rtx_p (xop1
, strict_p
)
8248 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8251 else if (can_avoid_literal_pool_for_label_p (x
))
8254 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8255 && code
== SYMBOL_REF
8256 && CONSTANT_POOL_ADDRESS_P (x
)
8258 && symbol_mentioned_p (get_pool_constant (x
))
8259 && ! pcrel_constant_p (get_pool_constant (x
))))
8265 /* Return nonzero if INDEX is valid for an address index operand in
8268 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8271 HOST_WIDE_INT range
;
8272 enum rtx_code code
= GET_CODE (index
);
8274 /* Standard coprocessor addressing modes. */
8275 if (TARGET_HARD_FLOAT
8276 && (mode
== SFmode
|| mode
== DFmode
))
8277 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8278 && INTVAL (index
) > -1024
8279 && (INTVAL (index
) & 3) == 0);
8281 /* For quad modes, we restrict the constant offset to be slightly less
8282 than what the instruction format permits. We do this because for
8283 quad mode moves, we will actually decompose them into two separate
8284 double-mode reads or writes. INDEX must therefore be a valid
8285 (double-mode) offset and so should INDEX+8. */
8286 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8287 return (code
== CONST_INT
8288 && INTVAL (index
) < 1016
8289 && INTVAL (index
) > -1024
8290 && (INTVAL (index
) & 3) == 0);
8292 /* We have no such constraint on double mode offsets, so we permit the
8293 full range of the instruction format. */
8294 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8295 return (code
== CONST_INT
8296 && INTVAL (index
) < 1024
8297 && INTVAL (index
) > -1024
8298 && (INTVAL (index
) & 3) == 0);
8300 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8301 return (code
== CONST_INT
8302 && INTVAL (index
) < 1024
8303 && INTVAL (index
) > -1024
8304 && (INTVAL (index
) & 3) == 0);
8306 if (arm_address_register_rtx_p (index
, strict_p
)
8307 && (GET_MODE_SIZE (mode
) <= 4))
8310 if (mode
== DImode
|| mode
== DFmode
)
8312 if (code
== CONST_INT
)
8314 HOST_WIDE_INT val
= INTVAL (index
);
8316 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8317 If vldr is selected it uses arm_coproc_mem_operand. */
8319 return val
> -256 && val
< 256;
8321 return val
> -4096 && val
< 4092;
8324 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8327 if (GET_MODE_SIZE (mode
) <= 4
8331 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8335 rtx xiop0
= XEXP (index
, 0);
8336 rtx xiop1
= XEXP (index
, 1);
8338 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8339 && power_of_two_operand (xiop1
, SImode
))
8340 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8341 && power_of_two_operand (xiop0
, SImode
)));
8343 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8344 || code
== ASHIFT
|| code
== ROTATERT
)
8346 rtx op
= XEXP (index
, 1);
8348 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8351 && INTVAL (op
) <= 31);
8355 /* For ARM v4 we may be doing a sign-extend operation during the
8361 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8367 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8369 return (code
== CONST_INT
8370 && INTVAL (index
) < range
8371 && INTVAL (index
) > -range
);
8374 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8375 index operand. i.e. 1, 2, 4 or 8. */
8377 thumb2_index_mul_operand (rtx op
)
8381 if (!CONST_INT_P (op
))
8385 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8388 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8390 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8392 enum rtx_code code
= GET_CODE (index
);
8394 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8395 /* Standard coprocessor addressing modes. */
8396 if (TARGET_HARD_FLOAT
8397 && (mode
== SFmode
|| mode
== DFmode
))
8398 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8399 /* Thumb-2 allows only > -256 index range for it's core register
8400 load/stores. Since we allow SF/DF in core registers, we have
8401 to use the intersection between -256~4096 (core) and -1024~1024
8403 && INTVAL (index
) > -256
8404 && (INTVAL (index
) & 3) == 0);
8406 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8408 /* For DImode assume values will usually live in core regs
8409 and only allow LDRD addressing modes. */
8410 if (!TARGET_LDRD
|| mode
!= DImode
)
8411 return (code
== CONST_INT
8412 && INTVAL (index
) < 1024
8413 && INTVAL (index
) > -1024
8414 && (INTVAL (index
) & 3) == 0);
8417 /* For quad modes, we restrict the constant offset to be slightly less
8418 than what the instruction format permits. We do this because for
8419 quad mode moves, we will actually decompose them into two separate
8420 double-mode reads or writes. INDEX must therefore be a valid
8421 (double-mode) offset and so should INDEX+8. */
8422 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8423 return (code
== CONST_INT
8424 && INTVAL (index
) < 1016
8425 && INTVAL (index
) > -1024
8426 && (INTVAL (index
) & 3) == 0);
8428 /* We have no such constraint on double mode offsets, so we permit the
8429 full range of the instruction format. */
8430 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8431 return (code
== CONST_INT
8432 && INTVAL (index
) < 1024
8433 && INTVAL (index
) > -1024
8434 && (INTVAL (index
) & 3) == 0);
8436 if (arm_address_register_rtx_p (index
, strict_p
)
8437 && (GET_MODE_SIZE (mode
) <= 4))
8440 if (mode
== DImode
|| mode
== DFmode
)
8442 if (code
== CONST_INT
)
8444 HOST_WIDE_INT val
= INTVAL (index
);
8445 /* Thumb-2 ldrd only has reg+const addressing modes.
8446 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8447 If vldr is selected it uses arm_coproc_mem_operand. */
8449 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8451 return IN_RANGE (val
, -255, 4095 - 4);
8459 rtx xiop0
= XEXP (index
, 0);
8460 rtx xiop1
= XEXP (index
, 1);
8462 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8463 && thumb2_index_mul_operand (xiop1
))
8464 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8465 && thumb2_index_mul_operand (xiop0
)));
8467 else if (code
== ASHIFT
)
8469 rtx op
= XEXP (index
, 1);
8471 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8474 && INTVAL (op
) <= 3);
8477 return (code
== CONST_INT
8478 && INTVAL (index
) < 4096
8479 && INTVAL (index
) > -256);
8482 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8484 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8494 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8496 return (regno
<= LAST_LO_REGNUM
8497 || regno
> LAST_VIRTUAL_REGISTER
8498 || regno
== FRAME_POINTER_REGNUM
8499 || (GET_MODE_SIZE (mode
) >= 4
8500 && (regno
== STACK_POINTER_REGNUM
8501 || regno
>= FIRST_PSEUDO_REGISTER
8502 || x
== hard_frame_pointer_rtx
8503 || x
== arg_pointer_rtx
)));
8506 /* Return nonzero if x is a legitimate index register. This is the case
8507 for any base register that can access a QImode object. */
8509 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8511 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8514 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8516 The AP may be eliminated to either the SP or the FP, so we use the
8517 least common denominator, e.g. SImode, and offsets from 0 to 64.
8519 ??? Verify whether the above is the right approach.
8521 ??? Also, the FP may be eliminated to the SP, so perhaps that
8522 needs special handling also.
8524 ??? Look at how the mips16 port solves this problem. It probably uses
8525 better ways to solve some of these problems.
8527 Although it is not incorrect, we don't accept QImode and HImode
8528 addresses based on the frame pointer or arg pointer until the
8529 reload pass starts. This is so that eliminating such addresses
8530 into stack based ones won't produce impossible code. */
8532 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8534 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8537 /* ??? Not clear if this is right. Experiment. */
8538 if (GET_MODE_SIZE (mode
) < 4
8539 && !(reload_in_progress
|| reload_completed
)
8540 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8541 || reg_mentioned_p (arg_pointer_rtx
, x
)
8542 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8543 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8544 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8545 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8548 /* Accept any base register. SP only in SImode or larger. */
8549 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8552 /* This is PC relative data before arm_reorg runs. */
8553 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8554 && GET_CODE (x
) == SYMBOL_REF
8555 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8558 /* This is PC relative data after arm_reorg runs. */
8559 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8561 && (GET_CODE (x
) == LABEL_REF
8562 || (GET_CODE (x
) == CONST
8563 && GET_CODE (XEXP (x
, 0)) == PLUS
8564 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8565 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8568 /* Post-inc indexing only supported for SImode and larger. */
8569 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8570 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8573 else if (GET_CODE (x
) == PLUS
)
8575 /* REG+REG address can be any two index registers. */
8576 /* We disallow FRAME+REG addressing since we know that FRAME
8577 will be replaced with STACK, and SP relative addressing only
8578 permits SP+OFFSET. */
8579 if (GET_MODE_SIZE (mode
) <= 4
8580 && XEXP (x
, 0) != frame_pointer_rtx
8581 && XEXP (x
, 1) != frame_pointer_rtx
8582 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8583 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8584 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8587 /* REG+const has 5-7 bit offset for non-SP registers. */
8588 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8589 || XEXP (x
, 0) == arg_pointer_rtx
)
8590 && CONST_INT_P (XEXP (x
, 1))
8591 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8594 /* REG+const has 10-bit offset for SP, but only SImode and
8595 larger is supported. */
8596 /* ??? Should probably check for DI/DFmode overflow here
8597 just like GO_IF_LEGITIMATE_OFFSET does. */
8598 else if (REG_P (XEXP (x
, 0))
8599 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8600 && GET_MODE_SIZE (mode
) >= 4
8601 && CONST_INT_P (XEXP (x
, 1))
8602 && INTVAL (XEXP (x
, 1)) >= 0
8603 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8604 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8607 else if (REG_P (XEXP (x
, 0))
8608 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8609 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8610 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8611 && REGNO (XEXP (x
, 0))
8612 <= LAST_VIRTUAL_POINTER_REGISTER
))
8613 && GET_MODE_SIZE (mode
) >= 4
8614 && CONST_INT_P (XEXP (x
, 1))
8615 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8619 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8620 && GET_MODE_SIZE (mode
) == 4
8621 && GET_CODE (x
) == SYMBOL_REF
8622 && CONSTANT_POOL_ADDRESS_P (x
)
8624 && symbol_mentioned_p (get_pool_constant (x
))
8625 && ! pcrel_constant_p (get_pool_constant (x
))))
8631 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8632 instruction of mode MODE. */
8634 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8636 switch (GET_MODE_SIZE (mode
))
8639 return val
>= 0 && val
< 32;
8642 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8646 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8652 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8655 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8656 else if (TARGET_THUMB2
)
8657 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8658 else /* if (TARGET_THUMB1) */
8659 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8662 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8664 Given an rtx X being reloaded into a reg required to be
8665 in class CLASS, return the class of reg to actually use.
8666 In general this is just CLASS, but for the Thumb core registers and
8667 immediate constants we prefer a LO_REGS class or a subset. */
8670 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8676 if (rclass
== GENERAL_REGS
)
8683 /* Build the SYMBOL_REF for __tls_get_addr. */
8685 static GTY(()) rtx tls_get_addr_libfunc
;
8688 get_tls_get_addr (void)
8690 if (!tls_get_addr_libfunc
)
8691 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8692 return tls_get_addr_libfunc
;
8696 arm_load_tp (rtx target
)
8699 target
= gen_reg_rtx (SImode
);
8703 /* Can return in any reg. */
8704 emit_insn (gen_load_tp_hard (target
));
8708 /* Always returned in r0. Immediately copy the result into a pseudo,
8709 otherwise other uses of r0 (e.g. setting up function arguments) may
8710 clobber the value. */
8716 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8717 rtx initial_fdpic_reg
= get_hard_reg_initial_val (Pmode
, FDPIC_REGNUM
);
8719 emit_insn (gen_load_tp_soft_fdpic ());
8722 emit_insn (gen_restore_pic_register_after_call(fdpic_reg
, initial_fdpic_reg
));
8725 emit_insn (gen_load_tp_soft ());
8727 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8728 emit_move_insn (target
, tmp
);
8734 load_tls_operand (rtx x
, rtx reg
)
8738 if (reg
== NULL_RTX
)
8739 reg
= gen_reg_rtx (SImode
);
8741 tmp
= gen_rtx_CONST (SImode
, x
);
8743 emit_move_insn (reg
, tmp
);
8749 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8751 rtx label
, labelno
= NULL_RTX
, sum
;
8753 gcc_assert (reloc
!= TLS_DESCSEQ
);
8758 sum
= gen_rtx_UNSPEC (Pmode
,
8759 gen_rtvec (2, x
, GEN_INT (reloc
)),
8764 labelno
= GEN_INT (pic_labelno
++);
8765 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8766 label
= gen_rtx_CONST (VOIDmode
, label
);
8768 sum
= gen_rtx_UNSPEC (Pmode
,
8769 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8770 GEN_INT (TARGET_ARM
? 8 : 4)),
8773 reg
= load_tls_operand (sum
, reg
);
8776 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
8777 else if (TARGET_ARM
)
8778 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8780 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8782 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8783 LCT_PURE
, /* LCT_CONST? */
8786 rtx_insn
*insns
= get_insns ();
8793 arm_tls_descseq_addr (rtx x
, rtx reg
)
8795 rtx labelno
= GEN_INT (pic_labelno
++);
8796 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8797 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8798 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8799 gen_rtx_CONST (VOIDmode
, label
),
8800 GEN_INT (!TARGET_ARM
)),
8802 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8804 emit_insn (gen_tlscall (x
, labelno
));
8806 reg
= gen_reg_rtx (SImode
);
8808 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8810 emit_move_insn (reg
, reg0
);
8817 legitimize_tls_address (rtx x
, rtx reg
)
8819 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8821 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8825 case TLS_MODEL_GLOBAL_DYNAMIC
:
8826 if (TARGET_GNU2_TLS
)
8828 gcc_assert (!TARGET_FDPIC
);
8830 reg
= arm_tls_descseq_addr (x
, reg
);
8832 tp
= arm_load_tp (NULL_RTX
);
8834 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8838 /* Original scheme */
8840 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32_FDPIC
);
8842 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8843 dest
= gen_reg_rtx (Pmode
);
8844 emit_libcall_block (insns
, dest
, ret
, x
);
8848 case TLS_MODEL_LOCAL_DYNAMIC
:
8849 if (TARGET_GNU2_TLS
)
8851 gcc_assert (!TARGET_FDPIC
);
8853 reg
= arm_tls_descseq_addr (x
, reg
);
8855 tp
= arm_load_tp (NULL_RTX
);
8857 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8862 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32_FDPIC
);
8864 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8866 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8867 share the LDM result with other LD model accesses. */
8868 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8870 dest
= gen_reg_rtx (Pmode
);
8871 emit_libcall_block (insns
, dest
, ret
, eqv
);
8873 /* Load the addend. */
8874 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8875 GEN_INT (TLS_LDO32
)),
8877 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8878 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8882 case TLS_MODEL_INITIAL_EXEC
:
8885 sum
= gen_rtx_UNSPEC (Pmode
,
8886 gen_rtvec (2, x
, GEN_INT (TLS_IE32_FDPIC
)),
8888 reg
= load_tls_operand (sum
, reg
);
8889 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
8890 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
8894 labelno
= GEN_INT (pic_labelno
++);
8895 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8896 label
= gen_rtx_CONST (VOIDmode
, label
);
8897 sum
= gen_rtx_UNSPEC (Pmode
,
8898 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8899 GEN_INT (TARGET_ARM
? 8 : 4)),
8901 reg
= load_tls_operand (sum
, reg
);
8904 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8905 else if (TARGET_THUMB2
)
8906 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8909 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8910 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8914 tp
= arm_load_tp (NULL_RTX
);
8916 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8918 case TLS_MODEL_LOCAL_EXEC
:
8919 tp
= arm_load_tp (NULL_RTX
);
8921 reg
= gen_rtx_UNSPEC (Pmode
,
8922 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8924 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8926 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8933 /* Try machine-dependent ways of modifying an illegitimate address
8934 to be legitimate. If we find one, return the new, valid address. */
8936 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8938 if (arm_tls_referenced_p (x
))
8942 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8944 addend
= XEXP (XEXP (x
, 0), 1);
8945 x
= XEXP (XEXP (x
, 0), 0);
8948 if (GET_CODE (x
) != SYMBOL_REF
)
8951 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8953 x
= legitimize_tls_address (x
, NULL_RTX
);
8957 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8965 return thumb_legitimize_address (x
, orig_x
, mode
);
8967 if (GET_CODE (x
) == PLUS
)
8969 rtx xop0
= XEXP (x
, 0);
8970 rtx xop1
= XEXP (x
, 1);
8972 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8973 xop0
= force_reg (SImode
, xop0
);
8975 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8976 && !symbol_mentioned_p (xop1
))
8977 xop1
= force_reg (SImode
, xop1
);
8979 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8980 && CONST_INT_P (xop1
))
8982 HOST_WIDE_INT n
, low_n
;
8986 /* VFP addressing modes actually allow greater offsets, but for
8987 now we just stick with the lowest common denominator. */
8988 if (mode
== DImode
|| mode
== DFmode
)
9000 low_n
= ((mode
) == TImode
? 0
9001 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
9005 base_reg
= gen_reg_rtx (SImode
);
9006 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
9007 emit_move_insn (base_reg
, val
);
9008 x
= plus_constant (Pmode
, base_reg
, low_n
);
9010 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9011 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9014 /* XXX We don't allow MINUS any more -- see comment in
9015 arm_legitimate_address_outer_p (). */
9016 else if (GET_CODE (x
) == MINUS
)
9018 rtx xop0
= XEXP (x
, 0);
9019 rtx xop1
= XEXP (x
, 1);
9021 if (CONSTANT_P (xop0
))
9022 xop0
= force_reg (SImode
, xop0
);
9024 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
9025 xop1
= force_reg (SImode
, xop1
);
9027 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9028 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
9031 /* Make sure to take full advantage of the pre-indexed addressing mode
9032 with absolute addresses which often allows for the base register to
9033 be factorized for multiple adjacent memory references, and it might
9034 even allows for the mini pool to be avoided entirely. */
9035 else if (CONST_INT_P (x
) && optimize
> 0)
9038 HOST_WIDE_INT mask
, base
, index
;
9041 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
9042 use a 8-bit index. So let's use a 12-bit index for SImode only and
9043 hope that arm_gen_constant will enable ldrb to use more bits. */
9044 bits
= (mode
== SImode
) ? 12 : 8;
9045 mask
= (1 << bits
) - 1;
9046 base
= INTVAL (x
) & ~mask
;
9047 index
= INTVAL (x
) & mask
;
9048 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
9050 /* It'll most probably be more efficient to generate the base
9051 with more bits set and use a negative index instead. */
9055 base_reg
= force_reg (SImode
, GEN_INT (base
));
9056 x
= plus_constant (Pmode
, base_reg
, index
);
9061 /* We need to find and carefully transform any SYMBOL and LABEL
9062 references; so go back to the original address expression. */
9063 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9064 false /*compute_now*/);
9066 if (new_x
!= orig_x
)
9074 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9075 to be legitimate. If we find one, return the new, valid address. */
9077 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9079 if (GET_CODE (x
) == PLUS
9080 && CONST_INT_P (XEXP (x
, 1))
9081 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
9082 || INTVAL (XEXP (x
, 1)) < 0))
9084 rtx xop0
= XEXP (x
, 0);
9085 rtx xop1
= XEXP (x
, 1);
9086 HOST_WIDE_INT offset
= INTVAL (xop1
);
9088 /* Try and fold the offset into a biasing of the base register and
9089 then offsetting that. Don't do this when optimizing for space
9090 since it can cause too many CSEs. */
9091 if (optimize_size
&& offset
>= 0
9092 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
9094 HOST_WIDE_INT delta
;
9097 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
9098 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
9099 delta
= 31 * GET_MODE_SIZE (mode
);
9101 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
9103 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
9105 x
= plus_constant (Pmode
, xop0
, delta
);
9107 else if (offset
< 0 && offset
> -256)
9108 /* Small negative offsets are best done with a subtract before the
9109 dereference, forcing these into a register normally takes two
9111 x
= force_operand (x
, NULL_RTX
);
9114 /* For the remaining cases, force the constant into a register. */
9115 xop1
= force_reg (SImode
, xop1
);
9116 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9119 else if (GET_CODE (x
) == PLUS
9120 && s_register_operand (XEXP (x
, 1), SImode
)
9121 && !s_register_operand (XEXP (x
, 0), SImode
))
9123 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
9125 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
9130 /* We need to find and carefully transform any SYMBOL and LABEL
9131 references; so go back to the original address expression. */
9132 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9133 false /*compute_now*/);
9135 if (new_x
!= orig_x
)
9142 /* Return TRUE if X contains any TLS symbol references. */
9145 arm_tls_referenced_p (rtx x
)
9147 if (! TARGET_HAVE_TLS
)
9150 subrtx_iterator::array_type array
;
9151 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
9153 const_rtx x
= *iter
;
9154 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
9156 /* ARM currently does not provide relocations to encode TLS variables
9157 into AArch32 instructions, only data, so there is no way to
9158 currently implement these if a literal pool is disabled. */
9159 if (arm_disable_literal_pool
)
9160 sorry ("accessing thread-local storage is not currently supported "
9161 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9166 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9167 TLS offsets, not real symbol references. */
9168 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9169 iter
.skip_subrtxes ();
9174 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9176 On the ARM, allow any integer (invalid ones are removed later by insn
9177 patterns), nice doubles and symbol_refs which refer to the function's
9180 When generating pic allow anything. */
9183 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
9185 return flag_pic
|| !label_mentioned_p (x
);
9189 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9191 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9192 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9193 for ARMv8-M Baseline or later the result is valid. */
9194 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
9197 return (CONST_INT_P (x
)
9198 || CONST_DOUBLE_P (x
)
9199 || CONSTANT_ADDRESS_P (x
)
9200 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
9205 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
9207 return (!arm_cannot_force_const_mem (mode
, x
)
9209 ? arm_legitimate_constant_p_1 (mode
, x
)
9210 : thumb_legitimate_constant_p (mode
, x
)));
9213 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9216 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9219 split_const (x
, &base
, &offset
);
9221 if (SYMBOL_REF_P (base
))
9223 /* Function symbols cannot have an offset due to the Thumb bit. */
9224 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
9225 && INTVAL (offset
) != 0)
9228 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9229 && !offset_within_block_p (base
, INTVAL (offset
)))
9232 return arm_tls_referenced_p (x
);
9235 #define REG_OR_SUBREG_REG(X) \
9237 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9239 #define REG_OR_SUBREG_RTX(X) \
9240 (REG_P (X) ? (X) : SUBREG_REG (X))
9243 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9245 machine_mode mode
= GET_MODE (x
);
9254 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9261 return COSTS_N_INSNS (1);
9264 if (arm_arch6m
&& arm_m_profile_small_mul
)
9265 return COSTS_N_INSNS (32);
9267 if (CONST_INT_P (XEXP (x
, 1)))
9270 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9277 return COSTS_N_INSNS (2) + cycles
;
9279 return COSTS_N_INSNS (1) + 16;
9282 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9284 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9285 return (COSTS_N_INSNS (words
)
9286 + 4 * ((MEM_P (SET_SRC (x
)))
9287 + MEM_P (SET_DEST (x
))));
9292 if (UINTVAL (x
) < 256
9293 /* 16-bit constant. */
9294 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9296 if (thumb_shiftable_const (INTVAL (x
)))
9297 return COSTS_N_INSNS (2);
9298 return COSTS_N_INSNS (3);
9300 else if ((outer
== PLUS
|| outer
== COMPARE
)
9301 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9303 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9304 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9305 return COSTS_N_INSNS (1);
9306 else if (outer
== AND
)
9309 /* This duplicates the tests in the andsi3 expander. */
9310 for (i
= 9; i
<= 31; i
++)
9311 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9312 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9313 return COSTS_N_INSNS (2);
9315 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9316 || outer
== LSHIFTRT
)
9318 return COSTS_N_INSNS (2);
9324 return COSTS_N_INSNS (3);
9342 /* XXX another guess. */
9343 /* Memory costs quite a lot for the first word, but subsequent words
9344 load at the equivalent of a single insn each. */
9345 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9346 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9351 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9357 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9358 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9364 return total
+ COSTS_N_INSNS (1);
9366 /* Assume a two-shift sequence. Increase the cost slightly so
9367 we prefer actual shifts over an extend operation. */
9368 return total
+ 1 + COSTS_N_INSNS (2);
9375 /* Estimates the size cost of thumb1 instructions.
9376 For now most of the code is copied from thumb1_rtx_costs. We need more
9377 fine grain tuning when we have more related test cases. */
9379 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9381 machine_mode mode
= GET_MODE (x
);
9390 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9394 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9395 defined by RTL expansion, especially for the expansion of
9397 if ((GET_CODE (XEXP (x
, 0)) == MULT
9398 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9399 || (GET_CODE (XEXP (x
, 1)) == MULT
9400 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9401 return COSTS_N_INSNS (2);
9406 return COSTS_N_INSNS (1);
9409 if (CONST_INT_P (XEXP (x
, 1)))
9411 /* Thumb1 mul instruction can't operate on const. We must Load it
9412 into a register first. */
9413 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9414 /* For the targets which have a very small and high-latency multiply
9415 unit, we prefer to synthesize the mult with up to 5 instructions,
9416 giving a good balance between size and performance. */
9417 if (arm_arch6m
&& arm_m_profile_small_mul
)
9418 return COSTS_N_INSNS (5);
9420 return COSTS_N_INSNS (1) + const_size
;
9422 return COSTS_N_INSNS (1);
9425 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9427 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9428 cost
= COSTS_N_INSNS (words
);
9429 if (satisfies_constraint_J (SET_SRC (x
))
9430 || satisfies_constraint_K (SET_SRC (x
))
9431 /* Too big an immediate for a 2-byte mov, using MOVT. */
9432 || (CONST_INT_P (SET_SRC (x
))
9433 && UINTVAL (SET_SRC (x
)) >= 256
9435 && satisfies_constraint_j (SET_SRC (x
)))
9436 /* thumb1_movdi_insn. */
9437 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9438 cost
+= COSTS_N_INSNS (1);
9444 if (UINTVAL (x
) < 256)
9445 return COSTS_N_INSNS (1);
9446 /* movw is 4byte long. */
9447 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9448 return COSTS_N_INSNS (2);
9449 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9450 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9451 return COSTS_N_INSNS (2);
9452 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9453 if (thumb_shiftable_const (INTVAL (x
)))
9454 return COSTS_N_INSNS (2);
9455 return COSTS_N_INSNS (3);
9457 else if ((outer
== PLUS
|| outer
== COMPARE
)
9458 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9460 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9461 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9462 return COSTS_N_INSNS (1);
9463 else if (outer
== AND
)
9466 /* This duplicates the tests in the andsi3 expander. */
9467 for (i
= 9; i
<= 31; i
++)
9468 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9469 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9470 return COSTS_N_INSNS (2);
9472 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9473 || outer
== LSHIFTRT
)
9475 return COSTS_N_INSNS (2);
9481 return COSTS_N_INSNS (3);
9495 return COSTS_N_INSNS (1);
9498 return (COSTS_N_INSNS (1)
9500 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9501 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9502 ? COSTS_N_INSNS (1) : 0));
9506 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9511 /* XXX still guessing. */
9512 switch (GET_MODE (XEXP (x
, 0)))
9515 return (1 + (mode
== DImode
? 4 : 0)
9516 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9519 return (4 + (mode
== DImode
? 4 : 0)
9520 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9523 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9534 /* Helper function for arm_rtx_costs. If one operand of the OP, a
9535 PLUS, adds the carry flag, then return the other operand. If
9536 neither is a carry, return OP unchanged. */
9538 strip_carry_operation (rtx op
)
9540 gcc_assert (GET_CODE (op
) == PLUS
);
9541 if (arm_carry_operation (XEXP (op
, 0), GET_MODE (op
)))
9542 return XEXP (op
, 1);
9543 else if (arm_carry_operation (XEXP (op
, 1), GET_MODE (op
)))
9544 return XEXP (op
, 0);
9548 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9549 operand, then return the operand that is being shifted. If the shift
9550 is not by a constant, then set SHIFT_REG to point to the operand.
9551 Return NULL if OP is not a shifter operand. */
9553 shifter_op_p (rtx op
, rtx
*shift_reg
)
9555 enum rtx_code code
= GET_CODE (op
);
9557 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9558 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9559 return XEXP (op
, 0);
9560 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9561 return XEXP (op
, 0);
9562 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9563 || code
== ASHIFTRT
)
9565 if (!CONST_INT_P (XEXP (op
, 1)))
9566 *shift_reg
= XEXP (op
, 1);
9567 return XEXP (op
, 0);
9574 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9576 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9577 rtx_code code
= GET_CODE (x
);
9578 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9580 switch (XINT (x
, 1))
9582 case UNSPEC_UNALIGNED_LOAD
:
9583 /* We can only do unaligned loads into the integer unit, and we can't
9585 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9587 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9588 + extra_cost
->ldst
.load_unaligned
);
9591 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9592 ADDR_SPACE_GENERIC
, speed_p
);
9596 case UNSPEC_UNALIGNED_STORE
:
9597 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9599 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9600 + extra_cost
->ldst
.store_unaligned
);
9602 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9604 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9605 ADDR_SPACE_GENERIC
, speed_p
);
9616 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9620 *cost
= COSTS_N_INSNS (2);
9626 /* Cost of a libcall. We assume one insn per argument, an amount for the
9627 call (one insn for -Os) and then one for processing the result. */
9628 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9630 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9633 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9634 if (shift_op != NULL \
9635 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9640 *cost += extra_cost->alu.arith_shift_reg; \
9641 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9642 ASHIFT, 1, speed_p); \
9645 *cost += extra_cost->alu.arith_shift; \
9647 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9648 ASHIFT, 0, speed_p) \
9649 + rtx_cost (XEXP (x, 1 - IDX), \
9650 GET_MODE (shift_op), \
9657 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9658 considering the costs of the addressing mode and memory access
9661 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
9662 int *cost
, bool speed_p
)
9664 machine_mode mode
= GET_MODE (x
);
9666 *cost
= COSTS_N_INSNS (1);
9669 && GET_CODE (XEXP (x
, 0)) == PLUS
9670 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9671 /* This will be split into two instructions. Add the cost of the
9672 additional instruction here. The cost of the memory access is computed
9673 below. See arm.md:calculate_pic_address. */
9674 *cost
+= COSTS_N_INSNS (1);
9676 /* Calculate cost of the addressing mode. */
9679 arm_addr_mode_op op_type
;
9680 switch (GET_CODE (XEXP (x
, 0)))
9684 op_type
= AMO_DEFAULT
;
9687 /* MINUS does not appear in RTL, but the architecture supports it,
9688 so handle this case defensively. */
9691 op_type
= AMO_NO_WB
;
9703 if (VECTOR_MODE_P (mode
))
9704 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
9705 else if (FLOAT_MODE_P (mode
))
9706 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
9708 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
9711 /* Calculate cost of memory access. */
9714 if (FLOAT_MODE_P (mode
))
9716 if (GET_MODE_SIZE (mode
) == 8)
9717 *cost
+= extra_cost
->ldst
.loadd
;
9719 *cost
+= extra_cost
->ldst
.loadf
;
9721 else if (VECTOR_MODE_P (mode
))
9722 *cost
+= extra_cost
->ldst
.loadv
;
9726 if (GET_MODE_SIZE (mode
) == 8)
9727 *cost
+= extra_cost
->ldst
.ldrd
;
9729 *cost
+= extra_cost
->ldst
.load
;
9736 /* RTX costs. Make an estimate of the cost of executing the operation
9737 X, which is contained within an operation with code OUTER_CODE.
9738 SPEED_P indicates whether the cost desired is the performance cost,
9739 or the size cost. The estimate is stored in COST and the return
9740 value is TRUE if the cost calculation is final, or FALSE if the
9741 caller should recurse through the operands of X to add additional
9744 We currently make no attempt to model the size savings of Thumb-2
9745 16-bit instructions. At the normal points in compilation where
9746 this code is called we have no measure of whether the condition
9747 flags are live or not, and thus no realistic way to determine what
9748 the size will eventually be. */
9750 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9751 const struct cpu_cost_table
*extra_cost
,
9752 int *cost
, bool speed_p
)
9754 machine_mode mode
= GET_MODE (x
);
9756 *cost
= COSTS_N_INSNS (1);
9761 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9763 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9771 /* SET RTXs don't have a mode so we get it from the destination. */
9772 mode
= GET_MODE (SET_DEST (x
));
9774 if (REG_P (SET_SRC (x
))
9775 && REG_P (SET_DEST (x
)))
9777 /* Assume that most copies can be done with a single insn,
9778 unless we don't have HW FP, in which case everything
9779 larger than word mode will require two insns. */
9780 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9781 && GET_MODE_SIZE (mode
) > 4)
9784 /* Conditional register moves can be encoded
9785 in 16 bits in Thumb mode. */
9786 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9792 if (CONST_INT_P (SET_SRC (x
)))
9794 /* Handle CONST_INT here, since the value doesn't have a mode
9795 and we would otherwise be unable to work out the true cost. */
9796 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9799 /* Slightly lower the cost of setting a core reg to a constant.
9800 This helps break up chains and allows for better scheduling. */
9801 if (REG_P (SET_DEST (x
))
9802 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9805 /* Immediate moves with an immediate in the range [0, 255] can be
9806 encoded in 16 bits in Thumb mode. */
9807 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9808 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9810 goto const_int_cost
;
9816 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
9820 /* Calculations of LDM costs are complex. We assume an initial cost
9821 (ldm_1st) which will load the number of registers mentioned in
9822 ldm_regs_per_insn_1st registers; then each additional
9823 ldm_regs_per_insn_subsequent registers cost one more insn. The
9824 formula for N regs is thus:
9826 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9827 + ldm_regs_per_insn_subsequent - 1)
9828 / ldm_regs_per_insn_subsequent).
9830 Additional costs may also be added for addressing. A similar
9831 formula is used for STM. */
9833 bool is_ldm
= load_multiple_operation (x
, SImode
);
9834 bool is_stm
= store_multiple_operation (x
, SImode
);
9836 if (is_ldm
|| is_stm
)
9840 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9841 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9842 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9843 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9844 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9845 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9846 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9848 *cost
+= regs_per_insn_1st
9849 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9850 + regs_per_insn_sub
- 1)
9851 / regs_per_insn_sub
);
9860 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9861 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9862 *cost
+= COSTS_N_INSNS (speed_p
9863 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9864 else if (mode
== SImode
&& TARGET_IDIV
)
9865 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9867 *cost
= LIBCALL_COST (2);
9869 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9870 possible udiv is prefered. */
9871 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
9872 return false; /* All arguments must be in registers. */
9875 /* MOD by a power of 2 can be expanded as:
9877 and r0, r0, #(n - 1)
9878 and r1, r1, #(n - 1)
9879 rsbpl r0, r1, #0. */
9880 if (CONST_INT_P (XEXP (x
, 1))
9881 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9884 *cost
+= COSTS_N_INSNS (3);
9887 *cost
+= 2 * extra_cost
->alu
.logical
9888 + extra_cost
->alu
.arith
;
9894 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9895 possible udiv is prefered. */
9896 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
9897 return false; /* All arguments must be in registers. */
9900 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9902 *cost
+= (COSTS_N_INSNS (1)
9903 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9905 *cost
+= extra_cost
->alu
.shift_reg
;
9913 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9915 *cost
+= (COSTS_N_INSNS (2)
9916 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9918 *cost
+= 2 * extra_cost
->alu
.shift
;
9919 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9920 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
9924 else if (mode
== SImode
)
9926 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9927 /* Slightly disparage register shifts at -Os, but not by much. */
9928 if (!CONST_INT_P (XEXP (x
, 1)))
9929 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9930 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9933 else if (GET_MODE_CLASS (mode
) == MODE_INT
9934 && GET_MODE_SIZE (mode
) < 4)
9938 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9939 /* Slightly disparage register shifts at -Os, but not by
9941 if (!CONST_INT_P (XEXP (x
, 1)))
9942 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9943 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9945 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9947 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9949 /* Can use SBFX/UBFX. */
9951 *cost
+= extra_cost
->alu
.bfx
;
9952 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9956 *cost
+= COSTS_N_INSNS (1);
9957 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9960 if (CONST_INT_P (XEXP (x
, 1)))
9961 *cost
+= 2 * extra_cost
->alu
.shift
;
9963 *cost
+= (extra_cost
->alu
.shift
9964 + extra_cost
->alu
.shift_reg
);
9967 /* Slightly disparage register shifts. */
9968 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9973 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9974 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9977 if (CONST_INT_P (XEXP (x
, 1)))
9978 *cost
+= (2 * extra_cost
->alu
.shift
9979 + extra_cost
->alu
.log_shift
);
9981 *cost
+= (extra_cost
->alu
.shift
9982 + extra_cost
->alu
.shift_reg
9983 + extra_cost
->alu
.log_shift_reg
);
9989 *cost
= LIBCALL_COST (2);
9998 *cost
+= extra_cost
->alu
.rev
;
10005 /* No rev instruction available. Look at arm_legacy_rev
10006 and thumb_legacy_rev for the form of RTL used then. */
10009 *cost
+= COSTS_N_INSNS (9);
10013 *cost
+= 6 * extra_cost
->alu
.shift
;
10014 *cost
+= 3 * extra_cost
->alu
.logical
;
10019 *cost
+= COSTS_N_INSNS (4);
10023 *cost
+= 2 * extra_cost
->alu
.shift
;
10024 *cost
+= extra_cost
->alu
.arith_shift
;
10025 *cost
+= 2 * extra_cost
->alu
.logical
;
10033 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10034 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10036 if (GET_CODE (XEXP (x
, 0)) == MULT
10037 || GET_CODE (XEXP (x
, 1)) == MULT
)
10039 rtx mul_op0
, mul_op1
, sub_op
;
10042 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10044 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10046 mul_op0
= XEXP (XEXP (x
, 0), 0);
10047 mul_op1
= XEXP (XEXP (x
, 0), 1);
10048 sub_op
= XEXP (x
, 1);
10052 mul_op0
= XEXP (XEXP (x
, 1), 0);
10053 mul_op1
= XEXP (XEXP (x
, 1), 1);
10054 sub_op
= XEXP (x
, 0);
10057 /* The first operand of the multiply may be optionally
10059 if (GET_CODE (mul_op0
) == NEG
)
10060 mul_op0
= XEXP (mul_op0
, 0);
10062 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10063 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10064 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
10070 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10074 if (mode
== SImode
)
10076 rtx shift_by_reg
= NULL
;
10079 rtx op0
= XEXP (x
, 0);
10080 rtx op1
= XEXP (x
, 1);
10082 /* Factor out any borrow operation. There's more than one way
10083 of expressing this; try to recognize them all. */
10084 if (GET_CODE (op0
) == MINUS
)
10086 if (arm_borrow_operation (op1
, SImode
))
10088 op1
= XEXP (op0
, 1);
10089 op0
= XEXP (op0
, 0);
10091 else if (arm_borrow_operation (XEXP (op0
, 1), SImode
))
10092 op0
= XEXP (op0
, 0);
10094 else if (GET_CODE (op1
) == PLUS
10095 && arm_borrow_operation (XEXP (op1
, 0), SImode
))
10096 op1
= XEXP (op1
, 0);
10097 else if (GET_CODE (op0
) == NEG
10098 && arm_borrow_operation (op1
, SImode
))
10100 /* Negate with carry-in. For Thumb2 this is done with
10101 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10102 RSC instruction that exists in Arm mode. */
10104 *cost
+= (TARGET_THUMB2
10105 ? extra_cost
->alu
.arith_shift
10106 : extra_cost
->alu
.arith
);
10107 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed_p
);
10111 shift_op
= shifter_op_p (op0
, &shift_by_reg
);
10112 if (shift_op
== NULL
)
10114 shift_op
= shifter_op_p (op1
, &shift_by_reg
);
10115 non_shift_op
= op0
;
10118 non_shift_op
= op1
;
10120 if (shift_op
!= NULL
)
10122 if (shift_by_reg
!= NULL
)
10125 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10126 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
10129 *cost
+= extra_cost
->alu
.arith_shift
;
10131 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
10132 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
10136 if (arm_arch_thumb2
10137 && GET_CODE (XEXP (x
, 1)) == MULT
)
10141 *cost
+= extra_cost
->mult
[0].add
;
10142 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
10143 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
10144 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
10148 if (CONST_INT_P (op0
))
10150 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10151 INTVAL (op0
), NULL_RTX
,
10153 *cost
= COSTS_N_INSNS (insns
);
10155 *cost
+= insns
* extra_cost
->alu
.arith
;
10156 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10160 *cost
+= extra_cost
->alu
.arith
;
10162 /* Don't recurse as we don't want to cost any borrow that
10164 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed_p
);
10165 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10169 if (GET_MODE_CLASS (mode
) == MODE_INT
10170 && GET_MODE_SIZE (mode
) < 4)
10172 rtx shift_op
, shift_reg
;
10175 /* We check both sides of the MINUS for shifter operands since,
10176 unlike PLUS, it's not commutative. */
10178 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
10179 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
10181 /* Slightly disparage, as we might need to widen the result. */
10184 *cost
+= extra_cost
->alu
.arith
;
10186 if (CONST_INT_P (XEXP (x
, 0)))
10188 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10195 if (mode
== DImode
)
10197 *cost
+= COSTS_N_INSNS (1);
10199 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10201 rtx op1
= XEXP (x
, 1);
10204 *cost
+= 2 * extra_cost
->alu
.arith
;
10206 if (GET_CODE (op1
) == ZERO_EXTEND
)
10207 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10210 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10211 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10215 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10218 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10219 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10221 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10224 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10225 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10228 *cost
+= (extra_cost
->alu
.arith
10229 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10230 ? extra_cost
->alu
.arith
10231 : extra_cost
->alu
.arith_shift
));
10232 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10233 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10234 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10239 *cost
+= 2 * extra_cost
->alu
.arith
;
10245 *cost
= LIBCALL_COST (2);
10249 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10250 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10252 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10254 rtx mul_op0
, mul_op1
, add_op
;
10257 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10259 mul_op0
= XEXP (XEXP (x
, 0), 0);
10260 mul_op1
= XEXP (XEXP (x
, 0), 1);
10261 add_op
= XEXP (x
, 1);
10263 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10264 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10265 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10271 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10274 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10276 *cost
= LIBCALL_COST (2);
10280 /* Narrow modes can be synthesized in SImode, but the range
10281 of useful sub-operations is limited. Check for shift operations
10282 on one of the operands. Only left shifts can be used in the
10284 if (GET_MODE_CLASS (mode
) == MODE_INT
10285 && GET_MODE_SIZE (mode
) < 4)
10287 rtx shift_op
, shift_reg
;
10290 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
10292 if (CONST_INT_P (XEXP (x
, 1)))
10294 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10295 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10297 *cost
= COSTS_N_INSNS (insns
);
10299 *cost
+= insns
* extra_cost
->alu
.arith
;
10300 /* Slightly penalize a narrow operation as the result may
10302 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10306 /* Slightly penalize a narrow operation as the result may
10310 *cost
+= extra_cost
->alu
.arith
;
10315 if (mode
== SImode
)
10317 rtx shift_op
, shift_reg
;
10319 if (TARGET_INT_SIMD
10320 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10321 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10323 /* UXTA[BH] or SXTA[BH]. */
10325 *cost
+= extra_cost
->alu
.extend_arith
;
10326 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10328 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10332 rtx op0
= XEXP (x
, 0);
10333 rtx op1
= XEXP (x
, 1);
10335 /* Handle a side effect of adding in the carry to an addition. */
10336 if (GET_CODE (op0
) == PLUS
10337 && arm_carry_operation (op1
, mode
))
10339 op1
= XEXP (op0
, 1);
10340 op0
= XEXP (op0
, 0);
10342 else if (GET_CODE (op1
) == PLUS
10343 && arm_carry_operation (op0
, mode
))
10345 op0
= XEXP (op1
, 0);
10346 op1
= XEXP (op1
, 1);
10348 else if (GET_CODE (op0
) == PLUS
)
10350 op0
= strip_carry_operation (op0
);
10351 if (swap_commutative_operands_p (op0
, op1
))
10352 std::swap (op0
, op1
);
10355 if (arm_carry_operation (op0
, mode
))
10357 /* Adding the carry to a register is a canonicalization of
10358 adding 0 to the register plus the carry. */
10360 *cost
+= extra_cost
->alu
.arith
;
10361 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10366 shift_op
= shifter_op_p (op0
, &shift_reg
);
10367 if (shift_op
!= NULL
)
10372 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10373 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10376 *cost
+= extra_cost
->alu
.arith_shift
;
10378 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10379 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10383 if (GET_CODE (op0
) == MULT
)
10387 if (TARGET_DSP_MULTIPLY
10388 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10389 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10390 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10391 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10392 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10393 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10394 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10395 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10396 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10397 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10398 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10399 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10402 /* SMLA[BT][BT]. */
10404 *cost
+= extra_cost
->mult
[0].extend_add
;
10405 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10406 SIGN_EXTEND
, 0, speed_p
)
10407 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10408 SIGN_EXTEND
, 0, speed_p
)
10409 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10414 *cost
+= extra_cost
->mult
[0].add
;
10415 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10416 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10417 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10421 if (CONST_INT_P (op1
))
10423 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10424 INTVAL (op1
), NULL_RTX
,
10426 *cost
= COSTS_N_INSNS (insns
);
10428 *cost
+= insns
* extra_cost
->alu
.arith
;
10429 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
10434 *cost
+= extra_cost
->alu
.arith
;
10436 /* Don't recurse here because we want to test the operands
10437 without any carry operation. */
10438 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
10439 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10443 if (mode
== DImode
)
10445 if (GET_CODE (XEXP (x
, 0)) == MULT
10446 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10447 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10448 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10449 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10452 *cost
+= extra_cost
->mult
[1].extend_add
;
10453 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10454 ZERO_EXTEND
, 0, speed_p
)
10455 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10456 ZERO_EXTEND
, 0, speed_p
)
10457 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10461 *cost
+= COSTS_N_INSNS (1);
10463 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10464 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10467 *cost
+= (extra_cost
->alu
.arith
10468 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10469 ? extra_cost
->alu
.arith
10470 : extra_cost
->alu
.arith_shift
));
10472 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10474 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10479 *cost
+= 2 * extra_cost
->alu
.arith
;
10484 *cost
= LIBCALL_COST (2);
10487 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10490 *cost
+= extra_cost
->alu
.rev
;
10494 /* Fall through. */
10495 case AND
: case XOR
:
10496 if (mode
== SImode
)
10498 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10499 rtx op0
= XEXP (x
, 0);
10500 rtx shift_op
, shift_reg
;
10504 || (code
== IOR
&& TARGET_THUMB2
)))
10505 op0
= XEXP (op0
, 0);
10508 shift_op
= shifter_op_p (op0
, &shift_reg
);
10509 if (shift_op
!= NULL
)
10514 *cost
+= extra_cost
->alu
.log_shift_reg
;
10515 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10518 *cost
+= extra_cost
->alu
.log_shift
;
10520 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10521 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10525 if (CONST_INT_P (XEXP (x
, 1)))
10527 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10528 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10531 *cost
= COSTS_N_INSNS (insns
);
10533 *cost
+= insns
* extra_cost
->alu
.logical
;
10534 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10539 *cost
+= extra_cost
->alu
.logical
;
10540 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10541 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10545 if (mode
== DImode
)
10547 rtx op0
= XEXP (x
, 0);
10548 enum rtx_code subcode
= GET_CODE (op0
);
10550 *cost
+= COSTS_N_INSNS (1);
10554 || (code
== IOR
&& TARGET_THUMB2
)))
10555 op0
= XEXP (op0
, 0);
10557 if (GET_CODE (op0
) == ZERO_EXTEND
)
10560 *cost
+= 2 * extra_cost
->alu
.logical
;
10562 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10564 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10567 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10570 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10572 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10574 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10579 *cost
+= 2 * extra_cost
->alu
.logical
;
10585 *cost
= LIBCALL_COST (2);
10589 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10590 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10592 rtx op0
= XEXP (x
, 0);
10594 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10595 op0
= XEXP (op0
, 0);
10598 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10600 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10601 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10604 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10606 *cost
= LIBCALL_COST (2);
10610 if (mode
== SImode
)
10612 if (TARGET_DSP_MULTIPLY
10613 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10614 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10615 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10616 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10617 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10618 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10619 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10620 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10621 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10622 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10623 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10624 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10627 /* SMUL[TB][TB]. */
10629 *cost
+= extra_cost
->mult
[0].extend
;
10630 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10631 SIGN_EXTEND
, 0, speed_p
);
10632 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10633 SIGN_EXTEND
, 1, speed_p
);
10637 *cost
+= extra_cost
->mult
[0].simple
;
10641 if (mode
== DImode
)
10643 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10644 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10645 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10646 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
10649 *cost
+= extra_cost
->mult
[1].extend
;
10650 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10651 ZERO_EXTEND
, 0, speed_p
)
10652 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10653 ZERO_EXTEND
, 0, speed_p
));
10657 *cost
= LIBCALL_COST (2);
10662 *cost
= LIBCALL_COST (2);
10666 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10667 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10669 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10672 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10677 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10681 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10683 *cost
= LIBCALL_COST (1);
10687 if (mode
== SImode
)
10689 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10691 *cost
+= COSTS_N_INSNS (1);
10692 /* Assume the non-flag-changing variant. */
10694 *cost
+= (extra_cost
->alu
.log_shift
10695 + extra_cost
->alu
.arith_shift
);
10696 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10700 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10701 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10703 *cost
+= COSTS_N_INSNS (1);
10704 /* No extra cost for MOV imm and MVN imm. */
10705 /* If the comparison op is using the flags, there's no further
10706 cost, otherwise we need to add the cost of the comparison. */
10707 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10708 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10709 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10711 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10712 *cost
+= (COSTS_N_INSNS (1)
10713 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10715 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10718 *cost
+= extra_cost
->alu
.arith
;
10724 *cost
+= extra_cost
->alu
.arith
;
10728 if (GET_MODE_CLASS (mode
) == MODE_INT
10729 && GET_MODE_SIZE (mode
) < 4)
10731 /* Slightly disparage, as we might need an extend operation. */
10734 *cost
+= extra_cost
->alu
.arith
;
10738 if (mode
== DImode
)
10740 *cost
+= COSTS_N_INSNS (1);
10742 *cost
+= 2 * extra_cost
->alu
.arith
;
10747 *cost
= LIBCALL_COST (1);
10751 if (mode
== SImode
)
10754 rtx shift_reg
= NULL
;
10756 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10760 if (shift_reg
!= NULL
)
10763 *cost
+= extra_cost
->alu
.log_shift_reg
;
10764 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10767 *cost
+= extra_cost
->alu
.log_shift
;
10768 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10773 *cost
+= extra_cost
->alu
.logical
;
10776 if (mode
== DImode
)
10778 *cost
+= COSTS_N_INSNS (1);
10784 *cost
+= LIBCALL_COST (1);
10789 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10791 *cost
+= COSTS_N_INSNS (3);
10794 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10795 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10797 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10798 /* Assume that if one arm of the if_then_else is a register,
10799 that it will be tied with the result and eliminate the
10800 conditional insn. */
10801 if (REG_P (XEXP (x
, 1)))
10803 else if (REG_P (XEXP (x
, 2)))
10809 if (extra_cost
->alu
.non_exec_costs_exec
)
10810 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10812 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10815 *cost
+= op1cost
+ op2cost
;
10821 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10825 machine_mode op0mode
;
10826 /* We'll mostly assume that the cost of a compare is the cost of the
10827 LHS. However, there are some notable exceptions. */
10829 /* Floating point compares are never done as side-effects. */
10830 op0mode
= GET_MODE (XEXP (x
, 0));
10831 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10832 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10835 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10837 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10839 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10845 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10847 *cost
= LIBCALL_COST (2);
10851 /* DImode compares normally take two insns. */
10852 if (op0mode
== DImode
)
10854 *cost
+= COSTS_N_INSNS (1);
10856 *cost
+= 2 * extra_cost
->alu
.arith
;
10860 if (op0mode
== SImode
)
10865 if (XEXP (x
, 1) == const0_rtx
10866 && !(REG_P (XEXP (x
, 0))
10867 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10868 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10870 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10872 /* Multiply operations that set the flags are often
10873 significantly more expensive. */
10875 && GET_CODE (XEXP (x
, 0)) == MULT
10876 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10877 *cost
+= extra_cost
->mult
[0].flag_setting
;
10880 && GET_CODE (XEXP (x
, 0)) == PLUS
10881 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10882 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10884 *cost
+= extra_cost
->mult
[0].flag_setting
;
10889 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10890 if (shift_op
!= NULL
)
10892 if (shift_reg
!= NULL
)
10894 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10897 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10900 *cost
+= extra_cost
->alu
.arith_shift
;
10901 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10902 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10907 *cost
+= extra_cost
->alu
.arith
;
10908 if (CONST_INT_P (XEXP (x
, 1))
10909 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10911 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10919 *cost
= LIBCALL_COST (2);
10942 if (outer_code
== SET
)
10944 /* Is it a store-flag operation? */
10945 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10946 && XEXP (x
, 1) == const0_rtx
)
10948 /* Thumb also needs an IT insn. */
10949 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10952 if (XEXP (x
, 1) == const0_rtx
)
10957 /* LSR Rd, Rn, #31. */
10959 *cost
+= extra_cost
->alu
.shift
;
10969 *cost
+= COSTS_N_INSNS (1);
10973 /* RSBS T1, Rn, Rn, LSR #31
10975 *cost
+= COSTS_N_INSNS (1);
10977 *cost
+= extra_cost
->alu
.arith_shift
;
10981 /* RSB Rd, Rn, Rn, ASR #1
10982 LSR Rd, Rd, #31. */
10983 *cost
+= COSTS_N_INSNS (1);
10985 *cost
+= (extra_cost
->alu
.arith_shift
10986 + extra_cost
->alu
.shift
);
10992 *cost
+= COSTS_N_INSNS (1);
10994 *cost
+= extra_cost
->alu
.shift
;
10998 /* Remaining cases are either meaningless or would take
10999 three insns anyway. */
11000 *cost
= COSTS_N_INSNS (3);
11003 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11008 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
11009 if (CONST_INT_P (XEXP (x
, 1))
11010 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11012 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11019 /* Not directly inside a set. If it involves the condition code
11020 register it must be the condition for a branch, cond_exec or
11021 I_T_E operation. Since the comparison is performed elsewhere
11022 this is just the control part which has no additional
11024 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11025 && XEXP (x
, 1) == const0_rtx
)
11033 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11034 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11037 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11041 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11043 *cost
= LIBCALL_COST (1);
11047 if (mode
== SImode
)
11050 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
11054 *cost
= LIBCALL_COST (1);
11058 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
11059 && MEM_P (XEXP (x
, 0)))
11061 if (mode
== DImode
)
11062 *cost
+= COSTS_N_INSNS (1);
11067 if (GET_MODE (XEXP (x
, 0)) == SImode
)
11068 *cost
+= extra_cost
->ldst
.load
;
11070 *cost
+= extra_cost
->ldst
.load_sign_extend
;
11072 if (mode
== DImode
)
11073 *cost
+= extra_cost
->alu
.shift
;
11078 /* Widening from less than 32-bits requires an extend operation. */
11079 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11081 /* We have SXTB/SXTH. */
11082 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11084 *cost
+= extra_cost
->alu
.extend
;
11086 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11088 /* Needs two shifts. */
11089 *cost
+= COSTS_N_INSNS (1);
11090 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11092 *cost
+= 2 * extra_cost
->alu
.shift
;
11095 /* Widening beyond 32-bits requires one more insn. */
11096 if (mode
== DImode
)
11098 *cost
+= COSTS_N_INSNS (1);
11100 *cost
+= extra_cost
->alu
.shift
;
11107 || GET_MODE (XEXP (x
, 0)) == SImode
11108 || GET_MODE (XEXP (x
, 0)) == QImode
)
11109 && MEM_P (XEXP (x
, 0)))
11111 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11113 if (mode
== DImode
)
11114 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11119 /* Widening from less than 32-bits requires an extend operation. */
11120 if (GET_MODE (XEXP (x
, 0)) == QImode
)
11122 /* UXTB can be a shorter instruction in Thumb2, but it might
11123 be slower than the AND Rd, Rn, #255 alternative. When
11124 optimizing for speed it should never be slower to use
11125 AND, and we don't really model 16-bit vs 32-bit insns
11128 *cost
+= extra_cost
->alu
.logical
;
11130 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11132 /* We have UXTB/UXTH. */
11133 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11135 *cost
+= extra_cost
->alu
.extend
;
11137 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11139 /* Needs two shifts. It's marginally preferable to use
11140 shifts rather than two BIC instructions as the second
11141 shift may merge with a subsequent insn as a shifter
11143 *cost
= COSTS_N_INSNS (2);
11144 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11146 *cost
+= 2 * extra_cost
->alu
.shift
;
11149 /* Widening beyond 32-bits requires one more insn. */
11150 if (mode
== DImode
)
11152 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11159 /* CONST_INT has no mode, so we cannot tell for sure how many
11160 insns are really going to be needed. The best we can do is
11161 look at the value passed. If it fits in SImode, then assume
11162 that's the mode it will be used for. Otherwise assume it
11163 will be used in DImode. */
11164 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
11169 /* Avoid blowing up in arm_gen_constant (). */
11170 if (!(outer_code
== PLUS
11171 || outer_code
== AND
11172 || outer_code
== IOR
11173 || outer_code
== XOR
11174 || outer_code
== MINUS
))
11178 if (mode
== SImode
)
11180 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
11181 INTVAL (x
), NULL
, NULL
,
11187 *cost
+= COSTS_N_INSNS (arm_gen_constant
11188 (outer_code
, SImode
, NULL
,
11189 trunc_int_for_mode (INTVAL (x
), SImode
),
11191 + arm_gen_constant (outer_code
, SImode
, NULL
,
11192 INTVAL (x
) >> 32, NULL
,
11204 if (arm_arch_thumb2
&& !flag_pic
)
11205 *cost
+= COSTS_N_INSNS (1);
11207 *cost
+= extra_cost
->ldst
.load
;
11210 *cost
+= COSTS_N_INSNS (1);
11214 *cost
+= COSTS_N_INSNS (1);
11216 *cost
+= extra_cost
->alu
.arith
;
11222 *cost
= COSTS_N_INSNS (4);
11227 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11228 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11230 if (vfp3_const_double_rtx (x
))
11233 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11239 if (mode
== DFmode
)
11240 *cost
+= extra_cost
->ldst
.loadd
;
11242 *cost
+= extra_cost
->ldst
.loadf
;
11245 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11249 *cost
= COSTS_N_INSNS (4);
11255 && TARGET_HARD_FLOAT
11256 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11257 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11258 *cost
= COSTS_N_INSNS (1);
11260 *cost
= COSTS_N_INSNS (4);
11265 /* When optimizing for size, we prefer constant pool entries to
11266 MOVW/MOVT pairs, so bump the cost of these slightly. */
11273 *cost
+= extra_cost
->alu
.clz
;
11277 if (XEXP (x
, 1) == const0_rtx
)
11280 *cost
+= extra_cost
->alu
.log_shift
;
11281 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11284 /* Fall through. */
11288 *cost
+= COSTS_N_INSNS (1);
11292 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11293 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11294 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11295 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11296 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11297 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11298 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11299 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11303 *cost
+= extra_cost
->mult
[1].extend
;
11304 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11305 ZERO_EXTEND
, 0, speed_p
)
11306 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11307 ZERO_EXTEND
, 0, speed_p
));
11310 *cost
= LIBCALL_COST (1);
11313 case UNSPEC_VOLATILE
:
11315 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11318 /* Reading the PC is like reading any other register. Writing it
11319 is more expensive, but we take that into account elsewhere. */
11324 /* TODO: Simple zero_extract of bottom bits using AND. */
11325 /* Fall through. */
11329 && CONST_INT_P (XEXP (x
, 1))
11330 && CONST_INT_P (XEXP (x
, 2)))
11333 *cost
+= extra_cost
->alu
.bfx
;
11334 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11337 /* Without UBFX/SBFX, need to resort to shift operations. */
11338 *cost
+= COSTS_N_INSNS (1);
11340 *cost
+= 2 * extra_cost
->alu
.shift
;
11341 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11345 if (TARGET_HARD_FLOAT
)
11348 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11350 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11352 /* Pre v8, widening HF->DF is a two-step process, first
11353 widening to SFmode. */
11354 *cost
+= COSTS_N_INSNS (1);
11356 *cost
+= extra_cost
->fp
[0].widen
;
11358 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11362 *cost
= LIBCALL_COST (1);
11365 case FLOAT_TRUNCATE
:
11366 if (TARGET_HARD_FLOAT
)
11369 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11370 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11372 /* Vector modes? */
11374 *cost
= LIBCALL_COST (1);
11378 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11380 rtx op0
= XEXP (x
, 0);
11381 rtx op1
= XEXP (x
, 1);
11382 rtx op2
= XEXP (x
, 2);
11385 /* vfms or vfnma. */
11386 if (GET_CODE (op0
) == NEG
)
11387 op0
= XEXP (op0
, 0);
11389 /* vfnms or vfnma. */
11390 if (GET_CODE (op2
) == NEG
)
11391 op2
= XEXP (op2
, 0);
11393 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11394 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11395 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11398 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11403 *cost
= LIBCALL_COST (3);
11408 if (TARGET_HARD_FLOAT
)
11410 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11411 a vcvt fixed-point conversion. */
11412 if (code
== FIX
&& mode
== SImode
11413 && GET_CODE (XEXP (x
, 0)) == FIX
11414 && GET_MODE (XEXP (x
, 0)) == SFmode
11415 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11416 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11420 *cost
+= extra_cost
->fp
[0].toint
;
11422 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11427 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11429 mode
= GET_MODE (XEXP (x
, 0));
11431 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11432 /* Strip of the 'cost' of rounding towards zero. */
11433 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11434 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11437 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11438 /* ??? Increase the cost to deal with transferring from
11439 FP -> CORE registers? */
11442 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11446 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11449 /* Vector costs? */
11451 *cost
= LIBCALL_COST (1);
11455 case UNSIGNED_FLOAT
:
11456 if (TARGET_HARD_FLOAT
)
11458 /* ??? Increase the cost to deal with transferring from CORE
11459 -> FP registers? */
11461 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11464 *cost
= LIBCALL_COST (1);
11472 /* Just a guess. Guess number of instructions in the asm
11473 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11474 though (see PR60663). */
11475 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11476 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11478 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11482 if (mode
!= VOIDmode
)
11483 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11485 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11490 #undef HANDLE_NARROW_SHIFT_ARITH
11492 /* RTX costs entry point. */
11495 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11496 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11499 int code
= GET_CODE (x
);
11500 gcc_assert (current_tune
->insn_extra_cost
);
11502 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
11503 (enum rtx_code
) outer_code
,
11504 current_tune
->insn_extra_cost
,
11507 if (dump_file
&& arm_verbose_cost
)
11509 print_rtl_single (dump_file
, x
);
11510 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11511 *total
, result
? "final" : "partial");
11517 arm_insn_cost (rtx_insn
*insn
, bool speed
)
11521 /* Don't cost a simple reg-reg move at a full insn cost: such moves
11522 will likely disappear during register allocation. */
11523 if (!reload_completed
11524 && GET_CODE (PATTERN (insn
)) == SET
11525 && REG_P (SET_DEST (PATTERN (insn
)))
11526 && REG_P (SET_SRC (PATTERN (insn
))))
11528 cost
= pattern_cost (PATTERN (insn
), speed
);
11529 /* If the cost is zero, then it's likely a complex insn. We don't want the
11530 cost of these to be less than something we know about. */
11531 return cost
? cost
: COSTS_N_INSNS (2);
11534 /* All address computations that can be done are free, but rtx cost returns
11535 the same for practically all of them. So we weight the different types
11536 of address here in the order (most pref first):
11537 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11539 arm_arm_address_cost (rtx x
)
11541 enum rtx_code c
= GET_CODE (x
);
11543 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11545 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11550 if (CONST_INT_P (XEXP (x
, 1)))
11553 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11563 arm_thumb_address_cost (rtx x
)
11565 enum rtx_code c
= GET_CODE (x
);
11570 && REG_P (XEXP (x
, 0))
11571 && CONST_INT_P (XEXP (x
, 1)))
11578 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11579 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11581 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11584 /* Adjust cost hook for XScale. */
11586 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11589 /* Some true dependencies can have a higher cost depending
11590 on precisely how certain input operands are used. */
11592 && recog_memoized (insn
) >= 0
11593 && recog_memoized (dep
) >= 0)
11595 int shift_opnum
= get_attr_shift (insn
);
11596 enum attr_type attr_type
= get_attr_type (dep
);
11598 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11599 operand for INSN. If we have a shifted input operand and the
11600 instruction we depend on is another ALU instruction, then we may
11601 have to account for an additional stall. */
11602 if (shift_opnum
!= 0
11603 && (attr_type
== TYPE_ALU_SHIFT_IMM
11604 || attr_type
== TYPE_ALUS_SHIFT_IMM
11605 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11606 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11607 || attr_type
== TYPE_ALU_SHIFT_REG
11608 || attr_type
== TYPE_ALUS_SHIFT_REG
11609 || attr_type
== TYPE_LOGIC_SHIFT_REG
11610 || attr_type
== TYPE_LOGICS_SHIFT_REG
11611 || attr_type
== TYPE_MOV_SHIFT
11612 || attr_type
== TYPE_MVN_SHIFT
11613 || attr_type
== TYPE_MOV_SHIFT_REG
11614 || attr_type
== TYPE_MVN_SHIFT_REG
))
11616 rtx shifted_operand
;
11619 /* Get the shifted operand. */
11620 extract_insn (insn
);
11621 shifted_operand
= recog_data
.operand
[shift_opnum
];
11623 /* Iterate over all the operands in DEP. If we write an operand
11624 that overlaps with SHIFTED_OPERAND, then we have increase the
11625 cost of this dependency. */
11626 extract_insn (dep
);
11627 preprocess_constraints (dep
);
11628 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11630 /* We can ignore strict inputs. */
11631 if (recog_data
.operand_type
[opno
] == OP_IN
)
11634 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11646 /* Adjust cost hook for Cortex A9. */
11648 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11658 case REG_DEP_OUTPUT
:
11659 if (recog_memoized (insn
) >= 0
11660 && recog_memoized (dep
) >= 0)
11662 if (GET_CODE (PATTERN (insn
)) == SET
)
11665 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11667 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11669 enum attr_type attr_type_insn
= get_attr_type (insn
);
11670 enum attr_type attr_type_dep
= get_attr_type (dep
);
11672 /* By default all dependencies of the form
11675 have an extra latency of 1 cycle because
11676 of the input and output dependency in this
11677 case. However this gets modeled as an true
11678 dependency and hence all these checks. */
11679 if (REG_P (SET_DEST (PATTERN (insn
)))
11680 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11682 /* FMACS is a special case where the dependent
11683 instruction can be issued 3 cycles before
11684 the normal latency in case of an output
11686 if ((attr_type_insn
== TYPE_FMACS
11687 || attr_type_insn
== TYPE_FMACD
)
11688 && (attr_type_dep
== TYPE_FMACS
11689 || attr_type_dep
== TYPE_FMACD
))
11691 if (dep_type
== REG_DEP_OUTPUT
)
11692 *cost
= insn_default_latency (dep
) - 3;
11694 *cost
= insn_default_latency (dep
);
11699 if (dep_type
== REG_DEP_OUTPUT
)
11700 *cost
= insn_default_latency (dep
) + 1;
11702 *cost
= insn_default_latency (dep
);
11712 gcc_unreachable ();
11718 /* Adjust cost hook for FA726TE. */
11720 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11723 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11724 have penalty of 3. */
11725 if (dep_type
== REG_DEP_TRUE
11726 && recog_memoized (insn
) >= 0
11727 && recog_memoized (dep
) >= 0
11728 && get_attr_conds (dep
) == CONDS_SET
)
11730 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11731 if (get_attr_conds (insn
) == CONDS_USE
11732 && get_attr_type (insn
) != TYPE_BRANCH
)
11738 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11739 || get_attr_conds (insn
) == CONDS_USE
)
11749 /* Implement TARGET_REGISTER_MOVE_COST.
11751 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11752 it is typically more expensive than a single memory access. We set
11753 the cost to less than two memory accesses so that floating
11754 point to integer conversion does not go through memory. */
11757 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11758 reg_class_t from
, reg_class_t to
)
11762 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11763 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11765 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11766 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11768 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11775 if (from
== HI_REGS
|| to
== HI_REGS
)
11782 /* Implement TARGET_MEMORY_MOVE_COST. */
11785 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11786 bool in ATTRIBUTE_UNUSED
)
11792 if (GET_MODE_SIZE (mode
) < 4)
11795 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11799 /* Vectorizer cost model implementation. */
11801 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11803 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11805 int misalign ATTRIBUTE_UNUSED
)
11809 switch (type_of_cost
)
11812 return current_tune
->vec_costs
->scalar_stmt_cost
;
11815 return current_tune
->vec_costs
->scalar_load_cost
;
11818 return current_tune
->vec_costs
->scalar_store_cost
;
11821 return current_tune
->vec_costs
->vec_stmt_cost
;
11824 return current_tune
->vec_costs
->vec_align_load_cost
;
11827 return current_tune
->vec_costs
->vec_store_cost
;
11829 case vec_to_scalar
:
11830 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11832 case scalar_to_vec
:
11833 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11835 case unaligned_load
:
11836 case vector_gather_load
:
11837 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11839 case unaligned_store
:
11840 case vector_scatter_store
:
11841 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11843 case cond_branch_taken
:
11844 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11846 case cond_branch_not_taken
:
11847 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11850 case vec_promote_demote
:
11851 return current_tune
->vec_costs
->vec_stmt_cost
;
11853 case vec_construct
:
11854 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11855 return elements
/ 2 + 1;
11858 gcc_unreachable ();
11862 /* Implement targetm.vectorize.add_stmt_cost. */
11865 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11866 struct _stmt_vec_info
*stmt_info
, int misalign
,
11867 enum vect_cost_model_location where
)
11869 unsigned *cost
= (unsigned *) data
;
11870 unsigned retval
= 0;
11872 if (flag_vect_cost_model
)
11874 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11875 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11877 /* Statements in an inner loop relative to the loop being
11878 vectorized are weighted more heavily. The value here is
11879 arbitrary and could potentially be improved with analysis. */
11880 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11881 count
*= 50; /* FIXME. */
11883 retval
= (unsigned) (count
* stmt_cost
);
11884 cost
[where
] += retval
;
11890 /* Return true if and only if this insn can dual-issue only as older. */
11892 cortexa7_older_only (rtx_insn
*insn
)
11894 if (recog_memoized (insn
) < 0)
11897 switch (get_attr_type (insn
))
11899 case TYPE_ALU_DSP_REG
:
11900 case TYPE_ALU_SREG
:
11901 case TYPE_ALUS_SREG
:
11902 case TYPE_LOGIC_REG
:
11903 case TYPE_LOGICS_REG
:
11905 case TYPE_ADCS_REG
:
11910 case TYPE_SHIFT_IMM
:
11911 case TYPE_SHIFT_REG
:
11912 case TYPE_LOAD_BYTE
:
11915 case TYPE_FFARITHS
:
11917 case TYPE_FFARITHD
:
11935 case TYPE_F_STORES
:
11942 /* Return true if and only if this insn can dual-issue as younger. */
11944 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11946 if (recog_memoized (insn
) < 0)
11949 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11953 switch (get_attr_type (insn
))
11956 case TYPE_ALUS_IMM
:
11957 case TYPE_LOGIC_IMM
:
11958 case TYPE_LOGICS_IMM
:
11963 case TYPE_MOV_SHIFT
:
11964 case TYPE_MOV_SHIFT_REG
:
11974 /* Look for an instruction that can dual issue only as an older
11975 instruction, and move it in front of any instructions that can
11976 dual-issue as younger, while preserving the relative order of all
11977 other instructions in the ready list. This is a hueuristic to help
11978 dual-issue in later cycles, by postponing issue of more flexible
11979 instructions. This heuristic may affect dual issue opportunities
11980 in the current cycle. */
11982 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11983 int *n_readyp
, int clock
)
11986 int first_older_only
= -1, first_younger
= -1;
11990 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11994 /* Traverse the ready list from the head (the instruction to issue
11995 first), and looking for the first instruction that can issue as
11996 younger and the first instruction that can dual-issue only as
11998 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12000 rtx_insn
*insn
= ready
[i
];
12001 if (cortexa7_older_only (insn
))
12003 first_older_only
= i
;
12005 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12008 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12012 /* Nothing to reorder because either no younger insn found or insn
12013 that can dual-issue only as older appears before any insn that
12014 can dual-issue as younger. */
12015 if (first_younger
== -1)
12018 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12022 /* Nothing to reorder because no older-only insn in the ready list. */
12023 if (first_older_only
== -1)
12026 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12030 /* Move first_older_only insn before first_younger. */
12032 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12033 INSN_UID(ready
[first_older_only
]),
12034 INSN_UID(ready
[first_younger
]));
12035 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12036 for (i
= first_older_only
; i
< first_younger
; i
++)
12038 ready
[i
] = ready
[i
+1];
12041 ready
[i
] = first_older_only_insn
;
12045 /* Implement TARGET_SCHED_REORDER. */
12047 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12052 case TARGET_CPU_cortexa7
:
12053 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12056 /* Do nothing for other cores. */
12060 return arm_issue_rate ();
12063 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12064 It corrects the value of COST based on the relationship between
12065 INSN and DEP through the dependence LINK. It returns the new
12066 value. There is a per-core adjust_cost hook to adjust scheduler costs
12067 and the per-core hook can choose to completely override the generic
12068 adjust_cost function. Only put bits of code into arm_adjust_cost that
12069 are common across all cores. */
12071 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
12076 /* When generating Thumb-1 code, we want to place flag-setting operations
12077 close to a conditional branch which depends on them, so that we can
12078 omit the comparison. */
12081 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12082 && recog_memoized (dep
) >= 0
12083 && get_attr_conds (dep
) == CONDS_SET
)
12086 if (current_tune
->sched_adjust_cost
!= NULL
)
12088 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12092 /* XXX Is this strictly true? */
12093 if (dep_type
== REG_DEP_ANTI
12094 || dep_type
== REG_DEP_OUTPUT
)
12097 /* Call insns don't incur a stall, even if they follow a load. */
12102 if ((i_pat
= single_set (insn
)) != NULL
12103 && MEM_P (SET_SRC (i_pat
))
12104 && (d_pat
= single_set (dep
)) != NULL
12105 && MEM_P (SET_DEST (d_pat
)))
12107 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12108 /* This is a load after a store, there is no conflict if the load reads
12109 from a cached area. Assume that loads from the stack, and from the
12110 constant pool are cached, and that others will miss. This is a
12113 if ((GET_CODE (src_mem
) == SYMBOL_REF
12114 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12115 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12116 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12117 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12125 arm_max_conditional_execute (void)
12127 return max_insns_skipped
;
12131 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12134 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12136 return (optimize
> 0) ? 2 : 0;
12140 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12142 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12145 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12146 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12147 sequences of non-executed instructions in IT blocks probably take the same
12148 amount of time as executed instructions (and the IT instruction itself takes
12149 space in icache). This function was experimentally determined to give good
12150 results on a popular embedded benchmark. */
12153 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12155 return (TARGET_32BIT
&& speed_p
) ? 1
12156 : arm_default_branch_cost (speed_p
, predictable_p
);
12160 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12162 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12165 static bool fp_consts_inited
= false;
12167 static REAL_VALUE_TYPE value_fp0
;
12170 init_fp_table (void)
12174 r
= REAL_VALUE_ATOF ("0", DFmode
);
12176 fp_consts_inited
= true;
12179 /* Return TRUE if rtx X is a valid immediate FP constant. */
12181 arm_const_double_rtx (rtx x
)
12183 const REAL_VALUE_TYPE
*r
;
12185 if (!fp_consts_inited
)
12188 r
= CONST_DOUBLE_REAL_VALUE (x
);
12189 if (REAL_VALUE_MINUS_ZERO (*r
))
12192 if (real_equal (r
, &value_fp0
))
12198 /* VFPv3 has a fairly wide range of representable immediates, formed from
12199 "quarter-precision" floating-point values. These can be evaluated using this
12200 formula (with ^ for exponentiation):
12204 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12205 16 <= n <= 31 and 0 <= r <= 7.
12207 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12209 - A (most-significant) is the sign bit.
12210 - BCD are the exponent (encoded as r XOR 3).
12211 - EFGH are the mantissa (encoded as n - 16).
12214 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12215 fconst[sd] instruction, or -1 if X isn't suitable. */
12217 vfp3_const_double_index (rtx x
)
12219 REAL_VALUE_TYPE r
, m
;
12220 int sign
, exponent
;
12221 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12222 unsigned HOST_WIDE_INT mask
;
12223 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12226 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12229 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12231 /* We can't represent these things, so detect them first. */
12232 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12235 /* Extract sign, exponent and mantissa. */
12236 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12237 r
= real_value_abs (&r
);
12238 exponent
= REAL_EXP (&r
);
12239 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12240 highest (sign) bit, with a fixed binary point at bit point_pos.
12241 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12242 bits for the mantissa, this may fail (low bits would be lost). */
12243 real_ldexp (&m
, &r
, point_pos
- exponent
);
12244 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12245 mantissa
= w
.elt (0);
12246 mant_hi
= w
.elt (1);
12248 /* If there are bits set in the low part of the mantissa, we can't
12249 represent this value. */
12253 /* Now make it so that mantissa contains the most-significant bits, and move
12254 the point_pos to indicate that the least-significant bits have been
12256 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12257 mantissa
= mant_hi
;
12259 /* We can permit four significant bits of mantissa only, plus a high bit
12260 which is always 1. */
12261 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12262 if ((mantissa
& mask
) != 0)
12265 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12266 mantissa
>>= point_pos
- 5;
12268 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12269 floating-point immediate zero with Neon using an integer-zero load, but
12270 that case is handled elsewhere.) */
12274 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12276 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12277 normalized significands are in the range [1, 2). (Our mantissa is shifted
12278 left 4 places at this point relative to normalized IEEE754 values). GCC
12279 internally uses [0.5, 1) (see real.c), so the exponent returned from
12280 REAL_EXP must be altered. */
12281 exponent
= 5 - exponent
;
12283 if (exponent
< 0 || exponent
> 7)
12286 /* Sign, mantissa and exponent are now in the correct form to plug into the
12287 formula described in the comment above. */
12288 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12291 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12293 vfp3_const_double_rtx (rtx x
)
12298 return vfp3_const_double_index (x
) != -1;
12301 /* Recognize immediates which can be used in various Neon instructions. Legal
12302 immediates are described by the following table (for VMVN variants, the
12303 bitwise inverse of the constant shown is recognized. In either case, VMOV
12304 is output and the correct instruction to use for a given constant is chosen
12305 by the assembler). The constant shown is replicated across all elements of
12306 the destination vector.
12308 insn elems variant constant (binary)
12309 ---- ----- ------- -----------------
12310 vmov i32 0 00000000 00000000 00000000 abcdefgh
12311 vmov i32 1 00000000 00000000 abcdefgh 00000000
12312 vmov i32 2 00000000 abcdefgh 00000000 00000000
12313 vmov i32 3 abcdefgh 00000000 00000000 00000000
12314 vmov i16 4 00000000 abcdefgh
12315 vmov i16 5 abcdefgh 00000000
12316 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12317 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12318 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12319 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12320 vmvn i16 10 00000000 abcdefgh
12321 vmvn i16 11 abcdefgh 00000000
12322 vmov i32 12 00000000 00000000 abcdefgh 11111111
12323 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12324 vmov i32 14 00000000 abcdefgh 11111111 11111111
12325 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12326 vmov i8 16 abcdefgh
12327 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12328 eeeeeeee ffffffff gggggggg hhhhhhhh
12329 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12330 vmov f32 19 00000000 00000000 00000000 00000000
12332 For case 18, B = !b. Representable values are exactly those accepted by
12333 vfp3_const_double_index, but are output as floating-point numbers rather
12336 For case 19, we will change it to vmov.i32 when assembling.
12338 Variants 0-5 (inclusive) may also be used as immediates for the second
12339 operand of VORR/VBIC instructions.
12341 The INVERSE argument causes the bitwise inverse of the given operand to be
12342 recognized instead (used for recognizing legal immediates for the VAND/VORN
12343 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12344 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12345 output, rather than the real insns vbic/vorr).
12347 INVERSE makes no difference to the recognition of float vectors.
12349 The return value is the variant of immediate as shown in the above table, or
12350 -1 if the given value doesn't match any of the listed patterns.
12353 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12354 rtx
*modconst
, int *elementwidth
)
12356 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12358 for (i = 0; i < idx; i += (STRIDE)) \
12363 immtype = (CLASS); \
12364 elsize = (ELSIZE); \
12368 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12369 unsigned int innersize
;
12370 unsigned char bytes
[16] = {};
12371 int immtype
= -1, matches
;
12372 unsigned int invmask
= inverse
? 0xff : 0;
12373 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12376 n_elts
= CONST_VECTOR_NUNITS (op
);
12380 gcc_assert (mode
!= VOIDmode
);
12383 innersize
= GET_MODE_UNIT_SIZE (mode
);
12385 /* Vectors of float constants. */
12386 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12388 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12390 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12393 /* FP16 vectors cannot be represented. */
12394 if (GET_MODE_INNER (mode
) == HFmode
)
12397 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12398 are distinct in this context. */
12399 if (!const_vec_duplicate_p (op
))
12403 *modconst
= CONST_VECTOR_ELT (op
, 0);
12408 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12414 /* The tricks done in the code below apply for little-endian vector layout.
12415 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12416 FIXME: Implement logic for big-endian vectors. */
12417 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
12420 /* Splat vector constant out into a byte vector. */
12421 for (i
= 0; i
< n_elts
; i
++)
12423 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12424 unsigned HOST_WIDE_INT elpart
;
12426 gcc_assert (CONST_INT_P (el
));
12427 elpart
= INTVAL (el
);
12429 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
12431 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12432 elpart
>>= BITS_PER_UNIT
;
12436 /* Sanity check. */
12437 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12441 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12442 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12444 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12445 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12447 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12448 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12450 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12451 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12453 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12455 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12457 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12458 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12460 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12461 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12463 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12464 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12466 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12467 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12469 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12471 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12473 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12474 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12476 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12477 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12479 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12480 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12482 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12483 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12485 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12487 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12488 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12496 *elementwidth
= elsize
;
12500 unsigned HOST_WIDE_INT imm
= 0;
12502 /* Un-invert bytes of recognized vector, if necessary. */
12504 for (i
= 0; i
< idx
; i
++)
12505 bytes
[i
] ^= invmask
;
12509 /* FIXME: Broken on 32-bit H_W_I hosts. */
12510 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12512 for (i
= 0; i
< 8; i
++)
12513 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12514 << (i
* BITS_PER_UNIT
);
12516 *modconst
= GEN_INT (imm
);
12520 unsigned HOST_WIDE_INT imm
= 0;
12522 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12523 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12525 *modconst
= GEN_INT (imm
);
12533 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12534 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12535 float elements), and a modified constant (whatever should be output for a
12536 VMOV) in *MODCONST. */
12539 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12540 rtx
*modconst
, int *elementwidth
)
12544 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12550 *modconst
= tmpconst
;
12553 *elementwidth
= tmpwidth
;
12558 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12559 the immediate is valid, write a constant suitable for using as an operand
12560 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12561 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12564 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12565 rtx
*modconst
, int *elementwidth
)
12569 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12571 if (retval
< 0 || retval
> 5)
12575 *modconst
= tmpconst
;
12578 *elementwidth
= tmpwidth
;
12583 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12584 the immediate is valid, write a constant suitable for using as an operand
12585 to VSHR/VSHL to *MODCONST and the corresponding element width to
12586 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12587 because they have different limitations. */
12590 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12591 rtx
*modconst
, int *elementwidth
,
12594 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12595 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12596 unsigned HOST_WIDE_INT last_elt
= 0;
12597 unsigned HOST_WIDE_INT maxshift
;
12599 /* Split vector constant out into a byte vector. */
12600 for (i
= 0; i
< n_elts
; i
++)
12602 rtx el
= CONST_VECTOR_ELT (op
, i
);
12603 unsigned HOST_WIDE_INT elpart
;
12605 if (CONST_INT_P (el
))
12606 elpart
= INTVAL (el
);
12607 else if (CONST_DOUBLE_P (el
))
12610 gcc_unreachable ();
12612 if (i
!= 0 && elpart
!= last_elt
)
12618 /* Shift less than element size. */
12619 maxshift
= innersize
* 8;
12623 /* Left shift immediate value can be from 0 to <size>-1. */
12624 if (last_elt
>= maxshift
)
12629 /* Right shift immediate value can be from 1 to <size>. */
12630 if (last_elt
== 0 || last_elt
> maxshift
)
12635 *elementwidth
= innersize
* 8;
12638 *modconst
= CONST_VECTOR_ELT (op
, 0);
12643 /* Return a string suitable for output of Neon immediate logic operation
12647 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12648 int inverse
, int quad
)
12650 int width
, is_valid
;
12651 static char templ
[40];
12653 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12655 gcc_assert (is_valid
!= 0);
12658 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12660 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12665 /* Return a string suitable for output of Neon immediate shift operation
12666 (VSHR or VSHL) MNEM. */
12669 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12670 machine_mode mode
, int quad
,
12673 int width
, is_valid
;
12674 static char templ
[40];
12676 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12677 gcc_assert (is_valid
!= 0);
12680 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12682 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12687 /* Output a sequence of pairwise operations to implement a reduction.
12688 NOTE: We do "too much work" here, because pairwise operations work on two
12689 registers-worth of operands in one go. Unfortunately we can't exploit those
12690 extra calculations to do the full operation in fewer steps, I don't think.
12691 Although all vector elements of the result but the first are ignored, we
12692 actually calculate the same result in each of the elements. An alternative
12693 such as initially loading a vector with zero to use as each of the second
12694 operands would use up an additional register and take an extra instruction,
12695 for no particular gain. */
12698 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12699 rtx (*reduc
) (rtx
, rtx
, rtx
))
12701 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12704 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12706 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12707 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12712 /* If VALS is a vector constant that can be loaded into a register
12713 using VDUP, generate instructions to do so and return an RTX to
12714 assign to the register. Otherwise return NULL_RTX. */
12717 neon_vdup_constant (rtx vals
)
12719 machine_mode mode
= GET_MODE (vals
);
12720 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12723 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12726 if (!const_vec_duplicate_p (vals
, &x
))
12727 /* The elements are not all the same. We could handle repeating
12728 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12729 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12733 /* We can load this constant by using VDUP and a constant in a
12734 single ARM register. This will be cheaper than a vector
12737 x
= copy_to_mode_reg (inner_mode
, x
);
12738 return gen_vec_duplicate (mode
, x
);
12741 /* Generate code to load VALS, which is a PARALLEL containing only
12742 constants (for vec_init) or CONST_VECTOR, efficiently into a
12743 register. Returns an RTX to copy into the register, or NULL_RTX
12744 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
12747 neon_make_constant (rtx vals
)
12749 machine_mode mode
= GET_MODE (vals
);
12751 rtx const_vec
= NULL_RTX
;
12752 int n_elts
= GET_MODE_NUNITS (mode
);
12756 if (GET_CODE (vals
) == CONST_VECTOR
)
12758 else if (GET_CODE (vals
) == PARALLEL
)
12760 /* A CONST_VECTOR must contain only CONST_INTs and
12761 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12762 Only store valid constants in a CONST_VECTOR. */
12763 for (i
= 0; i
< n_elts
; ++i
)
12765 rtx x
= XVECEXP (vals
, 0, i
);
12766 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12769 if (n_const
== n_elts
)
12770 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12773 gcc_unreachable ();
12775 if (const_vec
!= NULL
12776 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12777 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12779 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12780 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12781 pipeline cycle; creating the constant takes one or two ARM
12782 pipeline cycles. */
12784 else if (const_vec
!= NULL_RTX
)
12785 /* Load from constant pool. On Cortex-A8 this takes two cycles
12786 (for either double or quad vectors). We cannot take advantage
12787 of single-cycle VLD1 because we need a PC-relative addressing
12791 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12792 We cannot construct an initializer. */
12796 /* Initialize vector TARGET to VALS. */
12799 neon_expand_vector_init (rtx target
, rtx vals
)
12801 machine_mode mode
= GET_MODE (target
);
12802 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12803 int n_elts
= GET_MODE_NUNITS (mode
);
12804 int n_var
= 0, one_var
= -1;
12805 bool all_same
= true;
12809 for (i
= 0; i
< n_elts
; ++i
)
12811 x
= XVECEXP (vals
, 0, i
);
12812 if (!CONSTANT_P (x
))
12813 ++n_var
, one_var
= i
;
12815 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12821 rtx constant
= neon_make_constant (vals
);
12822 if (constant
!= NULL_RTX
)
12824 emit_move_insn (target
, constant
);
12829 /* Splat a single non-constant element if we can. */
12830 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12832 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12833 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
12837 /* One field is non-constant. Load constant then overwrite varying
12838 field. This is more efficient than using the stack. */
12841 rtx copy
= copy_rtx (vals
);
12842 rtx merge_mask
= GEN_INT (1 << one_var
);
12844 /* Load constant part of vector, substitute neighboring value for
12845 varying element. */
12846 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12847 neon_expand_vector_init (target
, copy
);
12849 /* Insert variable. */
12850 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12851 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
12855 /* Construct the vector in memory one field at a time
12856 and load the whole vector. */
12857 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12858 for (i
= 0; i
< n_elts
; i
++)
12859 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12860 i
* GET_MODE_SIZE (inner_mode
)),
12861 XVECEXP (vals
, 0, i
));
12862 emit_move_insn (target
, mem
);
12865 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12866 ERR if it doesn't. EXP indicates the source location, which includes the
12867 inlining history for intrinsics. */
12870 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12871 const_tree exp
, const char *desc
)
12873 HOST_WIDE_INT lane
;
12875 gcc_assert (CONST_INT_P (operand
));
12877 lane
= INTVAL (operand
);
12879 if (lane
< low
|| lane
>= high
)
12882 error ("%K%s %wd out of range %wd - %wd",
12883 exp
, desc
, lane
, low
, high
- 1);
12885 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12889 /* Bounds-check lanes. */
12892 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12895 bounds_check (operand
, low
, high
, exp
, "lane");
12898 /* Bounds-check constants. */
12901 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12903 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12907 neon_element_bits (machine_mode mode
)
12909 return GET_MODE_UNIT_BITSIZE (mode
);
12913 /* Predicates for `match_operand' and `match_operator'. */
12915 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12916 WB is true if full writeback address modes are allowed and is false
12917 if limited writeback address modes (POST_INC and PRE_DEC) are
12921 arm_coproc_mem_operand (rtx op
, bool wb
)
12925 /* Reject eliminable registers. */
12926 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12927 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12928 || reg_mentioned_p (arg_pointer_rtx
, op
)
12929 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12930 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12931 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12932 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12935 /* Constants are converted into offsets from labels. */
12939 ind
= XEXP (op
, 0);
12941 if (reload_completed
12942 && (GET_CODE (ind
) == LABEL_REF
12943 || (GET_CODE (ind
) == CONST
12944 && GET_CODE (XEXP (ind
, 0)) == PLUS
12945 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12946 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12949 /* Match: (mem (reg)). */
12951 return arm_address_register_rtx_p (ind
, 0);
12953 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12954 acceptable in any case (subject to verification by
12955 arm_address_register_rtx_p). We need WB to be true to accept
12956 PRE_INC and POST_DEC. */
12957 if (GET_CODE (ind
) == POST_INC
12958 || GET_CODE (ind
) == PRE_DEC
12960 && (GET_CODE (ind
) == PRE_INC
12961 || GET_CODE (ind
) == POST_DEC
)))
12962 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12965 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12966 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12967 && GET_CODE (XEXP (ind
, 1)) == PLUS
12968 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12969 ind
= XEXP (ind
, 1);
12974 if (GET_CODE (ind
) == PLUS
12975 && REG_P (XEXP (ind
, 0))
12976 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12977 && CONST_INT_P (XEXP (ind
, 1))
12978 && INTVAL (XEXP (ind
, 1)) > -1024
12979 && INTVAL (XEXP (ind
, 1)) < 1024
12980 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12986 /* Return TRUE if OP is a memory operand which we can load or store a vector
12987 to/from. TYPE is one of the following values:
12988 0 - Vector load/stor (vldr)
12989 1 - Core registers (ldm)
12990 2 - Element/structure loads (vld1)
12993 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12997 /* Reject eliminable registers. */
12998 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12999 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13000 || reg_mentioned_p (arg_pointer_rtx
, op
)
13001 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13002 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13003 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13004 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13007 /* Constants are converted into offsets from labels. */
13011 ind
= XEXP (op
, 0);
13013 if (reload_completed
13014 && (GET_CODE (ind
) == LABEL_REF
13015 || (GET_CODE (ind
) == CONST
13016 && GET_CODE (XEXP (ind
, 0)) == PLUS
13017 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13018 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13021 /* Match: (mem (reg)). */
13023 return arm_address_register_rtx_p (ind
, 0);
13025 /* Allow post-increment with Neon registers. */
13026 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13027 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13028 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13030 /* Allow post-increment by register for VLDn */
13031 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13032 && GET_CODE (XEXP (ind
, 1)) == PLUS
13033 && REG_P (XEXP (XEXP (ind
, 1), 1)))
13040 && GET_CODE (ind
) == PLUS
13041 && REG_P (XEXP (ind
, 0))
13042 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13043 && CONST_INT_P (XEXP (ind
, 1))
13044 && INTVAL (XEXP (ind
, 1)) > -1024
13045 /* For quad modes, we restrict the constant offset to be slightly less
13046 than what the instruction format permits. We have no such constraint
13047 on double mode offsets. (This must match arm_legitimate_index_p.) */
13048 && (INTVAL (XEXP (ind
, 1))
13049 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13050 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13056 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13059 neon_struct_mem_operand (rtx op
)
13063 /* Reject eliminable registers. */
13064 if (! (reload_in_progress
|| reload_completed
)
13065 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13066 || reg_mentioned_p (arg_pointer_rtx
, op
)
13067 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13068 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13069 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13070 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13073 /* Constants are converted into offsets from labels. */
13077 ind
= XEXP (op
, 0);
13079 if (reload_completed
13080 && (GET_CODE (ind
) == LABEL_REF
13081 || (GET_CODE (ind
) == CONST
13082 && GET_CODE (XEXP (ind
, 0)) == PLUS
13083 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13084 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13087 /* Match: (mem (reg)). */
13089 return arm_address_register_rtx_p (ind
, 0);
13091 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13092 if (GET_CODE (ind
) == POST_INC
13093 || GET_CODE (ind
) == PRE_DEC
)
13094 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13099 /* Prepares the operands for the VCMLA by lane instruction such that the right
13100 register number is selected. This instruction is special in that it always
13101 requires a D register, however there is a choice to be made between Dn[0],
13102 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13104 The VCMLA by lane function always selects two values. For instance given D0
13105 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13106 used by the instruction. However given V4SF then index 0 and 1 are valid as
13107 D0[0] or D1[0] are both valid.
13109 This function centralizes that information based on OPERANDS, OPERANDS[3]
13110 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13111 updated to contain the right index. */
13114 neon_vcmla_lane_prepare_operands (rtx
*operands
)
13116 int lane
= INTVAL (operands
[4]);
13117 machine_mode constmode
= SImode
;
13118 machine_mode mode
= GET_MODE (operands
[3]);
13119 int regno
= REGNO (operands
[3]);
13120 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
13121 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
13123 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
13125 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
13129 operands
[3] = gen_int_mode (regno
, constmode
);
13130 operands
[4] = gen_int_mode (lane
, constmode
);
13136 /* Return true if X is a register that will be eliminated later on. */
13138 arm_eliminable_register (rtx x
)
13140 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13141 || REGNO (x
) == ARG_POINTER_REGNUM
13142 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13143 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13146 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13147 coprocessor registers. Otherwise return NO_REGS. */
13150 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13152 if (mode
== HFmode
)
13154 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13155 return GENERAL_REGS
;
13156 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13158 return GENERAL_REGS
;
13161 /* The neon move patterns handle all legitimate vector and struct
13164 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13165 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13166 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13167 || VALID_NEON_STRUCT_MODE (mode
)))
13170 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13173 return GENERAL_REGS
;
13176 /* Values which must be returned in the most-significant end of the return
13180 arm_return_in_msb (const_tree valtype
)
13182 return (TARGET_AAPCS_BASED
13183 && BYTES_BIG_ENDIAN
13184 && (AGGREGATE_TYPE_P (valtype
)
13185 || TREE_CODE (valtype
) == COMPLEX_TYPE
13186 || FIXED_POINT_TYPE_P (valtype
)));
13189 /* Return TRUE if X references a SYMBOL_REF. */
13191 symbol_mentioned_p (rtx x
)
13196 if (GET_CODE (x
) == SYMBOL_REF
)
13199 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13200 are constant offsets, not symbols. */
13201 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13204 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13206 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13212 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13213 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13216 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13223 /* Return TRUE if X references a LABEL_REF. */
13225 label_mentioned_p (rtx x
)
13230 if (GET_CODE (x
) == LABEL_REF
)
13233 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13234 instruction, but they are constant offsets, not symbols. */
13235 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13238 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13239 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13245 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13246 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13249 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13257 tls_mentioned_p (rtx x
)
13259 switch (GET_CODE (x
))
13262 return tls_mentioned_p (XEXP (x
, 0));
13265 if (XINT (x
, 1) == UNSPEC_TLS
)
13268 /* Fall through. */
13274 /* Must not copy any rtx that uses a pc-relative address.
13275 Also, disallow copying of load-exclusive instructions that
13276 may appear after splitting of compare-and-swap-style operations
13277 so as to prevent those loops from being transformed away from their
13278 canonical forms (see PR 69904). */
13281 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13283 /* The tls call insn cannot be copied, as it is paired with a data
13285 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13288 subrtx_iterator::array_type array
;
13289 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13291 const_rtx x
= *iter
;
13292 if (GET_CODE (x
) == UNSPEC
13293 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13294 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13298 rtx set
= single_set (insn
);
13301 rtx src
= SET_SRC (set
);
13302 if (GET_CODE (src
) == ZERO_EXTEND
)
13303 src
= XEXP (src
, 0);
13305 /* Catch the load-exclusive and load-acquire operations. */
13306 if (GET_CODE (src
) == UNSPEC_VOLATILE
13307 && (XINT (src
, 1) == VUNSPEC_LL
13308 || XINT (src
, 1) == VUNSPEC_LAX
))
13315 minmax_code (rtx x
)
13317 enum rtx_code code
= GET_CODE (x
);
13330 gcc_unreachable ();
13334 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13337 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13338 int *mask
, bool *signed_sat
)
13340 /* The high bound must be a power of two minus one. */
13341 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13345 /* The low bound is either zero (for usat) or one less than the
13346 negation of the high bound (for ssat). */
13347 if (INTVAL (lo_bound
) == 0)
13352 *signed_sat
= false;
13357 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13362 *signed_sat
= true;
13370 /* Return 1 if memory locations are adjacent. */
13372 adjacent_mem_locations (rtx a
, rtx b
)
13374 /* We don't guarantee to preserve the order of these memory refs. */
13375 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13378 if ((REG_P (XEXP (a
, 0))
13379 || (GET_CODE (XEXP (a
, 0)) == PLUS
13380 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13381 && (REG_P (XEXP (b
, 0))
13382 || (GET_CODE (XEXP (b
, 0)) == PLUS
13383 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13385 HOST_WIDE_INT val0
= 0, val1
= 0;
13389 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13391 reg0
= XEXP (XEXP (a
, 0), 0);
13392 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13395 reg0
= XEXP (a
, 0);
13397 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13399 reg1
= XEXP (XEXP (b
, 0), 0);
13400 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13403 reg1
= XEXP (b
, 0);
13405 /* Don't accept any offset that will require multiple
13406 instructions to handle, since this would cause the
13407 arith_adjacentmem pattern to output an overlong sequence. */
13408 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13411 /* Don't allow an eliminable register: register elimination can make
13412 the offset too large. */
13413 if (arm_eliminable_register (reg0
))
13416 val_diff
= val1
- val0
;
13420 /* If the target has load delay slots, then there's no benefit
13421 to using an ldm instruction unless the offset is zero and
13422 we are optimizing for size. */
13423 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13424 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13425 && (val_diff
== 4 || val_diff
== -4));
13428 return ((REGNO (reg0
) == REGNO (reg1
))
13429 && (val_diff
== 4 || val_diff
== -4));
13435 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13436 for load operations, false for store operations. CONSECUTIVE is true
13437 if the register numbers in the operation must be consecutive in the register
13438 bank. RETURN_PC is true if value is to be loaded in PC.
13439 The pattern we are trying to match for load is:
13440 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13441 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13444 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13447 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13448 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13449 3. If consecutive is TRUE, then for kth register being loaded,
13450 REGNO (R_dk) = REGNO (R_d0) + k.
13451 The pattern for store is similar. */
13453 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13454 bool consecutive
, bool return_pc
)
13456 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13457 rtx reg
, mem
, addr
;
13459 unsigned first_regno
;
13460 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13462 bool addr_reg_in_reglist
= false;
13463 bool update
= false;
13468 /* If not in SImode, then registers must be consecutive
13469 (e.g., VLDM instructions for DFmode). */
13470 gcc_assert ((mode
== SImode
) || consecutive
);
13471 /* Setting return_pc for stores is illegal. */
13472 gcc_assert (!return_pc
|| load
);
13474 /* Set up the increments and the regs per val based on the mode. */
13475 reg_increment
= GET_MODE_SIZE (mode
);
13476 regs_per_val
= reg_increment
/ 4;
13477 offset_adj
= return_pc
? 1 : 0;
13480 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13481 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13484 /* Check if this is a write-back. */
13485 elt
= XVECEXP (op
, 0, offset_adj
);
13486 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13492 /* The offset adjustment must be the number of registers being
13493 popped times the size of a single register. */
13494 if (!REG_P (SET_DEST (elt
))
13495 || !REG_P (XEXP (SET_SRC (elt
), 0))
13496 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13497 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13498 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13499 ((count
- 1 - offset_adj
) * reg_increment
))
13503 i
= i
+ offset_adj
;
13504 base
= base
+ offset_adj
;
13505 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13506 success depends on the type: VLDM can do just one reg,
13507 LDM must do at least two. */
13508 if ((count
<= i
) && (mode
== SImode
))
13511 elt
= XVECEXP (op
, 0, i
- 1);
13512 if (GET_CODE (elt
) != SET
)
13517 reg
= SET_DEST (elt
);
13518 mem
= SET_SRC (elt
);
13522 reg
= SET_SRC (elt
);
13523 mem
= SET_DEST (elt
);
13526 if (!REG_P (reg
) || !MEM_P (mem
))
13529 regno
= REGNO (reg
);
13530 first_regno
= regno
;
13531 addr
= XEXP (mem
, 0);
13532 if (GET_CODE (addr
) == PLUS
)
13534 if (!CONST_INT_P (XEXP (addr
, 1)))
13537 offset
= INTVAL (XEXP (addr
, 1));
13538 addr
= XEXP (addr
, 0);
13544 /* Don't allow SP to be loaded unless it is also the base register. It
13545 guarantees that SP is reset correctly when an LDM instruction
13546 is interrupted. Otherwise, we might end up with a corrupt stack. */
13547 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13550 if (regno
== REGNO (addr
))
13551 addr_reg_in_reglist
= true;
13553 for (; i
< count
; i
++)
13555 elt
= XVECEXP (op
, 0, i
);
13556 if (GET_CODE (elt
) != SET
)
13561 reg
= SET_DEST (elt
);
13562 mem
= SET_SRC (elt
);
13566 reg
= SET_SRC (elt
);
13567 mem
= SET_DEST (elt
);
13571 || GET_MODE (reg
) != mode
13572 || REGNO (reg
) <= regno
13575 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13576 /* Don't allow SP to be loaded unless it is also the base register. It
13577 guarantees that SP is reset correctly when an LDM instruction
13578 is interrupted. Otherwise, we might end up with a corrupt stack. */
13579 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13581 || GET_MODE (mem
) != mode
13582 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13583 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13584 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13585 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13586 offset
+ (i
- base
) * reg_increment
))
13587 && (!REG_P (XEXP (mem
, 0))
13588 || offset
+ (i
- base
) * reg_increment
!= 0)))
13591 regno
= REGNO (reg
);
13592 if (regno
== REGNO (addr
))
13593 addr_reg_in_reglist
= true;
13598 if (update
&& addr_reg_in_reglist
)
13601 /* For Thumb-1, address register is always modified - either by write-back
13602 or by explicit load. If the pattern does not describe an update,
13603 then the address register must be in the list of loaded registers. */
13605 return update
|| addr_reg_in_reglist
;
13611 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13612 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13613 instruction. ADD_OFFSET is nonzero if the base address register needs
13614 to be modified with an add instruction before we can use it. */
13617 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13618 int nops
, HOST_WIDE_INT add_offset
)
13620 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13621 if the offset isn't small enough. The reason 2 ldrs are faster
13622 is because these ARMs are able to do more than one cache access
13623 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13624 whilst the ARM8 has a double bandwidth cache. This means that
13625 these cores can do both an instruction fetch and a data fetch in
13626 a single cycle, so the trick of calculating the address into a
13627 scratch register (one of the result regs) and then doing a load
13628 multiple actually becomes slower (and no smaller in code size).
13629 That is the transformation
13631 ldr rd1, [rbase + offset]
13632 ldr rd2, [rbase + offset + 4]
13636 add rd1, rbase, offset
13637 ldmia rd1, {rd1, rd2}
13639 produces worse code -- '3 cycles + any stalls on rd2' instead of
13640 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13641 access per cycle, the first sequence could never complete in less
13642 than 6 cycles, whereas the ldm sequence would only take 5 and
13643 would make better use of sequential accesses if not hitting the
13646 We cheat here and test 'arm_ld_sched' which we currently know to
13647 only be true for the ARM8, ARM9 and StrongARM. If this ever
13648 changes, then the test below needs to be reworked. */
13649 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13652 /* XScale has load-store double instructions, but they have stricter
13653 alignment requirements than load-store multiple, so we cannot
13656 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13657 the pipeline until completion.
13665 An ldr instruction takes 1-3 cycles, but does not block the
13674 Best case ldr will always win. However, the more ldr instructions
13675 we issue, the less likely we are to be able to schedule them well.
13676 Using ldr instructions also increases code size.
13678 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13679 for counts of 3 or 4 regs. */
13680 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13685 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13686 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13687 an array ORDER which describes the sequence to use when accessing the
13688 offsets that produces an ascending order. In this sequence, each
13689 offset must be larger by exactly 4 than the previous one. ORDER[0]
13690 must have been filled in with the lowest offset by the caller.
13691 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13692 we use to verify that ORDER produces an ascending order of registers.
13693 Return true if it was possible to construct such an order, false if
13697 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13698 int *unsorted_regs
)
13701 for (i
= 1; i
< nops
; i
++)
13705 order
[i
] = order
[i
- 1];
13706 for (j
= 0; j
< nops
; j
++)
13707 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13709 /* We must find exactly one offset that is higher than the
13710 previous one by 4. */
13711 if (order
[i
] != order
[i
- 1])
13715 if (order
[i
] == order
[i
- 1])
13717 /* The register numbers must be ascending. */
13718 if (unsorted_regs
!= NULL
13719 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13725 /* Used to determine in a peephole whether a sequence of load
13726 instructions can be changed into a load-multiple instruction.
13727 NOPS is the number of separate load instructions we are examining. The
13728 first NOPS entries in OPERANDS are the destination registers, the
13729 next NOPS entries are memory operands. If this function is
13730 successful, *BASE is set to the common base register of the memory
13731 accesses; *LOAD_OFFSET is set to the first memory location's offset
13732 from that base register.
13733 REGS is an array filled in with the destination register numbers.
13734 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13735 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13736 the sequence of registers in REGS matches the loads from ascending memory
13737 locations, and the function verifies that the register numbers are
13738 themselves ascending. If CHECK_REGS is false, the register numbers
13739 are stored in the order they are found in the operands. */
13741 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13742 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13744 int unsorted_regs
[MAX_LDM_STM_OPS
];
13745 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13746 int order
[MAX_LDM_STM_OPS
];
13750 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13751 easily extended if required. */
13752 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13754 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13756 /* Loop over the operands and check that the memory references are
13757 suitable (i.e. immediate offsets from the same base register). At
13758 the same time, extract the target register, and the memory
13760 for (i
= 0; i
< nops
; i
++)
13765 /* Convert a subreg of a mem into the mem itself. */
13766 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13767 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13769 gcc_assert (MEM_P (operands
[nops
+ i
]));
13771 /* Don't reorder volatile memory references; it doesn't seem worth
13772 looking for the case where the order is ok anyway. */
13773 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13776 offset
= const0_rtx
;
13778 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13779 || (GET_CODE (reg
) == SUBREG
13780 && REG_P (reg
= SUBREG_REG (reg
))))
13781 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13782 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13783 || (GET_CODE (reg
) == SUBREG
13784 && REG_P (reg
= SUBREG_REG (reg
))))
13785 && (CONST_INT_P (offset
13786 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13790 base_reg
= REGNO (reg
);
13791 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13794 else if (base_reg
!= (int) REGNO (reg
))
13795 /* Not addressed from the same base register. */
13798 unsorted_regs
[i
] = (REG_P (operands
[i
])
13799 ? REGNO (operands
[i
])
13800 : REGNO (SUBREG_REG (operands
[i
])));
13802 /* If it isn't an integer register, or if it overwrites the
13803 base register but isn't the last insn in the list, then
13804 we can't do this. */
13805 if (unsorted_regs
[i
] < 0
13806 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13807 || unsorted_regs
[i
] > 14
13808 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13811 /* Don't allow SP to be loaded unless it is also the base
13812 register. It guarantees that SP is reset correctly when
13813 an LDM instruction is interrupted. Otherwise, we might
13814 end up with a corrupt stack. */
13815 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13818 unsorted_offsets
[i
] = INTVAL (offset
);
13819 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13823 /* Not a suitable memory address. */
13827 /* All the useful information has now been extracted from the
13828 operands into unsorted_regs and unsorted_offsets; additionally,
13829 order[0] has been set to the lowest offset in the list. Sort
13830 the offsets into order, verifying that they are adjacent, and
13831 check that the register numbers are ascending. */
13832 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13833 check_regs
? unsorted_regs
: NULL
))
13837 memcpy (saved_order
, order
, sizeof order
);
13843 for (i
= 0; i
< nops
; i
++)
13844 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13846 *load_offset
= unsorted_offsets
[order
[0]];
13849 if (unsorted_offsets
[order
[0]] == 0)
13850 ldm_case
= 1; /* ldmia */
13851 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13852 ldm_case
= 2; /* ldmib */
13853 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13854 ldm_case
= 3; /* ldmda */
13855 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13856 ldm_case
= 4; /* ldmdb */
13857 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13858 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13863 if (!multiple_operation_profitable_p (false, nops
,
13865 ? unsorted_offsets
[order
[0]] : 0))
13871 /* Used to determine in a peephole whether a sequence of store instructions can
13872 be changed into a store-multiple instruction.
13873 NOPS is the number of separate store instructions we are examining.
13874 NOPS_TOTAL is the total number of instructions recognized by the peephole
13876 The first NOPS entries in OPERANDS are the source registers, the next
13877 NOPS entries are memory operands. If this function is successful, *BASE is
13878 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13879 to the first memory location's offset from that base register. REGS is an
13880 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13881 likewise filled with the corresponding rtx's.
13882 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13883 numbers to an ascending order of stores.
13884 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13885 from ascending memory locations, and the function verifies that the register
13886 numbers are themselves ascending. If CHECK_REGS is false, the register
13887 numbers are stored in the order they are found in the operands. */
13889 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13890 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13891 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13893 int unsorted_regs
[MAX_LDM_STM_OPS
];
13894 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13895 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13896 int order
[MAX_LDM_STM_OPS
];
13898 rtx base_reg_rtx
= NULL
;
13901 /* Write back of base register is currently only supported for Thumb 1. */
13902 int base_writeback
= TARGET_THUMB1
;
13904 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13905 easily extended if required. */
13906 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13908 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13910 /* Loop over the operands and check that the memory references are
13911 suitable (i.e. immediate offsets from the same base register). At
13912 the same time, extract the target register, and the memory
13914 for (i
= 0; i
< nops
; i
++)
13919 /* Convert a subreg of a mem into the mem itself. */
13920 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13921 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13923 gcc_assert (MEM_P (operands
[nops
+ i
]));
13925 /* Don't reorder volatile memory references; it doesn't seem worth
13926 looking for the case where the order is ok anyway. */
13927 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13930 offset
= const0_rtx
;
13932 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13933 || (GET_CODE (reg
) == SUBREG
13934 && REG_P (reg
= SUBREG_REG (reg
))))
13935 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13936 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13937 || (GET_CODE (reg
) == SUBREG
13938 && REG_P (reg
= SUBREG_REG (reg
))))
13939 && (CONST_INT_P (offset
13940 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13942 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13943 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13944 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13948 base_reg
= REGNO (reg
);
13949 base_reg_rtx
= reg
;
13950 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13953 else if (base_reg
!= (int) REGNO (reg
))
13954 /* Not addressed from the same base register. */
13957 /* If it isn't an integer register, then we can't do this. */
13958 if (unsorted_regs
[i
] < 0
13959 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13960 /* The effects are unpredictable if the base register is
13961 both updated and stored. */
13962 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13963 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13964 || unsorted_regs
[i
] > 14)
13967 unsorted_offsets
[i
] = INTVAL (offset
);
13968 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13972 /* Not a suitable memory address. */
13976 /* All the useful information has now been extracted from the
13977 operands into unsorted_regs and unsorted_offsets; additionally,
13978 order[0] has been set to the lowest offset in the list. Sort
13979 the offsets into order, verifying that they are adjacent, and
13980 check that the register numbers are ascending. */
13981 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13982 check_regs
? unsorted_regs
: NULL
))
13986 memcpy (saved_order
, order
, sizeof order
);
13992 for (i
= 0; i
< nops
; i
++)
13994 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13996 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13999 *load_offset
= unsorted_offsets
[order
[0]];
14003 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14006 if (unsorted_offsets
[order
[0]] == 0)
14007 stm_case
= 1; /* stmia */
14008 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14009 stm_case
= 2; /* stmib */
14010 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14011 stm_case
= 3; /* stmda */
14012 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14013 stm_case
= 4; /* stmdb */
14017 if (!multiple_operation_profitable_p (false, nops
, 0))
14023 /* Routines for use in generating RTL. */
14025 /* Generate a load-multiple instruction. COUNT is the number of loads in
14026 the instruction; REGS and MEMS are arrays containing the operands.
14027 BASEREG is the base register to be used in addressing the memory operands.
14028 WBACK_OFFSET is nonzero if the instruction should update the base
14032 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14033 HOST_WIDE_INT wback_offset
)
14038 if (!multiple_operation_profitable_p (false, count
, 0))
14044 for (i
= 0; i
< count
; i
++)
14045 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14047 if (wback_offset
!= 0)
14048 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14050 seq
= get_insns ();
14056 result
= gen_rtx_PARALLEL (VOIDmode
,
14057 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14058 if (wback_offset
!= 0)
14060 XVECEXP (result
, 0, 0)
14061 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14066 for (j
= 0; i
< count
; i
++, j
++)
14067 XVECEXP (result
, 0, i
)
14068 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14073 /* Generate a store-multiple instruction. COUNT is the number of stores in
14074 the instruction; REGS and MEMS are arrays containing the operands.
14075 BASEREG is the base register to be used in addressing the memory operands.
14076 WBACK_OFFSET is nonzero if the instruction should update the base
14080 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14081 HOST_WIDE_INT wback_offset
)
14086 if (GET_CODE (basereg
) == PLUS
)
14087 basereg
= XEXP (basereg
, 0);
14089 if (!multiple_operation_profitable_p (false, count
, 0))
14095 for (i
= 0; i
< count
; i
++)
14096 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14098 if (wback_offset
!= 0)
14099 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14101 seq
= get_insns ();
14107 result
= gen_rtx_PARALLEL (VOIDmode
,
14108 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14109 if (wback_offset
!= 0)
14111 XVECEXP (result
, 0, 0)
14112 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14117 for (j
= 0; i
< count
; i
++, j
++)
14118 XVECEXP (result
, 0, i
)
14119 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14124 /* Generate either a load-multiple or a store-multiple instruction. This
14125 function can be used in situations where we can start with a single MEM
14126 rtx and adjust its address upwards.
14127 COUNT is the number of operations in the instruction, not counting a
14128 possible update of the base register. REGS is an array containing the
14130 BASEREG is the base register to be used in addressing the memory operands,
14131 which are constructed from BASEMEM.
14132 WRITE_BACK specifies whether the generated instruction should include an
14133 update of the base register.
14134 OFFSETP is used to pass an offset to and from this function; this offset
14135 is not used when constructing the address (instead BASEMEM should have an
14136 appropriate offset in its address), it is used only for setting
14137 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14140 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14141 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14143 rtx mems
[MAX_LDM_STM_OPS
];
14144 HOST_WIDE_INT offset
= *offsetp
;
14147 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14149 if (GET_CODE (basereg
) == PLUS
)
14150 basereg
= XEXP (basereg
, 0);
14152 for (i
= 0; i
< count
; i
++)
14154 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14155 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14163 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14164 write_back
? 4 * count
: 0);
14166 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14167 write_back
? 4 * count
: 0);
14171 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14172 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14174 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14179 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14180 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14182 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14186 /* Called from a peephole2 expander to turn a sequence of loads into an
14187 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14188 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14189 is true if we can reorder the registers because they are used commutatively
14191 Returns true iff we could generate a new instruction. */
14194 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14196 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14197 rtx mems
[MAX_LDM_STM_OPS
];
14198 int i
, j
, base_reg
;
14200 HOST_WIDE_INT offset
;
14201 int write_back
= FALSE
;
14205 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14206 &base_reg
, &offset
, !sort_regs
);
14212 for (i
= 0; i
< nops
- 1; i
++)
14213 for (j
= i
+ 1; j
< nops
; j
++)
14214 if (regs
[i
] > regs
[j
])
14220 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14224 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14226 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14228 for (i
= 0; i
< nops
; i
++)
14229 if (base_reg
== regs
[i
])
14230 write_back
= false;
14232 /* Ensure the base is dead if it is updated. */
14233 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
14239 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14240 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14242 base_reg_rtx
= newbase
;
14245 for (i
= 0; i
< nops
; i
++)
14247 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14248 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14251 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14252 write_back
? offset
+ i
* 4 : 0));
14256 /* Called from a peephole2 expander to turn a sequence of stores into an
14257 STM instruction. OPERANDS are the operands found by the peephole matcher;
14258 NOPS indicates how many separate stores we are trying to combine.
14259 Returns true iff we could generate a new instruction. */
14262 gen_stm_seq (rtx
*operands
, int nops
)
14265 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14266 rtx mems
[MAX_LDM_STM_OPS
];
14269 HOST_WIDE_INT offset
;
14270 int write_back
= FALSE
;
14273 bool base_reg_dies
;
14275 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14276 mem_order
, &base_reg
, &offset
, true);
14281 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14283 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14286 gcc_assert (base_reg_dies
);
14292 gcc_assert (base_reg_dies
);
14293 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14297 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14299 for (i
= 0; i
< nops
; i
++)
14301 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14302 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14305 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14306 write_back
? offset
+ i
* 4 : 0));
14310 /* Called from a peephole2 expander to turn a sequence of stores that are
14311 preceded by constant loads into an STM instruction. OPERANDS are the
14312 operands found by the peephole matcher; NOPS indicates how many
14313 separate stores we are trying to combine; there are 2 * NOPS
14314 instructions in the peephole.
14315 Returns true iff we could generate a new instruction. */
14318 gen_const_stm_seq (rtx
*operands
, int nops
)
14320 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14321 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14322 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14323 rtx mems
[MAX_LDM_STM_OPS
];
14326 HOST_WIDE_INT offset
;
14327 int write_back
= FALSE
;
14330 bool base_reg_dies
;
14332 HARD_REG_SET allocated
;
14334 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14335 mem_order
, &base_reg
, &offset
, false);
14340 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14342 /* If the same register is used more than once, try to find a free
14344 CLEAR_HARD_REG_SET (allocated
);
14345 for (i
= 0; i
< nops
; i
++)
14347 for (j
= i
+ 1; j
< nops
; j
++)
14348 if (regs
[i
] == regs
[j
])
14350 rtx t
= peep2_find_free_register (0, nops
* 2,
14351 TARGET_THUMB1
? "l" : "r",
14352 SImode
, &allocated
);
14356 regs
[i
] = REGNO (t
);
14360 /* Compute an ordering that maps the register numbers to an ascending
14363 for (i
= 0; i
< nops
; i
++)
14364 if (regs
[i
] < regs
[reg_order
[0]])
14367 for (i
= 1; i
< nops
; i
++)
14369 int this_order
= reg_order
[i
- 1];
14370 for (j
= 0; j
< nops
; j
++)
14371 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14372 && (this_order
== reg_order
[i
- 1]
14373 || regs
[j
] < regs
[this_order
]))
14375 reg_order
[i
] = this_order
;
14378 /* Ensure that registers that must be live after the instruction end
14379 up with the correct value. */
14380 for (i
= 0; i
< nops
; i
++)
14382 int this_order
= reg_order
[i
];
14383 if ((this_order
!= mem_order
[i
]
14384 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14385 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14389 /* Load the constants. */
14390 for (i
= 0; i
< nops
; i
++)
14392 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14393 sorted_regs
[i
] = regs
[reg_order
[i
]];
14394 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14397 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14399 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14402 gcc_assert (base_reg_dies
);
14408 gcc_assert (base_reg_dies
);
14409 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14413 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14415 for (i
= 0; i
< nops
; i
++)
14417 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14418 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14421 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14422 write_back
? offset
+ i
* 4 : 0));
14426 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14427 unaligned copies on processors which support unaligned semantics for those
14428 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14429 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14430 An interleave factor of 1 (the minimum) will perform no interleaving.
14431 Load/store multiple are used for aligned addresses where possible. */
14434 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14435 HOST_WIDE_INT length
,
14436 unsigned int interleave_factor
)
14438 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14439 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14440 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14441 HOST_WIDE_INT i
, j
;
14442 HOST_WIDE_INT remaining
= length
, words
;
14443 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14445 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14446 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14447 HOST_WIDE_INT srcoffset
, dstoffset
;
14448 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14451 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
14453 /* Use hard registers if we have aligned source or destination so we can use
14454 load/store multiple with contiguous registers. */
14455 if (dst_aligned
|| src_aligned
)
14456 for (i
= 0; i
< interleave_factor
; i
++)
14457 regs
[i
] = gen_rtx_REG (SImode
, i
);
14459 for (i
= 0; i
< interleave_factor
; i
++)
14460 regs
[i
] = gen_reg_rtx (SImode
);
14462 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14463 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14465 srcoffset
= dstoffset
= 0;
14467 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14468 For copying the last bytes we want to subtract this offset again. */
14469 src_autoinc
= dst_autoinc
= 0;
14471 for (i
= 0; i
< interleave_factor
; i
++)
14474 /* Copy BLOCK_SIZE_BYTES chunks. */
14476 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14479 if (src_aligned
&& interleave_factor
> 1)
14481 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14482 TRUE
, srcbase
, &srcoffset
));
14483 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14487 for (j
= 0; j
< interleave_factor
; j
++)
14489 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14491 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14492 srcoffset
+ j
* UNITS_PER_WORD
);
14493 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14495 srcoffset
+= block_size_bytes
;
14499 if (dst_aligned
&& interleave_factor
> 1)
14501 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14502 TRUE
, dstbase
, &dstoffset
));
14503 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14507 for (j
= 0; j
< interleave_factor
; j
++)
14509 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14511 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14512 dstoffset
+ j
* UNITS_PER_WORD
);
14513 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14515 dstoffset
+= block_size_bytes
;
14518 remaining
-= block_size_bytes
;
14521 /* Copy any whole words left (note these aren't interleaved with any
14522 subsequent halfword/byte load/stores in the interests of simplicity). */
14524 words
= remaining
/ UNITS_PER_WORD
;
14526 gcc_assert (words
< interleave_factor
);
14528 if (src_aligned
&& words
> 1)
14530 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14532 src_autoinc
+= UNITS_PER_WORD
* words
;
14536 for (j
= 0; j
< words
; j
++)
14538 addr
= plus_constant (Pmode
, src
,
14539 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14540 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14541 srcoffset
+ j
* UNITS_PER_WORD
);
14543 emit_move_insn (regs
[j
], mem
);
14545 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14547 srcoffset
+= words
* UNITS_PER_WORD
;
14550 if (dst_aligned
&& words
> 1)
14552 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14554 dst_autoinc
+= words
* UNITS_PER_WORD
;
14558 for (j
= 0; j
< words
; j
++)
14560 addr
= plus_constant (Pmode
, dst
,
14561 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14562 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14563 dstoffset
+ j
* UNITS_PER_WORD
);
14565 emit_move_insn (mem
, regs
[j
]);
14567 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14569 dstoffset
+= words
* UNITS_PER_WORD
;
14572 remaining
-= words
* UNITS_PER_WORD
;
14574 gcc_assert (remaining
< 4);
14576 /* Copy a halfword if necessary. */
14578 if (remaining
>= 2)
14580 halfword_tmp
= gen_reg_rtx (SImode
);
14582 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14583 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14584 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14586 /* Either write out immediately, or delay until we've loaded the last
14587 byte, depending on interleave factor. */
14588 if (interleave_factor
== 1)
14590 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14591 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14592 emit_insn (gen_unaligned_storehi (mem
,
14593 gen_lowpart (HImode
, halfword_tmp
)));
14594 halfword_tmp
= NULL
;
14602 gcc_assert (remaining
< 2);
14604 /* Copy last byte. */
14606 if ((remaining
& 1) != 0)
14608 byte_tmp
= gen_reg_rtx (SImode
);
14610 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14611 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14612 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14614 if (interleave_factor
== 1)
14616 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14617 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14618 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14627 /* Store last halfword if we haven't done so already. */
14631 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14632 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14633 emit_insn (gen_unaligned_storehi (mem
,
14634 gen_lowpart (HImode
, halfword_tmp
)));
14638 /* Likewise for last byte. */
14642 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14643 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14644 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14648 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14651 /* From mips_adjust_block_mem:
14653 Helper function for doing a loop-based block operation on memory
14654 reference MEM. Each iteration of the loop will operate on LENGTH
14657 Create a new base register for use within the loop and point it to
14658 the start of MEM. Create a new memory reference that uses this
14659 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14662 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14665 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14667 /* Although the new mem does not refer to a known location,
14668 it does keep up to LENGTH bytes of alignment. */
14669 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14670 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14673 /* From mips_block_move_loop:
14675 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14676 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14677 the memory regions do not overlap. */
14680 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14681 unsigned int interleave_factor
,
14682 HOST_WIDE_INT bytes_per_iter
)
14684 rtx src_reg
, dest_reg
, final_src
, test
;
14685 HOST_WIDE_INT leftover
;
14687 leftover
= length
% bytes_per_iter
;
14688 length
-= leftover
;
14690 /* Create registers and memory references for use within the loop. */
14691 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14692 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14694 /* Calculate the value that SRC_REG should have after the last iteration of
14696 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14697 0, 0, OPTAB_WIDEN
);
14699 /* Emit the start of the loop. */
14700 rtx_code_label
*label
= gen_label_rtx ();
14701 emit_label (label
);
14703 /* Emit the loop body. */
14704 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14705 interleave_factor
);
14707 /* Move on to the next block. */
14708 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14709 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14711 /* Emit the loop condition. */
14712 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14713 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14715 /* Mop up any left-over bytes. */
14717 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14720 /* Emit a block move when either the source or destination is unaligned (not
14721 aligned to a four-byte boundary). This may need further tuning depending on
14722 core type, optimize_size setting, etc. */
14725 arm_cpymemqi_unaligned (rtx
*operands
)
14727 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14731 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14732 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14733 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14734 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14735 or dst_aligned though: allow more interleaving in those cases since the
14736 resulting code can be smaller. */
14737 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14738 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14741 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14742 interleave_factor
, bytes_per_iter
);
14744 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14745 interleave_factor
);
14749 /* Note that the loop created by arm_block_move_unaligned_loop may be
14750 subject to loop unrolling, which makes tuning this condition a little
14753 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14755 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14762 arm_gen_cpymemqi (rtx
*operands
)
14764 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14765 HOST_WIDE_INT srcoffset
, dstoffset
;
14766 rtx src
, dst
, srcbase
, dstbase
;
14767 rtx part_bytes_reg
= NULL
;
14770 if (!CONST_INT_P (operands
[2])
14771 || !CONST_INT_P (operands
[3])
14772 || INTVAL (operands
[2]) > 64)
14775 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14776 return arm_cpymemqi_unaligned (operands
);
14778 if (INTVAL (operands
[3]) & 3)
14781 dstbase
= operands
[0];
14782 srcbase
= operands
[1];
14784 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14785 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14787 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14788 out_words_to_go
= INTVAL (operands
[2]) / 4;
14789 last_bytes
= INTVAL (operands
[2]) & 3;
14790 dstoffset
= srcoffset
= 0;
14792 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14793 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14795 while (in_words_to_go
>= 2)
14797 if (in_words_to_go
> 4)
14798 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14799 TRUE
, srcbase
, &srcoffset
));
14801 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14802 src
, FALSE
, srcbase
,
14805 if (out_words_to_go
)
14807 if (out_words_to_go
> 4)
14808 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14809 TRUE
, dstbase
, &dstoffset
));
14810 else if (out_words_to_go
!= 1)
14811 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14812 out_words_to_go
, dst
,
14815 dstbase
, &dstoffset
));
14818 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14819 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14820 if (last_bytes
!= 0)
14822 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14828 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14829 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14832 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14833 if (out_words_to_go
)
14837 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14838 sreg
= copy_to_reg (mem
);
14840 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14841 emit_move_insn (mem
, sreg
);
14844 gcc_assert (!in_words_to_go
); /* Sanity check */
14847 if (in_words_to_go
)
14849 gcc_assert (in_words_to_go
> 0);
14851 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14852 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14855 gcc_assert (!last_bytes
|| part_bytes_reg
);
14857 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14859 rtx tmp
= gen_reg_rtx (SImode
);
14861 /* The bytes we want are in the top end of the word. */
14862 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14863 GEN_INT (8 * (4 - last_bytes
))));
14864 part_bytes_reg
= tmp
;
14868 mem
= adjust_automodify_address (dstbase
, QImode
,
14869 plus_constant (Pmode
, dst
,
14871 dstoffset
+ last_bytes
- 1);
14872 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14876 tmp
= gen_reg_rtx (SImode
);
14877 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14878 part_bytes_reg
= tmp
;
14885 if (last_bytes
> 1)
14887 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14888 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14892 rtx tmp
= gen_reg_rtx (SImode
);
14893 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14894 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14895 part_bytes_reg
= tmp
;
14902 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14903 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14910 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
14913 next_consecutive_mem (rtx mem
)
14915 machine_mode mode
= GET_MODE (mem
);
14916 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14917 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14919 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14922 /* Copy using LDRD/STRD instructions whenever possible.
14923 Returns true upon success. */
14925 gen_cpymem_ldrd_strd (rtx
*operands
)
14927 unsigned HOST_WIDE_INT len
;
14928 HOST_WIDE_INT align
;
14929 rtx src
, dst
, base
;
14931 bool src_aligned
, dst_aligned
;
14932 bool src_volatile
, dst_volatile
;
14934 gcc_assert (CONST_INT_P (operands
[2]));
14935 gcc_assert (CONST_INT_P (operands
[3]));
14937 len
= UINTVAL (operands
[2]);
14941 /* Maximum alignment we can assume for both src and dst buffers. */
14942 align
= INTVAL (operands
[3]);
14944 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14947 /* Place src and dst addresses in registers
14948 and update the corresponding mem rtx. */
14950 dst_volatile
= MEM_VOLATILE_P (dst
);
14951 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14952 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14953 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14956 src_volatile
= MEM_VOLATILE_P (src
);
14957 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14958 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14959 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14961 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14964 if (src_volatile
|| dst_volatile
)
14967 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14968 if (!(dst_aligned
|| src_aligned
))
14969 return arm_gen_cpymemqi (operands
);
14971 /* If the either src or dst is unaligned we'll be accessing it as pairs
14972 of unaligned SImode accesses. Otherwise we can generate DImode
14973 ldrd/strd instructions. */
14974 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14975 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14980 reg0
= gen_reg_rtx (DImode
);
14981 rtx low_reg
= NULL_RTX
;
14982 rtx hi_reg
= NULL_RTX
;
14984 if (!src_aligned
|| !dst_aligned
)
14986 low_reg
= gen_lowpart (SImode
, reg0
);
14987 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14989 if (MEM_ALIGN (src
) >= 2 * BITS_PER_WORD
)
14990 emit_move_insn (reg0
, src
);
14991 else if (src_aligned
)
14992 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14995 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14996 src
= next_consecutive_mem (src
);
14997 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
15000 if (MEM_ALIGN (dst
) >= 2 * BITS_PER_WORD
)
15001 emit_move_insn (dst
, reg0
);
15002 else if (dst_aligned
)
15003 emit_insn (gen_unaligned_storedi (dst
, reg0
));
15006 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
15007 dst
= next_consecutive_mem (dst
);
15008 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
15011 src
= next_consecutive_mem (src
);
15012 dst
= next_consecutive_mem (dst
);
15015 gcc_assert (len
< 8);
15018 /* More than a word but less than a double-word to copy. Copy a word. */
15019 reg0
= gen_reg_rtx (SImode
);
15020 src
= adjust_address (src
, SImode
, 0);
15021 dst
= adjust_address (dst
, SImode
, 0);
15023 emit_move_insn (reg0
, src
);
15025 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15028 emit_move_insn (dst
, reg0
);
15030 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15032 src
= next_consecutive_mem (src
);
15033 dst
= next_consecutive_mem (dst
);
15040 /* Copy the remaining bytes. */
15043 dst
= adjust_address (dst
, HImode
, 0);
15044 src
= adjust_address (src
, HImode
, 0);
15045 reg0
= gen_reg_rtx (SImode
);
15047 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15049 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15052 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15054 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15056 src
= next_consecutive_mem (src
);
15057 dst
= next_consecutive_mem (dst
);
15062 dst
= adjust_address (dst
, QImode
, 0);
15063 src
= adjust_address (src
, QImode
, 0);
15064 reg0
= gen_reg_rtx (QImode
);
15065 emit_move_insn (reg0
, src
);
15066 emit_move_insn (dst
, reg0
);
15070 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15071 into its component 32-bit subregs. OP2 may be an immediate
15072 constant and we want to simplify it in that case. */
15074 arm_decompose_di_binop (rtx op1
, rtx op2
, rtx
*lo_op1
, rtx
*hi_op1
,
15075 rtx
*lo_op2
, rtx
*hi_op2
)
15077 *lo_op1
= gen_lowpart (SImode
, op1
);
15078 *hi_op1
= gen_highpart (SImode
, op1
);
15079 *lo_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15080 subreg_lowpart_offset (SImode
, DImode
));
15081 *hi_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15082 subreg_highpart_offset (SImode
, DImode
));
15085 /* Select a dominance comparison mode if possible for a test of the general
15086 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15087 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15088 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15089 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15090 In all cases OP will be either EQ or NE, but we don't need to know which
15091 here. If we are unable to support a dominance comparison we return
15092 CC mode. This will then fail to match for the RTL expressions that
15093 generate this call. */
15095 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15097 enum rtx_code cond1
, cond2
;
15100 /* Currently we will probably get the wrong result if the individual
15101 comparisons are not simple. This also ensures that it is safe to
15102 reverse a comparison if necessary. */
15103 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
15105 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
15109 /* The if_then_else variant of this tests the second condition if the
15110 first passes, but is true if the first fails. Reverse the first
15111 condition to get a true "inclusive-or" expression. */
15112 if (cond_or
== DOM_CC_NX_OR_Y
)
15113 cond1
= reverse_condition (cond1
);
15115 /* If the comparisons are not equal, and one doesn't dominate the other,
15116 then we can't do this. */
15118 && !comparison_dominates_p (cond1
, cond2
)
15119 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15123 std::swap (cond1
, cond2
);
15128 if (cond_or
== DOM_CC_X_AND_Y
)
15133 case EQ
: return CC_DEQmode
;
15134 case LE
: return CC_DLEmode
;
15135 case LEU
: return CC_DLEUmode
;
15136 case GE
: return CC_DGEmode
;
15137 case GEU
: return CC_DGEUmode
;
15138 default: gcc_unreachable ();
15142 if (cond_or
== DOM_CC_X_AND_Y
)
15154 gcc_unreachable ();
15158 if (cond_or
== DOM_CC_X_AND_Y
)
15170 gcc_unreachable ();
15174 if (cond_or
== DOM_CC_X_AND_Y
)
15175 return CC_DLTUmode
;
15180 return CC_DLTUmode
;
15182 return CC_DLEUmode
;
15186 gcc_unreachable ();
15190 if (cond_or
== DOM_CC_X_AND_Y
)
15191 return CC_DGTUmode
;
15196 return CC_DGTUmode
;
15198 return CC_DGEUmode
;
15202 gcc_unreachable ();
15205 /* The remaining cases only occur when both comparisons are the
15208 gcc_assert (cond1
== cond2
);
15212 gcc_assert (cond1
== cond2
);
15216 gcc_assert (cond1
== cond2
);
15220 gcc_assert (cond1
== cond2
);
15221 return CC_DLEUmode
;
15224 gcc_assert (cond1
== cond2
);
15225 return CC_DGEUmode
;
15228 gcc_unreachable ();
15233 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15235 /* All floating point compares return CCFP if it is an equality
15236 comparison, and CCFPE otherwise. */
15237 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15260 gcc_unreachable ();
15264 /* A compare with a shifted operand. Because of canonicalization, the
15265 comparison will have to be swapped when we emit the assembler. */
15266 if (GET_MODE (y
) == SImode
15267 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15268 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15269 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15270 || GET_CODE (x
) == ROTATERT
))
15273 /* A widened compare of the sum of a value plus a carry against a
15274 constant. This is a representation of RSC. We want to swap the
15275 result of the comparison at output. Not valid if the Z bit is
15277 if (GET_MODE (x
) == DImode
15278 && GET_CODE (x
) == PLUS
15279 && arm_borrow_operation (XEXP (x
, 1), DImode
)
15281 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
15282 && (op
== LE
|| op
== GT
))
15283 || (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
15284 && (op
== LEU
|| op
== GTU
))))
15287 /* If X is a constant we want to use CC_RSBmode. This is
15288 non-canonical, but arm_gen_compare_reg uses this to generate the
15289 correct canonical form. */
15290 if (GET_MODE (y
) == SImode
15291 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
15292 && CONST_INT_P (x
))
15295 /* This operation is performed swapped, but since we only rely on the Z
15296 flag we don't need an additional mode. */
15297 if (GET_MODE (y
) == SImode
15298 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15299 && GET_CODE (x
) == NEG
15300 && (op
== EQ
|| op
== NE
))
15303 /* This is a special case that is used by combine to allow a
15304 comparison of a shifted byte load to be split into a zero-extend
15305 followed by a comparison of the shifted integer (only valid for
15306 equalities and unsigned inequalities). */
15307 if (GET_MODE (x
) == SImode
15308 && GET_CODE (x
) == ASHIFT
15309 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15310 && GET_CODE (XEXP (x
, 0)) == SUBREG
15311 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15312 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15313 && (op
== EQ
|| op
== NE
15314 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15315 && CONST_INT_P (y
))
15318 /* A construct for a conditional compare, if the false arm contains
15319 0, then both conditions must be true, otherwise either condition
15320 must be true. Not all conditions are possible, so CCmode is
15321 returned if it can't be done. */
15322 if (GET_CODE (x
) == IF_THEN_ELSE
15323 && (XEXP (x
, 2) == const0_rtx
15324 || XEXP (x
, 2) == const1_rtx
)
15325 && COMPARISON_P (XEXP (x
, 0))
15326 && COMPARISON_P (XEXP (x
, 1)))
15327 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15328 INTVAL (XEXP (x
, 2)));
15330 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15331 if (GET_CODE (x
) == AND
15332 && (op
== EQ
|| op
== NE
)
15333 && COMPARISON_P (XEXP (x
, 0))
15334 && COMPARISON_P (XEXP (x
, 1)))
15335 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15338 if (GET_CODE (x
) == IOR
15339 && (op
== EQ
|| op
== NE
)
15340 && COMPARISON_P (XEXP (x
, 0))
15341 && COMPARISON_P (XEXP (x
, 1)))
15342 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15345 /* An operation (on Thumb) where we want to test for a single bit.
15346 This is done by shifting that bit up into the top bit of a
15347 scratch register; we can then branch on the sign bit. */
15349 && GET_MODE (x
) == SImode
15350 && (op
== EQ
|| op
== NE
)
15351 && GET_CODE (x
) == ZERO_EXTRACT
15352 && XEXP (x
, 1) == const1_rtx
)
15355 /* An operation that sets the condition codes as a side-effect, the
15356 V flag is not set correctly, so we can only use comparisons where
15357 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15359 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15360 if (GET_MODE (x
) == SImode
15362 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15363 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15364 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15365 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15366 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15367 || GET_CODE (x
) == LSHIFTRT
15368 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15369 || GET_CODE (x
) == ROTATERT
15370 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15371 return CC_NOOVmode
;
15373 /* A comparison of ~reg with a const is really a special
15374 canoncialization of compare (~const, reg), which is a reverse
15375 subtract operation. We may not get here if CONST is 0, but that
15376 doesn't matter because ~0 isn't a valid immediate for RSB. */
15377 if (GET_MODE (x
) == SImode
15378 && GET_CODE (x
) == NOT
15379 && CONST_INT_P (y
))
15382 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15385 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15386 && GET_CODE (x
) == PLUS
15387 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15390 if (GET_MODE (x
) == DImode
15391 && GET_CODE (x
) == PLUS
15392 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
15394 && UINTVAL (y
) == 0x800000000
15395 && (op
== GEU
|| op
== LTU
))
15398 if (GET_MODE (x
) == DImode
15399 && (op
== GE
|| op
== LT
)
15400 && GET_CODE (x
) == SIGN_EXTEND
15401 && ((GET_CODE (y
) == PLUS
15402 && arm_borrow_operation (XEXP (y
, 0), DImode
))
15403 || arm_borrow_operation (y
, DImode
)))
15406 if (GET_MODE (x
) == DImode
15407 && (op
== GEU
|| op
== LTU
)
15408 && GET_CODE (x
) == ZERO_EXTEND
15409 && ((GET_CODE (y
) == PLUS
15410 && arm_borrow_operation (XEXP (y
, 0), DImode
))
15411 || arm_borrow_operation (y
, DImode
)))
15414 if (GET_MODE (x
) == DImode
15415 && (op
== EQ
|| op
== NE
)
15416 && GET_CODE (x
) == PLUS
15417 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
15418 && GET_CODE (y
) == SIGN_EXTEND
15419 && GET_CODE (XEXP (y
, 0)) == PLUS
)
15422 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15423 return GET_MODE (x
);
15428 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
15429 the sequence of instructions needed to generate a suitable condition
15430 code register. Return the CC register result. */
15432 arm_gen_dicompare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15437 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
15438 gcc_assert (TARGET_32BIT
);
15439 gcc_assert (!CONST_INT_P (x
));
15441 rtx x_lo
= simplify_gen_subreg (SImode
, x
, DImode
,
15442 subreg_lowpart_offset (SImode
, DImode
));
15443 rtx x_hi
= simplify_gen_subreg (SImode
, x
, DImode
,
15444 subreg_highpart_offset (SImode
, DImode
));
15445 rtx y_lo
= simplify_gen_subreg (SImode
, y
, DImode
,
15446 subreg_lowpart_offset (SImode
, DImode
));
15447 rtx y_hi
= simplify_gen_subreg (SImode
, y
, DImode
,
15448 subreg_highpart_offset (SImode
, DImode
));
15454 if (y_lo
== const0_rtx
|| y_hi
== const0_rtx
)
15456 if (y_lo
!= const0_rtx
)
15458 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
15460 gcc_assert (y_hi
== const0_rtx
);
15461 y_lo
= gen_int_mode (-INTVAL (y_lo
), SImode
);
15462 if (!arm_add_operand (y_lo
, SImode
))
15463 y_lo
= force_reg (SImode
, y_lo
);
15464 emit_insn (gen_addsi3 (scratch2
, x_lo
, y_lo
));
15467 else if (y_hi
!= const0_rtx
)
15469 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
15471 y_hi
= gen_int_mode (-INTVAL (y_hi
), SImode
);
15472 if (!arm_add_operand (y_hi
, SImode
))
15473 y_hi
= force_reg (SImode
, y_hi
);
15474 emit_insn (gen_addsi3 (scratch2
, x_hi
, y_hi
));
15480 gcc_assert (!reload_completed
);
15481 scratch
= gen_rtx_SCRATCH (SImode
);
15484 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15485 cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
15488 = gen_rtx_SET (cc_reg
,
15489 gen_rtx_COMPARE (CC_NOOVmode
,
15490 gen_rtx_IOR (SImode
, x_lo
, x_hi
),
15492 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
,
15497 if (!arm_add_operand (y_lo
, SImode
))
15498 y_lo
= force_reg (SImode
, y_lo
);
15500 if (!arm_add_operand (y_hi
, SImode
))
15501 y_hi
= force_reg (SImode
, y_hi
);
15503 rtx cmp1
= gen_rtx_NE (SImode
, x_lo
, y_lo
);
15504 rtx cmp2
= gen_rtx_NE (SImode
, x_hi
, y_hi
);
15505 rtx conjunction
= gen_rtx_IOR (SImode
, cmp1
, cmp2
);
15506 mode
= SELECT_CC_MODE (code
, conjunction
, const0_rtx
);
15507 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15509 emit_insn (gen_rtx_SET (cc_reg
,
15510 gen_rtx_COMPARE (VOIDmode
, conjunction
,
15518 if (y_lo
== const0_rtx
)
15520 /* If the low word of y is 0, then this is simply a normal
15521 compare of the upper words. */
15522 if (!arm_add_operand (y_hi
, SImode
))
15523 y_hi
= force_reg (SImode
, y_hi
);
15525 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
15528 if (!arm_add_operand (y_lo
, SImode
))
15529 y_lo
= force_reg (SImode
, y_lo
);
15532 = gen_rtx_LTU (DImode
,
15533 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
15537 scratch
= gen_rtx_SCRATCH (SImode
);
15539 if (!arm_not_operand (y_hi
, SImode
))
15540 y_hi
= force_reg (SImode
, y_hi
);
15543 if (y_hi
== const0_rtx
)
15544 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch
, x_hi
,
15546 else if (CONST_INT_P (y_hi
))
15547 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch
, x_hi
,
15550 insn
= emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch
, x_hi
, y_hi
,
15552 return SET_DEST (single_set (insn
));
15558 /* During expansion, we only expect to get here if y is a
15559 constant that we want to handle, otherwise we should have
15560 swapped the operands already. */
15561 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
15563 if (!const_ok_for_arm (INTVAL (y_lo
)))
15564 y_lo
= force_reg (SImode
, y_lo
);
15566 /* Perform a reverse subtract and compare. */
15568 = gen_rtx_LTU (DImode
,
15569 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
15571 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_NVout_scratch (scratch
, y_hi
,
15573 return SET_DEST (single_set (insn
));
15579 if (y_lo
== const0_rtx
)
15581 /* If the low word of y is 0, then this is simply a normal
15582 compare of the upper words. */
15583 if (!arm_add_operand (y_hi
, SImode
))
15584 y_hi
= force_reg (SImode
, y_hi
);
15586 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
15589 if (!arm_add_operand (y_lo
, SImode
))
15590 y_lo
= force_reg (SImode
, y_lo
);
15593 = gen_rtx_LTU (DImode
,
15594 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
15598 scratch
= gen_rtx_SCRATCH (SImode
);
15599 if (!arm_not_operand (y_hi
, SImode
))
15600 y_hi
= force_reg (SImode
, y_hi
);
15603 if (y_hi
== const0_rtx
)
15604 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch
, x_hi
,
15606 else if (CONST_INT_P (y_hi
))
15608 /* Constant is viewed as unsigned when zero-extended. */
15609 y_hi
= GEN_INT (UINTVAL (y_hi
) & 0xffffffffULL
);
15610 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch
, x_hi
,
15614 insn
= emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch
, x_hi
, y_hi
,
15616 return SET_DEST (single_set (insn
));
15622 /* During expansion, we only expect to get here if y is a
15623 constant that we want to handle, otherwise we should have
15624 swapped the operands already. */
15625 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
15627 if (!const_ok_for_arm (INTVAL (y_lo
)))
15628 y_lo
= force_reg (SImode
, y_lo
);
15630 /* Perform a reverse subtract and compare. */
15632 = gen_rtx_LTU (DImode
,
15633 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
15635 y_hi
= GEN_INT (0xffffffff & UINTVAL (y_hi
));
15636 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_Bout_scratch (scratch
, y_hi
,
15638 return SET_DEST (single_set (insn
));
15642 gcc_unreachable ();
15646 /* X and Y are two things to compare using CODE. Emit the compare insn and
15647 return the rtx for register 0 in the proper mode. */
15649 arm_gen_compare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15651 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15652 return arm_gen_dicompare_reg (code
, x
, y
, scratch
);
15654 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
15655 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15656 if (mode
== CC_RSBmode
)
15659 scratch
= gen_rtx_SCRATCH (SImode
);
15660 emit_insn (gen_rsb_imm_compare_scratch (scratch
,
15661 GEN_INT (~UINTVAL (x
)), y
));
15664 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15669 /* Generate a sequence of insns that will generate the correct return
15670 address mask depending on the physical architecture that the program
15673 arm_gen_return_addr_mask (void)
15675 rtx reg
= gen_reg_rtx (Pmode
);
15677 emit_insn (gen_return_addr_mask (reg
));
15682 arm_reload_in_hi (rtx
*operands
)
15684 rtx ref
= operands
[1];
15686 HOST_WIDE_INT offset
= 0;
15688 if (GET_CODE (ref
) == SUBREG
)
15690 offset
= SUBREG_BYTE (ref
);
15691 ref
= SUBREG_REG (ref
);
15696 /* We have a pseudo which has been spilt onto the stack; there
15697 are two cases here: the first where there is a simple
15698 stack-slot replacement and a second where the stack-slot is
15699 out of range, or is used as a subreg. */
15700 if (reg_equiv_mem (REGNO (ref
)))
15702 ref
= reg_equiv_mem (REGNO (ref
));
15703 base
= find_replacement (&XEXP (ref
, 0));
15706 /* The slot is out of range, or was dressed up in a SUBREG. */
15707 base
= reg_equiv_address (REGNO (ref
));
15709 /* PR 62554: If there is no equivalent memory location then just move
15710 the value as an SImode register move. This happens when the target
15711 architecture variant does not have an HImode register move. */
15714 gcc_assert (REG_P (operands
[0]));
15715 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15716 gen_rtx_SUBREG (SImode
, ref
, 0)));
15721 base
= find_replacement (&XEXP (ref
, 0));
15723 /* Handle the case where the address is too complex to be offset by 1. */
15724 if (GET_CODE (base
) == MINUS
15725 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15727 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15729 emit_set_insn (base_plus
, base
);
15732 else if (GET_CODE (base
) == PLUS
)
15734 /* The addend must be CONST_INT, or we would have dealt with it above. */
15735 HOST_WIDE_INT hi
, lo
;
15737 offset
+= INTVAL (XEXP (base
, 1));
15738 base
= XEXP (base
, 0);
15740 /* Rework the address into a legal sequence of insns. */
15741 /* Valid range for lo is -4095 -> 4095 */
15744 : -((-offset
) & 0xfff));
15746 /* Corner case, if lo is the max offset then we would be out of range
15747 once we have added the additional 1 below, so bump the msb into the
15748 pre-loading insn(s). */
15752 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15753 ^ (HOST_WIDE_INT
) 0x80000000)
15754 - (HOST_WIDE_INT
) 0x80000000);
15756 gcc_assert (hi
+ lo
== offset
);
15760 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15762 /* Get the base address; addsi3 knows how to handle constants
15763 that require more than one insn. */
15764 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15770 /* Operands[2] may overlap operands[0] (though it won't overlap
15771 operands[1]), that's why we asked for a DImode reg -- so we can
15772 use the bit that does not overlap. */
15773 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15774 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15776 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15778 emit_insn (gen_zero_extendqisi2 (scratch
,
15779 gen_rtx_MEM (QImode
,
15780 plus_constant (Pmode
, base
,
15782 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15783 gen_rtx_MEM (QImode
,
15784 plus_constant (Pmode
, base
,
15786 if (!BYTES_BIG_ENDIAN
)
15787 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15788 gen_rtx_IOR (SImode
,
15791 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15795 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15796 gen_rtx_IOR (SImode
,
15797 gen_rtx_ASHIFT (SImode
, scratch
,
15799 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15802 /* Handle storing a half-word to memory during reload by synthesizing as two
15803 byte stores. Take care not to clobber the input values until after we
15804 have moved them somewhere safe. This code assumes that if the DImode
15805 scratch in operands[2] overlaps either the input value or output address
15806 in some way, then that value must die in this insn (we absolutely need
15807 two scratch registers for some corner cases). */
15809 arm_reload_out_hi (rtx
*operands
)
15811 rtx ref
= operands
[0];
15812 rtx outval
= operands
[1];
15814 HOST_WIDE_INT offset
= 0;
15816 if (GET_CODE (ref
) == SUBREG
)
15818 offset
= SUBREG_BYTE (ref
);
15819 ref
= SUBREG_REG (ref
);
15824 /* We have a pseudo which has been spilt onto the stack; there
15825 are two cases here: the first where there is a simple
15826 stack-slot replacement and a second where the stack-slot is
15827 out of range, or is used as a subreg. */
15828 if (reg_equiv_mem (REGNO (ref
)))
15830 ref
= reg_equiv_mem (REGNO (ref
));
15831 base
= find_replacement (&XEXP (ref
, 0));
15834 /* The slot is out of range, or was dressed up in a SUBREG. */
15835 base
= reg_equiv_address (REGNO (ref
));
15837 /* PR 62254: If there is no equivalent memory location then just move
15838 the value as an SImode register move. This happens when the target
15839 architecture variant does not have an HImode register move. */
15842 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15844 if (REG_P (outval
))
15846 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15847 gen_rtx_SUBREG (SImode
, outval
, 0)));
15849 else /* SUBREG_P (outval) */
15851 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15852 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15853 SUBREG_REG (outval
)));
15855 /* FIXME: Handle other cases ? */
15856 gcc_unreachable ();
15862 base
= find_replacement (&XEXP (ref
, 0));
15864 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15866 /* Handle the case where the address is too complex to be offset by 1. */
15867 if (GET_CODE (base
) == MINUS
15868 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15870 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15872 /* Be careful not to destroy OUTVAL. */
15873 if (reg_overlap_mentioned_p (base_plus
, outval
))
15875 /* Updating base_plus might destroy outval, see if we can
15876 swap the scratch and base_plus. */
15877 if (!reg_overlap_mentioned_p (scratch
, outval
))
15878 std::swap (scratch
, base_plus
);
15881 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15883 /* Be conservative and copy OUTVAL into the scratch now,
15884 this should only be necessary if outval is a subreg
15885 of something larger than a word. */
15886 /* XXX Might this clobber base? I can't see how it can,
15887 since scratch is known to overlap with OUTVAL, and
15888 must be wider than a word. */
15889 emit_insn (gen_movhi (scratch_hi
, outval
));
15890 outval
= scratch_hi
;
15894 emit_set_insn (base_plus
, base
);
15897 else if (GET_CODE (base
) == PLUS
)
15899 /* The addend must be CONST_INT, or we would have dealt with it above. */
15900 HOST_WIDE_INT hi
, lo
;
15902 offset
+= INTVAL (XEXP (base
, 1));
15903 base
= XEXP (base
, 0);
15905 /* Rework the address into a legal sequence of insns. */
15906 /* Valid range for lo is -4095 -> 4095 */
15909 : -((-offset
) & 0xfff));
15911 /* Corner case, if lo is the max offset then we would be out of range
15912 once we have added the additional 1 below, so bump the msb into the
15913 pre-loading insn(s). */
15917 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15918 ^ (HOST_WIDE_INT
) 0x80000000)
15919 - (HOST_WIDE_INT
) 0x80000000);
15921 gcc_assert (hi
+ lo
== offset
);
15925 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15927 /* Be careful not to destroy OUTVAL. */
15928 if (reg_overlap_mentioned_p (base_plus
, outval
))
15930 /* Updating base_plus might destroy outval, see if we
15931 can swap the scratch and base_plus. */
15932 if (!reg_overlap_mentioned_p (scratch
, outval
))
15933 std::swap (scratch
, base_plus
);
15936 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15938 /* Be conservative and copy outval into scratch now,
15939 this should only be necessary if outval is a
15940 subreg of something larger than a word. */
15941 /* XXX Might this clobber base? I can't see how it
15942 can, since scratch is known to overlap with
15944 emit_insn (gen_movhi (scratch_hi
, outval
));
15945 outval
= scratch_hi
;
15949 /* Get the base address; addsi3 knows how to handle constants
15950 that require more than one insn. */
15951 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15957 if (BYTES_BIG_ENDIAN
)
15959 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15960 plus_constant (Pmode
, base
,
15962 gen_lowpart (QImode
, outval
)));
15963 emit_insn (gen_lshrsi3 (scratch
,
15964 gen_rtx_SUBREG (SImode
, outval
, 0),
15966 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15968 gen_lowpart (QImode
, scratch
)));
15972 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15974 gen_lowpart (QImode
, outval
)));
15975 emit_insn (gen_lshrsi3 (scratch
,
15976 gen_rtx_SUBREG (SImode
, outval
, 0),
15978 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15979 plus_constant (Pmode
, base
,
15981 gen_lowpart (QImode
, scratch
)));
15985 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15986 (padded to the size of a word) should be passed in a register. */
15989 arm_must_pass_in_stack (const function_arg_info
&arg
)
15991 if (TARGET_AAPCS_BASED
)
15992 return must_pass_in_stack_var_size (arg
);
15994 return must_pass_in_stack_var_size_or_pad (arg
);
15998 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15999 byte of a stack argument has useful data. For legacy APCS ABIs we use
16000 the default. For AAPCS based ABIs small aggregate types are placed
16001 in the lowest memory address. */
16003 static pad_direction
16004 arm_function_arg_padding (machine_mode mode
, const_tree type
)
16006 if (!TARGET_AAPCS_BASED
)
16007 return default_function_arg_padding (mode
, type
);
16009 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
16010 return PAD_DOWNWARD
;
16016 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16017 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16018 register has useful data, and return the opposite if the most
16019 significant byte does. */
16022 arm_pad_reg_upward (machine_mode mode
,
16023 tree type
, int first ATTRIBUTE_UNUSED
)
16025 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
16027 /* For AAPCS, small aggregates, small fixed-point types,
16028 and small complex types are always padded upwards. */
16031 if ((AGGREGATE_TYPE_P (type
)
16032 || TREE_CODE (type
) == COMPLEX_TYPE
16033 || FIXED_POINT_TYPE_P (type
))
16034 && int_size_in_bytes (type
) <= 4)
16039 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
16040 && GET_MODE_SIZE (mode
) <= 4)
16045 /* Otherwise, use default padding. */
16046 return !BYTES_BIG_ENDIAN
;
16049 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16050 assuming that the address in the base register is word aligned. */
16052 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
16054 HOST_WIDE_INT max_offset
;
16056 /* Offset must be a multiple of 4 in Thumb mode. */
16057 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
16062 else if (TARGET_ARM
)
16067 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
16070 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16071 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16072 Assumes that the address in the base register RN is word aligned. Pattern
16073 guarantees that both memory accesses use the same base register,
16074 the offsets are constants within the range, and the gap between the offsets is 4.
16075 If preload complete then check that registers are legal. WBACK indicates whether
16076 address is updated. LOAD indicates whether memory access is load or store. */
16078 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
16079 bool wback
, bool load
)
16081 unsigned int t
, t2
, n
;
16083 if (!reload_completed
)
16086 if (!offset_ok_for_ldrd_strd (offset
))
16093 if ((TARGET_THUMB2
)
16094 && ((wback
&& (n
== t
|| n
== t2
))
16095 || (t
== SP_REGNUM
)
16096 || (t
== PC_REGNUM
)
16097 || (t2
== SP_REGNUM
)
16098 || (t2
== PC_REGNUM
)
16099 || (!load
&& (n
== PC_REGNUM
))
16100 || (load
&& (t
== t2
))
16101 /* Triggers Cortex-M3 LDRD errata. */
16102 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
16106 && ((wback
&& (n
== t
|| n
== t2
))
16107 || (t2
== PC_REGNUM
)
16108 || (t
% 2 != 0) /* First destination register is not even. */
16110 /* PC can be used as base register (for offset addressing only),
16111 but it is depricated. */
16112 || (n
== PC_REGNUM
)))
16118 /* Return true if a 64-bit access with alignment ALIGN and with a
16119 constant offset OFFSET from the base pointer is permitted on this
16122 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
16124 return (unaligned_access
16125 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
16126 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
16129 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16130 operand MEM's address contains an immediate offset from the base
16131 register and has no side effects, in which case it sets BASE,
16132 OFFSET and ALIGN accordingly. */
16134 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
16138 gcc_assert (base
!= NULL
&& offset
!= NULL
);
16140 /* TODO: Handle more general memory operand patterns, such as
16141 PRE_DEC and PRE_INC. */
16143 if (side_effects_p (mem
))
16146 /* Can't deal with subregs. */
16147 if (GET_CODE (mem
) == SUBREG
)
16150 gcc_assert (MEM_P (mem
));
16152 *offset
= const0_rtx
;
16153 *align
= MEM_ALIGN (mem
);
16155 addr
= XEXP (mem
, 0);
16157 /* If addr isn't valid for DImode, then we can't handle it. */
16158 if (!arm_legitimate_address_p (DImode
, addr
,
16159 reload_in_progress
|| reload_completed
))
16167 else if (GET_CODE (addr
) == PLUS
)
16169 *base
= XEXP (addr
, 0);
16170 *offset
= XEXP (addr
, 1);
16171 return (REG_P (*base
) && CONST_INT_P (*offset
));
16177 /* Called from a peephole2 to replace two word-size accesses with a
16178 single LDRD/STRD instruction. Returns true iff we can generate a
16179 new instruction sequence. That is, both accesses use the same base
16180 register and the gap between constant offsets is 4. This function
16181 may reorder its operands to match ldrd/strd RTL templates.
16182 OPERANDS are the operands found by the peephole matcher;
16183 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16184 corresponding memory operands. LOAD indicaates whether the access
16185 is load or store. CONST_STORE indicates a store of constant
16186 integer values held in OPERANDS[4,5] and assumes that the pattern
16187 is of length 4 insn, for the purpose of checking dead registers.
16188 COMMUTE indicates that register operands may be reordered. */
16190 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
16191 bool const_store
, bool commute
)
16194 HOST_WIDE_INT offsets
[2], offset
, align
[2];
16195 rtx base
= NULL_RTX
;
16196 rtx cur_base
, cur_offset
, tmp
;
16198 HARD_REG_SET regset
;
16200 gcc_assert (!const_store
|| !load
);
16201 /* Check that the memory references are immediate offsets from the
16202 same base register. Extract the base register, the destination
16203 registers, and the corresponding memory offsets. */
16204 for (i
= 0; i
< nops
; i
++)
16206 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
16212 else if (REGNO (base
) != REGNO (cur_base
))
16215 offsets
[i
] = INTVAL (cur_offset
);
16216 if (GET_CODE (operands
[i
]) == SUBREG
)
16218 tmp
= SUBREG_REG (operands
[i
]);
16219 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
16224 /* Make sure there is no dependency between the individual loads. */
16225 if (load
&& REGNO (operands
[0]) == REGNO (base
))
16226 return false; /* RAW */
16228 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
16229 return false; /* WAW */
16231 /* If the same input register is used in both stores
16232 when storing different constants, try to find a free register.
16233 For example, the code
16238 can be transformed into
16242 in Thumb mode assuming that r1 is free.
16243 For ARM mode do the same but only if the starting register
16244 can be made to be even. */
16246 && REGNO (operands
[0]) == REGNO (operands
[1])
16247 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
16251 CLEAR_HARD_REG_SET (regset
);
16252 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16253 if (tmp
== NULL_RTX
)
16256 /* Use the new register in the first load to ensure that
16257 if the original input register is not dead after peephole,
16258 then it will have the correct constant value. */
16261 else if (TARGET_ARM
)
16263 int regno
= REGNO (operands
[0]);
16264 if (!peep2_reg_dead_p (4, operands
[0]))
16266 /* When the input register is even and is not dead after the
16267 pattern, it has to hold the second constant but we cannot
16268 form a legal STRD in ARM mode with this register as the second
16270 if (regno
% 2 == 0)
16273 /* Is regno-1 free? */
16274 SET_HARD_REG_SET (regset
);
16275 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
16276 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16277 if (tmp
== NULL_RTX
)
16284 /* Find a DImode register. */
16285 CLEAR_HARD_REG_SET (regset
);
16286 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16287 if (tmp
!= NULL_RTX
)
16289 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16290 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16294 /* Can we use the input register to form a DI register? */
16295 SET_HARD_REG_SET (regset
);
16296 CLEAR_HARD_REG_BIT(regset
,
16297 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
16298 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16299 if (tmp
== NULL_RTX
)
16301 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
16305 gcc_assert (operands
[0] != NULL_RTX
);
16306 gcc_assert (operands
[1] != NULL_RTX
);
16307 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16308 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
16312 /* Make sure the instructions are ordered with lower memory access first. */
16313 if (offsets
[0] > offsets
[1])
16315 gap
= offsets
[0] - offsets
[1];
16316 offset
= offsets
[1];
16318 /* Swap the instructions such that lower memory is accessed first. */
16319 std::swap (operands
[0], operands
[1]);
16320 std::swap (operands
[2], operands
[3]);
16321 std::swap (align
[0], align
[1]);
16323 std::swap (operands
[4], operands
[5]);
16327 gap
= offsets
[1] - offsets
[0];
16328 offset
= offsets
[0];
16331 /* Make sure accesses are to consecutive memory locations. */
16332 if (gap
!= GET_MODE_SIZE (SImode
))
16335 if (!align_ok_ldrd_strd (align
[0], offset
))
16338 /* Make sure we generate legal instructions. */
16339 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16343 /* In Thumb state, where registers are almost unconstrained, there
16344 is little hope to fix it. */
16348 if (load
&& commute
)
16350 /* Try reordering registers. */
16351 std::swap (operands
[0], operands
[1]);
16352 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16359 /* If input registers are dead after this pattern, they can be
16360 reordered or replaced by other registers that are free in the
16361 current pattern. */
16362 if (!peep2_reg_dead_p (4, operands
[0])
16363 || !peep2_reg_dead_p (4, operands
[1]))
16366 /* Try to reorder the input registers. */
16367 /* For example, the code
16372 can be transformed into
16377 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
16380 std::swap (operands
[0], operands
[1]);
16384 /* Try to find a free DI register. */
16385 CLEAR_HARD_REG_SET (regset
);
16386 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
16387 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
16390 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16391 if (tmp
== NULL_RTX
)
16394 /* DREG must be an even-numbered register in DImode.
16395 Split it into SI registers. */
16396 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16397 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16398 gcc_assert (operands
[0] != NULL_RTX
);
16399 gcc_assert (operands
[1] != NULL_RTX
);
16400 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16401 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
16403 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
16413 /* Return true if parallel execution of the two word-size accesses provided
16414 could be satisfied with a single LDRD/STRD instruction. Two word-size
16415 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16416 register operands and OPERANDS[2,3] are the corresponding memory operands.
16419 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
16422 HOST_WIDE_INT offsets
[2], offset
, align
[2];
16423 rtx base
= NULL_RTX
;
16424 rtx cur_base
, cur_offset
;
16427 /* Check that the memory references are immediate offsets from the
16428 same base register. Extract the base register, the destination
16429 registers, and the corresponding memory offsets. */
16430 for (i
= 0; i
< nops
; i
++)
16432 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
16438 else if (REGNO (base
) != REGNO (cur_base
))
16441 offsets
[i
] = INTVAL (cur_offset
);
16442 if (GET_CODE (operands
[i
]) == SUBREG
)
16446 if (offsets
[0] > offsets
[1])
16449 gap
= offsets
[1] - offsets
[0];
16450 offset
= offsets
[0];
16452 /* Make sure accesses are to consecutive memory locations. */
16453 if (gap
!= GET_MODE_SIZE (SImode
))
16456 if (!align_ok_ldrd_strd (align
[0], offset
))
16459 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16464 /* Print a symbolic form of X to the debug file, F. */
16466 arm_print_value (FILE *f
, rtx x
)
16468 switch (GET_CODE (x
))
16471 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16477 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
16478 sizeof (fpstr
), 0, 1);
16488 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16490 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16491 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16499 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16503 fprintf (f
, "`%s'", XSTR (x
, 0));
16507 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16511 arm_print_value (f
, XEXP (x
, 0));
16515 arm_print_value (f
, XEXP (x
, 0));
16517 arm_print_value (f
, XEXP (x
, 1));
16525 fprintf (f
, "????");
16530 /* Routines for manipulation of the constant pool. */
16532 /* Arm instructions cannot load a large constant directly into a
16533 register; they have to come from a pc relative load. The constant
16534 must therefore be placed in the addressable range of the pc
16535 relative load. Depending on the precise pc relative load
16536 instruction the range is somewhere between 256 bytes and 4k. This
16537 means that we often have to dump a constant inside a function, and
16538 generate code to branch around it.
16540 It is important to minimize this, since the branches will slow
16541 things down and make the code larger.
16543 Normally we can hide the table after an existing unconditional
16544 branch so that there is no interruption of the flow, but in the
16545 worst case the code looks like this:
16563 We fix this by performing a scan after scheduling, which notices
16564 which instructions need to have their operands fetched from the
16565 constant table and builds the table.
16567 The algorithm starts by building a table of all the constants that
16568 need fixing up and all the natural barriers in the function (places
16569 where a constant table can be dropped without breaking the flow).
16570 For each fixup we note how far the pc-relative replacement will be
16571 able to reach and the offset of the instruction into the function.
16573 Having built the table we then group the fixes together to form
16574 tables that are as large as possible (subject to addressing
16575 constraints) and emit each table of constants after the last
16576 barrier that is within range of all the instructions in the group.
16577 If a group does not contain a barrier, then we forcibly create one
16578 by inserting a jump instruction into the flow. Once the table has
16579 been inserted, the insns are then modified to reference the
16580 relevant entry in the pool.
16582 Possible enhancements to the algorithm (not implemented) are:
16584 1) For some processors and object formats, there may be benefit in
16585 aligning the pools to the start of cache lines; this alignment
16586 would need to be taken into account when calculating addressability
16589 /* These typedefs are located at the start of this file, so that
16590 they can be used in the prototypes there. This comment is to
16591 remind readers of that fact so that the following structures
16592 can be understood more easily.
16594 typedef struct minipool_node Mnode;
16595 typedef struct minipool_fixup Mfix; */
16597 struct minipool_node
16599 /* Doubly linked chain of entries. */
16602 /* The maximum offset into the code that this entry can be placed. While
16603 pushing fixes for forward references, all entries are sorted in order
16604 of increasing max_address. */
16605 HOST_WIDE_INT max_address
;
16606 /* Similarly for an entry inserted for a backwards ref. */
16607 HOST_WIDE_INT min_address
;
16608 /* The number of fixes referencing this entry. This can become zero
16609 if we "unpush" an entry. In this case we ignore the entry when we
16610 come to emit the code. */
16612 /* The offset from the start of the minipool. */
16613 HOST_WIDE_INT offset
;
16614 /* The value in table. */
16616 /* The mode of value. */
16618 /* The size of the value. With iWMMXt enabled
16619 sizes > 4 also imply an alignment of 8-bytes. */
16623 struct minipool_fixup
16627 HOST_WIDE_INT address
;
16633 HOST_WIDE_INT forwards
;
16634 HOST_WIDE_INT backwards
;
16637 /* Fixes less than a word need padding out to a word boundary. */
16638 #define MINIPOOL_FIX_SIZE(mode) \
16639 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16641 static Mnode
* minipool_vector_head
;
16642 static Mnode
* minipool_vector_tail
;
16643 static rtx_code_label
*minipool_vector_label
;
16644 static int minipool_pad
;
16646 /* The linked list of all minipool fixes required for this function. */
16647 Mfix
* minipool_fix_head
;
16648 Mfix
* minipool_fix_tail
;
16649 /* The fix entry for the current minipool, once it has been placed. */
16650 Mfix
* minipool_barrier
;
16652 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16653 #define JUMP_TABLES_IN_TEXT_SECTION 0
16656 static HOST_WIDE_INT
16657 get_jump_table_size (rtx_jump_table_data
*insn
)
16659 /* ADDR_VECs only take room if read-only data does into the text
16661 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16663 rtx body
= PATTERN (insn
);
16664 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16665 HOST_WIDE_INT size
;
16666 HOST_WIDE_INT modesize
;
16668 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16669 size
= modesize
* XVECLEN (body
, elt
);
16673 /* Round up size of TBB table to a halfword boundary. */
16674 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
16677 /* No padding necessary for TBH. */
16680 /* Add two bytes for alignment on Thumb. */
16685 gcc_unreachable ();
16693 /* Emit insns to load the function address from FUNCDESC (an FDPIC
16694 function descriptor) into a register and the GOT address into the
16695 FDPIC register, returning an rtx for the register holding the
16696 function address. */
16699 arm_load_function_descriptor (rtx funcdesc
)
16701 rtx fnaddr_reg
= gen_reg_rtx (Pmode
);
16702 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
16703 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
16704 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
16706 emit_move_insn (fnaddr_reg
, fnaddr
);
16708 /* The ABI requires the entry point address to be loaded first, but
16709 since we cannot support lazy binding for lack of atomic load of
16710 two 32-bits values, we do not need to bother to prevent the
16711 previous load from being moved after that of the GOT address. */
16712 emit_insn (gen_restore_pic_register_after_call (pic_reg
, gotaddr
));
16717 /* Return the maximum amount of padding that will be inserted before
16719 static HOST_WIDE_INT
16720 get_label_padding (rtx label
)
16722 HOST_WIDE_INT align
, min_insn_size
;
16724 align
= 1 << label_to_alignment (label
).levels
[0].log
;
16725 min_insn_size
= TARGET_THUMB
? 2 : 4;
16726 return align
> min_insn_size
? align
- min_insn_size
: 0;
16729 /* Move a minipool fix MP from its current location to before MAX_MP.
16730 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16731 constraints may need updating. */
16733 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16734 HOST_WIDE_INT max_address
)
16736 /* The code below assumes these are different. */
16737 gcc_assert (mp
!= max_mp
);
16739 if (max_mp
== NULL
)
16741 if (max_address
< mp
->max_address
)
16742 mp
->max_address
= max_address
;
16746 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16747 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16749 mp
->max_address
= max_address
;
16751 /* Unlink MP from its current position. Since max_mp is non-null,
16752 mp->prev must be non-null. */
16753 mp
->prev
->next
= mp
->next
;
16754 if (mp
->next
!= NULL
)
16755 mp
->next
->prev
= mp
->prev
;
16757 minipool_vector_tail
= mp
->prev
;
16759 /* Re-insert it before MAX_MP. */
16761 mp
->prev
= max_mp
->prev
;
16764 if (mp
->prev
!= NULL
)
16765 mp
->prev
->next
= mp
;
16767 minipool_vector_head
= mp
;
16770 /* Save the new entry. */
16773 /* Scan over the preceding entries and adjust their addresses as
16775 while (mp
->prev
!= NULL
16776 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16778 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16785 /* Add a constant to the minipool for a forward reference. Returns the
16786 node added or NULL if the constant will not fit in this pool. */
16788 add_minipool_forward_ref (Mfix
*fix
)
16790 /* If set, max_mp is the first pool_entry that has a lower
16791 constraint than the one we are trying to add. */
16792 Mnode
* max_mp
= NULL
;
16793 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16796 /* If the minipool starts before the end of FIX->INSN then this FIX
16797 cannot be placed into the current pool. Furthermore, adding the
16798 new constant pool entry may cause the pool to start FIX_SIZE bytes
16800 if (minipool_vector_head
&&
16801 (fix
->address
+ get_attr_length (fix
->insn
)
16802 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16805 /* Scan the pool to see if a constant with the same value has
16806 already been added. While we are doing this, also note the
16807 location where we must insert the constant if it doesn't already
16809 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16811 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16812 && fix
->mode
== mp
->mode
16813 && (!LABEL_P (fix
->value
)
16814 || (CODE_LABEL_NUMBER (fix
->value
)
16815 == CODE_LABEL_NUMBER (mp
->value
)))
16816 && rtx_equal_p (fix
->value
, mp
->value
))
16818 /* More than one fix references this entry. */
16820 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16823 /* Note the insertion point if necessary. */
16825 && mp
->max_address
> max_address
)
16828 /* If we are inserting an 8-bytes aligned quantity and
16829 we have not already found an insertion point, then
16830 make sure that all such 8-byte aligned quantities are
16831 placed at the start of the pool. */
16832 if (ARM_DOUBLEWORD_ALIGN
16834 && fix
->fix_size
>= 8
16835 && mp
->fix_size
< 8)
16838 max_address
= mp
->max_address
;
16842 /* The value is not currently in the minipool, so we need to create
16843 a new entry for it. If MAX_MP is NULL, the entry will be put on
16844 the end of the list since the placement is less constrained than
16845 any existing entry. Otherwise, we insert the new fix before
16846 MAX_MP and, if necessary, adjust the constraints on the other
16849 mp
->fix_size
= fix
->fix_size
;
16850 mp
->mode
= fix
->mode
;
16851 mp
->value
= fix
->value
;
16853 /* Not yet required for a backwards ref. */
16854 mp
->min_address
= -65536;
16856 if (max_mp
== NULL
)
16858 mp
->max_address
= max_address
;
16860 mp
->prev
= minipool_vector_tail
;
16862 if (mp
->prev
== NULL
)
16864 minipool_vector_head
= mp
;
16865 minipool_vector_label
= gen_label_rtx ();
16868 mp
->prev
->next
= mp
;
16870 minipool_vector_tail
= mp
;
16874 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16875 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16877 mp
->max_address
= max_address
;
16880 mp
->prev
= max_mp
->prev
;
16882 if (mp
->prev
!= NULL
)
16883 mp
->prev
->next
= mp
;
16885 minipool_vector_head
= mp
;
16888 /* Save the new entry. */
16891 /* Scan over the preceding entries and adjust their addresses as
16893 while (mp
->prev
!= NULL
16894 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16896 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16904 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16905 HOST_WIDE_INT min_address
)
16907 HOST_WIDE_INT offset
;
16909 /* The code below assumes these are different. */
16910 gcc_assert (mp
!= min_mp
);
16912 if (min_mp
== NULL
)
16914 if (min_address
> mp
->min_address
)
16915 mp
->min_address
= min_address
;
16919 /* We will adjust this below if it is too loose. */
16920 mp
->min_address
= min_address
;
16922 /* Unlink MP from its current position. Since min_mp is non-null,
16923 mp->next must be non-null. */
16924 mp
->next
->prev
= mp
->prev
;
16925 if (mp
->prev
!= NULL
)
16926 mp
->prev
->next
= mp
->next
;
16928 minipool_vector_head
= mp
->next
;
16930 /* Reinsert it after MIN_MP. */
16932 mp
->next
= min_mp
->next
;
16934 if (mp
->next
!= NULL
)
16935 mp
->next
->prev
= mp
;
16937 minipool_vector_tail
= mp
;
16943 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16945 mp
->offset
= offset
;
16946 if (mp
->refcount
> 0)
16947 offset
+= mp
->fix_size
;
16949 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16950 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16956 /* Add a constant to the minipool for a backward reference. Returns the
16957 node added or NULL if the constant will not fit in this pool.
16959 Note that the code for insertion for a backwards reference can be
16960 somewhat confusing because the calculated offsets for each fix do
16961 not take into account the size of the pool (which is still under
16964 add_minipool_backward_ref (Mfix
*fix
)
16966 /* If set, min_mp is the last pool_entry that has a lower constraint
16967 than the one we are trying to add. */
16968 Mnode
*min_mp
= NULL
;
16969 /* This can be negative, since it is only a constraint. */
16970 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16973 /* If we can't reach the current pool from this insn, or if we can't
16974 insert this entry at the end of the pool without pushing other
16975 fixes out of range, then we don't try. This ensures that we
16976 can't fail later on. */
16977 if (min_address
>= minipool_barrier
->address
16978 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16979 >= minipool_barrier
->address
))
16982 /* Scan the pool to see if a constant with the same value has
16983 already been added. While we are doing this, also note the
16984 location where we must insert the constant if it doesn't already
16986 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16988 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16989 && fix
->mode
== mp
->mode
16990 && (!LABEL_P (fix
->value
)
16991 || (CODE_LABEL_NUMBER (fix
->value
)
16992 == CODE_LABEL_NUMBER (mp
->value
)))
16993 && rtx_equal_p (fix
->value
, mp
->value
)
16994 /* Check that there is enough slack to move this entry to the
16995 end of the table (this is conservative). */
16996 && (mp
->max_address
16997 > (minipool_barrier
->address
16998 + minipool_vector_tail
->offset
16999 + minipool_vector_tail
->fix_size
)))
17002 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
17005 if (min_mp
!= NULL
)
17006 mp
->min_address
+= fix
->fix_size
;
17009 /* Note the insertion point if necessary. */
17010 if (mp
->min_address
< min_address
)
17012 /* For now, we do not allow the insertion of 8-byte alignment
17013 requiring nodes anywhere but at the start of the pool. */
17014 if (ARM_DOUBLEWORD_ALIGN
17015 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17020 else if (mp
->max_address
17021 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
17023 /* Inserting before this entry would push the fix beyond
17024 its maximum address (which can happen if we have
17025 re-located a forwards fix); force the new fix to come
17027 if (ARM_DOUBLEWORD_ALIGN
17028 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17033 min_address
= mp
->min_address
+ fix
->fix_size
;
17036 /* Do not insert a non-8-byte aligned quantity before 8-byte
17037 aligned quantities. */
17038 else if (ARM_DOUBLEWORD_ALIGN
17039 && fix
->fix_size
< 8
17040 && mp
->fix_size
>= 8)
17043 min_address
= mp
->min_address
+ fix
->fix_size
;
17048 /* We need to create a new entry. */
17050 mp
->fix_size
= fix
->fix_size
;
17051 mp
->mode
= fix
->mode
;
17052 mp
->value
= fix
->value
;
17054 mp
->max_address
= minipool_barrier
->address
+ 65536;
17056 mp
->min_address
= min_address
;
17058 if (min_mp
== NULL
)
17061 mp
->next
= minipool_vector_head
;
17063 if (mp
->next
== NULL
)
17065 minipool_vector_tail
= mp
;
17066 minipool_vector_label
= gen_label_rtx ();
17069 mp
->next
->prev
= mp
;
17071 minipool_vector_head
= mp
;
17075 mp
->next
= min_mp
->next
;
17079 if (mp
->next
!= NULL
)
17080 mp
->next
->prev
= mp
;
17082 minipool_vector_tail
= mp
;
17085 /* Save the new entry. */
17093 /* Scan over the following entries and adjust their offsets. */
17094 while (mp
->next
!= NULL
)
17096 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17097 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17100 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
17102 mp
->next
->offset
= mp
->offset
;
17111 assign_minipool_offsets (Mfix
*barrier
)
17113 HOST_WIDE_INT offset
= 0;
17116 minipool_barrier
= barrier
;
17118 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17120 mp
->offset
= offset
;
17122 if (mp
->refcount
> 0)
17123 offset
+= mp
->fix_size
;
17127 /* Output the literal table */
17129 dump_minipool (rtx_insn
*scan
)
17135 if (ARM_DOUBLEWORD_ALIGN
)
17136 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17137 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
17144 fprintf (dump_file
,
17145 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17146 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
17148 scan
= emit_label_after (gen_label_rtx (), scan
);
17149 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
17150 scan
= emit_label_after (minipool_vector_label
, scan
);
17152 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
17154 if (mp
->refcount
> 0)
17158 fprintf (dump_file
,
17159 ";; Offset %u, min %ld, max %ld ",
17160 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
17161 (unsigned long) mp
->max_address
);
17162 arm_print_value (dump_file
, mp
->value
);
17163 fputc ('\n', dump_file
);
17166 rtx val
= copy_rtx (mp
->value
);
17168 switch (GET_MODE_SIZE (mp
->mode
))
17170 #ifdef HAVE_consttable_1
17172 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
17176 #ifdef HAVE_consttable_2
17178 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
17182 #ifdef HAVE_consttable_4
17184 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
17188 #ifdef HAVE_consttable_8
17190 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
17194 #ifdef HAVE_consttable_16
17196 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
17201 gcc_unreachable ();
17209 minipool_vector_head
= minipool_vector_tail
= NULL
;
17210 scan
= emit_insn_after (gen_consttable_end (), scan
);
17211 scan
= emit_barrier_after (scan
);
17214 /* Return the cost of forcibly inserting a barrier after INSN. */
17216 arm_barrier_cost (rtx_insn
*insn
)
17218 /* Basing the location of the pool on the loop depth is preferable,
17219 but at the moment, the basic block information seems to be
17220 corrupt by this stage of the compilation. */
17221 int base_cost
= 50;
17222 rtx_insn
*next
= next_nonnote_insn (insn
);
17224 if (next
!= NULL
&& LABEL_P (next
))
17227 switch (GET_CODE (insn
))
17230 /* It will always be better to place the table before the label, rather
17239 return base_cost
- 10;
17242 return base_cost
+ 10;
17246 /* Find the best place in the insn stream in the range
17247 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17248 Create the barrier by inserting a jump and add a new fix entry for
17251 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
17253 HOST_WIDE_INT count
= 0;
17254 rtx_barrier
*barrier
;
17255 rtx_insn
*from
= fix
->insn
;
17256 /* The instruction after which we will insert the jump. */
17257 rtx_insn
*selected
= NULL
;
17259 /* The address at which the jump instruction will be placed. */
17260 HOST_WIDE_INT selected_address
;
17262 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
17263 rtx_code_label
*label
= gen_label_rtx ();
17265 selected_cost
= arm_barrier_cost (from
);
17266 selected_address
= fix
->address
;
17268 while (from
&& count
< max_count
)
17270 rtx_jump_table_data
*tmp
;
17273 /* This code shouldn't have been called if there was a natural barrier
17275 gcc_assert (!BARRIER_P (from
));
17277 /* Count the length of this insn. This must stay in sync with the
17278 code that pushes minipool fixes. */
17279 if (LABEL_P (from
))
17280 count
+= get_label_padding (from
);
17282 count
+= get_attr_length (from
);
17284 /* If there is a jump table, add its length. */
17285 if (tablejump_p (from
, NULL
, &tmp
))
17287 count
+= get_jump_table_size (tmp
);
17289 /* Jump tables aren't in a basic block, so base the cost on
17290 the dispatch insn. If we select this location, we will
17291 still put the pool after the table. */
17292 new_cost
= arm_barrier_cost (from
);
17294 if (count
< max_count
17295 && (!selected
|| new_cost
<= selected_cost
))
17298 selected_cost
= new_cost
;
17299 selected_address
= fix
->address
+ count
;
17302 /* Continue after the dispatch table. */
17303 from
= NEXT_INSN (tmp
);
17307 new_cost
= arm_barrier_cost (from
);
17309 if (count
< max_count
17310 && (!selected
|| new_cost
<= selected_cost
))
17313 selected_cost
= new_cost
;
17314 selected_address
= fix
->address
+ count
;
17317 from
= NEXT_INSN (from
);
17320 /* Make sure that we found a place to insert the jump. */
17321 gcc_assert (selected
);
17323 /* Create a new JUMP_INSN that branches around a barrier. */
17324 from
= emit_jump_insn_after (gen_jump (label
), selected
);
17325 JUMP_LABEL (from
) = label
;
17326 barrier
= emit_barrier_after (from
);
17327 emit_label_after (label
, barrier
);
17329 /* Create a minipool barrier entry for the new barrier. */
17330 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
17331 new_fix
->insn
= barrier
;
17332 new_fix
->address
= selected_address
;
17333 new_fix
->next
= fix
->next
;
17334 fix
->next
= new_fix
;
17339 /* Record that there is a natural barrier in the insn stream at
17342 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
17344 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17347 fix
->address
= address
;
17350 if (minipool_fix_head
!= NULL
)
17351 minipool_fix_tail
->next
= fix
;
17353 minipool_fix_head
= fix
;
17355 minipool_fix_tail
= fix
;
17358 /* Record INSN, which will need fixing up to load a value from the
17359 minipool. ADDRESS is the offset of the insn since the start of the
17360 function; LOC is a pointer to the part of the insn which requires
17361 fixing; VALUE is the constant that must be loaded, which is of type
17364 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
17365 machine_mode mode
, rtx value
)
17367 gcc_assert (!arm_disable_literal_pool
);
17368 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17371 fix
->address
= address
;
17374 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
17375 fix
->value
= value
;
17376 fix
->forwards
= get_attr_pool_range (insn
);
17377 fix
->backwards
= get_attr_neg_pool_range (insn
);
17378 fix
->minipool
= NULL
;
17380 /* If an insn doesn't have a range defined for it, then it isn't
17381 expecting to be reworked by this code. Better to stop now than
17382 to generate duff assembly code. */
17383 gcc_assert (fix
->forwards
|| fix
->backwards
);
17385 /* If an entry requires 8-byte alignment then assume all constant pools
17386 require 4 bytes of padding. Trying to do this later on a per-pool
17387 basis is awkward because existing pool entries have to be modified. */
17388 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
17393 fprintf (dump_file
,
17394 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17395 GET_MODE_NAME (mode
),
17396 INSN_UID (insn
), (unsigned long) address
,
17397 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
17398 arm_print_value (dump_file
, fix
->value
);
17399 fprintf (dump_file
, "\n");
17402 /* Add it to the chain of fixes. */
17405 if (minipool_fix_head
!= NULL
)
17406 minipool_fix_tail
->next
= fix
;
17408 minipool_fix_head
= fix
;
17410 minipool_fix_tail
= fix
;
17413 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17414 Returns the number of insns needed, or 99 if we always want to synthesize
17417 arm_max_const_double_inline_cost ()
17419 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
17422 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17423 Returns the number of insns needed, or 99 if we don't know how to
17426 arm_const_double_inline_cost (rtx val
)
17428 rtx lowpart
, highpart
;
17431 mode
= GET_MODE (val
);
17433 if (mode
== VOIDmode
)
17436 gcc_assert (GET_MODE_SIZE (mode
) == 8);
17438 lowpart
= gen_lowpart (SImode
, val
);
17439 highpart
= gen_highpart_mode (SImode
, mode
, val
);
17441 gcc_assert (CONST_INT_P (lowpart
));
17442 gcc_assert (CONST_INT_P (highpart
));
17444 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
17445 NULL_RTX
, NULL_RTX
, 0, 0)
17446 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
17447 NULL_RTX
, NULL_RTX
, 0, 0));
17450 /* Cost of loading a SImode constant. */
17452 arm_const_inline_cost (enum rtx_code code
, rtx val
)
17454 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
17455 NULL_RTX
, NULL_RTX
, 1, 0);
17458 /* Return true if it is worthwhile to split a 64-bit constant into two
17459 32-bit operations. This is the case if optimizing for size, or
17460 if we have load delay slots, or if one 32-bit part can be done with
17461 a single data operation. */
17463 arm_const_double_by_parts (rtx val
)
17465 machine_mode mode
= GET_MODE (val
);
17468 if (optimize_size
|| arm_ld_sched
)
17471 if (mode
== VOIDmode
)
17474 part
= gen_highpart_mode (SImode
, mode
, val
);
17476 gcc_assert (CONST_INT_P (part
));
17478 if (const_ok_for_arm (INTVAL (part
))
17479 || const_ok_for_arm (~INTVAL (part
)))
17482 part
= gen_lowpart (SImode
, val
);
17484 gcc_assert (CONST_INT_P (part
));
17486 if (const_ok_for_arm (INTVAL (part
))
17487 || const_ok_for_arm (~INTVAL (part
)))
17493 /* Return true if it is possible to inline both the high and low parts
17494 of a 64-bit constant into 32-bit data processing instructions. */
17496 arm_const_double_by_immediates (rtx val
)
17498 machine_mode mode
= GET_MODE (val
);
17501 if (mode
== VOIDmode
)
17504 part
= gen_highpart_mode (SImode
, mode
, val
);
17506 gcc_assert (CONST_INT_P (part
));
17508 if (!const_ok_for_arm (INTVAL (part
)))
17511 part
= gen_lowpart (SImode
, val
);
17513 gcc_assert (CONST_INT_P (part
));
17515 if (!const_ok_for_arm (INTVAL (part
)))
17521 /* Scan INSN and note any of its operands that need fixing.
17522 If DO_PUSHES is false we do not actually push any of the fixups
17525 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17529 extract_constrain_insn (insn
);
17531 if (recog_data
.n_alternatives
== 0)
17534 /* Fill in recog_op_alt with information about the constraints of
17536 preprocess_constraints (insn
);
17538 const operand_alternative
*op_alt
= which_op_alt ();
17539 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17541 /* Things we need to fix can only occur in inputs. */
17542 if (recog_data
.operand_type
[opno
] != OP_IN
)
17545 /* If this alternative is a memory reference, then any mention
17546 of constants in this alternative is really to fool reload
17547 into allowing us to accept one there. We need to fix them up
17548 now so that we output the right code. */
17549 if (op_alt
[opno
].memory_ok
)
17551 rtx op
= recog_data
.operand
[opno
];
17553 if (CONSTANT_P (op
))
17556 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17557 recog_data
.operand_mode
[opno
], op
);
17559 else if (MEM_P (op
)
17560 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17561 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17565 rtx cop
= avoid_constant_pool_reference (op
);
17567 /* Casting the address of something to a mode narrower
17568 than a word can cause avoid_constant_pool_reference()
17569 to return the pool reference itself. That's no good to
17570 us here. Lets just hope that we can use the
17571 constant pool value directly. */
17573 cop
= get_pool_constant (XEXP (op
, 0));
17575 push_minipool_fix (insn
, address
,
17576 recog_data
.operand_loc
[opno
],
17577 recog_data
.operand_mode
[opno
], cop
);
17587 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
17588 and unions in the context of ARMv8-M Security Extensions. It is used as a
17589 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
17590 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
17591 or four masks, depending on whether it is being computed for a
17592 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
17593 respectively. The tree for the type of the argument or a field within an
17594 argument is passed in ARG_TYPE, the current register this argument or field
17595 starts in is kept in the pointer REGNO and updated accordingly, the bit this
17596 argument or field starts at is passed in STARTING_BIT and the last used bit
17597 is kept in LAST_USED_BIT which is also updated accordingly. */
17599 static unsigned HOST_WIDE_INT
17600 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
17601 uint32_t * padding_bits_to_clear
,
17602 unsigned starting_bit
, int * last_used_bit
)
17605 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
17607 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
17609 unsigned current_bit
= starting_bit
;
17611 long int offset
, size
;
17614 field
= TYPE_FIELDS (arg_type
);
17617 /* The offset within a structure is always an offset from
17618 the start of that structure. Make sure we take that into the
17619 calculation of the register based offset that we use here. */
17620 offset
= starting_bit
;
17621 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
17624 /* This is the actual size of the field, for bitfields this is the
17625 bitfield width and not the container size. */
17626 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
17628 if (*last_used_bit
!= offset
)
17630 if (offset
< *last_used_bit
)
17632 /* This field's offset is before the 'last_used_bit', that
17633 means this field goes on the next register. So we need to
17634 pad the rest of the current register and increase the
17635 register number. */
17637 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
17640 padding_bits_to_clear
[*regno
] |= mask
;
17641 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
17646 /* Otherwise we pad the bits between the last field's end and
17647 the start of the new field. */
17650 mask
= ((uint32_t)-1) >> (32 - offset
);
17651 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
17652 padding_bits_to_clear
[*regno
] |= mask
;
17654 current_bit
= offset
;
17657 /* Calculate further padding bits for inner structs/unions too. */
17658 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
17660 *last_used_bit
= current_bit
;
17661 not_to_clear_reg_mask
17662 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
17663 padding_bits_to_clear
, offset
,
17668 /* Update 'current_bit' with this field's size. If the
17669 'current_bit' lies in a subsequent register, update 'regno' and
17670 reset 'current_bit' to point to the current bit in that new
17672 current_bit
+= size
;
17673 while (current_bit
>= 32)
17676 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
17679 *last_used_bit
= current_bit
;
17682 field
= TREE_CHAIN (field
);
17684 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
17686 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
17688 tree field
, field_t
;
17689 int i
, regno_t
, field_size
;
17693 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
17694 = {-1, -1, -1, -1};
17696 /* To compute the padding bits in a union we only consider bits as
17697 padding bits if they are always either a padding bit or fall outside a
17698 fields size for all fields in the union. */
17699 field
= TYPE_FIELDS (arg_type
);
17702 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
17703 = {0U, 0U, 0U, 0U};
17704 int last_used_bit_t
= *last_used_bit
;
17706 field_t
= TREE_TYPE (field
);
17708 /* If the field's type is either a record or a union make sure to
17709 compute their padding bits too. */
17710 if (RECORD_OR_UNION_TYPE_P (field_t
))
17711 not_to_clear_reg_mask
17712 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
17713 &padding_bits_to_clear_t
[0],
17714 starting_bit
, &last_used_bit_t
);
17717 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
17718 regno_t
= (field_size
/ 32) + *regno
;
17719 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
17722 for (i
= *regno
; i
< regno_t
; i
++)
17724 /* For all but the last register used by this field only keep the
17725 padding bits that were padding bits in this field. */
17726 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
17729 /* For the last register, keep all padding bits that were padding
17730 bits in this field and any padding bits that are still valid
17731 as padding bits but fall outside of this field's size. */
17732 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
17733 padding_bits_to_clear_res
[regno_t
]
17734 &= padding_bits_to_clear_t
[regno_t
] | mask
;
17736 /* Update the maximum size of the fields in terms of registers used
17737 ('max_reg') and the 'last_used_bit' in said register. */
17738 if (max_reg
< regno_t
)
17741 max_bit
= last_used_bit_t
;
17743 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
17744 max_bit
= last_used_bit_t
;
17746 field
= TREE_CHAIN (field
);
17749 /* Update the current padding_bits_to_clear using the intersection of the
17750 padding bits of all the fields. */
17751 for (i
=*regno
; i
< max_reg
; i
++)
17752 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
17754 /* Do not keep trailing padding bits, we do not know yet whether this
17755 is the end of the argument. */
17756 mask
= ((uint32_t) 1 << max_bit
) - 1;
17757 padding_bits_to_clear
[max_reg
]
17758 |= padding_bits_to_clear_res
[max_reg
] & mask
;
17761 *last_used_bit
= max_bit
;
17764 /* This function should only be used for structs and unions. */
17765 gcc_unreachable ();
17767 return not_to_clear_reg_mask
;
17770 /* In the context of ARMv8-M Security Extensions, this function is used for both
17771 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17772 registers are used when returning or passing arguments, which is then
17773 returned as a mask. It will also compute a mask to indicate padding/unused
17774 bits for each of these registers, and passes this through the
17775 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17776 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17777 the starting register used to pass this argument or return value is passed
17778 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17779 for struct and union types. */
17781 static unsigned HOST_WIDE_INT
17782 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
17783 uint32_t * padding_bits_to_clear
)
17786 int last_used_bit
= 0;
17787 unsigned HOST_WIDE_INT not_to_clear_mask
;
17789 if (RECORD_OR_UNION_TYPE_P (arg_type
))
17792 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
17793 padding_bits_to_clear
, 0,
17797 /* If the 'last_used_bit' is not zero, that means we are still using a
17798 part of the last 'regno'. In such cases we must clear the trailing
17799 bits. Otherwise we are not using regno and we should mark it as to
17801 if (last_used_bit
!= 0)
17802 padding_bits_to_clear
[regno
]
17803 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
17805 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
17809 not_to_clear_mask
= 0;
17810 /* We are not dealing with structs nor unions. So these arguments may be
17811 passed in floating point registers too. In some cases a BLKmode is
17812 used when returning or passing arguments in multiple VFP registers. */
17813 if (GET_MODE (arg_rtx
) == BLKmode
)
17818 /* This should really only occur when dealing with the hard-float
17820 gcc_assert (TARGET_HARD_FLOAT_ABI
);
17822 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
17824 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
17825 gcc_assert (REG_P (reg
));
17827 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
17829 /* If we are dealing with DF mode, make sure we don't
17830 clear either of the registers it addresses. */
17831 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
17834 unsigned HOST_WIDE_INT mask
;
17835 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
17836 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
17837 not_to_clear_mask
|= mask
;
17843 /* Otherwise we can rely on the MODE to determine how many registers
17844 are being used by this argument. */
17845 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
17846 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
17849 unsigned HOST_WIDE_INT
17850 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
17851 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
17852 not_to_clear_mask
|= mask
;
17857 return not_to_clear_mask
;
17860 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17861 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17862 are to be fully cleared, using the value in register CLEARING_REG if more
17863 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17864 the bits that needs to be cleared in caller-saved core registers, with
17865 SCRATCH_REG used as a scratch register for that clearing.
17867 NOTE: one of three following assertions must hold:
17868 - SCRATCH_REG is a low register
17869 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17870 in TO_CLEAR_BITMAP)
17871 - CLEARING_REG is a low register. */
17874 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
17875 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
17877 bool saved_clearing
= false;
17878 rtx saved_clearing_reg
= NULL_RTX
;
17879 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
17881 gcc_assert (arm_arch_cmse
);
17883 if (!bitmap_empty_p (to_clear_bitmap
))
17885 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
17886 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
17888 clearing_regno
= REGNO (clearing_reg
);
17890 /* Clear padding bits. */
17891 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
17892 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
17895 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
17897 if (padding_bits_to_clear
[i
] == 0)
17900 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17901 CLEARING_REG as scratch. */
17903 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
17905 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17906 such that we can use clearing_reg to clear the unused bits in the
17908 if ((clearing_regno
> maxregno
17909 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
17910 && !saved_clearing
)
17912 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
17913 emit_move_insn (scratch_reg
, clearing_reg
);
17914 saved_clearing
= true;
17915 saved_clearing_reg
= scratch_reg
;
17917 scratch_reg
= clearing_reg
;
17920 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17921 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
17922 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
17924 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17925 mask
= (~padding_bits_to_clear
[i
]) >> 16;
17926 rtx16
= gen_int_mode (16, SImode
);
17927 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
17929 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
17931 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
17933 if (saved_clearing
)
17934 emit_move_insn (clearing_reg
, saved_clearing_reg
);
17937 /* Clear full registers. */
17939 /* If not marked for clearing, clearing_reg already does not contain
17941 if (clearing_regno
<= maxregno
17942 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
17944 emit_move_insn (clearing_reg
, const0_rtx
);
17945 emit_use (clearing_reg
);
17946 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
17949 for (regno
= minregno
; regno
<= maxregno
; regno
++)
17951 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
17954 if (IS_VFP_REGNUM (regno
))
17956 /* If regno is an even vfp register and its successor is also to
17957 be cleared, use vmov. */
17958 if (TARGET_VFP_DOUBLE
17959 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17960 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
17962 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
17963 CONST1_RTX (DFmode
));
17964 emit_use (gen_rtx_REG (DFmode
, regno
));
17969 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17970 CONST1_RTX (SFmode
));
17971 emit_use (gen_rtx_REG (SFmode
, regno
));
17976 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
17977 emit_use (gen_rtx_REG (SImode
, regno
));
17982 /* Clears caller saved registers not used to pass arguments before a
17983 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17984 registers is done in __gnu_cmse_nonsecure_call libcall.
17985 See libgcc/config/arm/cmse_nonsecure_call.S. */
17988 cmse_nonsecure_call_clear_caller_saved (void)
17992 FOR_EACH_BB_FN (bb
, cfun
)
17996 FOR_BB_INSNS (bb
, insn
)
17998 unsigned address_regnum
, regno
, maxregno
=
17999 TARGET_HARD_FLOAT_ABI
? D7_VFP_REGNUM
: NUM_ARG_REGS
- 1;
18000 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
18002 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
18004 CUMULATIVE_ARGS args_so_far_v
;
18005 cumulative_args_t args_so_far
;
18006 tree arg_type
, fntype
;
18007 bool first_param
= true;
18008 function_args_iterator args_iter
;
18009 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
18011 if (!NONDEBUG_INSN_P (insn
))
18014 if (!CALL_P (insn
))
18017 pat
= PATTERN (insn
);
18018 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
18019 call
= XVECEXP (pat
, 0, 0);
18021 /* Get the real call RTX if the insn sets a value, ie. returns. */
18022 if (GET_CODE (call
) == SET
)
18023 call
= SET_SRC (call
);
18025 /* Check if it is a cmse_nonsecure_call. */
18026 unspec
= XEXP (call
, 0);
18027 if (GET_CODE (unspec
) != UNSPEC
18028 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
18031 /* Determine the caller-saved registers we need to clear. */
18032 bitmap_clear (to_clear_bitmap
);
18033 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
18035 /* Only look at the caller-saved floating point registers in case of
18036 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18037 lazy store and loads which clear both caller- and callee-saved
18039 if (TARGET_HARD_FLOAT_ABI
)
18041 auto_sbitmap
float_bitmap (maxregno
+ 1);
18043 bitmap_clear (float_bitmap
);
18044 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
18045 D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1);
18046 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
18049 /* Make sure the register used to hold the function address is not
18051 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
18052 gcc_assert (MEM_P (address
));
18053 gcc_assert (REG_P (XEXP (address
, 0)));
18054 address_regnum
= REGNO (XEXP (address
, 0));
18055 if (address_regnum
< R0_REGNUM
+ NUM_ARG_REGS
)
18056 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
18058 /* Set basic block of call insn so that df rescan is performed on
18059 insns inserted here. */
18060 set_block_for_insn (insn
, bb
);
18061 df_set_flags (DF_DEFER_INSN_RESCAN
);
18064 /* Make sure the scheduler doesn't schedule other insns beyond
18066 emit_insn (gen_blockage ());
18068 /* Walk through all arguments and clear registers appropriately.
18070 fntype
= TREE_TYPE (MEM_EXPR (address
));
18071 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
18073 args_so_far
= pack_cumulative_args (&args_so_far_v
);
18074 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
18077 uint64_t to_clear_args_mask
;
18079 if (VOID_TYPE_P (arg_type
))
18082 function_arg_info
arg (arg_type
, /*named=*/true);
18084 /* ??? We should advance after processing the argument and pass
18085 the argument we're advancing past. */
18086 arm_function_arg_advance (args_so_far
, arg
);
18088 arg_rtx
= arm_function_arg (args_so_far
, arg
);
18089 gcc_assert (REG_P (arg_rtx
));
18091 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
18093 &padding_bits_to_clear
[0]);
18094 if (to_clear_args_mask
)
18096 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
18098 if (to_clear_args_mask
& (1ULL << regno
))
18099 bitmap_clear_bit (to_clear_bitmap
, regno
);
18103 first_param
= false;
18106 /* We use right shift and left shift to clear the LSB of the address
18107 we jump to instead of using bic, to avoid having to use an extra
18108 register on Thumb-1. */
18109 clearing_reg
= XEXP (address
, 0);
18110 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
18111 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
18112 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
18113 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
18115 /* Clear caller-saved registers that leak before doing a non-secure
18117 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
18118 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
18119 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
18121 seq
= get_insns ();
18123 emit_insn_before (seq
, insn
);
18128 /* Rewrite move insn into subtract of 0 if the condition codes will
18129 be useful in next conditional jump insn. */
18132 thumb1_reorg (void)
18136 FOR_EACH_BB_FN (bb
, cfun
)
18139 rtx cmp
, op0
, op1
, set
= NULL
;
18140 rtx_insn
*prev
, *insn
= BB_END (bb
);
18141 bool insn_clobbered
= false;
18143 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
18144 insn
= PREV_INSN (insn
);
18146 /* Find the last cbranchsi4_insn in basic block BB. */
18147 if (insn
== BB_HEAD (bb
)
18148 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
18151 /* Get the register with which we are comparing. */
18152 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
18153 op0
= XEXP (cmp
, 0);
18154 op1
= XEXP (cmp
, 1);
18156 /* Check that comparison is against ZERO. */
18157 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
18160 /* Find the first flag setting insn before INSN in basic block BB. */
18161 gcc_assert (insn
!= BB_HEAD (bb
));
18162 for (prev
= PREV_INSN (insn
);
18164 && prev
!= BB_HEAD (bb
)
18166 || DEBUG_INSN_P (prev
)
18167 || ((set
= single_set (prev
)) != NULL
18168 && get_attr_conds (prev
) == CONDS_NOCOND
)));
18169 prev
= PREV_INSN (prev
))
18171 if (reg_set_p (op0
, prev
))
18172 insn_clobbered
= true;
18175 /* Skip if op0 is clobbered by insn other than prev. */
18176 if (insn_clobbered
)
18182 dest
= SET_DEST (set
);
18183 src
= SET_SRC (set
);
18184 if (!low_register_operand (dest
, SImode
)
18185 || !low_register_operand (src
, SImode
))
18188 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
18189 in INSN. Both src and dest of the move insn are checked. */
18190 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
18192 dest
= copy_rtx (dest
);
18193 src
= copy_rtx (src
);
18194 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
18195 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
18196 INSN_CODE (prev
) = -1;
18197 /* Set test register in INSN to dest. */
18198 XEXP (cmp
, 0) = copy_rtx (dest
);
18199 INSN_CODE (insn
) = -1;
18204 /* Convert instructions to their cc-clobbering variant if possible, since
18205 that allows us to use smaller encodings. */
18208 thumb2_reorg (void)
18213 INIT_REG_SET (&live
);
18215 /* We are freeing block_for_insn in the toplev to keep compatibility
18216 with old MDEP_REORGS that are not CFG based. Recompute it now. */
18217 compute_bb_for_insn ();
18220 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
18222 FOR_EACH_BB_FN (bb
, cfun
)
18224 if ((current_tune
->disparage_flag_setting_t16_encodings
18225 == tune_params::DISPARAGE_FLAGS_ALL
)
18226 && optimize_bb_for_speed_p (bb
))
18230 Convert_Action action
= SKIP
;
18231 Convert_Action action_for_partial_flag_setting
18232 = ((current_tune
->disparage_flag_setting_t16_encodings
18233 != tune_params::DISPARAGE_FLAGS_NEITHER
)
18234 && optimize_bb_for_speed_p (bb
))
18237 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
18238 df_simulate_initialize_backwards (bb
, &live
);
18239 FOR_BB_INSNS_REVERSE (bb
, insn
)
18241 if (NONJUMP_INSN_P (insn
)
18242 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
18243 && GET_CODE (PATTERN (insn
)) == SET
)
18246 rtx pat
= PATTERN (insn
);
18247 rtx dst
= XEXP (pat
, 0);
18248 rtx src
= XEXP (pat
, 1);
18249 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
18251 if (UNARY_P (src
) || BINARY_P (src
))
18252 op0
= XEXP (src
, 0);
18254 if (BINARY_P (src
))
18255 op1
= XEXP (src
, 1);
18257 if (low_register_operand (dst
, SImode
))
18259 switch (GET_CODE (src
))
18262 /* Adding two registers and storing the result
18263 in the first source is already a 16-bit
18265 if (rtx_equal_p (dst
, op0
)
18266 && register_operand (op1
, SImode
))
18269 if (low_register_operand (op0
, SImode
))
18271 /* ADDS <Rd>,<Rn>,<Rm> */
18272 if (low_register_operand (op1
, SImode
))
18274 /* ADDS <Rdn>,#<imm8> */
18275 /* SUBS <Rdn>,#<imm8> */
18276 else if (rtx_equal_p (dst
, op0
)
18277 && CONST_INT_P (op1
)
18278 && IN_RANGE (INTVAL (op1
), -255, 255))
18280 /* ADDS <Rd>,<Rn>,#<imm3> */
18281 /* SUBS <Rd>,<Rn>,#<imm3> */
18282 else if (CONST_INT_P (op1
)
18283 && IN_RANGE (INTVAL (op1
), -7, 7))
18286 /* ADCS <Rd>, <Rn> */
18287 else if (GET_CODE (XEXP (src
, 0)) == PLUS
18288 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
18289 && low_register_operand (XEXP (XEXP (src
, 0), 1),
18291 && COMPARISON_P (op1
)
18292 && cc_register (XEXP (op1
, 0), VOIDmode
)
18293 && maybe_get_arm_condition_code (op1
) == ARM_CS
18294 && XEXP (op1
, 1) == const0_rtx
)
18299 /* RSBS <Rd>,<Rn>,#0
18300 Not handled here: see NEG below. */
18301 /* SUBS <Rd>,<Rn>,#<imm3>
18303 Not handled here: see PLUS above. */
18304 /* SUBS <Rd>,<Rn>,<Rm> */
18305 if (low_register_operand (op0
, SImode
)
18306 && low_register_operand (op1
, SImode
))
18311 /* MULS <Rdm>,<Rn>,<Rdm>
18312 As an exception to the rule, this is only used
18313 when optimizing for size since MULS is slow on all
18314 known implementations. We do not even want to use
18315 MULS in cold code, if optimizing for speed, so we
18316 test the global flag here. */
18317 if (!optimize_size
)
18319 /* Fall through. */
18323 /* ANDS <Rdn>,<Rm> */
18324 if (rtx_equal_p (dst
, op0
)
18325 && low_register_operand (op1
, SImode
))
18326 action
= action_for_partial_flag_setting
;
18327 else if (rtx_equal_p (dst
, op1
)
18328 && low_register_operand (op0
, SImode
))
18329 action
= action_for_partial_flag_setting
== SKIP
18330 ? SKIP
: SWAP_CONV
;
18336 /* ASRS <Rdn>,<Rm> */
18337 /* LSRS <Rdn>,<Rm> */
18338 /* LSLS <Rdn>,<Rm> */
18339 if (rtx_equal_p (dst
, op0
)
18340 && low_register_operand (op1
, SImode
))
18341 action
= action_for_partial_flag_setting
;
18342 /* ASRS <Rd>,<Rm>,#<imm5> */
18343 /* LSRS <Rd>,<Rm>,#<imm5> */
18344 /* LSLS <Rd>,<Rm>,#<imm5> */
18345 else if (low_register_operand (op0
, SImode
)
18346 && CONST_INT_P (op1
)
18347 && IN_RANGE (INTVAL (op1
), 0, 31))
18348 action
= action_for_partial_flag_setting
;
18352 /* RORS <Rdn>,<Rm> */
18353 if (rtx_equal_p (dst
, op0
)
18354 && low_register_operand (op1
, SImode
))
18355 action
= action_for_partial_flag_setting
;
18359 /* MVNS <Rd>,<Rm> */
18360 if (low_register_operand (op0
, SImode
))
18361 action
= action_for_partial_flag_setting
;
18365 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
18366 if (low_register_operand (op0
, SImode
))
18371 /* MOVS <Rd>,#<imm8> */
18372 if (CONST_INT_P (src
)
18373 && IN_RANGE (INTVAL (src
), 0, 255))
18374 action
= action_for_partial_flag_setting
;
18378 /* MOVS and MOV<c> with registers have different
18379 encodings, so are not relevant here. */
18387 if (action
!= SKIP
)
18389 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
18390 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
18393 if (action
== SWAP_CONV
)
18395 src
= copy_rtx (src
);
18396 XEXP (src
, 0) = op1
;
18397 XEXP (src
, 1) = op0
;
18398 pat
= gen_rtx_SET (dst
, src
);
18399 vec
= gen_rtvec (2, pat
, clobber
);
18401 else /* action == CONV */
18402 vec
= gen_rtvec (2, pat
, clobber
);
18404 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
18405 INSN_CODE (insn
) = -1;
18409 if (NONDEBUG_INSN_P (insn
))
18410 df_simulate_one_insn_backwards (bb
, insn
, &live
);
18414 CLEAR_REG_SET (&live
);
18417 /* Gcc puts the pool in the wrong place for ARM, since we can only
18418 load addresses a limited distance around the pc. We do some
18419 special munging to move the constant pool values to the correct
18420 point in the code. */
18425 HOST_WIDE_INT address
= 0;
18429 cmse_nonsecure_call_clear_caller_saved ();
18431 /* We cannot run the Thumb passes for thunks because there is no CFG. */
18432 if (cfun
->is_thunk
)
18434 else if (TARGET_THUMB1
)
18436 else if (TARGET_THUMB2
)
18439 /* Ensure all insns that must be split have been split at this point.
18440 Otherwise, the pool placement code below may compute incorrect
18441 insn lengths. Note that when optimizing, all insns have already
18442 been split at this point. */
18444 split_all_insns_noflow ();
18446 /* Make sure we do not attempt to create a literal pool even though it should
18447 no longer be necessary to create any. */
18448 if (arm_disable_literal_pool
)
18451 minipool_fix_head
= minipool_fix_tail
= NULL
;
18453 /* The first insn must always be a note, or the code below won't
18454 scan it properly. */
18455 insn
= get_insns ();
18456 gcc_assert (NOTE_P (insn
));
18459 /* Scan all the insns and record the operands that will need fixing. */
18460 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
18462 if (BARRIER_P (insn
))
18463 push_minipool_barrier (insn
, address
);
18464 else if (INSN_P (insn
))
18466 rtx_jump_table_data
*table
;
18468 note_invalid_constants (insn
, address
, true);
18469 address
+= get_attr_length (insn
);
18471 /* If the insn is a vector jump, add the size of the table
18472 and skip the table. */
18473 if (tablejump_p (insn
, NULL
, &table
))
18475 address
+= get_jump_table_size (table
);
18479 else if (LABEL_P (insn
))
18480 /* Add the worst-case padding due to alignment. We don't add
18481 the _current_ padding because the minipool insertions
18482 themselves might change it. */
18483 address
+= get_label_padding (insn
);
18486 fix
= minipool_fix_head
;
18488 /* Now scan the fixups and perform the required changes. */
18493 Mfix
* last_added_fix
;
18494 Mfix
* last_barrier
= NULL
;
18497 /* Skip any further barriers before the next fix. */
18498 while (fix
&& BARRIER_P (fix
->insn
))
18501 /* No more fixes. */
18505 last_added_fix
= NULL
;
18507 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
18509 if (BARRIER_P (ftmp
->insn
))
18511 if (ftmp
->address
>= minipool_vector_head
->max_address
)
18514 last_barrier
= ftmp
;
18516 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
18519 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
18522 /* If we found a barrier, drop back to that; any fixes that we
18523 could have reached but come after the barrier will now go in
18524 the next mini-pool. */
18525 if (last_barrier
!= NULL
)
18527 /* Reduce the refcount for those fixes that won't go into this
18529 for (fdel
= last_barrier
->next
;
18530 fdel
&& fdel
!= ftmp
;
18533 fdel
->minipool
->refcount
--;
18534 fdel
->minipool
= NULL
;
18537 ftmp
= last_barrier
;
18541 /* ftmp is first fix that we can't fit into this pool and
18542 there no natural barriers that we could use. Insert a
18543 new barrier in the code somewhere between the previous
18544 fix and this one, and arrange to jump around it. */
18545 HOST_WIDE_INT max_address
;
18547 /* The last item on the list of fixes must be a barrier, so
18548 we can never run off the end of the list of fixes without
18549 last_barrier being set. */
18552 max_address
= minipool_vector_head
->max_address
;
18553 /* Check that there isn't another fix that is in range that
18554 we couldn't fit into this pool because the pool was
18555 already too large: we need to put the pool before such an
18556 instruction. The pool itself may come just after the
18557 fix because create_fix_barrier also allows space for a
18558 jump instruction. */
18559 if (ftmp
->address
< max_address
)
18560 max_address
= ftmp
->address
+ 1;
18562 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
18565 assign_minipool_offsets (last_barrier
);
18569 if (!BARRIER_P (ftmp
->insn
)
18570 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
18577 /* Scan over the fixes we have identified for this pool, fixing them
18578 up and adding the constants to the pool itself. */
18579 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
18580 this_fix
= this_fix
->next
)
18581 if (!BARRIER_P (this_fix
->insn
))
18584 = plus_constant (Pmode
,
18585 gen_rtx_LABEL_REF (VOIDmode
,
18586 minipool_vector_label
),
18587 this_fix
->minipool
->offset
);
18588 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
18591 dump_minipool (last_barrier
->insn
);
18595 /* From now on we must synthesize any constants that we can't handle
18596 directly. This can happen if the RTL gets split during final
18597 instruction generation. */
18598 cfun
->machine
->after_arm_reorg
= 1;
18600 /* Free the minipool memory. */
18601 obstack_free (&minipool_obstack
, minipool_startobj
);
18604 /* Routines to output assembly language. */
18606 /* Return string representation of passed in real value. */
18607 static const char *
18608 fp_const_from_val (REAL_VALUE_TYPE
*r
)
18610 if (!fp_consts_inited
)
18613 gcc_assert (real_equal (r
, &value_fp0
));
18617 /* OPERANDS[0] is the entire list of insns that constitute pop,
18618 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
18619 is in the list, UPDATE is true iff the list contains explicit
18620 update of base register. */
18622 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
18628 const char *conditional
;
18629 int num_saves
= XVECLEN (operands
[0], 0);
18630 unsigned int regno
;
18631 unsigned int regno_base
= REGNO (operands
[1]);
18632 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
18635 offset
+= update
? 1 : 0;
18636 offset
+= return_pc
? 1 : 0;
18638 /* Is the base register in the list? */
18639 for (i
= offset
; i
< num_saves
; i
++)
18641 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
18642 /* If SP is in the list, then the base register must be SP. */
18643 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
18644 /* If base register is in the list, there must be no explicit update. */
18645 if (regno
== regno_base
)
18646 gcc_assert (!update
);
18649 conditional
= reverse
? "%?%D0" : "%?%d0";
18650 /* Can't use POP if returning from an interrupt. */
18651 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
18652 sprintf (pattern
, "pop%s\t{", conditional
);
18655 /* Output ldmfd when the base register is SP, otherwise output ldmia.
18656 It's just a convention, their semantics are identical. */
18657 if (regno_base
== SP_REGNUM
)
18658 sprintf (pattern
, "ldmfd%s\t", conditional
);
18660 sprintf (pattern
, "ldmia%s\t", conditional
);
18662 sprintf (pattern
, "ldm%s\t", conditional
);
18664 strcat (pattern
, reg_names
[regno_base
]);
18666 strcat (pattern
, "!, {");
18668 strcat (pattern
, ", {");
18671 /* Output the first destination register. */
18673 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
18675 /* Output the rest of the destination registers. */
18676 for (i
= offset
+ 1; i
< num_saves
; i
++)
18678 strcat (pattern
, ", ");
18680 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
18683 strcat (pattern
, "}");
18685 if (interrupt_p
&& return_pc
)
18686 strcat (pattern
, "^");
18688 output_asm_insn (pattern
, &cond
);
18692 /* Output the assembly for a store multiple. */
18695 vfp_output_vstmd (rtx
* operands
)
18701 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
18702 ? XEXP (operands
[0], 0)
18703 : XEXP (XEXP (operands
[0], 0), 0);
18704 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
18707 strcpy (pattern
, "vpush%?.64\t{%P1");
18709 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
18711 p
= strlen (pattern
);
18713 gcc_assert (REG_P (operands
[1]));
18715 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
18716 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
18718 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
18720 strcpy (&pattern
[p
], "}");
18722 output_asm_insn (pattern
, operands
);
18727 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
18728 number of bytes pushed. */
18731 vfp_emit_fstmd (int base_reg
, int count
)
18738 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
18739 register pairs are stored by a store multiple insn. We avoid this
18740 by pushing an extra pair. */
18741 if (count
== 2 && !arm_arch6
)
18743 if (base_reg
== LAST_VFP_REGNUM
- 3)
18748 /* FSTMD may not store more than 16 doubleword registers at once. Split
18749 larger stores into multiple parts (up to a maximum of two, in
18754 /* NOTE: base_reg is an internal register number, so each D register
18756 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
18757 saved
+= vfp_emit_fstmd (base_reg
, 16);
18761 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
18762 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
18764 reg
= gen_rtx_REG (DFmode
, base_reg
);
18767 XVECEXP (par
, 0, 0)
18768 = gen_rtx_SET (gen_frame_mem
18770 gen_rtx_PRE_MODIFY (Pmode
,
18773 (Pmode
, stack_pointer_rtx
,
18776 gen_rtx_UNSPEC (BLKmode
,
18777 gen_rtvec (1, reg
),
18778 UNSPEC_PUSH_MULT
));
18780 tmp
= gen_rtx_SET (stack_pointer_rtx
,
18781 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
18782 RTX_FRAME_RELATED_P (tmp
) = 1;
18783 XVECEXP (dwarf
, 0, 0) = tmp
;
18785 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
18786 RTX_FRAME_RELATED_P (tmp
) = 1;
18787 XVECEXP (dwarf
, 0, 1) = tmp
;
18789 for (i
= 1; i
< count
; i
++)
18791 reg
= gen_rtx_REG (DFmode
, base_reg
);
18793 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
18795 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
18796 plus_constant (Pmode
,
18800 RTX_FRAME_RELATED_P (tmp
) = 1;
18801 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
18804 par
= emit_insn (par
);
18805 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
18806 RTX_FRAME_RELATED_P (par
) = 1;
18811 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18812 has the cmse_nonsecure_call attribute and returns false otherwise. */
18815 detect_cmse_nonsecure_call (tree addr
)
18820 tree fntype
= TREE_TYPE (addr
);
18821 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
18822 TYPE_ATTRIBUTES (fntype
)))
18828 /* Emit a call instruction with pattern PAT. ADDR is the address of
18829 the call target. */
18832 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
18836 insn
= emit_call_insn (pat
);
18838 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18839 If the call might use such an entry, add a use of the PIC register
18840 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18841 if (TARGET_VXWORKS_RTP
18844 && GET_CODE (addr
) == SYMBOL_REF
18845 && (SYMBOL_REF_DECL (addr
)
18846 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
18847 : !SYMBOL_REF_LOCAL_P (addr
)))
18849 require_pic_register (NULL_RTX
, false /*compute_now*/);
18850 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
18855 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
18856 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), fdpic_reg
);
18859 if (TARGET_AAPCS_BASED
)
18861 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18862 linker. We need to add an IP clobber to allow setting
18863 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18864 is not needed since it's a fixed register. */
18865 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
18866 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
18870 /* Output a 'call' insn. */
18872 output_call (rtx
*operands
)
18874 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
18876 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18877 if (REGNO (operands
[0]) == LR_REGNUM
)
18879 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
18880 output_asm_insn ("mov%?\t%0, %|lr", operands
);
18883 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
18885 if (TARGET_INTERWORK
|| arm_arch4t
)
18886 output_asm_insn ("bx%?\t%0", operands
);
18888 output_asm_insn ("mov%?\t%|pc, %0", operands
);
18893 /* Output a move from arm registers to arm registers of a long double
18894 OPERANDS[0] is the destination.
18895 OPERANDS[1] is the source. */
18897 output_mov_long_double_arm_from_arm (rtx
*operands
)
18899 /* We have to be careful here because the two might overlap. */
18900 int dest_start
= REGNO (operands
[0]);
18901 int src_start
= REGNO (operands
[1]);
18905 if (dest_start
< src_start
)
18907 for (i
= 0; i
< 3; i
++)
18909 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18910 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18911 output_asm_insn ("mov%?\t%0, %1", ops
);
18916 for (i
= 2; i
>= 0; i
--)
18918 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18919 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18920 output_asm_insn ("mov%?\t%0, %1", ops
);
18928 arm_emit_movpair (rtx dest
, rtx src
)
18930 /* If the src is an immediate, simplify it. */
18931 if (CONST_INT_P (src
))
18933 HOST_WIDE_INT val
= INTVAL (src
);
18934 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
18935 if ((val
>> 16) & 0x0000ffff)
18937 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
18939 GEN_INT ((val
>> 16) & 0x0000ffff));
18940 rtx_insn
*insn
= get_last_insn ();
18941 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18945 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
18946 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
18947 rtx_insn
*insn
= get_last_insn ();
18948 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18951 /* Output a move between double words. It must be REG<-MEM
18954 output_move_double (rtx
*operands
, bool emit
, int *count
)
18956 enum rtx_code code0
= GET_CODE (operands
[0]);
18957 enum rtx_code code1
= GET_CODE (operands
[1]);
18962 /* The only case when this might happen is when
18963 you are looking at the length of a DImode instruction
18964 that has an invalid constant in it. */
18965 if (code0
== REG
&& code1
!= MEM
)
18967 gcc_assert (!emit
);
18974 unsigned int reg0
= REGNO (operands
[0]);
18976 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
18978 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
18980 switch (GET_CODE (XEXP (operands
[1], 0)))
18987 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
18988 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
18990 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18995 gcc_assert (TARGET_LDRD
);
18997 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
19004 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
19006 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
19014 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
19016 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
19021 gcc_assert (TARGET_LDRD
);
19023 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
19028 /* Autoicrement addressing modes should never have overlapping
19029 base and destination registers, and overlapping index registers
19030 are already prohibited, so this doesn't need to worry about
19032 otherops
[0] = operands
[0];
19033 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
19034 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
19036 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
19038 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
19040 /* Registers overlap so split out the increment. */
19043 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
19044 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
19051 /* Use a single insn if we can.
19052 FIXME: IWMMXT allows offsets larger than ldrd can
19053 handle, fix these up with a pair of ldr. */
19055 || !CONST_INT_P (otherops
[2])
19056 || (INTVAL (otherops
[2]) > -256
19057 && INTVAL (otherops
[2]) < 256))
19060 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
19066 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
19067 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
19077 /* Use a single insn if we can.
19078 FIXME: IWMMXT allows offsets larger than ldrd can handle,
19079 fix these up with a pair of ldr. */
19081 || !CONST_INT_P (otherops
[2])
19082 || (INTVAL (otherops
[2]) > -256
19083 && INTVAL (otherops
[2]) < 256))
19086 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
19092 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
19093 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
19103 /* We might be able to use ldrd %0, %1 here. However the range is
19104 different to ldr/adr, and it is broken on some ARMv7-M
19105 implementations. */
19106 /* Use the second register of the pair to avoid problematic
19108 otherops
[1] = operands
[1];
19110 output_asm_insn ("adr%?\t%0, %1", otherops
);
19111 operands
[1] = otherops
[0];
19115 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
19117 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
19124 /* ??? This needs checking for thumb2. */
19126 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
19127 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
19129 otherops
[0] = operands
[0];
19130 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
19131 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
19133 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
19135 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
19137 switch ((int) INTVAL (otherops
[2]))
19141 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
19147 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
19153 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
19157 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
19158 operands
[1] = otherops
[0];
19160 && (REG_P (otherops
[2])
19162 || (CONST_INT_P (otherops
[2])
19163 && INTVAL (otherops
[2]) > -256
19164 && INTVAL (otherops
[2]) < 256)))
19166 if (reg_overlap_mentioned_p (operands
[0],
19169 /* Swap base and index registers over to
19170 avoid a conflict. */
19171 std::swap (otherops
[1], otherops
[2]);
19173 /* If both registers conflict, it will usually
19174 have been fixed by a splitter. */
19175 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
19176 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
19180 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
19181 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
19188 otherops
[0] = operands
[0];
19190 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
19195 if (CONST_INT_P (otherops
[2]))
19199 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
19200 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
19202 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
19208 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
19214 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
19221 return "ldrd%?\t%0, [%1]";
19223 return "ldmia%?\t%1, %M0";
19227 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
19228 /* Take care of overlapping base/data reg. */
19229 if (reg_mentioned_p (operands
[0], operands
[1]))
19233 output_asm_insn ("ldr%?\t%0, %1", otherops
);
19234 output_asm_insn ("ldr%?\t%0, %1", operands
);
19244 output_asm_insn ("ldr%?\t%0, %1", operands
);
19245 output_asm_insn ("ldr%?\t%0, %1", otherops
);
19255 /* Constraints should ensure this. */
19256 gcc_assert (code0
== MEM
&& code1
== REG
);
19257 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
19258 || (TARGET_ARM
&& TARGET_LDRD
));
19260 /* For TARGET_ARM the first source register of an STRD
19261 must be even. This is usually the case for double-word
19262 values but user assembly constraints can force an odd
19263 starting register. */
19264 bool allow_strd
= TARGET_LDRD
19265 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
19266 switch (GET_CODE (XEXP (operands
[0], 0)))
19272 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
19274 output_asm_insn ("stm%?\t%m0, %M1", operands
);
19279 gcc_assert (allow_strd
);
19281 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
19288 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
19290 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
19298 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
19300 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
19305 gcc_assert (allow_strd
);
19307 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
19312 otherops
[0] = operands
[1];
19313 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
19314 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
19316 /* IWMMXT allows offsets larger than strd can handle,
19317 fix these up with a pair of str. */
19319 && CONST_INT_P (otherops
[2])
19320 && (INTVAL(otherops
[2]) <= -256
19321 || INTVAL(otherops
[2]) >= 256))
19323 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
19327 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
19328 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
19337 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
19338 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
19344 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
19347 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
19352 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
19357 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
19358 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
19360 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
19364 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
19371 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
19378 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
19383 && (REG_P (otherops
[2])
19385 || (CONST_INT_P (otherops
[2])
19386 && INTVAL (otherops
[2]) > -256
19387 && INTVAL (otherops
[2]) < 256)))
19389 otherops
[0] = operands
[1];
19390 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
19392 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
19398 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
19399 otherops
[1] = operands
[1];
19402 output_asm_insn ("str%?\t%1, %0", operands
);
19403 output_asm_insn ("str%?\t%H1, %0", otherops
);
19413 /* Output a move, load or store for quad-word vectors in ARM registers. Only
19414 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
19417 output_move_quad (rtx
*operands
)
19419 if (REG_P (operands
[0]))
19421 /* Load, or reg->reg move. */
19423 if (MEM_P (operands
[1]))
19425 switch (GET_CODE (XEXP (operands
[1], 0)))
19428 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
19433 output_asm_insn ("adr%?\t%0, %1", operands
);
19434 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
19438 gcc_unreachable ();
19446 gcc_assert (REG_P (operands
[1]));
19448 dest
= REGNO (operands
[0]);
19449 src
= REGNO (operands
[1]);
19451 /* This seems pretty dumb, but hopefully GCC won't try to do it
19454 for (i
= 0; i
< 4; i
++)
19456 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
19457 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
19458 output_asm_insn ("mov%?\t%0, %1", ops
);
19461 for (i
= 3; i
>= 0; i
--)
19463 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
19464 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
19465 output_asm_insn ("mov%?\t%0, %1", ops
);
19471 gcc_assert (MEM_P (operands
[0]));
19472 gcc_assert (REG_P (operands
[1]));
19473 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
19475 switch (GET_CODE (XEXP (operands
[0], 0)))
19478 output_asm_insn ("stm%?\t%m0, %M1", operands
);
19482 gcc_unreachable ();
19489 /* Output a VFP load or store instruction. */
19492 output_move_vfp (rtx
*operands
)
19494 rtx reg
, mem
, addr
, ops
[2];
19495 int load
= REG_P (operands
[0]);
19496 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
19497 int sp
= (!TARGET_VFP_FP16INST
19498 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
19499 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
19504 reg
= operands
[!load
];
19505 mem
= operands
[load
];
19507 mode
= GET_MODE (reg
);
19509 gcc_assert (REG_P (reg
));
19510 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
19511 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
19517 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
19518 gcc_assert (MEM_P (mem
));
19520 addr
= XEXP (mem
, 0);
19522 switch (GET_CODE (addr
))
19525 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
19526 ops
[0] = XEXP (addr
, 0);
19531 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
19532 ops
[0] = XEXP (addr
, 0);
19537 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
19543 sprintf (buff
, templ
,
19544 load
? "ld" : "st",
19545 dp
? "64" : sp
? "32" : "16",
19547 integer_p
? "\t%@ int" : "");
19548 output_asm_insn (buff
, ops
);
19553 /* Output a Neon double-word or quad-word load or store, or a load
19554 or store for larger structure modes.
19556 WARNING: The ordering of elements is weird in big-endian mode,
19557 because the EABI requires that vectors stored in memory appear
19558 as though they were stored by a VSTM, as required by the EABI.
19559 GCC RTL defines element ordering based on in-memory order.
19560 This can be different from the architectural ordering of elements
19561 within a NEON register. The intrinsics defined in arm_neon.h use the
19562 NEON register element ordering, not the GCC RTL element ordering.
19564 For example, the in-memory ordering of a big-endian a quadword
19565 vector with 16-bit elements when stored from register pair {d0,d1}
19566 will be (lowest address first, d0[N] is NEON register element N):
19568 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
19570 When necessary, quadword registers (dN, dN+1) are moved to ARM
19571 registers from rN in the order:
19573 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
19575 So that STM/LDM can be used on vectors in ARM registers, and the
19576 same memory layout will result as if VSTM/VLDM were used.
19578 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
19579 possible, which allows use of appropriate alignment tags.
19580 Note that the choice of "64" is independent of the actual vector
19581 element size; this size simply ensures that the behavior is
19582 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
19584 Due to limitations of those instructions, use of VST1.64/VLD1.64
19585 is not possible if:
19586 - the address contains PRE_DEC, or
19587 - the mode refers to more than 4 double-word registers
19589 In those cases, it would be possible to replace VSTM/VLDM by a
19590 sequence of instructions; this is not currently implemented since
19591 this is not certain to actually improve performance. */
19594 output_move_neon (rtx
*operands
)
19596 rtx reg
, mem
, addr
, ops
[2];
19597 int regno
, nregs
, load
= REG_P (operands
[0]);
19602 reg
= operands
[!load
];
19603 mem
= operands
[load
];
19605 mode
= GET_MODE (reg
);
19607 gcc_assert (REG_P (reg
));
19608 regno
= REGNO (reg
);
19609 nregs
= REG_NREGS (reg
) / 2;
19610 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
19611 || NEON_REGNO_OK_FOR_QUAD (regno
));
19612 gcc_assert (VALID_NEON_DREG_MODE (mode
)
19613 || VALID_NEON_QREG_MODE (mode
)
19614 || VALID_NEON_STRUCT_MODE (mode
));
19615 gcc_assert (MEM_P (mem
));
19617 addr
= XEXP (mem
, 0);
19619 /* Strip off const from addresses like (const (plus (...))). */
19620 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
19621 addr
= XEXP (addr
, 0);
19623 switch (GET_CODE (addr
))
19626 /* We have to use vldm / vstm for too-large modes. */
19629 templ
= "v%smia%%?\t%%0!, %%h1";
19630 ops
[0] = XEXP (addr
, 0);
19634 templ
= "v%s1.64\t%%h1, %%A0";
19641 /* We have to use vldm / vstm in this case, since there is no
19642 pre-decrement form of the vld1 / vst1 instructions. */
19643 templ
= "v%smdb%%?\t%%0!, %%h1";
19644 ops
[0] = XEXP (addr
, 0);
19649 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
19650 gcc_unreachable ();
19653 /* We have to use vldm / vstm for too-large modes. */
19657 templ
= "v%smia%%?\t%%m0, %%h1";
19659 templ
= "v%s1.64\t%%h1, %%A0";
19665 /* Fall through. */
19671 for (i
= 0; i
< nregs
; i
++)
19673 /* We're only using DImode here because it's a convenient size. */
19674 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
19675 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
19676 if (reg_overlap_mentioned_p (ops
[0], mem
))
19678 gcc_assert (overlap
== -1);
19683 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
19684 output_asm_insn (buff
, ops
);
19689 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
19690 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
19691 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
19692 output_asm_insn (buff
, ops
);
19699 gcc_unreachable ();
19702 sprintf (buff
, templ
, load
? "ld" : "st");
19703 output_asm_insn (buff
, ops
);
19708 /* Compute and return the length of neon_mov<mode>, where <mode> is
19709 one of VSTRUCT modes: EI, OI, CI or XI. */
19711 arm_attr_length_move_neon (rtx_insn
*insn
)
19713 rtx reg
, mem
, addr
;
19717 extract_insn_cached (insn
);
19719 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
19721 mode
= GET_MODE (recog_data
.operand
[0]);
19732 gcc_unreachable ();
19736 load
= REG_P (recog_data
.operand
[0]);
19737 reg
= recog_data
.operand
[!load
];
19738 mem
= recog_data
.operand
[load
];
19740 gcc_assert (MEM_P (mem
));
19742 addr
= XEXP (mem
, 0);
19744 /* Strip off const from addresses like (const (plus (...))). */
19745 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
19746 addr
= XEXP (addr
, 0);
19748 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
19750 int insns
= REG_NREGS (reg
) / 2;
19757 /* Return nonzero if the offset in the address is an immediate. Otherwise,
19761 arm_address_offset_is_imm (rtx_insn
*insn
)
19765 extract_insn_cached (insn
);
19767 if (REG_P (recog_data
.operand
[0]))
19770 mem
= recog_data
.operand
[0];
19772 gcc_assert (MEM_P (mem
));
19774 addr
= XEXP (mem
, 0);
19777 || (GET_CODE (addr
) == PLUS
19778 && REG_P (XEXP (addr
, 0))
19779 && CONST_INT_P (XEXP (addr
, 1))))
19785 /* Output an ADD r, s, #n where n may be too big for one instruction.
19786 If adding zero to one register, output nothing. */
19788 output_add_immediate (rtx
*operands
)
19790 HOST_WIDE_INT n
= INTVAL (operands
[2]);
19792 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
19795 output_multi_immediate (operands
,
19796 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19799 output_multi_immediate (operands
,
19800 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19807 /* Output a multiple immediate operation.
19808 OPERANDS is the vector of operands referred to in the output patterns.
19809 INSTR1 is the output pattern to use for the first constant.
19810 INSTR2 is the output pattern to use for subsequent constants.
19811 IMMED_OP is the index of the constant slot in OPERANDS.
19812 N is the constant value. */
19813 static const char *
19814 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
19815 int immed_op
, HOST_WIDE_INT n
)
19817 #if HOST_BITS_PER_WIDE_INT > 32
19823 /* Quick and easy output. */
19824 operands
[immed_op
] = const0_rtx
;
19825 output_asm_insn (instr1
, operands
);
19830 const char * instr
= instr1
;
19832 /* Note that n is never zero here (which would give no output). */
19833 for (i
= 0; i
< 32; i
+= 2)
19837 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
19838 output_asm_insn (instr
, operands
);
19848 /* Return the name of a shifter operation. */
19849 static const char *
19850 arm_shift_nmem(enum rtx_code code
)
19855 return ARM_LSL_NAME
;
19871 /* Return the appropriate ARM instruction for the operation code.
19872 The returned result should not be overwritten. OP is the rtx of the
19873 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19876 arithmetic_instr (rtx op
, int shift_first_arg
)
19878 switch (GET_CODE (op
))
19884 return shift_first_arg
? "rsb" : "sub";
19899 return arm_shift_nmem(GET_CODE(op
));
19902 gcc_unreachable ();
19906 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19907 for the operation code. The returned result should not be overwritten.
19908 OP is the rtx code of the shift.
19909 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19911 static const char *
19912 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
19915 enum rtx_code code
= GET_CODE (op
);
19920 if (!CONST_INT_P (XEXP (op
, 1)))
19922 output_operand_lossage ("invalid shift operand");
19927 *amountp
= 32 - INTVAL (XEXP (op
, 1));
19935 mnem
= arm_shift_nmem(code
);
19936 if (CONST_INT_P (XEXP (op
, 1)))
19938 *amountp
= INTVAL (XEXP (op
, 1));
19940 else if (REG_P (XEXP (op
, 1)))
19947 output_operand_lossage ("invalid shift operand");
19953 /* We never have to worry about the amount being other than a
19954 power of 2, since this case can never be reloaded from a reg. */
19955 if (!CONST_INT_P (XEXP (op
, 1)))
19957 output_operand_lossage ("invalid shift operand");
19961 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
19963 /* Amount must be a power of two. */
19964 if (*amountp
& (*amountp
- 1))
19966 output_operand_lossage ("invalid shift operand");
19970 *amountp
= exact_log2 (*amountp
);
19971 gcc_assert (IN_RANGE (*amountp
, 0, 31));
19972 return ARM_LSL_NAME
;
19975 output_operand_lossage ("invalid shift operand");
19979 /* This is not 100% correct, but follows from the desire to merge
19980 multiplication by a power of 2 with the recognizer for a
19981 shift. >=32 is not a valid shift for "lsl", so we must try and
19982 output a shift that produces the correct arithmetical result.
19983 Using lsr #32 is identical except for the fact that the carry bit
19984 is not set correctly if we set the flags; but we never use the
19985 carry bit from such an operation, so we can ignore that. */
19986 if (code
== ROTATERT
)
19987 /* Rotate is just modulo 32. */
19989 else if (*amountp
!= (*amountp
& 31))
19991 if (code
== ASHIFT
)
19996 /* Shifts of 0 are no-ops. */
20003 /* Output a .ascii pseudo-op, keeping track of lengths. This is
20004 because /bin/as is horribly restrictive. The judgement about
20005 whether or not each character is 'printable' (and can be output as
20006 is) or not (and must be printed with an octal escape) must be made
20007 with reference to the *host* character set -- the situation is
20008 similar to that discussed in the comments above pp_c_char in
20009 c-pretty-print.c. */
20011 #define MAX_ASCII_LEN 51
20014 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
20017 int len_so_far
= 0;
20019 fputs ("\t.ascii\t\"", stream
);
20021 for (i
= 0; i
< len
; i
++)
20025 if (len_so_far
>= MAX_ASCII_LEN
)
20027 fputs ("\"\n\t.ascii\t\"", stream
);
20033 if (c
== '\\' || c
== '\"')
20035 putc ('\\', stream
);
20043 fprintf (stream
, "\\%03o", c
);
20048 fputs ("\"\n", stream
);
20052 /* Compute the register save mask for registers 0 through 12
20053 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
20055 static unsigned long
20056 arm_compute_save_reg0_reg12_mask (void)
20058 unsigned long func_type
= arm_current_func_type ();
20059 unsigned long save_reg_mask
= 0;
20062 if (IS_INTERRUPT (func_type
))
20064 unsigned int max_reg
;
20065 /* Interrupt functions must not corrupt any registers,
20066 even call clobbered ones. If this is a leaf function
20067 we can just examine the registers used by the RTL, but
20068 otherwise we have to assume that whatever function is
20069 called might clobber anything, and so we have to save
20070 all the call-clobbered registers as well. */
20071 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
20072 /* FIQ handlers have registers r8 - r12 banked, so
20073 we only need to check r0 - r7, Normal ISRs only
20074 bank r14 and r15, so we must check up to r12.
20075 r13 is the stack pointer which is always preserved,
20076 so we do not need to consider it here. */
20081 for (reg
= 0; reg
<= max_reg
; reg
++)
20082 if (df_regs_ever_live_p (reg
)
20083 || (! crtl
->is_leaf
&& call_used_or_fixed_reg_p (reg
)))
20084 save_reg_mask
|= (1 << reg
);
20086 /* Also save the pic base register if necessary. */
20087 if (PIC_REGISTER_MAY_NEED_SAVING
20088 && crtl
->uses_pic_offset_table
)
20089 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
20091 else if (IS_VOLATILE(func_type
))
20093 /* For noreturn functions we historically omitted register saves
20094 altogether. However this really messes up debugging. As a
20095 compromise save just the frame pointers. Combined with the link
20096 register saved elsewhere this should be sufficient to get
20098 if (frame_pointer_needed
)
20099 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
20100 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
20101 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
20102 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
20103 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
20107 /* In the normal case we only need to save those registers
20108 which are call saved and which are used by this function. */
20109 for (reg
= 0; reg
<= 11; reg
++)
20110 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
20111 save_reg_mask
|= (1 << reg
);
20113 /* Handle the frame pointer as a special case. */
20114 if (frame_pointer_needed
)
20115 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
20117 /* If we aren't loading the PIC register,
20118 don't stack it even though it may be live. */
20119 if (PIC_REGISTER_MAY_NEED_SAVING
20120 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
20121 || crtl
->uses_pic_offset_table
))
20122 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
20124 /* The prologue will copy SP into R0, so save it. */
20125 if (IS_STACKALIGN (func_type
))
20126 save_reg_mask
|= 1;
20129 /* Save registers so the exception handler can modify them. */
20130 if (crtl
->calls_eh_return
)
20136 reg
= EH_RETURN_DATA_REGNO (i
);
20137 if (reg
== INVALID_REGNUM
)
20139 save_reg_mask
|= 1 << reg
;
20143 return save_reg_mask
;
20146 /* Return true if r3 is live at the start of the function. */
20149 arm_r3_live_at_start_p (void)
20151 /* Just look at cfg info, which is still close enough to correct at this
20152 point. This gives false positives for broken functions that might use
20153 uninitialized data that happens to be allocated in r3, but who cares? */
20154 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
20157 /* Compute the number of bytes used to store the static chain register on the
20158 stack, above the stack frame. We need to know this accurately to get the
20159 alignment of the rest of the stack frame correct. */
20162 arm_compute_static_chain_stack_bytes (void)
20164 /* Once the value is updated from the init value of -1, do not
20166 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
20167 return cfun
->machine
->static_chain_stack_bytes
;
20169 /* See the defining assertion in arm_expand_prologue. */
20170 if (IS_NESTED (arm_current_func_type ())
20171 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20172 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
20173 || flag_stack_clash_protection
)
20174 && !df_regs_ever_live_p (LR_REGNUM
)))
20175 && arm_r3_live_at_start_p ()
20176 && crtl
->args
.pretend_args_size
== 0)
20182 /* Compute a bit mask of which core registers need to be
20183 saved on the stack for the current function.
20184 This is used by arm_compute_frame_layout, which may add extra registers. */
20186 static unsigned long
20187 arm_compute_save_core_reg_mask (void)
20189 unsigned int save_reg_mask
= 0;
20190 unsigned long func_type
= arm_current_func_type ();
20193 if (IS_NAKED (func_type
))
20194 /* This should never really happen. */
20197 /* If we are creating a stack frame, then we must save the frame pointer,
20198 IP (which will hold the old stack pointer), LR and the PC. */
20199 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20201 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
20204 | (1 << PC_REGNUM
);
20206 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
20208 /* Decide if we need to save the link register.
20209 Interrupt routines have their own banked link register,
20210 so they never need to save it.
20211 Otherwise if we do not use the link register we do not need to save
20212 it. If we are pushing other registers onto the stack however, we
20213 can save an instruction in the epilogue by pushing the link register
20214 now and then popping it back into the PC. This incurs extra memory
20215 accesses though, so we only do it when optimizing for size, and only
20216 if we know that we will not need a fancy return sequence. */
20217 if (df_regs_ever_live_p (LR_REGNUM
)
20220 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
20221 && !crtl
->tail_call_emit
20222 && !crtl
->calls_eh_return
))
20223 save_reg_mask
|= 1 << LR_REGNUM
;
20225 if (cfun
->machine
->lr_save_eliminated
)
20226 save_reg_mask
&= ~ (1 << LR_REGNUM
);
20228 if (TARGET_REALLY_IWMMXT
20229 && ((bit_count (save_reg_mask
)
20230 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
20231 arm_compute_static_chain_stack_bytes())
20234 /* The total number of registers that are going to be pushed
20235 onto the stack is odd. We need to ensure that the stack
20236 is 64-bit aligned before we start to save iWMMXt registers,
20237 and also before we start to create locals. (A local variable
20238 might be a double or long long which we will load/store using
20239 an iWMMXt instruction). Therefore we need to push another
20240 ARM register, so that the stack will be 64-bit aligned. We
20241 try to avoid using the arg registers (r0 -r3) as they might be
20242 used to pass values in a tail call. */
20243 for (reg
= 4; reg
<= 12; reg
++)
20244 if ((save_reg_mask
& (1 << reg
)) == 0)
20248 save_reg_mask
|= (1 << reg
);
20251 cfun
->machine
->sibcall_blocked
= 1;
20252 save_reg_mask
|= (1 << 3);
20256 /* We may need to push an additional register for use initializing the
20257 PIC base register. */
20258 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
20259 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
20261 reg
= thumb_find_work_register (1 << 4);
20262 if (!call_used_or_fixed_reg_p (reg
))
20263 save_reg_mask
|= (1 << reg
);
20266 return save_reg_mask
;
20269 /* Compute a bit mask of which core registers need to be
20270 saved on the stack for the current function. */
20271 static unsigned long
20272 thumb1_compute_save_core_reg_mask (void)
20274 unsigned long mask
;
20278 for (reg
= 0; reg
< 12; reg
++)
20279 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
20282 /* Handle the frame pointer as a special case. */
20283 if (frame_pointer_needed
)
20284 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
20287 && !TARGET_SINGLE_PIC_BASE
20288 && arm_pic_register
!= INVALID_REGNUM
20289 && crtl
->uses_pic_offset_table
)
20290 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
20292 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
20293 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
20294 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
20296 /* LR will also be pushed if any lo regs are pushed. */
20297 if (mask
& 0xff || thumb_force_lr_save ())
20298 mask
|= (1 << LR_REGNUM
);
20300 bool call_clobbered_scratch
20301 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
20302 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
20304 /* Make sure we have a low work register if we need one. We will
20305 need one if we are going to push a high register, but we are not
20306 currently intending to push a low register. However if both the
20307 prologue and epilogue have a spare call-clobbered low register,
20308 then we won't need to find an additional work register. It does
20309 not need to be the same register in the prologue and
20311 if ((mask
& 0xff) == 0
20312 && !call_clobbered_scratch
20313 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
20315 /* Use thumb_find_work_register to choose which register
20316 we will use. If the register is live then we will
20317 have to push it. Use LAST_LO_REGNUM as our fallback
20318 choice for the register to select. */
20319 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
20320 /* Make sure the register returned by thumb_find_work_register is
20321 not part of the return value. */
20322 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
20323 reg
= LAST_LO_REGNUM
;
20325 if (callee_saved_reg_p (reg
))
20329 /* The 504 below is 8 bytes less than 512 because there are two possible
20330 alignment words. We can't tell here if they will be present or not so we
20331 have to play it safe and assume that they are. */
20332 if ((CALLER_INTERWORKING_SLOT_SIZE
+
20333 ROUND_UP_WORD (get_frame_size ()) +
20334 crtl
->outgoing_args_size
) >= 504)
20336 /* This is the same as the code in thumb1_expand_prologue() which
20337 determines which register to use for stack decrement. */
20338 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
20339 if (mask
& (1 << reg
))
20342 if (reg
> LAST_LO_REGNUM
)
20344 /* Make sure we have a register available for stack decrement. */
20345 mask
|= 1 << LAST_LO_REGNUM
;
20353 /* Return the number of bytes required to save VFP registers. */
20355 arm_get_vfp_saved_size (void)
20357 unsigned int regno
;
20362 /* Space for saved VFP registers. */
20363 if (TARGET_HARD_FLOAT
)
20366 for (regno
= FIRST_VFP_REGNUM
;
20367 regno
< LAST_VFP_REGNUM
;
20370 if ((!df_regs_ever_live_p (regno
)
20371 || call_used_or_fixed_reg_p (regno
))
20372 && (!df_regs_ever_live_p (regno
+ 1)
20373 || call_used_or_fixed_reg_p (regno
+ 1)))
20377 /* Workaround ARM10 VFPr1 bug. */
20378 if (count
== 2 && !arm_arch6
)
20380 saved
+= count
* 8;
20389 if (count
== 2 && !arm_arch6
)
20391 saved
+= count
* 8;
20398 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
20399 everything bar the final return instruction. If simple_return is true,
20400 then do not output epilogue, because it has already been emitted in RTL.
20402 Note: do not forget to update length attribute of corresponding insn pattern
20403 when changing assembly output (eg. length attribute of
20404 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
20405 register clearing sequences). */
20407 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
20408 bool simple_return
)
20410 char conditional
[10];
20413 unsigned long live_regs_mask
;
20414 unsigned long func_type
;
20415 arm_stack_offsets
*offsets
;
20417 func_type
= arm_current_func_type ();
20419 if (IS_NAKED (func_type
))
20422 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
20424 /* If this function was declared non-returning, and we have
20425 found a tail call, then we have to trust that the called
20426 function won't return. */
20431 /* Otherwise, trap an attempted return by aborting. */
20433 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
20435 assemble_external_libcall (ops
[1]);
20436 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
20442 gcc_assert (!cfun
->calls_alloca
|| really_return
);
20444 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
20446 cfun
->machine
->return_used_this_function
= 1;
20448 offsets
= arm_get_frame_offsets ();
20449 live_regs_mask
= offsets
->saved_regs_mask
;
20451 if (!simple_return
&& live_regs_mask
)
20453 const char * return_reg
;
20455 /* If we do not have any special requirements for function exit
20456 (e.g. interworking) then we can load the return address
20457 directly into the PC. Otherwise we must load it into LR. */
20459 && !IS_CMSE_ENTRY (func_type
)
20460 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
20461 return_reg
= reg_names
[PC_REGNUM
];
20463 return_reg
= reg_names
[LR_REGNUM
];
20465 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
20467 /* There are three possible reasons for the IP register
20468 being saved. 1) a stack frame was created, in which case
20469 IP contains the old stack pointer, or 2) an ISR routine
20470 corrupted it, or 3) it was saved to align the stack on
20471 iWMMXt. In case 1, restore IP into SP, otherwise just
20473 if (frame_pointer_needed
)
20475 live_regs_mask
&= ~ (1 << IP_REGNUM
);
20476 live_regs_mask
|= (1 << SP_REGNUM
);
20479 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
20482 /* On some ARM architectures it is faster to use LDR rather than
20483 LDM to load a single register. On other architectures, the
20484 cost is the same. In 26 bit mode, or for exception handlers,
20485 we have to use LDM to load the PC so that the CPSR is also
20487 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
20488 if (live_regs_mask
== (1U << reg
))
20491 if (reg
<= LAST_ARM_REGNUM
20492 && (reg
!= LR_REGNUM
20494 || ! IS_INTERRUPT (func_type
)))
20496 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
20497 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
20504 /* Generate the load multiple instruction to restore the
20505 registers. Note we can get here, even if
20506 frame_pointer_needed is true, but only if sp already
20507 points to the base of the saved core registers. */
20508 if (live_regs_mask
& (1 << SP_REGNUM
))
20510 unsigned HOST_WIDE_INT stack_adjust
;
20512 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
20513 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
20515 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
20516 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
20519 /* If we can't use ldmib (SA110 bug),
20520 then try to pop r3 instead. */
20522 live_regs_mask
|= 1 << 3;
20524 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
20527 /* For interrupt returns we have to use an LDM rather than
20528 a POP so that we can use the exception return variant. */
20529 else if (IS_INTERRUPT (func_type
))
20530 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
20532 sprintf (instr
, "pop%s\t{", conditional
);
20534 p
= instr
+ strlen (instr
);
20536 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
20537 if (live_regs_mask
& (1 << reg
))
20539 int l
= strlen (reg_names
[reg
]);
20545 memcpy (p
, ", ", 2);
20549 memcpy (p
, "%|", 2);
20550 memcpy (p
+ 2, reg_names
[reg
], l
);
20554 if (live_regs_mask
& (1 << LR_REGNUM
))
20556 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
20557 /* If returning from an interrupt, restore the CPSR. */
20558 if (IS_INTERRUPT (func_type
))
20565 output_asm_insn (instr
, & operand
);
20567 /* See if we need to generate an extra instruction to
20568 perform the actual function return. */
20570 && func_type
!= ARM_FT_INTERWORKED
20571 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
20573 /* The return has already been handled
20574 by loading the LR into the PC. */
20581 switch ((int) ARM_FUNC_TYPE (func_type
))
20585 /* ??? This is wrong for unified assembly syntax. */
20586 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
20589 case ARM_FT_INTERWORKED
:
20590 gcc_assert (arm_arch5t
|| arm_arch4t
);
20591 sprintf (instr
, "bx%s\t%%|lr", conditional
);
20594 case ARM_FT_EXCEPTION
:
20595 /* ??? This is wrong for unified assembly syntax. */
20596 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
20600 if (IS_CMSE_ENTRY (func_type
))
20602 /* Check if we have to clear the 'GE bits' which is only used if
20603 parallel add and subtraction instructions are available. */
20604 if (TARGET_INT_SIMD
)
20605 snprintf (instr
, sizeof (instr
),
20606 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
20608 snprintf (instr
, sizeof (instr
),
20609 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
20611 output_asm_insn (instr
, & operand
);
20612 if (TARGET_HARD_FLOAT
)
20614 /* Clear the cumulative exception-status bits (0-4,7) and the
20615 condition code bits (28-31) of the FPSCR. We need to
20616 remember to clear the first scratch register used (IP) and
20617 save and restore the second (r4). */
20618 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
20619 output_asm_insn (instr
, & operand
);
20620 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
20621 output_asm_insn (instr
, & operand
);
20622 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
20623 output_asm_insn (instr
, & operand
);
20624 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
20625 output_asm_insn (instr
, & operand
);
20626 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
20627 output_asm_insn (instr
, & operand
);
20628 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
20629 output_asm_insn (instr
, & operand
);
20630 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
20631 output_asm_insn (instr
, & operand
);
20632 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
20633 output_asm_insn (instr
, & operand
);
20635 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
20637 /* Use bx if it's available. */
20638 else if (arm_arch5t
|| arm_arch4t
)
20639 sprintf (instr
, "bx%s\t%%|lr", conditional
);
20641 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
20645 output_asm_insn (instr
, & operand
);
20651 /* Output in FILE asm statements needed to declare the NAME of the function
20652 defined by its DECL node. */
20655 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
20657 size_t cmse_name_len
;
20658 char *cmse_name
= 0;
20659 char cmse_prefix
[] = "__acle_se_";
20661 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
20662 extra function label for each function with the 'cmse_nonsecure_entry'
20663 attribute. This extra function label should be prepended with
20664 '__acle_se_', telling the linker that it needs to create secure gateway
20665 veneers for this function. */
20666 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
20667 DECL_ATTRIBUTES (decl
)))
20669 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
20670 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
20671 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
20672 targetm
.asm_out
.globalize_label (file
, cmse_name
);
20674 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
20675 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
20678 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
20679 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20680 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20681 ASM_OUTPUT_LABEL (file
, name
);
20684 ASM_OUTPUT_LABEL (file
, cmse_name
);
20686 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
20689 /* Write the function name into the code section, directly preceding
20690 the function prologue.
20692 Code will be output similar to this:
20694 .ascii "arm_poke_function_name", 0
20697 .word 0xff000000 + (t1 - t0)
20698 arm_poke_function_name
20700 stmfd sp!, {fp, ip, lr, pc}
20703 When performing a stack backtrace, code can inspect the value
20704 of 'pc' stored at 'fp' + 0. If the trace function then looks
20705 at location pc - 12 and the top 8 bits are set, then we know
20706 that there is a function name embedded immediately preceding this
20707 location and has length ((pc[-3]) & 0xff000000).
20709 We assume that pc is declared as a pointer to an unsigned long.
20711 It is of no benefit to output the function name if we are assembling
20712 a leaf function. These function types will not contain a stack
20713 backtrace structure, therefore it is not possible to determine the
20716 arm_poke_function_name (FILE *stream
, const char *name
)
20718 unsigned long alignlength
;
20719 unsigned long length
;
20722 length
= strlen (name
) + 1;
20723 alignlength
= ROUND_UP_WORD (length
);
20725 ASM_OUTPUT_ASCII (stream
, name
, length
);
20726 ASM_OUTPUT_ALIGN (stream
, 2);
20727 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
20728 assemble_aligned_integer (UNITS_PER_WORD
, x
);
20731 /* Place some comments into the assembler stream
20732 describing the current function. */
20734 arm_output_function_prologue (FILE *f
)
20736 unsigned long func_type
;
20738 /* Sanity check. */
20739 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
20741 func_type
= arm_current_func_type ();
20743 switch ((int) ARM_FUNC_TYPE (func_type
))
20746 case ARM_FT_NORMAL
:
20748 case ARM_FT_INTERWORKED
:
20749 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
20752 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
20755 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
20757 case ARM_FT_EXCEPTION
:
20758 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
20762 if (IS_NAKED (func_type
))
20763 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20765 if (IS_VOLATILE (func_type
))
20766 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
20768 if (IS_NESTED (func_type
))
20769 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
20770 if (IS_STACKALIGN (func_type
))
20771 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20772 if (IS_CMSE_ENTRY (func_type
))
20773 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
20775 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20776 (HOST_WIDE_INT
) crtl
->args
.size
,
20777 crtl
->args
.pretend_args_size
,
20778 (HOST_WIDE_INT
) get_frame_size ());
20780 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20781 frame_pointer_needed
,
20782 cfun
->machine
->uses_anonymous_args
);
20784 if (cfun
->machine
->lr_save_eliminated
)
20785 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
20787 if (crtl
->calls_eh_return
)
20788 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
20793 arm_output_function_epilogue (FILE *)
20795 arm_stack_offsets
*offsets
;
20801 /* Emit any call-via-reg trampolines that are needed for v4t support
20802 of call_reg and call_value_reg type insns. */
20803 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
20805 rtx label
= cfun
->machine
->call_via
[regno
];
20809 switch_to_section (function_section (current_function_decl
));
20810 targetm
.asm_out
.internal_label (asm_out_file
, "L",
20811 CODE_LABEL_NUMBER (label
));
20812 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
20816 /* ??? Probably not safe to set this here, since it assumes that a
20817 function will be emitted as assembly immediately after we generate
20818 RTL for it. This does not happen for inline functions. */
20819 cfun
->machine
->return_used_this_function
= 0;
20821 else /* TARGET_32BIT */
20823 /* We need to take into account any stack-frame rounding. */
20824 offsets
= arm_get_frame_offsets ();
20826 gcc_assert (!use_return_insn (FALSE
, NULL
)
20827 || (cfun
->machine
->return_used_this_function
!= 0)
20828 || offsets
->saved_regs
== offsets
->outgoing_args
20829 || frame_pointer_needed
);
20833 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20834 STR and STRD. If an even number of registers are being pushed, one
20835 or more STRD patterns are created for each register pair. If an
20836 odd number of registers are pushed, emit an initial STR followed by
20837 as many STRD instructions as are needed. This works best when the
20838 stack is initially 64-bit aligned (the normal case), since it
20839 ensures that each STRD is also 64-bit aligned. */
20841 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
20846 rtx par
= NULL_RTX
;
20847 rtx dwarf
= NULL_RTX
;
20851 num_regs
= bit_count (saved_regs_mask
);
20853 /* Must be at least one register to save, and can't save SP or PC. */
20854 gcc_assert (num_regs
> 0 && num_regs
<= 14);
20855 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20856 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20858 /* Create sequence for DWARF info. All the frame-related data for
20859 debugging is held in this wrapper. */
20860 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20862 /* Describe the stack adjustment. */
20863 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20864 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20865 RTX_FRAME_RELATED_P (tmp
) = 1;
20866 XVECEXP (dwarf
, 0, 0) = tmp
;
20868 /* Find the first register. */
20869 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
20874 /* If there's an odd number of registers to push. Start off by
20875 pushing a single register. This ensures that subsequent strd
20876 operations are dword aligned (assuming that SP was originally
20877 64-bit aligned). */
20878 if ((num_regs
& 1) != 0)
20880 rtx reg
, mem
, insn
;
20882 reg
= gen_rtx_REG (SImode
, regno
);
20884 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
20885 stack_pointer_rtx
));
20887 mem
= gen_frame_mem (Pmode
,
20889 (Pmode
, stack_pointer_rtx
,
20890 plus_constant (Pmode
, stack_pointer_rtx
,
20893 tmp
= gen_rtx_SET (mem
, reg
);
20894 RTX_FRAME_RELATED_P (tmp
) = 1;
20895 insn
= emit_insn (tmp
);
20896 RTX_FRAME_RELATED_P (insn
) = 1;
20897 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20898 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
20899 RTX_FRAME_RELATED_P (tmp
) = 1;
20902 XVECEXP (dwarf
, 0, i
) = tmp
;
20906 while (i
< num_regs
)
20907 if (saved_regs_mask
& (1 << regno
))
20909 rtx reg1
, reg2
, mem1
, mem2
;
20910 rtx tmp0
, tmp1
, tmp2
;
20913 /* Find the register to pair with this one. */
20914 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
20918 reg1
= gen_rtx_REG (SImode
, regno
);
20919 reg2
= gen_rtx_REG (SImode
, regno2
);
20926 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20929 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20931 -4 * (num_regs
- 1)));
20932 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
20933 plus_constant (Pmode
, stack_pointer_rtx
,
20935 tmp1
= gen_rtx_SET (mem1
, reg1
);
20936 tmp2
= gen_rtx_SET (mem2
, reg2
);
20937 RTX_FRAME_RELATED_P (tmp0
) = 1;
20938 RTX_FRAME_RELATED_P (tmp1
) = 1;
20939 RTX_FRAME_RELATED_P (tmp2
) = 1;
20940 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
20941 XVECEXP (par
, 0, 0) = tmp0
;
20942 XVECEXP (par
, 0, 1) = tmp1
;
20943 XVECEXP (par
, 0, 2) = tmp2
;
20944 insn
= emit_insn (par
);
20945 RTX_FRAME_RELATED_P (insn
) = 1;
20946 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20950 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20953 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20956 tmp1
= gen_rtx_SET (mem1
, reg1
);
20957 tmp2
= gen_rtx_SET (mem2
, reg2
);
20958 RTX_FRAME_RELATED_P (tmp1
) = 1;
20959 RTX_FRAME_RELATED_P (tmp2
) = 1;
20960 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20961 XVECEXP (par
, 0, 0) = tmp1
;
20962 XVECEXP (par
, 0, 1) = tmp2
;
20966 /* Create unwind information. This is an approximation. */
20967 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
20968 plus_constant (Pmode
,
20972 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
20973 plus_constant (Pmode
,
20978 RTX_FRAME_RELATED_P (tmp1
) = 1;
20979 RTX_FRAME_RELATED_P (tmp2
) = 1;
20980 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
20981 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
20983 regno
= regno2
+ 1;
20991 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20992 whenever possible, otherwise it emits single-word stores. The first store
20993 also allocates stack space for all saved registers, using writeback with
20994 post-addressing mode. All other stores use offset addressing. If no STRD
20995 can be emitted, this function emits a sequence of single-word stores,
20996 and not an STM as before, because single-word stores provide more freedom
20997 scheduling and can be turned into an STM by peephole optimizations. */
20999 arm_emit_strd_push (unsigned long saved_regs_mask
)
21002 int i
, j
, dwarf_index
= 0;
21004 rtx dwarf
= NULL_RTX
;
21005 rtx insn
= NULL_RTX
;
21008 /* TODO: A more efficient code can be emitted by changing the
21009 layout, e.g., first push all pairs that can use STRD to keep the
21010 stack aligned, and then push all other registers. */
21011 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
21012 if (saved_regs_mask
& (1 << i
))
21015 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21016 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
21017 gcc_assert (num_regs
> 0);
21019 /* Create sequence for DWARF info. */
21020 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21022 /* For dwarf info, we generate explicit stack update. */
21023 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21024 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21025 RTX_FRAME_RELATED_P (tmp
) = 1;
21026 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
21028 /* Save registers. */
21029 offset
= - 4 * num_regs
;
21031 while (j
<= LAST_ARM_REGNUM
)
21032 if (saved_regs_mask
& (1 << j
))
21035 && (saved_regs_mask
& (1 << (j
+ 1))))
21037 /* Current register and previous register form register pair for
21038 which STRD can be generated. */
21041 /* Allocate stack space for all saved registers. */
21042 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
21043 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
21044 mem
= gen_frame_mem (DImode
, tmp
);
21047 else if (offset
> 0)
21048 mem
= gen_frame_mem (DImode
,
21049 plus_constant (Pmode
,
21053 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
21055 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
21056 RTX_FRAME_RELATED_P (tmp
) = 1;
21057 tmp
= emit_insn (tmp
);
21059 /* Record the first store insn. */
21060 if (dwarf_index
== 1)
21063 /* Generate dwarf info. */
21064 mem
= gen_frame_mem (SImode
,
21065 plus_constant (Pmode
,
21068 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
21069 RTX_FRAME_RELATED_P (tmp
) = 1;
21070 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
21072 mem
= gen_frame_mem (SImode
,
21073 plus_constant (Pmode
,
21076 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
21077 RTX_FRAME_RELATED_P (tmp
) = 1;
21078 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
21085 /* Emit a single word store. */
21088 /* Allocate stack space for all saved registers. */
21089 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
21090 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
21091 mem
= gen_frame_mem (SImode
, tmp
);
21094 else if (offset
> 0)
21095 mem
= gen_frame_mem (SImode
,
21096 plus_constant (Pmode
,
21100 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
21102 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
21103 RTX_FRAME_RELATED_P (tmp
) = 1;
21104 tmp
= emit_insn (tmp
);
21106 /* Record the first store insn. */
21107 if (dwarf_index
== 1)
21110 /* Generate dwarf info. */
21111 mem
= gen_frame_mem (SImode
,
21112 plus_constant(Pmode
,
21115 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
21116 RTX_FRAME_RELATED_P (tmp
) = 1;
21117 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
21126 /* Attach dwarf info to the first insn we generate. */
21127 gcc_assert (insn
!= NULL_RTX
);
21128 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21129 RTX_FRAME_RELATED_P (insn
) = 1;
21132 /* Generate and emit an insn that we will recognize as a push_multi.
21133 Unfortunately, since this insn does not reflect very well the actual
21134 semantics of the operation, we need to annotate the insn for the benefit
21135 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
21136 MASK for registers that should be annotated for DWARF2 frame unwind
21139 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
21142 int num_dwarf_regs
= 0;
21146 int dwarf_par_index
;
21149 /* We don't record the PC in the dwarf frame information. */
21150 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
21152 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
21154 if (mask
& (1 << i
))
21156 if (dwarf_regs_mask
& (1 << i
))
21160 gcc_assert (num_regs
&& num_regs
<= 16);
21161 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
21163 /* For the body of the insn we are going to generate an UNSPEC in
21164 parallel with several USEs. This allows the insn to be recognized
21165 by the push_multi pattern in the arm.md file.
21167 The body of the insn looks something like this:
21170 (set (mem:BLK (pre_modify:SI (reg:SI sp)
21171 (const_int:SI <num>)))
21172 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
21178 For the frame note however, we try to be more explicit and actually
21179 show each register being stored into the stack frame, plus a (single)
21180 decrement of the stack pointer. We do it this way in order to be
21181 friendly to the stack unwinding code, which only wants to see a single
21182 stack decrement per instruction. The RTL we generate for the note looks
21183 something like this:
21186 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
21187 (set (mem:SI (reg:SI sp)) (reg:SI r4))
21188 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
21189 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
21193 FIXME:: In an ideal world the PRE_MODIFY would not exist and
21194 instead we'd have a parallel expression detailing all
21195 the stores to the various memory addresses so that debug
21196 information is more up-to-date. Remember however while writing
21197 this to take care of the constraints with the push instruction.
21199 Note also that this has to be taken care of for the VFP registers.
21201 For more see PR43399. */
21203 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
21204 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
21205 dwarf_par_index
= 1;
21207 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
21209 if (mask
& (1 << i
))
21211 reg
= gen_rtx_REG (SImode
, i
);
21213 XVECEXP (par
, 0, 0)
21214 = gen_rtx_SET (gen_frame_mem
21216 gen_rtx_PRE_MODIFY (Pmode
,
21219 (Pmode
, stack_pointer_rtx
,
21222 gen_rtx_UNSPEC (BLKmode
,
21223 gen_rtvec (1, reg
),
21224 UNSPEC_PUSH_MULT
));
21226 if (dwarf_regs_mask
& (1 << i
))
21228 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
21230 RTX_FRAME_RELATED_P (tmp
) = 1;
21231 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
21238 for (j
= 1, i
++; j
< num_regs
; i
++)
21240 if (mask
& (1 << i
))
21242 reg
= gen_rtx_REG (SImode
, i
);
21244 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
21246 if (dwarf_regs_mask
& (1 << i
))
21249 = gen_rtx_SET (gen_frame_mem
21251 plus_constant (Pmode
, stack_pointer_rtx
,
21254 RTX_FRAME_RELATED_P (tmp
) = 1;
21255 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
21262 par
= emit_insn (par
);
21264 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21265 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21266 RTX_FRAME_RELATED_P (tmp
) = 1;
21267 XVECEXP (dwarf
, 0, 0) = tmp
;
21269 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
21274 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
21275 SIZE is the offset to be adjusted.
21276 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
21278 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
21282 RTX_FRAME_RELATED_P (insn
) = 1;
21283 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
21284 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
21287 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
21288 SAVED_REGS_MASK shows which registers need to be restored.
21290 Unfortunately, since this insn does not reflect very well the actual
21291 semantics of the operation, we need to annotate the insn for the benefit
21292 of DWARF2 frame unwind information. */
21294 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
21299 rtx dwarf
= NULL_RTX
;
21301 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
21305 offset_adj
= return_in_pc
? 1 : 0;
21306 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
21307 if (saved_regs_mask
& (1 << i
))
21310 gcc_assert (num_regs
&& num_regs
<= 16);
21312 /* If SP is in reglist, then we don't emit SP update insn. */
21313 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
21315 /* The parallel needs to hold num_regs SETs
21316 and one SET for the stack update. */
21317 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
21320 XVECEXP (par
, 0, 0) = ret_rtx
;
21324 /* Increment the stack pointer, based on there being
21325 num_regs 4-byte registers to restore. */
21326 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21327 plus_constant (Pmode
,
21330 RTX_FRAME_RELATED_P (tmp
) = 1;
21331 XVECEXP (par
, 0, offset_adj
) = tmp
;
21334 /* Now restore every reg, which may include PC. */
21335 for (j
= 0, i
= 0; j
< num_regs
; i
++)
21336 if (saved_regs_mask
& (1 << i
))
21338 reg
= gen_rtx_REG (SImode
, i
);
21339 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
21341 /* Emit single load with writeback. */
21342 tmp
= gen_frame_mem (SImode
,
21343 gen_rtx_POST_INC (Pmode
,
21344 stack_pointer_rtx
));
21345 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
21346 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
21350 tmp
= gen_rtx_SET (reg
,
21353 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
21354 RTX_FRAME_RELATED_P (tmp
) = 1;
21355 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
21357 /* We need to maintain a sequence for DWARF info too. As dwarf info
21358 should not have PC, skip PC. */
21359 if (i
!= PC_REGNUM
)
21360 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
21366 par
= emit_jump_insn (par
);
21368 par
= emit_insn (par
);
21370 REG_NOTES (par
) = dwarf
;
21372 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
21373 stack_pointer_rtx
, stack_pointer_rtx
);
21376 /* Generate and emit an insn pattern that we will recognize as a pop_multi
21377 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
21379 Unfortunately, since this insn does not reflect very well the actual
21380 semantics of the operation, we need to annotate the insn for the benefit
21381 of DWARF2 frame unwind information. */
21383 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
21387 rtx dwarf
= NULL_RTX
;
21390 gcc_assert (num_regs
&& num_regs
<= 32);
21392 /* Workaround ARM10 VFPr1 bug. */
21393 if (num_regs
== 2 && !arm_arch6
)
21395 if (first_reg
== 15)
21401 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
21402 there could be up to 32 D-registers to restore.
21403 If there are more than 16 D-registers, make two recursive calls,
21404 each of which emits one pop_multi instruction. */
21407 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
21408 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
21412 /* The parallel needs to hold num_regs SETs
21413 and one SET for the stack update. */
21414 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21416 /* Increment the stack pointer, based on there being
21417 num_regs 8-byte registers to restore. */
21418 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
21419 RTX_FRAME_RELATED_P (tmp
) = 1;
21420 XVECEXP (par
, 0, 0) = tmp
;
21422 /* Now show every reg that will be restored, using a SET for each. */
21423 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
21425 reg
= gen_rtx_REG (DFmode
, i
);
21427 tmp
= gen_rtx_SET (reg
,
21430 plus_constant (Pmode
, base_reg
, 8 * j
)));
21431 RTX_FRAME_RELATED_P (tmp
) = 1;
21432 XVECEXP (par
, 0, j
+ 1) = tmp
;
21434 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
21439 par
= emit_insn (par
);
21440 REG_NOTES (par
) = dwarf
;
21442 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
21443 if (REGNO (base_reg
) == IP_REGNUM
)
21445 RTX_FRAME_RELATED_P (par
) = 1;
21446 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
21449 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
21450 base_reg
, base_reg
);
21453 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
21454 number of registers are being popped, multiple LDRD patterns are created for
21455 all register pairs. If odd number of registers are popped, last register is
21456 loaded by using LDR pattern. */
21458 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
21462 rtx par
= NULL_RTX
;
21463 rtx dwarf
= NULL_RTX
;
21464 rtx tmp
, reg
, tmp1
;
21465 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
21467 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
21468 if (saved_regs_mask
& (1 << i
))
21471 gcc_assert (num_regs
&& num_regs
<= 16);
21473 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
21474 to be popped. So, if num_regs is even, now it will become odd,
21475 and we can generate pop with PC. If num_regs is odd, it will be
21476 even now, and ldr with return can be generated for PC. */
21480 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21482 /* Var j iterates over all the registers to gather all the registers in
21483 saved_regs_mask. Var i gives index of saved registers in stack frame.
21484 A PARALLEL RTX of register-pair is created here, so that pattern for
21485 LDRD can be matched. As PC is always last register to be popped, and
21486 we have already decremented num_regs if PC, we don't have to worry
21487 about PC in this loop. */
21488 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
21489 if (saved_regs_mask
& (1 << j
))
21491 /* Create RTX for memory load. */
21492 reg
= gen_rtx_REG (SImode
, j
);
21493 tmp
= gen_rtx_SET (reg
,
21494 gen_frame_mem (SImode
,
21495 plus_constant (Pmode
,
21496 stack_pointer_rtx
, 4 * i
)));
21497 RTX_FRAME_RELATED_P (tmp
) = 1;
21501 /* When saved-register index (i) is even, the RTX to be emitted is
21502 yet to be created. Hence create it first. The LDRD pattern we
21503 are generating is :
21504 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
21505 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
21506 where target registers need not be consecutive. */
21507 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
21511 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
21512 added as 0th element and if i is odd, reg_i is added as 1st element
21513 of LDRD pattern shown above. */
21514 XVECEXP (par
, 0, (i
% 2)) = tmp
;
21515 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
21519 /* When saved-register index (i) is odd, RTXs for both the registers
21520 to be loaded are generated in above given LDRD pattern, and the
21521 pattern can be emitted now. */
21522 par
= emit_insn (par
);
21523 REG_NOTES (par
) = dwarf
;
21524 RTX_FRAME_RELATED_P (par
) = 1;
21530 /* If the number of registers pushed is odd AND return_in_pc is false OR
21531 number of registers are even AND return_in_pc is true, last register is
21532 popped using LDR. It can be PC as well. Hence, adjust the stack first and
21533 then LDR with post increment. */
21535 /* Increment the stack pointer, based on there being
21536 num_regs 4-byte registers to restore. */
21537 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21538 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
21539 RTX_FRAME_RELATED_P (tmp
) = 1;
21540 tmp
= emit_insn (tmp
);
21543 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
21544 stack_pointer_rtx
, stack_pointer_rtx
);
21549 if (((num_regs
% 2) == 1 && !return_in_pc
)
21550 || ((num_regs
% 2) == 0 && return_in_pc
))
21552 /* Scan for the single register to be popped. Skip until the saved
21553 register is found. */
21554 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
21556 /* Gen LDR with post increment here. */
21557 tmp1
= gen_rtx_MEM (SImode
,
21558 gen_rtx_POST_INC (SImode
,
21559 stack_pointer_rtx
));
21560 set_mem_alias_set (tmp1
, get_frame_alias_set ());
21562 reg
= gen_rtx_REG (SImode
, j
);
21563 tmp
= gen_rtx_SET (reg
, tmp1
);
21564 RTX_FRAME_RELATED_P (tmp
) = 1;
21565 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
21569 /* If return_in_pc, j must be PC_REGNUM. */
21570 gcc_assert (j
== PC_REGNUM
);
21571 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
21572 XVECEXP (par
, 0, 0) = ret_rtx
;
21573 XVECEXP (par
, 0, 1) = tmp
;
21574 par
= emit_jump_insn (par
);
21578 par
= emit_insn (tmp
);
21579 REG_NOTES (par
) = dwarf
;
21580 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
21581 stack_pointer_rtx
, stack_pointer_rtx
);
21585 else if ((num_regs
% 2) == 1 && return_in_pc
)
21587 /* There are 2 registers to be popped. So, generate the pattern
21588 pop_multiple_with_stack_update_and_return to pop in PC. */
21589 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
21595 /* LDRD in ARM mode needs consecutive registers as operands. This function
21596 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
21597 offset addressing and then generates one separate stack udpate. This provides
21598 more scheduling freedom, compared to writeback on every load. However,
21599 if the function returns using load into PC directly
21600 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
21601 before the last load. TODO: Add a peephole optimization to recognize
21602 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
21603 peephole optimization to merge the load at stack-offset zero
21604 with the stack update instruction using load with writeback
21605 in post-index addressing mode. */
21607 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
21611 rtx par
= NULL_RTX
;
21612 rtx dwarf
= NULL_RTX
;
21615 /* Restore saved registers. */
21616 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
21618 while (j
<= LAST_ARM_REGNUM
)
21619 if (saved_regs_mask
& (1 << j
))
21622 && (saved_regs_mask
& (1 << (j
+ 1)))
21623 && (j
+ 1) != PC_REGNUM
)
21625 /* Current register and next register form register pair for which
21626 LDRD can be generated. PC is always the last register popped, and
21627 we handle it separately. */
21629 mem
= gen_frame_mem (DImode
,
21630 plus_constant (Pmode
,
21634 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
21636 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
21637 tmp
= emit_insn (tmp
);
21638 RTX_FRAME_RELATED_P (tmp
) = 1;
21640 /* Generate dwarf info. */
21642 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
21643 gen_rtx_REG (SImode
, j
),
21645 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
21646 gen_rtx_REG (SImode
, j
+ 1),
21649 REG_NOTES (tmp
) = dwarf
;
21654 else if (j
!= PC_REGNUM
)
21656 /* Emit a single word load. */
21658 mem
= gen_frame_mem (SImode
,
21659 plus_constant (Pmode
,
21663 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
21665 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
21666 tmp
= emit_insn (tmp
);
21667 RTX_FRAME_RELATED_P (tmp
) = 1;
21669 /* Generate dwarf info. */
21670 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
21671 gen_rtx_REG (SImode
, j
),
21677 else /* j == PC_REGNUM */
21683 /* Update the stack. */
21686 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21687 plus_constant (Pmode
,
21690 tmp
= emit_insn (tmp
);
21691 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
21692 stack_pointer_rtx
, stack_pointer_rtx
);
21696 if (saved_regs_mask
& (1 << PC_REGNUM
))
21698 /* Only PC is to be popped. */
21699 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
21700 XVECEXP (par
, 0, 0) = ret_rtx
;
21701 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
21702 gen_frame_mem (SImode
,
21703 gen_rtx_POST_INC (SImode
,
21704 stack_pointer_rtx
)));
21705 RTX_FRAME_RELATED_P (tmp
) = 1;
21706 XVECEXP (par
, 0, 1) = tmp
;
21707 par
= emit_jump_insn (par
);
21709 /* Generate dwarf info. */
21710 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
21711 gen_rtx_REG (SImode
, PC_REGNUM
),
21713 REG_NOTES (par
) = dwarf
;
21714 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
21715 stack_pointer_rtx
, stack_pointer_rtx
);
21719 /* Calculate the size of the return value that is passed in registers. */
21721 arm_size_return_regs (void)
21725 if (crtl
->return_rtx
!= 0)
21726 mode
= GET_MODE (crtl
->return_rtx
);
21728 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
21730 return GET_MODE_SIZE (mode
);
21733 /* Return true if the current function needs to save/restore LR. */
21735 thumb_force_lr_save (void)
21737 return !cfun
->machine
->lr_save_eliminated
21739 || thumb_far_jump_used_p ()
21740 || df_regs_ever_live_p (LR_REGNUM
));
21743 /* We do not know if r3 will be available because
21744 we do have an indirect tailcall happening in this
21745 particular case. */
21747 is_indirect_tailcall_p (rtx call
)
21749 rtx pat
= PATTERN (call
);
21751 /* Indirect tail call. */
21752 pat
= XVECEXP (pat
, 0, 0);
21753 if (GET_CODE (pat
) == SET
)
21754 pat
= SET_SRC (pat
);
21756 pat
= XEXP (XEXP (pat
, 0), 0);
21757 return REG_P (pat
);
21760 /* Return true if r3 is used by any of the tail call insns in the
21761 current function. */
21763 any_sibcall_could_use_r3 (void)
21768 if (!crtl
->tail_call_emit
)
21770 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
21771 if (e
->flags
& EDGE_SIBCALL
)
21773 rtx_insn
*call
= BB_END (e
->src
);
21774 if (!CALL_P (call
))
21775 call
= prev_nonnote_nondebug_insn (call
);
21776 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
21777 if (find_regno_fusage (call
, USE
, 3)
21778 || is_indirect_tailcall_p (call
))
21785 /* Compute the distance from register FROM to register TO.
21786 These can be the arg pointer (26), the soft frame pointer (25),
21787 the stack pointer (13) or the hard frame pointer (11).
21788 In thumb mode r7 is used as the soft frame pointer, if needed.
21789 Typical stack layout looks like this:
21791 old stack pointer -> | |
21794 | | saved arguments for
21795 | | vararg functions
21798 hard FP & arg pointer -> | | \
21806 soft frame pointer -> | | /
21811 locals base pointer -> | | /
21816 current stack pointer -> | | /
21819 For a given function some or all of these stack components
21820 may not be needed, giving rise to the possibility of
21821 eliminating some of the registers.
21823 The values returned by this function must reflect the behavior
21824 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21826 The sign of the number returned reflects the direction of stack
21827 growth, so the values are positive for all eliminations except
21828 from the soft frame pointer to the hard frame pointer.
21830 SFP may point just inside the local variables block to ensure correct
21834 /* Return cached stack offsets. */
21836 static arm_stack_offsets
*
21837 arm_get_frame_offsets (void)
21839 struct arm_stack_offsets
*offsets
;
21841 offsets
= &cfun
->machine
->stack_offsets
;
21847 /* Calculate stack offsets. These are used to calculate register elimination
21848 offsets and in prologue/epilogue code. Also calculates which registers
21849 should be saved. */
21852 arm_compute_frame_layout (void)
21854 struct arm_stack_offsets
*offsets
;
21855 unsigned long func_type
;
21858 HOST_WIDE_INT frame_size
;
21861 offsets
= &cfun
->machine
->stack_offsets
;
21863 /* Initially this is the size of the local variables. It will translated
21864 into an offset once we have determined the size of preceding data. */
21865 frame_size
= ROUND_UP_WORD (get_frame_size ());
21867 /* Space for variadic functions. */
21868 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
21870 /* In Thumb mode this is incorrect, but never used. */
21872 = (offsets
->saved_args
21873 + arm_compute_static_chain_stack_bytes ()
21874 + (frame_pointer_needed
? 4 : 0));
21878 unsigned int regno
;
21880 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
21881 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21882 saved
= core_saved
;
21884 /* We know that SP will be doubleword aligned on entry, and we must
21885 preserve that condition at any subroutine call. We also require the
21886 soft frame pointer to be doubleword aligned. */
21888 if (TARGET_REALLY_IWMMXT
)
21890 /* Check for the call-saved iWMMXt registers. */
21891 for (regno
= FIRST_IWMMXT_REGNUM
;
21892 regno
<= LAST_IWMMXT_REGNUM
;
21894 if (df_regs_ever_live_p (regno
)
21895 && !call_used_or_fixed_reg_p (regno
))
21899 func_type
= arm_current_func_type ();
21900 /* Space for saved VFP registers. */
21901 if (! IS_VOLATILE (func_type
)
21902 && TARGET_HARD_FLOAT
)
21903 saved
+= arm_get_vfp_saved_size ();
21905 else /* TARGET_THUMB1 */
21907 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
21908 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21909 saved
= core_saved
;
21910 if (TARGET_BACKTRACE
)
21914 /* Saved registers include the stack frame. */
21915 offsets
->saved_regs
21916 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
21917 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
21919 /* A leaf function does not need any stack alignment if it has nothing
21921 if (crtl
->is_leaf
&& frame_size
== 0
21922 /* However if it calls alloca(), we have a dynamically allocated
21923 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21924 && ! cfun
->calls_alloca
)
21926 offsets
->outgoing_args
= offsets
->soft_frame
;
21927 offsets
->locals_base
= offsets
->soft_frame
;
21931 /* Ensure SFP has the correct alignment. */
21932 if (ARM_DOUBLEWORD_ALIGN
21933 && (offsets
->soft_frame
& 7))
21935 offsets
->soft_frame
+= 4;
21936 /* Try to align stack by pushing an extra reg. Don't bother doing this
21937 when there is a stack frame as the alignment will be rolled into
21938 the normal stack adjustment. */
21939 if (frame_size
+ crtl
->outgoing_args_size
== 0)
21943 /* Register r3 is caller-saved. Normally it does not need to be
21944 saved on entry by the prologue. However if we choose to save
21945 it for padding then we may confuse the compiler into thinking
21946 a prologue sequence is required when in fact it is not. This
21947 will occur when shrink-wrapping if r3 is used as a scratch
21948 register and there are no other callee-saved writes.
21950 This situation can be avoided when other callee-saved registers
21951 are available and r3 is not mandatory if we choose a callee-saved
21952 register for padding. */
21953 bool prefer_callee_reg_p
= false;
21955 /* If it is safe to use r3, then do so. This sometimes
21956 generates better code on Thumb-2 by avoiding the need to
21957 use 32-bit push/pop instructions. */
21958 if (! any_sibcall_could_use_r3 ()
21959 && arm_size_return_regs () <= 12
21960 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
21962 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
21965 if (!TARGET_THUMB2
)
21966 prefer_callee_reg_p
= true;
21969 || prefer_callee_reg_p
)
21971 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
21973 /* Avoid fixed registers; they may be changed at
21974 arbitrary times so it's unsafe to restore them
21975 during the epilogue. */
21977 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
21987 offsets
->saved_regs
+= 4;
21988 offsets
->saved_regs_mask
|= (1 << reg
);
21993 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
21994 offsets
->outgoing_args
= (offsets
->locals_base
21995 + crtl
->outgoing_args_size
);
21997 if (ARM_DOUBLEWORD_ALIGN
)
21999 /* Ensure SP remains doubleword aligned. */
22000 if (offsets
->outgoing_args
& 7)
22001 offsets
->outgoing_args
+= 4;
22002 gcc_assert (!(offsets
->outgoing_args
& 7));
22007 /* Calculate the relative offsets for the different stack pointers. Positive
22008 offsets are in the direction of stack growth. */
22011 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
22013 arm_stack_offsets
*offsets
;
22015 offsets
= arm_get_frame_offsets ();
22017 /* OK, now we have enough information to compute the distances.
22018 There must be an entry in these switch tables for each pair
22019 of registers in ELIMINABLE_REGS, even if some of the entries
22020 seem to be redundant or useless. */
22023 case ARG_POINTER_REGNUM
:
22026 case THUMB_HARD_FRAME_POINTER_REGNUM
:
22029 case FRAME_POINTER_REGNUM
:
22030 /* This is the reverse of the soft frame pointer
22031 to hard frame pointer elimination below. */
22032 return offsets
->soft_frame
- offsets
->saved_args
;
22034 case ARM_HARD_FRAME_POINTER_REGNUM
:
22035 /* This is only non-zero in the case where the static chain register
22036 is stored above the frame. */
22037 return offsets
->frame
- offsets
->saved_args
- 4;
22039 case STACK_POINTER_REGNUM
:
22040 /* If nothing has been pushed on the stack at all
22041 then this will return -4. This *is* correct! */
22042 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
22045 gcc_unreachable ();
22047 gcc_unreachable ();
22049 case FRAME_POINTER_REGNUM
:
22052 case THUMB_HARD_FRAME_POINTER_REGNUM
:
22055 case ARM_HARD_FRAME_POINTER_REGNUM
:
22056 /* The hard frame pointer points to the top entry in the
22057 stack frame. The soft frame pointer to the bottom entry
22058 in the stack frame. If there is no stack frame at all,
22059 then they are identical. */
22061 return offsets
->frame
- offsets
->soft_frame
;
22063 case STACK_POINTER_REGNUM
:
22064 return offsets
->outgoing_args
- offsets
->soft_frame
;
22067 gcc_unreachable ();
22069 gcc_unreachable ();
22072 /* You cannot eliminate from the stack pointer.
22073 In theory you could eliminate from the hard frame
22074 pointer to the stack pointer, but this will never
22075 happen, since if a stack frame is not needed the
22076 hard frame pointer will never be used. */
22077 gcc_unreachable ();
22081 /* Given FROM and TO register numbers, say whether this elimination is
22082 allowed. Frame pointer elimination is automatically handled.
22084 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
22085 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
22086 pointer, we must eliminate FRAME_POINTER_REGNUM into
22087 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
22088 ARG_POINTER_REGNUM. */
22091 arm_can_eliminate (const int from
, const int to
)
22093 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
22094 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
22095 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
22096 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
22100 /* Emit RTL to save coprocessor registers on function entry. Returns the
22101 number of bytes pushed. */
22104 arm_save_coproc_regs(void)
22106 int saved_size
= 0;
22108 unsigned start_reg
;
22111 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
22112 if (df_regs_ever_live_p (reg
) && !call_used_or_fixed_reg_p (reg
))
22114 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
22115 insn
= gen_rtx_MEM (V2SImode
, insn
);
22116 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
22117 RTX_FRAME_RELATED_P (insn
) = 1;
22121 if (TARGET_HARD_FLOAT
)
22123 start_reg
= FIRST_VFP_REGNUM
;
22125 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
22127 if ((!df_regs_ever_live_p (reg
) || call_used_or_fixed_reg_p (reg
))
22128 && (!df_regs_ever_live_p (reg
+ 1)
22129 || call_used_or_fixed_reg_p (reg
+ 1)))
22131 if (start_reg
!= reg
)
22132 saved_size
+= vfp_emit_fstmd (start_reg
,
22133 (reg
- start_reg
) / 2);
22134 start_reg
= reg
+ 2;
22137 if (start_reg
!= reg
)
22138 saved_size
+= vfp_emit_fstmd (start_reg
,
22139 (reg
- start_reg
) / 2);
22145 /* Set the Thumb frame pointer from the stack pointer. */
22148 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
22150 HOST_WIDE_INT amount
;
22153 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
22155 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22156 stack_pointer_rtx
, GEN_INT (amount
)));
22159 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
22160 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
22161 expects the first two operands to be the same. */
22164 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22166 hard_frame_pointer_rtx
));
22170 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22171 hard_frame_pointer_rtx
,
22172 stack_pointer_rtx
));
22174 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
22175 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
22176 RTX_FRAME_RELATED_P (dwarf
) = 1;
22177 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22180 RTX_FRAME_RELATED_P (insn
) = 1;
22183 struct scratch_reg
{
22188 /* Return a short-lived scratch register for use as a 2nd scratch register on
22189 function entry after the registers are saved in the prologue. This register
22190 must be released by means of release_scratch_register_on_entry. IP is not
22191 considered since it is always used as the 1st scratch register if available.
22193 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
22194 mask of live registers. */
22197 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
22198 unsigned long live_regs
)
22204 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
22210 for (i
= 4; i
< 11; i
++)
22211 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
22219 /* If IP is used as the 1st scratch register for a nested function,
22220 then either r3 wasn't available or is used to preserve IP. */
22221 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
22223 regno
= (regno1
== 3 ? 2 : 3);
22225 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
22230 sr
->reg
= gen_rtx_REG (SImode
, regno
);
22233 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
22234 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
22235 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
22236 plus_constant (Pmode
, stack_pointer_rtx
, -4));
22237 RTX_FRAME_RELATED_P (insn
) = 1;
22238 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
22242 /* Release a scratch register obtained from the preceding function. */
22245 release_scratch_register_on_entry (struct scratch_reg
*sr
)
22249 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
22250 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
22251 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
22252 plus_constant (Pmode
, stack_pointer_rtx
, 4));
22253 RTX_FRAME_RELATED_P (insn
) = 1;
22254 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
22258 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22260 #if PROBE_INTERVAL > 4096
22261 #error Cannot use indexed addressing mode for stack probing
22264 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22265 inclusive. These are offsets from the current stack pointer. REGNO1
22266 is the index number of the 1st scratch register and LIVE_REGS is the
22267 mask of live registers. */
22270 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
22271 unsigned int regno1
, unsigned long live_regs
)
22273 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
22275 /* See if we have a constant small number of probes to generate. If so,
22276 that's the easy case. */
22277 if (size
<= PROBE_INTERVAL
)
22279 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
22280 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
22281 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
22284 /* The run-time loop is made up of 10 insns in the generic case while the
22285 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
22286 else if (size
<= 5 * PROBE_INTERVAL
)
22288 HOST_WIDE_INT i
, rem
;
22290 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
22291 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
22292 emit_stack_probe (reg1
);
22294 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
22295 it exceeds SIZE. If only two probes are needed, this will not
22296 generate any code. Then probe at FIRST + SIZE. */
22297 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
22299 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
22300 emit_stack_probe (reg1
);
22303 rem
= size
- (i
- PROBE_INTERVAL
);
22304 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
22306 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
22307 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
22310 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
22313 /* Otherwise, do the same as above, but in a loop. Note that we must be
22314 extra careful with variables wrapping around because we might be at
22315 the very top (or the very bottom) of the address space and we have
22316 to be able to handle this case properly; in particular, we use an
22317 equality test for the loop condition. */
22320 HOST_WIDE_INT rounded_size
;
22321 struct scratch_reg sr
;
22323 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
22325 emit_move_insn (reg1
, GEN_INT (first
));
22328 /* Step 1: round SIZE to the previous multiple of the interval. */
22330 rounded_size
= size
& -PROBE_INTERVAL
;
22331 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
22334 /* Step 2: compute initial and final value of the loop counter. */
22336 /* TEST_ADDR = SP + FIRST. */
22337 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
22339 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22340 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
22343 /* Step 3: the loop
22347 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22350 while (TEST_ADDR != LAST_ADDR)
22352 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22353 until it is equal to ROUNDED_SIZE. */
22355 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
22358 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22359 that SIZE is equal to ROUNDED_SIZE. */
22361 if (size
!= rounded_size
)
22363 HOST_WIDE_INT rem
= size
- rounded_size
;
22365 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
22367 emit_set_insn (sr
.reg
,
22368 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
22369 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
22370 PROBE_INTERVAL
- rem
));
22373 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
22376 release_scratch_register_on_entry (&sr
);
22379 /* Make sure nothing is scheduled before we are done. */
22380 emit_insn (gen_blockage ());
22383 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22384 absolute addresses. */
22387 output_probe_stack_range (rtx reg1
, rtx reg2
)
22389 static int labelno
= 0;
22393 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
22396 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
22398 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22400 xops
[1] = GEN_INT (PROBE_INTERVAL
);
22401 output_asm_insn ("sub\t%0, %0, %1", xops
);
22403 /* Probe at TEST_ADDR. */
22404 output_asm_insn ("str\tr0, [%0, #0]", xops
);
22406 /* Test if TEST_ADDR == LAST_ADDR. */
22408 output_asm_insn ("cmp\t%0, %1", xops
);
22411 fputs ("\tbne\t", asm_out_file
);
22412 assemble_name_raw (asm_out_file
, loop_lab
);
22413 fputc ('\n', asm_out_file
);
22418 /* Generate the prologue instructions for entry into an ARM or Thumb-2
22421 arm_expand_prologue (void)
22426 unsigned long live_regs_mask
;
22427 unsigned long func_type
;
22429 int saved_pretend_args
= 0;
22430 int saved_regs
= 0;
22431 unsigned HOST_WIDE_INT args_to_push
;
22432 HOST_WIDE_INT size
;
22433 arm_stack_offsets
*offsets
;
22436 func_type
= arm_current_func_type ();
22438 /* Naked functions don't have prologues. */
22439 if (IS_NAKED (func_type
))
22441 if (flag_stack_usage_info
)
22442 current_function_static_stack_size
= 0;
22446 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
22447 args_to_push
= crtl
->args
.pretend_args_size
;
22449 /* Compute which register we will have to save onto the stack. */
22450 offsets
= arm_get_frame_offsets ();
22451 live_regs_mask
= offsets
->saved_regs_mask
;
22453 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
22455 if (IS_STACKALIGN (func_type
))
22459 /* Handle a word-aligned stack pointer. We generate the following:
22464 <save and restore r0 in normal prologue/epilogue>
22468 The unwinder doesn't need to know about the stack realignment.
22469 Just tell it we saved SP in r0. */
22470 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
22472 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
22473 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
22475 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
22476 RTX_FRAME_RELATED_P (insn
) = 1;
22477 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
22479 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
22481 /* ??? The CFA changes here, which may cause GDB to conclude that it
22482 has entered a different function. That said, the unwind info is
22483 correct, individually, before and after this instruction because
22484 we've described the save of SP, which will override the default
22485 handling of SP as restoring from the CFA. */
22486 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
22489 /* Let's compute the static_chain_stack_bytes required and store it. Right
22490 now the value must be -1 as stored by arm_init_machine_status (). */
22491 cfun
->machine
->static_chain_stack_bytes
22492 = arm_compute_static_chain_stack_bytes ();
22494 /* The static chain register is the same as the IP register. If it is
22495 clobbered when creating the frame, we need to save and restore it. */
22496 clobber_ip
= IS_NESTED (func_type
)
22497 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
22498 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
22499 || flag_stack_clash_protection
)
22500 && !df_regs_ever_live_p (LR_REGNUM
)
22501 && arm_r3_live_at_start_p ()));
22503 /* Find somewhere to store IP whilst the frame is being created.
22504 We try the following places in order:
22506 1. The last argument register r3 if it is available.
22507 2. A slot on the stack above the frame if there are no
22508 arguments to push onto the stack.
22509 3. Register r3 again, after pushing the argument registers
22510 onto the stack, if this is a varargs function.
22511 4. The last slot on the stack created for the arguments to
22512 push, if this isn't a varargs function.
22514 Note - we only need to tell the dwarf2 backend about the SP
22515 adjustment in the second variant; the static chain register
22516 doesn't need to be unwound, as it doesn't contain a value
22517 inherited from the caller. */
22520 if (!arm_r3_live_at_start_p ())
22521 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
22522 else if (args_to_push
== 0)
22526 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
22529 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
22530 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
22533 /* Just tell the dwarf backend that we adjusted SP. */
22534 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
22535 plus_constant (Pmode
, stack_pointer_rtx
,
22537 RTX_FRAME_RELATED_P (insn
) = 1;
22538 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22542 /* Store the args on the stack. */
22543 if (cfun
->machine
->uses_anonymous_args
)
22545 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
22546 (0xf0 >> (args_to_push
/ 4)) & 0xf);
22547 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
22548 saved_pretend_args
= 1;
22554 if (args_to_push
== 4)
22555 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
22557 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
22558 plus_constant (Pmode
,
22562 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
22564 /* Just tell the dwarf backend that we adjusted SP. */
22565 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
22566 plus_constant (Pmode
, stack_pointer_rtx
,
22568 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22571 RTX_FRAME_RELATED_P (insn
) = 1;
22572 fp_offset
= args_to_push
;
22577 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
22579 if (IS_INTERRUPT (func_type
))
22581 /* Interrupt functions must not corrupt any registers.
22582 Creating a frame pointer however, corrupts the IP
22583 register, so we must push it first. */
22584 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
22586 /* Do not set RTX_FRAME_RELATED_P on this insn.
22587 The dwarf stack unwinding code only wants to see one
22588 stack decrement per function, and this is not it. If
22589 this instruction is labeled as being part of the frame
22590 creation sequence then dwarf2out_frame_debug_expr will
22591 die when it encounters the assignment of IP to FP
22592 later on, since the use of SP here establishes SP as
22593 the CFA register and not IP.
22595 Anyway this instruction is not really part of the stack
22596 frame creation although it is part of the prologue. */
22599 insn
= emit_set_insn (ip_rtx
,
22600 plus_constant (Pmode
, stack_pointer_rtx
,
22602 RTX_FRAME_RELATED_P (insn
) = 1;
22607 /* Push the argument registers, or reserve space for them. */
22608 if (cfun
->machine
->uses_anonymous_args
)
22609 insn
= emit_multi_reg_push
22610 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
22611 (0xf0 >> (args_to_push
/ 4)) & 0xf);
22614 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22615 GEN_INT (- args_to_push
)));
22616 RTX_FRAME_RELATED_P (insn
) = 1;
22619 /* If this is an interrupt service routine, and the link register
22620 is going to be pushed, and we're not generating extra
22621 push of IP (needed when frame is needed and frame layout if apcs),
22622 subtracting four from LR now will mean that the function return
22623 can be done with a single instruction. */
22624 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
22625 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
22626 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
22629 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
22631 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
22634 if (live_regs_mask
)
22636 unsigned long dwarf_regs_mask
= live_regs_mask
;
22638 saved_regs
+= bit_count (live_regs_mask
) * 4;
22639 if (optimize_size
&& !frame_pointer_needed
22640 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
22642 /* If no coprocessor registers are being pushed and we don't have
22643 to worry about a frame pointer then push extra registers to
22644 create the stack frame. This is done in a way that does not
22645 alter the frame layout, so is independent of the epilogue. */
22649 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
22651 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
22652 if (frame
&& n
* 4 >= frame
)
22655 live_regs_mask
|= (1 << n
) - 1;
22656 saved_regs
+= frame
;
22661 && current_tune
->prefer_ldrd_strd
22662 && !optimize_function_for_size_p (cfun
))
22664 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
22666 thumb2_emit_strd_push (live_regs_mask
);
22667 else if (TARGET_ARM
22668 && !TARGET_APCS_FRAME
22669 && !IS_INTERRUPT (func_type
))
22670 arm_emit_strd_push (live_regs_mask
);
22673 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
22674 RTX_FRAME_RELATED_P (insn
) = 1;
22679 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
22680 RTX_FRAME_RELATED_P (insn
) = 1;
22684 if (! IS_VOLATILE (func_type
))
22685 saved_regs
+= arm_save_coproc_regs ();
22687 if (frame_pointer_needed
&& TARGET_ARM
)
22689 /* Create the new frame pointer. */
22690 if (TARGET_APCS_FRAME
)
22692 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
22693 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
22694 RTX_FRAME_RELATED_P (insn
) = 1;
22698 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
22699 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22700 stack_pointer_rtx
, insn
));
22701 RTX_FRAME_RELATED_P (insn
) = 1;
22705 size
= offsets
->outgoing_args
- offsets
->saved_args
;
22706 if (flag_stack_usage_info
)
22707 current_function_static_stack_size
= size
;
22709 /* If this isn't an interrupt service routine and we have a frame, then do
22710 stack checking. We use IP as the first scratch register, except for the
22711 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
22712 if (!IS_INTERRUPT (func_type
)
22713 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
22714 || flag_stack_clash_protection
))
22716 unsigned int regno
;
22718 if (!IS_NESTED (func_type
) || clobber_ip
)
22720 else if (df_regs_ever_live_p (LR_REGNUM
))
22725 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
22727 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
22728 arm_emit_probe_stack_range (get_stack_check_protect (),
22729 size
- get_stack_check_protect (),
22730 regno
, live_regs_mask
);
22733 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
22734 regno
, live_regs_mask
);
22737 /* Recover the static chain register. */
22740 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
22741 insn
= gen_rtx_REG (SImode
, 3);
22744 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
22745 insn
= gen_frame_mem (SImode
, insn
);
22747 emit_set_insn (ip_rtx
, insn
);
22748 emit_insn (gen_force_register_use (ip_rtx
));
22751 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
22753 /* This add can produce multiple insns for a large constant, so we
22754 need to get tricky. */
22755 rtx_insn
*last
= get_last_insn ();
22757 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
22758 - offsets
->outgoing_args
);
22760 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22764 last
= last
? NEXT_INSN (last
) : get_insns ();
22765 RTX_FRAME_RELATED_P (last
) = 1;
22767 while (last
!= insn
);
22769 /* If the frame pointer is needed, emit a special barrier that
22770 will prevent the scheduler from moving stores to the frame
22771 before the stack adjustment. */
22772 if (frame_pointer_needed
)
22773 emit_insn (gen_stack_tie (stack_pointer_rtx
,
22774 hard_frame_pointer_rtx
));
22778 if (frame_pointer_needed
&& TARGET_THUMB2
)
22779 thumb_set_frame_pointer (offsets
);
22781 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
22783 unsigned long mask
;
22785 mask
= live_regs_mask
;
22786 mask
&= THUMB2_WORK_REGS
;
22787 if (!IS_NESTED (func_type
))
22788 mask
|= (1 << IP_REGNUM
);
22789 arm_load_pic_register (mask
, NULL_RTX
);
22792 /* If we are profiling, make sure no instructions are scheduled before
22793 the call to mcount. Similarly if the user has requested no
22794 scheduling in the prolog. Similarly if we want non-call exceptions
22795 using the EABI unwinder, to prevent faulting instructions from being
22796 swapped with a stack adjustment. */
22797 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
22798 || (arm_except_unwind_info (&global_options
) == UI_TARGET
22799 && cfun
->can_throw_non_call_exceptions
))
22800 emit_insn (gen_blockage ());
22802 /* If the link register is being kept alive, with the return address in it,
22803 then make sure that it does not get reused by the ce2 pass. */
22804 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
22805 cfun
->machine
->lr_save_eliminated
= 1;
22808 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22810 arm_print_condition (FILE *stream
)
22812 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
22814 /* Branch conversion is not implemented for Thumb-2. */
22817 output_operand_lossage ("predicated Thumb instruction");
22820 if (current_insn_predicate
!= NULL
)
22822 output_operand_lossage
22823 ("predicated instruction in conditional sequence");
22827 fputs (arm_condition_codes
[arm_current_cc
], stream
);
22829 else if (current_insn_predicate
)
22831 enum arm_cond_code code
;
22835 output_operand_lossage ("predicated Thumb instruction");
22839 code
= get_arm_condition_code (current_insn_predicate
);
22840 fputs (arm_condition_codes
[code
], stream
);
22845 /* Globally reserved letters: acln
22846 Puncutation letters currently used: @_|?().!#
22847 Lower case letters currently used: bcdefhimpqtvwxyz
22848 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22849 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22851 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22853 If CODE is 'd', then the X is a condition operand and the instruction
22854 should only be executed if the condition is true.
22855 if CODE is 'D', then the X is a condition operand and the instruction
22856 should only be executed if the condition is false: however, if the mode
22857 of the comparison is CCFPEmode, then always execute the instruction -- we
22858 do this because in these circumstances !GE does not necessarily imply LT;
22859 in these cases the instruction pattern will take care to make sure that
22860 an instruction containing %d will follow, thereby undoing the effects of
22861 doing this instruction unconditionally.
22862 If CODE is 'N' then X is a floating point operand that must be negated
22864 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22865 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22867 arm_print_operand (FILE *stream
, rtx x
, int code
)
22872 fputs (ASM_COMMENT_START
, stream
);
22876 fputs (user_label_prefix
, stream
);
22880 fputs (REGISTER_PREFIX
, stream
);
22884 arm_print_condition (stream
);
22888 /* The current condition code for a condition code setting instruction.
22889 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22890 fputc('s', stream
);
22891 arm_print_condition (stream
);
22895 /* If the instruction is conditionally executed then print
22896 the current condition code, otherwise print 's'. */
22897 gcc_assert (TARGET_THUMB2
);
22898 if (current_insn_predicate
)
22899 arm_print_condition (stream
);
22901 fputc('s', stream
);
22904 /* %# is a "break" sequence. It doesn't output anything, but is used to
22905 separate e.g. operand numbers from following text, if that text consists
22906 of further digits which we don't want to be part of the operand
22914 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
22915 fprintf (stream
, "%s", fp_const_from_val (&r
));
22919 /* An integer or symbol address without a preceding # sign. */
22921 switch (GET_CODE (x
))
22924 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
22928 output_addr_const (stream
, x
);
22932 if (GET_CODE (XEXP (x
, 0)) == PLUS
22933 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
22935 output_addr_const (stream
, x
);
22938 /* Fall through. */
22941 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22945 /* An integer that we want to print in HEX. */
22947 switch (GET_CODE (x
))
22950 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
22954 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22959 if (CONST_INT_P (x
))
22962 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
22963 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
22967 putc ('~', stream
);
22968 output_addr_const (stream
, x
);
22973 /* Print the log2 of a CONST_INT. */
22977 if (!CONST_INT_P (x
)
22978 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
22979 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22981 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22986 /* The low 16 bits of an immediate constant. */
22987 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
22991 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
22995 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
23003 shift
= shift_op (x
, &val
);
23007 fprintf (stream
, ", %s ", shift
);
23009 arm_print_operand (stream
, XEXP (x
, 1), 0);
23011 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
23016 /* An explanation of the 'Q', 'R' and 'H' register operands:
23018 In a pair of registers containing a DI or DF value the 'Q'
23019 operand returns the register number of the register containing
23020 the least significant part of the value. The 'R' operand returns
23021 the register number of the register containing the most
23022 significant part of the value.
23024 The 'H' operand returns the higher of the two register numbers.
23025 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
23026 same as the 'Q' operand, since the most significant part of the
23027 value is held in the lower number register. The reverse is true
23028 on systems where WORDS_BIG_ENDIAN is false.
23030 The purpose of these operands is to distinguish between cases
23031 where the endian-ness of the values is important (for example
23032 when they are added together), and cases where the endian-ness
23033 is irrelevant, but the order of register operations is important.
23034 For example when loading a value from memory into a register
23035 pair, the endian-ness does not matter. Provided that the value
23036 from the lower memory address is put into the lower numbered
23037 register, and the value from the higher address is put into the
23038 higher numbered register, the load will work regardless of whether
23039 the value being loaded is big-wordian or little-wordian. The
23040 order of the two register loads can matter however, if the address
23041 of the memory location is actually held in one of the registers
23042 being overwritten by the load.
23044 The 'Q' and 'R' constraints are also available for 64-bit
23047 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
23049 rtx part
= gen_lowpart (SImode
, x
);
23050 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
23054 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
23056 output_operand_lossage ("invalid operand for code '%c'", code
);
23060 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
23064 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
23066 machine_mode mode
= GET_MODE (x
);
23069 if (mode
== VOIDmode
)
23071 part
= gen_highpart_mode (SImode
, mode
, x
);
23072 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
23076 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
23078 output_operand_lossage ("invalid operand for code '%c'", code
);
23082 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
23086 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
23088 output_operand_lossage ("invalid operand for code '%c'", code
);
23092 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
23096 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
23098 output_operand_lossage ("invalid operand for code '%c'", code
);
23102 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
23106 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
23108 output_operand_lossage ("invalid operand for code '%c'", code
);
23112 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
23116 asm_fprintf (stream
, "%r",
23117 REG_P (XEXP (x
, 0))
23118 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
23122 asm_fprintf (stream
, "{%r-%r}",
23124 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
23127 /* Like 'M', but writing doubleword vector registers, for use by Neon
23131 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
23132 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
23134 asm_fprintf (stream
, "{d%d}", regno
);
23136 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
23141 /* CONST_TRUE_RTX means always -- that's the default. */
23142 if (x
== const_true_rtx
)
23145 if (!COMPARISON_P (x
))
23147 output_operand_lossage ("invalid operand for code '%c'", code
);
23151 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
23156 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
23157 want to do that. */
23158 if (x
== const_true_rtx
)
23160 output_operand_lossage ("instruction never executed");
23163 if (!COMPARISON_P (x
))
23165 output_operand_lossage ("invalid operand for code '%c'", code
);
23169 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
23170 (get_arm_condition_code (x
))],
23180 /* Former Maverick support, removed after GCC-4.7. */
23181 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
23186 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
23187 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
23188 /* Bad value for wCG register number. */
23190 output_operand_lossage ("invalid operand for code '%c'", code
);
23195 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
23198 /* Print an iWMMXt control register name. */
23200 if (!CONST_INT_P (x
)
23202 || INTVAL (x
) >= 16)
23203 /* Bad value for wC register number. */
23205 output_operand_lossage ("invalid operand for code '%c'", code
);
23211 static const char * wc_reg_names
[16] =
23213 "wCID", "wCon", "wCSSF", "wCASF",
23214 "wC4", "wC5", "wC6", "wC7",
23215 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
23216 "wC12", "wC13", "wC14", "wC15"
23219 fputs (wc_reg_names
[INTVAL (x
)], stream
);
23223 /* Print the high single-precision register of a VFP double-precision
23227 machine_mode mode
= GET_MODE (x
);
23230 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
23232 output_operand_lossage ("invalid operand for code '%c'", code
);
23237 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
23239 output_operand_lossage ("invalid operand for code '%c'", code
);
23243 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
23247 /* Print a VFP/Neon double precision or quad precision register name. */
23251 machine_mode mode
= GET_MODE (x
);
23252 int is_quad
= (code
== 'q');
23255 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
23257 output_operand_lossage ("invalid operand for code '%c'", code
);
23262 || !IS_VFP_REGNUM (REGNO (x
)))
23264 output_operand_lossage ("invalid operand for code '%c'", code
);
23269 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
23270 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
23272 output_operand_lossage ("invalid operand for code '%c'", code
);
23276 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
23277 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
23281 /* These two codes print the low/high doubleword register of a Neon quad
23282 register, respectively. For pair-structure types, can also print
23283 low/high quadword registers. */
23287 machine_mode mode
= GET_MODE (x
);
23290 if ((GET_MODE_SIZE (mode
) != 16
23291 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
23293 output_operand_lossage ("invalid operand for code '%c'", code
);
23298 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
23300 output_operand_lossage ("invalid operand for code '%c'", code
);
23304 if (GET_MODE_SIZE (mode
) == 16)
23305 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
23306 + (code
== 'f' ? 1 : 0));
23308 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
23309 + (code
== 'f' ? 1 : 0));
23313 /* Print a VFPv3 floating-point constant, represented as an integer
23317 int index
= vfp3_const_double_index (x
);
23318 gcc_assert (index
!= -1);
23319 fprintf (stream
, "%d", index
);
23323 /* Print bits representing opcode features for Neon.
23325 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
23326 and polynomials as unsigned.
23328 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
23330 Bit 2 is 1 for rounding functions, 0 otherwise. */
23332 /* Identify the type as 's', 'u', 'p' or 'f'. */
23335 HOST_WIDE_INT bits
= INTVAL (x
);
23336 fputc ("uspf"[bits
& 3], stream
);
23340 /* Likewise, but signed and unsigned integers are both 'i'. */
23343 HOST_WIDE_INT bits
= INTVAL (x
);
23344 fputc ("iipf"[bits
& 3], stream
);
23348 /* As for 'T', but emit 'u' instead of 'p'. */
23351 HOST_WIDE_INT bits
= INTVAL (x
);
23352 fputc ("usuf"[bits
& 3], stream
);
23356 /* Bit 2: rounding (vs none). */
23359 HOST_WIDE_INT bits
= INTVAL (x
);
23360 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
23364 /* Memory operand for vld1/vst1 instruction. */
23368 bool postinc
= FALSE
;
23369 rtx postinc_reg
= NULL
;
23370 unsigned align
, memsize
, align_bits
;
23372 gcc_assert (MEM_P (x
));
23373 addr
= XEXP (x
, 0);
23374 if (GET_CODE (addr
) == POST_INC
)
23377 addr
= XEXP (addr
, 0);
23379 if (GET_CODE (addr
) == POST_MODIFY
)
23381 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
23382 addr
= XEXP (addr
, 0);
23384 asm_fprintf (stream
, "[%r", REGNO (addr
));
23386 /* We know the alignment of this access, so we can emit a hint in the
23387 instruction (for some alignments) as an aid to the memory subsystem
23389 align
= MEM_ALIGN (x
) >> 3;
23390 memsize
= MEM_SIZE (x
);
23392 /* Only certain alignment specifiers are supported by the hardware. */
23393 if (memsize
== 32 && (align
% 32) == 0)
23395 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
23397 else if (memsize
>= 8 && (align
% 8) == 0)
23402 if (align_bits
!= 0)
23403 asm_fprintf (stream
, ":%d", align_bits
);
23405 asm_fprintf (stream
, "]");
23408 fputs("!", stream
);
23410 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
23418 gcc_assert (MEM_P (x
));
23419 addr
= XEXP (x
, 0);
23420 gcc_assert (REG_P (addr
));
23421 asm_fprintf (stream
, "[%r]", REGNO (addr
));
23425 /* Translate an S register number into a D register number and element index. */
23428 machine_mode mode
= GET_MODE (x
);
23431 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
23433 output_operand_lossage ("invalid operand for code '%c'", code
);
23438 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
23440 output_operand_lossage ("invalid operand for code '%c'", code
);
23444 regno
= regno
- FIRST_VFP_REGNUM
;
23445 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
23450 gcc_assert (CONST_DOUBLE_P (x
));
23452 result
= vfp3_const_double_for_fract_bits (x
);
23454 result
= vfp3_const_double_for_bits (x
);
23455 fprintf (stream
, "#%d", result
);
23458 /* Register specifier for vld1.16/vst1.16. Translate the S register
23459 number into a D register number and element index. */
23462 machine_mode mode
= GET_MODE (x
);
23465 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
23467 output_operand_lossage ("invalid operand for code '%c'", code
);
23472 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
23474 output_operand_lossage ("invalid operand for code '%c'", code
);
23478 regno
= regno
- FIRST_VFP_REGNUM
;
23479 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
23486 output_operand_lossage ("missing operand");
23490 switch (GET_CODE (x
))
23493 asm_fprintf (stream
, "%r", REGNO (x
));
23497 output_address (GET_MODE (x
), XEXP (x
, 0));
23503 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
23504 sizeof (fpstr
), 0, 1);
23505 fprintf (stream
, "#%s", fpstr
);
23510 gcc_assert (GET_CODE (x
) != NEG
);
23511 fputc ('#', stream
);
23512 if (GET_CODE (x
) == HIGH
)
23514 fputs (":lower16:", stream
);
23518 output_addr_const (stream
, x
);
23524 /* Target hook for printing a memory address. */
23526 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
23530 int is_minus
= GET_CODE (x
) == MINUS
;
23533 asm_fprintf (stream
, "[%r]", REGNO (x
));
23534 else if (GET_CODE (x
) == PLUS
|| is_minus
)
23536 rtx base
= XEXP (x
, 0);
23537 rtx index
= XEXP (x
, 1);
23538 HOST_WIDE_INT offset
= 0;
23540 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
23542 /* Ensure that BASE is a register. */
23543 /* (one of them must be). */
23544 /* Also ensure the SP is not used as in index register. */
23545 std::swap (base
, index
);
23547 switch (GET_CODE (index
))
23550 offset
= INTVAL (index
);
23553 asm_fprintf (stream
, "[%r, #%wd]",
23554 REGNO (base
), offset
);
23558 asm_fprintf (stream
, "[%r, %s%r]",
23559 REGNO (base
), is_minus
? "-" : "",
23569 asm_fprintf (stream
, "[%r, %s%r",
23570 REGNO (base
), is_minus
? "-" : "",
23571 REGNO (XEXP (index
, 0)));
23572 arm_print_operand (stream
, index
, 'S');
23573 fputs ("]", stream
);
23578 gcc_unreachable ();
23581 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
23582 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
23584 gcc_assert (REG_P (XEXP (x
, 0)));
23586 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
23587 asm_fprintf (stream
, "[%r, #%s%d]!",
23588 REGNO (XEXP (x
, 0)),
23589 GET_CODE (x
) == PRE_DEC
? "-" : "",
23590 GET_MODE_SIZE (mode
));
23592 asm_fprintf (stream
, "[%r], #%s%d",
23593 REGNO (XEXP (x
, 0)),
23594 GET_CODE (x
) == POST_DEC
? "-" : "",
23595 GET_MODE_SIZE (mode
));
23597 else if (GET_CODE (x
) == PRE_MODIFY
)
23599 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
23600 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
23601 asm_fprintf (stream
, "#%wd]!",
23602 INTVAL (XEXP (XEXP (x
, 1), 1)));
23604 asm_fprintf (stream
, "%r]!",
23605 REGNO (XEXP (XEXP (x
, 1), 1)));
23607 else if (GET_CODE (x
) == POST_MODIFY
)
23609 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
23610 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
23611 asm_fprintf (stream
, "#%wd",
23612 INTVAL (XEXP (XEXP (x
, 1), 1)));
23614 asm_fprintf (stream
, "%r",
23615 REGNO (XEXP (XEXP (x
, 1), 1)));
23617 else output_addr_const (stream
, x
);
23622 asm_fprintf (stream
, "[%r]", REGNO (x
));
23623 else if (GET_CODE (x
) == POST_INC
)
23624 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
23625 else if (GET_CODE (x
) == PLUS
)
23627 gcc_assert (REG_P (XEXP (x
, 0)));
23628 if (CONST_INT_P (XEXP (x
, 1)))
23629 asm_fprintf (stream
, "[%r, #%wd]",
23630 REGNO (XEXP (x
, 0)),
23631 INTVAL (XEXP (x
, 1)));
23633 asm_fprintf (stream
, "[%r, %r]",
23634 REGNO (XEXP (x
, 0)),
23635 REGNO (XEXP (x
, 1)));
23638 output_addr_const (stream
, x
);
23642 /* Target hook for indicating whether a punctuation character for
23643 TARGET_PRINT_OPERAND is valid. */
23645 arm_print_operand_punct_valid_p (unsigned char code
)
23647 return (code
== '@' || code
== '|' || code
== '.'
23648 || code
== '(' || code
== ')' || code
== '#'
23649 || (TARGET_32BIT
&& (code
== '?'))
23650 || (TARGET_THUMB2
&& (code
== '!'))
23651 || (TARGET_THUMB
&& (code
== '_')));
23654 /* Target hook for assembling integer objects. The ARM version needs to
23655 handle word-sized values specially. */
23657 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
23661 if (size
== UNITS_PER_WORD
&& aligned_p
)
23663 fputs ("\t.word\t", asm_out_file
);
23664 output_addr_const (asm_out_file
, x
);
23666 /* Mark symbols as position independent. We only do this in the
23667 .text segment, not in the .data segment. */
23668 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
23669 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
23671 /* See legitimize_pic_address for an explanation of the
23672 TARGET_VXWORKS_RTP check. */
23673 /* References to weak symbols cannot be resolved locally:
23674 they may be overridden by a non-weak definition at link
23676 if (!arm_pic_data_is_text_relative
23677 || (GET_CODE (x
) == SYMBOL_REF
23678 && (!SYMBOL_REF_LOCAL_P (x
)
23679 || (SYMBOL_REF_DECL (x
)
23680 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0)
23681 || (SYMBOL_REF_FUNCTION_P (x
)
23682 && !arm_fdpic_local_funcdesc_p (x
)))))
23684 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
23685 fputs ("(GOTFUNCDESC)", asm_out_file
);
23687 fputs ("(GOT)", asm_out_file
);
23691 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
23692 fputs ("(GOTOFFFUNCDESC)", asm_out_file
);
23698 || arm_is_segment_info_known (x
, &is_readonly
))
23699 fputs ("(GOTOFF)", asm_out_file
);
23701 fputs ("(GOT)", asm_out_file
);
23706 /* For FDPIC we also have to mark symbol for .data section. */
23708 && !making_const_table
23709 && SYMBOL_REF_P (x
)
23710 && SYMBOL_REF_FUNCTION_P (x
))
23711 fputs ("(FUNCDESC)", asm_out_file
);
23713 fputc ('\n', asm_out_file
);
23717 mode
= GET_MODE (x
);
23719 if (arm_vector_mode_supported_p (mode
))
23723 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
23725 units
= CONST_VECTOR_NUNITS (x
);
23726 size
= GET_MODE_UNIT_SIZE (mode
);
23728 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
23729 for (i
= 0; i
< units
; i
++)
23731 rtx elt
= CONST_VECTOR_ELT (x
, i
);
23733 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
23736 for (i
= 0; i
< units
; i
++)
23738 rtx elt
= CONST_VECTOR_ELT (x
, i
);
23740 (*CONST_DOUBLE_REAL_VALUE (elt
),
23741 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
23742 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
23748 return default_assemble_integer (x
, size
, aligned_p
);
23752 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
23756 if (!TARGET_AAPCS_BASED
)
23759 default_named_section_asm_out_constructor
23760 : default_named_section_asm_out_destructor
) (symbol
, priority
);
23764 /* Put these in the .init_array section, using a special relocation. */
23765 if (priority
!= DEFAULT_INIT_PRIORITY
)
23768 sprintf (buf
, "%s.%.5u",
23769 is_ctor
? ".init_array" : ".fini_array",
23771 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
23778 switch_to_section (s
);
23779 assemble_align (POINTER_SIZE
);
23780 fputs ("\t.word\t", asm_out_file
);
23781 output_addr_const (asm_out_file
, symbol
);
23782 fputs ("(target1)\n", asm_out_file
);
23785 /* Add a function to the list of static constructors. */
23788 arm_elf_asm_constructor (rtx symbol
, int priority
)
23790 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
23793 /* Add a function to the list of static destructors. */
23796 arm_elf_asm_destructor (rtx symbol
, int priority
)
23798 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
23801 /* A finite state machine takes care of noticing whether or not instructions
23802 can be conditionally executed, and thus decrease execution time and code
23803 size by deleting branch instructions. The fsm is controlled by
23804 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
23806 /* The state of the fsm controlling condition codes are:
23807 0: normal, do nothing special
23808 1: make ASM_OUTPUT_OPCODE not output this instruction
23809 2: make ASM_OUTPUT_OPCODE not output this instruction
23810 3: make instructions conditional
23811 4: make instructions conditional
23813 State transitions (state->state by whom under condition):
23814 0 -> 1 final_prescan_insn if the `target' is a label
23815 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23816 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23817 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23818 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23819 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23820 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23821 (the target insn is arm_target_insn).
23823 If the jump clobbers the conditions then we use states 2 and 4.
23825 A similar thing can be done with conditional return insns.
23827 XXX In case the `target' is an unconditional branch, this conditionalising
23828 of the instructions always reduces code size, but not always execution
23829 time. But then, I want to reduce the code size to somewhere near what
23830 /bin/cc produces. */
23832 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23833 instructions. When a COND_EXEC instruction is seen the subsequent
23834 instructions are scanned so that multiple conditional instructions can be
23835 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23836 specify the length and true/false mask for the IT block. These will be
23837 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23839 /* Returns the index of the ARM condition code string in
23840 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23841 COMPARISON should be an rtx like `(eq (...) (...))'. */
23844 maybe_get_arm_condition_code (rtx comparison
)
23846 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
23847 enum arm_cond_code code
;
23848 enum rtx_code comp_code
= GET_CODE (comparison
);
23850 if (GET_MODE_CLASS (mode
) != MODE_CC
)
23851 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
23852 XEXP (comparison
, 1));
23856 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
23857 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
23858 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
23859 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
23860 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
23861 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
23862 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
23863 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
23864 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
23865 case E_CC_DLTUmode
: code
= ARM_CC
;
23868 if (comp_code
== EQ
)
23869 return ARM_INVERSE_CONDITION_CODE (code
);
23870 if (comp_code
== NE
)
23874 case E_CC_NOOVmode
:
23877 case NE
: return ARM_NE
;
23878 case EQ
: return ARM_EQ
;
23879 case GE
: return ARM_PL
;
23880 case LT
: return ARM_MI
;
23881 default: return ARM_NV
;
23887 case NE
: return ARM_NE
;
23888 case EQ
: return ARM_EQ
;
23889 default: return ARM_NV
;
23895 case NE
: return ARM_MI
;
23896 case EQ
: return ARM_PL
;
23897 default: return ARM_NV
;
23902 /* We can handle all cases except UNEQ and LTGT. */
23905 case GE
: return ARM_GE
;
23906 case GT
: return ARM_GT
;
23907 case LE
: return ARM_LS
;
23908 case LT
: return ARM_MI
;
23909 case NE
: return ARM_NE
;
23910 case EQ
: return ARM_EQ
;
23911 case ORDERED
: return ARM_VC
;
23912 case UNORDERED
: return ARM_VS
;
23913 case UNLT
: return ARM_LT
;
23914 case UNLE
: return ARM_LE
;
23915 case UNGT
: return ARM_HI
;
23916 case UNGE
: return ARM_PL
;
23917 /* UNEQ and LTGT do not have a representation. */
23918 case UNEQ
: /* Fall through. */
23919 case LTGT
: /* Fall through. */
23920 default: return ARM_NV
;
23926 case NE
: return ARM_NE
;
23927 case EQ
: return ARM_EQ
;
23928 case GE
: return ARM_LE
;
23929 case GT
: return ARM_LT
;
23930 case LE
: return ARM_GE
;
23931 case LT
: return ARM_GT
;
23932 case GEU
: return ARM_LS
;
23933 case GTU
: return ARM_CC
;
23934 case LEU
: return ARM_CS
;
23935 case LTU
: return ARM_HI
;
23936 default: return ARM_NV
;
23942 case LTU
: return ARM_CS
;
23943 case GEU
: return ARM_CC
;
23944 default: return ARM_NV
;
23950 case GE
: return ARM_GE
;
23951 case LT
: return ARM_LT
;
23952 default: return ARM_NV
;
23958 case GEU
: return ARM_CS
;
23959 case LTU
: return ARM_CC
;
23960 default: return ARM_NV
;
23966 case NE
: return ARM_VS
;
23967 case EQ
: return ARM_VC
;
23968 default: return ARM_NV
;
23974 case GEU
: return ARM_CS
;
23975 case LTU
: return ARM_CC
;
23976 default: return ARM_NV
;
23983 case NE
: return ARM_NE
;
23984 case EQ
: return ARM_EQ
;
23985 case GE
: return ARM_GE
;
23986 case GT
: return ARM_GT
;
23987 case LE
: return ARM_LE
;
23988 case LT
: return ARM_LT
;
23989 case GEU
: return ARM_CS
;
23990 case GTU
: return ARM_HI
;
23991 case LEU
: return ARM_LS
;
23992 case LTU
: return ARM_CC
;
23993 default: return ARM_NV
;
23996 default: gcc_unreachable ();
24000 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
24001 static enum arm_cond_code
24002 get_arm_condition_code (rtx comparison
)
24004 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
24005 gcc_assert (code
!= ARM_NV
);
24009 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
24010 code registers when not targetting Thumb1. The VFP condition register
24011 only exists when generating hard-float code. */
24013 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
24019 *p2
= TARGET_HARD_FLOAT
? VFPCC_REGNUM
: INVALID_REGNUM
;
24023 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
24026 thumb2_final_prescan_insn (rtx_insn
*insn
)
24028 rtx_insn
*first_insn
= insn
;
24029 rtx body
= PATTERN (insn
);
24031 enum arm_cond_code code
;
24036 /* max_insns_skipped in the tune was already taken into account in the
24037 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
24038 just emit the IT blocks as we can. It does not make sense to split
24040 max
= MAX_INSN_PER_IT_BLOCK
;
24042 /* Remove the previous insn from the count of insns to be output. */
24043 if (arm_condexec_count
)
24044 arm_condexec_count
--;
24046 /* Nothing to do if we are already inside a conditional block. */
24047 if (arm_condexec_count
)
24050 if (GET_CODE (body
) != COND_EXEC
)
24053 /* Conditional jumps are implemented directly. */
24057 predicate
= COND_EXEC_TEST (body
);
24058 arm_current_cc
= get_arm_condition_code (predicate
);
24060 n
= get_attr_ce_count (insn
);
24061 arm_condexec_count
= 1;
24062 arm_condexec_mask
= (1 << n
) - 1;
24063 arm_condexec_masklen
= n
;
24064 /* See if subsequent instructions can be combined into the same block. */
24067 insn
= next_nonnote_insn (insn
);
24069 /* Jumping into the middle of an IT block is illegal, so a label or
24070 barrier terminates the block. */
24071 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
24074 body
= PATTERN (insn
);
24075 /* USE and CLOBBER aren't really insns, so just skip them. */
24076 if (GET_CODE (body
) == USE
24077 || GET_CODE (body
) == CLOBBER
)
24080 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
24081 if (GET_CODE (body
) != COND_EXEC
)
24083 /* Maximum number of conditionally executed instructions in a block. */
24084 n
= get_attr_ce_count (insn
);
24085 if (arm_condexec_masklen
+ n
> max
)
24088 predicate
= COND_EXEC_TEST (body
);
24089 code
= get_arm_condition_code (predicate
);
24090 mask
= (1 << n
) - 1;
24091 if (arm_current_cc
== code
)
24092 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
24093 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
24096 arm_condexec_count
++;
24097 arm_condexec_masklen
+= n
;
24099 /* A jump must be the last instruction in a conditional block. */
24103 /* Restore recog_data (getting the attributes of other insns can
24104 destroy this array, but final.c assumes that it remains intact
24105 across this call). */
24106 extract_constrain_insn_cached (first_insn
);
24110 arm_final_prescan_insn (rtx_insn
*insn
)
24112 /* BODY will hold the body of INSN. */
24113 rtx body
= PATTERN (insn
);
24115 /* This will be 1 if trying to repeat the trick, and things need to be
24116 reversed if it appears to fail. */
24119 /* If we start with a return insn, we only succeed if we find another one. */
24120 int seeking_return
= 0;
24121 enum rtx_code return_code
= UNKNOWN
;
24123 /* START_INSN will hold the insn from where we start looking. This is the
24124 first insn after the following code_label if REVERSE is true. */
24125 rtx_insn
*start_insn
= insn
;
24127 /* If in state 4, check if the target branch is reached, in order to
24128 change back to state 0. */
24129 if (arm_ccfsm_state
== 4)
24131 if (insn
== arm_target_insn
)
24133 arm_target_insn
= NULL
;
24134 arm_ccfsm_state
= 0;
24139 /* If in state 3, it is possible to repeat the trick, if this insn is an
24140 unconditional branch to a label, and immediately following this branch
24141 is the previous target label which is only used once, and the label this
24142 branch jumps to is not too far off. */
24143 if (arm_ccfsm_state
== 3)
24145 if (simplejump_p (insn
))
24147 start_insn
= next_nonnote_insn (start_insn
);
24148 if (BARRIER_P (start_insn
))
24150 /* XXX Isn't this always a barrier? */
24151 start_insn
= next_nonnote_insn (start_insn
);
24153 if (LABEL_P (start_insn
)
24154 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
24155 && LABEL_NUSES (start_insn
) == 1)
24160 else if (ANY_RETURN_P (body
))
24162 start_insn
= next_nonnote_insn (start_insn
);
24163 if (BARRIER_P (start_insn
))
24164 start_insn
= next_nonnote_insn (start_insn
);
24165 if (LABEL_P (start_insn
)
24166 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
24167 && LABEL_NUSES (start_insn
) == 1)
24170 seeking_return
= 1;
24171 return_code
= GET_CODE (body
);
24180 gcc_assert (!arm_ccfsm_state
|| reverse
);
24181 if (!JUMP_P (insn
))
24184 /* This jump might be paralleled with a clobber of the condition codes
24185 the jump should always come first */
24186 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
24187 body
= XVECEXP (body
, 0, 0);
24190 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
24191 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
24194 int fail
= FALSE
, succeed
= FALSE
;
24195 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
24196 int then_not_else
= TRUE
;
24197 rtx_insn
*this_insn
= start_insn
;
24200 /* Register the insn jumped to. */
24203 if (!seeking_return
)
24204 label
= XEXP (SET_SRC (body
), 0);
24206 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
24207 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
24208 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
24210 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
24211 then_not_else
= FALSE
;
24213 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
24215 seeking_return
= 1;
24216 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
24218 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
24220 seeking_return
= 1;
24221 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
24222 then_not_else
= FALSE
;
24225 gcc_unreachable ();
24227 /* See how many insns this branch skips, and what kind of insns. If all
24228 insns are okay, and the label or unconditional branch to the same
24229 label is not too far away, succeed. */
24230 for (insns_skipped
= 0;
24231 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
24235 this_insn
= next_nonnote_insn (this_insn
);
24239 switch (GET_CODE (this_insn
))
24242 /* Succeed if it is the target label, otherwise fail since
24243 control falls in from somewhere else. */
24244 if (this_insn
== label
)
24246 arm_ccfsm_state
= 1;
24254 /* Succeed if the following insn is the target label.
24256 If return insns are used then the last insn in a function
24257 will be a barrier. */
24258 this_insn
= next_nonnote_insn (this_insn
);
24259 if (this_insn
&& this_insn
== label
)
24261 arm_ccfsm_state
= 1;
24269 /* The AAPCS says that conditional calls should not be
24270 used since they make interworking inefficient (the
24271 linker can't transform BL<cond> into BLX). That's
24272 only a problem if the machine has BLX. */
24279 /* Succeed if the following insn is the target label, or
24280 if the following two insns are a barrier and the
24282 this_insn
= next_nonnote_insn (this_insn
);
24283 if (this_insn
&& BARRIER_P (this_insn
))
24284 this_insn
= next_nonnote_insn (this_insn
);
24286 if (this_insn
&& this_insn
== label
24287 && insns_skipped
< max_insns_skipped
)
24289 arm_ccfsm_state
= 1;
24297 /* If this is an unconditional branch to the same label, succeed.
24298 If it is to another label, do nothing. If it is conditional,
24300 /* XXX Probably, the tests for SET and the PC are
24303 scanbody
= PATTERN (this_insn
);
24304 if (GET_CODE (scanbody
) == SET
24305 && GET_CODE (SET_DEST (scanbody
)) == PC
)
24307 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
24308 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
24310 arm_ccfsm_state
= 2;
24313 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
24316 /* Fail if a conditional return is undesirable (e.g. on a
24317 StrongARM), but still allow this if optimizing for size. */
24318 else if (GET_CODE (scanbody
) == return_code
24319 && !use_return_insn (TRUE
, NULL
)
24322 else if (GET_CODE (scanbody
) == return_code
)
24324 arm_ccfsm_state
= 2;
24327 else if (GET_CODE (scanbody
) == PARALLEL
)
24329 switch (get_attr_conds (this_insn
))
24339 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
24344 /* Instructions using or affecting the condition codes make it
24346 scanbody
= PATTERN (this_insn
);
24347 if (!(GET_CODE (scanbody
) == SET
24348 || GET_CODE (scanbody
) == PARALLEL
)
24349 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
24359 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
24360 arm_target_label
= CODE_LABEL_NUMBER (label
);
24363 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
24365 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
24367 this_insn
= next_nonnote_insn (this_insn
);
24368 gcc_assert (!this_insn
24369 || (!BARRIER_P (this_insn
)
24370 && !LABEL_P (this_insn
)));
24374 /* Oh, dear! we ran off the end.. give up. */
24375 extract_constrain_insn_cached (insn
);
24376 arm_ccfsm_state
= 0;
24377 arm_target_insn
= NULL
;
24380 arm_target_insn
= this_insn
;
24383 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
24386 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
24388 if (reverse
|| then_not_else
)
24389 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
24392 /* Restore recog_data (getting the attributes of other insns can
24393 destroy this array, but final.c assumes that it remains intact
24394 across this call. */
24395 extract_constrain_insn_cached (insn
);
24399 /* Output IT instructions. */
24401 thumb2_asm_output_opcode (FILE * stream
)
24406 if (arm_condexec_mask
)
24408 for (n
= 0; n
< arm_condexec_masklen
; n
++)
24409 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
24411 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
24412 arm_condition_codes
[arm_current_cc
]);
24413 arm_condexec_mask
= 0;
24417 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
24418 UNITS_PER_WORD bytes wide. */
24419 static unsigned int
24420 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
24423 && regno
> PC_REGNUM
24424 && regno
!= FRAME_POINTER_REGNUM
24425 && regno
!= ARG_POINTER_REGNUM
24426 && !IS_VFP_REGNUM (regno
))
24429 return ARM_NUM_REGS (mode
);
24432 /* Implement TARGET_HARD_REGNO_MODE_OK. */
24434 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
24436 if (GET_MODE_CLASS (mode
) == MODE_CC
)
24437 return (regno
== CC_REGNUM
24438 || (TARGET_HARD_FLOAT
24439 && regno
== VFPCC_REGNUM
));
24441 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
24445 /* For the Thumb we only allow values bigger than SImode in
24446 registers 0 - 6, so that there is always a second low
24447 register available to hold the upper part of the value.
24448 We probably we ought to ensure that the register is the
24449 start of an even numbered register pair. */
24450 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
24452 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
24454 if (mode
== SFmode
|| mode
== SImode
)
24455 return VFP_REGNO_OK_FOR_SINGLE (regno
);
24457 if (mode
== DFmode
)
24458 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
24460 if (mode
== HFmode
)
24461 return VFP_REGNO_OK_FOR_SINGLE (regno
);
24463 /* VFP registers can hold HImode values. */
24464 if (mode
== HImode
)
24465 return VFP_REGNO_OK_FOR_SINGLE (regno
);
24468 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
24469 || (VALID_NEON_QREG_MODE (mode
)
24470 && NEON_REGNO_OK_FOR_QUAD (regno
))
24471 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
24472 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
24473 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
24474 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
24475 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
24480 if (TARGET_REALLY_IWMMXT
)
24482 if (IS_IWMMXT_GR_REGNUM (regno
))
24483 return mode
== SImode
;
24485 if (IS_IWMMXT_REGNUM (regno
))
24486 return VALID_IWMMXT_REG_MODE (mode
);
24489 /* We allow almost any value to be stored in the general registers.
24490 Restrict doubleword quantities to even register pairs in ARM state
24491 so that we can use ldrd. Do not allow very large Neon structure
24492 opaque modes in general registers; they would use too many. */
24493 if (regno
<= LAST_ARM_REGNUM
)
24495 if (ARM_NUM_REGS (mode
) > 4)
24501 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
24504 if (regno
== FRAME_POINTER_REGNUM
24505 || regno
== ARG_POINTER_REGNUM
)
24506 /* We only allow integers in the fake hard registers. */
24507 return GET_MODE_CLASS (mode
) == MODE_INT
;
24512 /* Implement TARGET_MODES_TIEABLE_P. */
24515 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
24517 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
24520 /* We specifically want to allow elements of "structure" modes to
24521 be tieable to the structure. This more general condition allows
24522 other rarer situations too. */
24524 && (VALID_NEON_DREG_MODE (mode1
)
24525 || VALID_NEON_QREG_MODE (mode1
)
24526 || VALID_NEON_STRUCT_MODE (mode1
))
24527 && (VALID_NEON_DREG_MODE (mode2
)
24528 || VALID_NEON_QREG_MODE (mode2
)
24529 || VALID_NEON_STRUCT_MODE (mode2
)))
24535 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
24536 not used in arm mode. */
24539 arm_regno_class (int regno
)
24541 if (regno
== PC_REGNUM
)
24546 if (regno
== STACK_POINTER_REGNUM
)
24548 if (regno
== CC_REGNUM
)
24555 if (TARGET_THUMB2
&& regno
< 8)
24558 if ( regno
<= LAST_ARM_REGNUM
24559 || regno
== FRAME_POINTER_REGNUM
24560 || regno
== ARG_POINTER_REGNUM
)
24561 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
24563 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
24564 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
24566 if (IS_VFP_REGNUM (regno
))
24568 if (regno
<= D7_VFP_REGNUM
)
24569 return VFP_D0_D7_REGS
;
24570 else if (regno
<= LAST_LO_VFP_REGNUM
)
24571 return VFP_LO_REGS
;
24573 return VFP_HI_REGS
;
24576 if (IS_IWMMXT_REGNUM (regno
))
24577 return IWMMXT_REGS
;
24579 if (IS_IWMMXT_GR_REGNUM (regno
))
24580 return IWMMXT_GR_REGS
;
24585 /* Handle a special case when computing the offset
24586 of an argument from the frame pointer. */
24588 arm_debugger_arg_offset (int value
, rtx addr
)
24592 /* We are only interested if dbxout_parms() failed to compute the offset. */
24596 /* We can only cope with the case where the address is held in a register. */
24600 /* If we are using the frame pointer to point at the argument, then
24601 an offset of 0 is correct. */
24602 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
24605 /* If we are using the stack pointer to point at the
24606 argument, then an offset of 0 is correct. */
24607 /* ??? Check this is consistent with thumb2 frame layout. */
24608 if ((TARGET_THUMB
|| !frame_pointer_needed
)
24609 && REGNO (addr
) == SP_REGNUM
)
24612 /* Oh dear. The argument is pointed to by a register rather
24613 than being held in a register, or being stored at a known
24614 offset from the frame pointer. Since GDB only understands
24615 those two kinds of argument we must translate the address
24616 held in the register into an offset from the frame pointer.
24617 We do this by searching through the insns for the function
24618 looking to see where this register gets its value. If the
24619 register is initialized from the frame pointer plus an offset
24620 then we are in luck and we can continue, otherwise we give up.
24622 This code is exercised by producing debugging information
24623 for a function with arguments like this:
24625 double func (double a, double b, int c, double d) {return d;}
24627 Without this code the stab for parameter 'd' will be set to
24628 an offset of 0 from the frame pointer, rather than 8. */
24630 /* The if() statement says:
24632 If the insn is a normal instruction
24633 and if the insn is setting the value in a register
24634 and if the register being set is the register holding the address of the argument
24635 and if the address is computing by an addition
24636 that involves adding to a register
24637 which is the frame pointer
24642 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24644 if ( NONJUMP_INSN_P (insn
)
24645 && GET_CODE (PATTERN (insn
)) == SET
24646 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
24647 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
24648 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
24649 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
24650 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
24653 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
24662 warning (0, "unable to compute real location of stacked parameter");
24663 value
= 8; /* XXX magic hack */
24669 /* Implement TARGET_PROMOTED_TYPE. */
24672 arm_promoted_type (const_tree t
)
24674 if (SCALAR_FLOAT_TYPE_P (t
)
24675 && TYPE_PRECISION (t
) == 16
24676 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
24677 return float_type_node
;
24681 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24682 This simply adds HFmode as a supported mode; even though we don't
24683 implement arithmetic on this type directly, it's supported by
24684 optabs conversions, much the way the double-word arithmetic is
24685 special-cased in the default hook. */
24688 arm_scalar_mode_supported_p (scalar_mode mode
)
24690 if (mode
== HFmode
)
24691 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24692 else if (ALL_FIXED_POINT_MODE_P (mode
))
24695 return default_scalar_mode_supported_p (mode
);
24698 /* Set the value of FLT_EVAL_METHOD.
24699 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
24701 0: evaluate all operations and constants, whose semantic type has at
24702 most the range and precision of type float, to the range and
24703 precision of float; evaluate all other operations and constants to
24704 the range and precision of the semantic type;
24706 N, where _FloatN is a supported interchange floating type
24707 evaluate all operations and constants, whose semantic type has at
24708 most the range and precision of _FloatN type, to the range and
24709 precision of the _FloatN type; evaluate all other operations and
24710 constants to the range and precision of the semantic type;
24712 If we have the ARMv8.2-A extensions then we support _Float16 in native
24713 precision, so we should set this to 16. Otherwise, we support the type,
24714 but want to evaluate expressions in float precision, so set this to
24717 static enum flt_eval_method
24718 arm_excess_precision (enum excess_precision_type type
)
24722 case EXCESS_PRECISION_TYPE_FAST
:
24723 case EXCESS_PRECISION_TYPE_STANDARD
:
24724 /* We can calculate either in 16-bit range and precision or
24725 32-bit range and precision. Make that decision based on whether
24726 we have native support for the ARMv8.2-A 16-bit floating-point
24727 instructions or not. */
24728 return (TARGET_VFP_FP16INST
24729 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24730 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
24731 case EXCESS_PRECISION_TYPE_IMPLICIT
:
24732 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
24734 gcc_unreachable ();
24736 return FLT_EVAL_METHOD_UNPREDICTABLE
;
24740 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
24741 _Float16 if we are using anything other than ieee format for 16-bit
24742 floating point. Otherwise, punt to the default implementation. */
24743 static opt_scalar_float_mode
24744 arm_floatn_mode (int n
, bool extended
)
24746 if (!extended
&& n
== 16)
24748 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
24750 return opt_scalar_float_mode ();
24753 return default_floatn_mode (n
, extended
);
24757 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24758 not to early-clobber SRC registers in the process.
24760 We assume that the operands described by SRC and DEST represent a
24761 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24762 number of components into which the copy has been decomposed. */
24764 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
24768 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
24769 || REGNO (operands
[0]) < REGNO (operands
[1]))
24771 for (i
= 0; i
< count
; i
++)
24773 operands
[2 * i
] = dest
[i
];
24774 operands
[2 * i
+ 1] = src
[i
];
24779 for (i
= 0; i
< count
; i
++)
24781 operands
[2 * i
] = dest
[count
- i
- 1];
24782 operands
[2 * i
+ 1] = src
[count
- i
- 1];
24787 /* Split operands into moves from op[1] + op[2] into op[0]. */
24790 neon_split_vcombine (rtx operands
[3])
24792 unsigned int dest
= REGNO (operands
[0]);
24793 unsigned int src1
= REGNO (operands
[1]);
24794 unsigned int src2
= REGNO (operands
[2]);
24795 machine_mode halfmode
= GET_MODE (operands
[1]);
24796 unsigned int halfregs
= REG_NREGS (operands
[1]);
24797 rtx destlo
, desthi
;
24799 if (src1
== dest
&& src2
== dest
+ halfregs
)
24801 /* No-op move. Can't split to nothing; emit something. */
24802 emit_note (NOTE_INSN_DELETED
);
24806 /* Preserve register attributes for variable tracking. */
24807 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
24808 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
24809 GET_MODE_SIZE (halfmode
));
24811 /* Special case of reversed high/low parts. Use VSWP. */
24812 if (src2
== dest
&& src1
== dest
+ halfregs
)
24814 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
24815 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
24816 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
24820 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
24822 /* Try to avoid unnecessary moves if part of the result
24823 is in the right place already. */
24825 emit_move_insn (destlo
, operands
[1]);
24826 if (src2
!= dest
+ halfregs
)
24827 emit_move_insn (desthi
, operands
[2]);
24831 if (src2
!= dest
+ halfregs
)
24832 emit_move_insn (desthi
, operands
[2]);
24834 emit_move_insn (destlo
, operands
[1]);
24838 /* Return the number (counting from 0) of
24839 the least significant set bit in MASK. */
24842 number_of_first_bit_set (unsigned mask
)
24844 return ctz_hwi (mask
);
24847 /* Like emit_multi_reg_push, but allowing for a different set of
24848 registers to be described as saved. MASK is the set of registers
24849 to be saved; REAL_REGS is the set of registers to be described as
24850 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24853 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
24855 unsigned long regno
;
24856 rtx par
[10], tmp
, reg
;
24860 /* Build the parallel of the registers actually being stored. */
24861 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
24863 regno
= ctz_hwi (mask
);
24864 reg
= gen_rtx_REG (SImode
, regno
);
24867 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
24869 tmp
= gen_rtx_USE (VOIDmode
, reg
);
24874 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24875 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
24876 tmp
= gen_frame_mem (BLKmode
, tmp
);
24877 tmp
= gen_rtx_SET (tmp
, par
[0]);
24880 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
24881 insn
= emit_insn (tmp
);
24883 /* Always build the stack adjustment note for unwind info. */
24884 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24885 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
24888 /* Build the parallel of the registers recorded as saved for unwind. */
24889 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
24891 regno
= ctz_hwi (real_regs
);
24892 reg
= gen_rtx_REG (SImode
, regno
);
24894 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
24895 tmp
= gen_frame_mem (SImode
, tmp
);
24896 tmp
= gen_rtx_SET (tmp
, reg
);
24897 RTX_FRAME_RELATED_P (tmp
) = 1;
24905 RTX_FRAME_RELATED_P (par
[0]) = 1;
24906 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
24909 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
24914 /* Emit code to push or pop registers to or from the stack. F is the
24915 assembly file. MASK is the registers to pop. */
24917 thumb_pop (FILE *f
, unsigned long mask
)
24920 int lo_mask
= mask
& 0xFF;
24924 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
24926 /* Special case. Do not generate a POP PC statement here, do it in
24928 thumb_exit (f
, -1);
24932 fprintf (f
, "\tpop\t{");
24934 /* Look at the low registers first. */
24935 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
24939 asm_fprintf (f
, "%r", regno
);
24941 if ((lo_mask
& ~1) != 0)
24946 if (mask
& (1 << PC_REGNUM
))
24948 /* Catch popping the PC. */
24949 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
24950 || IS_CMSE_ENTRY (arm_current_func_type ()))
24952 /* The PC is never poped directly, instead
24953 it is popped into r3 and then BX is used. */
24954 fprintf (f
, "}\n");
24956 thumb_exit (f
, -1);
24965 asm_fprintf (f
, "%r", PC_REGNUM
);
24969 fprintf (f
, "}\n");
24972 /* Generate code to return from a thumb function.
24973 If 'reg_containing_return_addr' is -1, then the return address is
24974 actually on the stack, at the stack pointer.
24976 Note: do not forget to update length attribute of corresponding insn pattern
24977 when changing assembly output (eg. length attribute of epilogue_insns when
24978 updating Armv8-M Baseline Security Extensions register clearing
24981 thumb_exit (FILE *f
, int reg_containing_return_addr
)
24983 unsigned regs_available_for_popping
;
24984 unsigned regs_to_pop
;
24986 unsigned available
;
24990 int restore_a4
= FALSE
;
24992 /* Compute the registers we need to pop. */
24996 if (reg_containing_return_addr
== -1)
24998 regs_to_pop
|= 1 << LR_REGNUM
;
25002 if (TARGET_BACKTRACE
)
25004 /* Restore the (ARM) frame pointer and stack pointer. */
25005 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
25009 /* If there is nothing to pop then just emit the BX instruction and
25011 if (pops_needed
== 0)
25013 if (crtl
->calls_eh_return
)
25014 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
25016 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25018 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
25019 reg_containing_return_addr
);
25020 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
25023 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
25026 /* Otherwise if we are not supporting interworking and we have not created
25027 a backtrace structure and the function was not entered in ARM mode then
25028 just pop the return address straight into the PC. */
25029 else if (!TARGET_INTERWORK
25030 && !TARGET_BACKTRACE
25031 && !is_called_in_ARM_mode (current_function_decl
)
25032 && !crtl
->calls_eh_return
25033 && !IS_CMSE_ENTRY (arm_current_func_type ()))
25035 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
25039 /* Find out how many of the (return) argument registers we can corrupt. */
25040 regs_available_for_popping
= 0;
25042 /* If returning via __builtin_eh_return, the bottom three registers
25043 all contain information needed for the return. */
25044 if (crtl
->calls_eh_return
)
25048 /* If we can deduce the registers used from the function's
25049 return value. This is more reliable that examining
25050 df_regs_ever_live_p () because that will be set if the register is
25051 ever used in the function, not just if the register is used
25052 to hold a return value. */
25054 if (crtl
->return_rtx
!= 0)
25055 mode
= GET_MODE (crtl
->return_rtx
);
25057 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
25059 size
= GET_MODE_SIZE (mode
);
25063 /* In a void function we can use any argument register.
25064 In a function that returns a structure on the stack
25065 we can use the second and third argument registers. */
25066 if (mode
== VOIDmode
)
25067 regs_available_for_popping
=
25068 (1 << ARG_REGISTER (1))
25069 | (1 << ARG_REGISTER (2))
25070 | (1 << ARG_REGISTER (3));
25072 regs_available_for_popping
=
25073 (1 << ARG_REGISTER (2))
25074 | (1 << ARG_REGISTER (3));
25076 else if (size
<= 4)
25077 regs_available_for_popping
=
25078 (1 << ARG_REGISTER (2))
25079 | (1 << ARG_REGISTER (3));
25080 else if (size
<= 8)
25081 regs_available_for_popping
=
25082 (1 << ARG_REGISTER (3));
25085 /* Match registers to be popped with registers into which we pop them. */
25086 for (available
= regs_available_for_popping
,
25087 required
= regs_to_pop
;
25088 required
!= 0 && available
!= 0;
25089 available
&= ~(available
& - available
),
25090 required
&= ~(required
& - required
))
25093 /* If we have any popping registers left over, remove them. */
25095 regs_available_for_popping
&= ~available
;
25097 /* Otherwise if we need another popping register we can use
25098 the fourth argument register. */
25099 else if (pops_needed
)
25101 /* If we have not found any free argument registers and
25102 reg a4 contains the return address, we must move it. */
25103 if (regs_available_for_popping
== 0
25104 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
25106 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
25107 reg_containing_return_addr
= LR_REGNUM
;
25109 else if (size
> 12)
25111 /* Register a4 is being used to hold part of the return value,
25112 but we have dire need of a free, low register. */
25115 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
25118 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
25120 /* The fourth argument register is available. */
25121 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
25127 /* Pop as many registers as we can. */
25128 thumb_pop (f
, regs_available_for_popping
);
25130 /* Process the registers we popped. */
25131 if (reg_containing_return_addr
== -1)
25133 /* The return address was popped into the lowest numbered register. */
25134 regs_to_pop
&= ~(1 << LR_REGNUM
);
25136 reg_containing_return_addr
=
25137 number_of_first_bit_set (regs_available_for_popping
);
25139 /* Remove this register for the mask of available registers, so that
25140 the return address will not be corrupted by further pops. */
25141 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
25144 /* If we popped other registers then handle them here. */
25145 if (regs_available_for_popping
)
25149 /* Work out which register currently contains the frame pointer. */
25150 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
25152 /* Move it into the correct place. */
25153 asm_fprintf (f
, "\tmov\t%r, %r\n",
25154 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
25156 /* (Temporarily) remove it from the mask of popped registers. */
25157 regs_available_for_popping
&= ~(1 << frame_pointer
);
25158 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
25160 if (regs_available_for_popping
)
25164 /* We popped the stack pointer as well,
25165 find the register that contains it. */
25166 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
25168 /* Move it into the stack register. */
25169 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
25171 /* At this point we have popped all necessary registers, so
25172 do not worry about restoring regs_available_for_popping
25173 to its correct value:
25175 assert (pops_needed == 0)
25176 assert (regs_available_for_popping == (1 << frame_pointer))
25177 assert (regs_to_pop == (1 << STACK_POINTER)) */
25181 /* Since we have just move the popped value into the frame
25182 pointer, the popping register is available for reuse, and
25183 we know that we still have the stack pointer left to pop. */
25184 regs_available_for_popping
|= (1 << frame_pointer
);
25188 /* If we still have registers left on the stack, but we no longer have
25189 any registers into which we can pop them, then we must move the return
25190 address into the link register and make available the register that
25192 if (regs_available_for_popping
== 0 && pops_needed
> 0)
25194 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
25196 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
25197 reg_containing_return_addr
);
25199 reg_containing_return_addr
= LR_REGNUM
;
25202 /* If we have registers left on the stack then pop some more.
25203 We know that at most we will want to pop FP and SP. */
25204 if (pops_needed
> 0)
25209 thumb_pop (f
, regs_available_for_popping
);
25211 /* We have popped either FP or SP.
25212 Move whichever one it is into the correct register. */
25213 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
25214 move_to
= number_of_first_bit_set (regs_to_pop
);
25216 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
25220 /* If we still have not popped everything then we must have only
25221 had one register available to us and we are now popping the SP. */
25222 if (pops_needed
> 0)
25226 thumb_pop (f
, regs_available_for_popping
);
25228 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
25230 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
25232 assert (regs_to_pop == (1 << STACK_POINTER))
25233 assert (pops_needed == 1)
25237 /* If necessary restore the a4 register. */
25240 if (reg_containing_return_addr
!= LR_REGNUM
)
25242 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
25243 reg_containing_return_addr
= LR_REGNUM
;
25246 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
25249 if (crtl
->calls_eh_return
)
25250 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
25252 /* Return to caller. */
25253 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25255 /* This is for the cases where LR is not being used to contain the return
25256 address. It may therefore contain information that we might not want
25257 to leak, hence it must be cleared. The value in R0 will never be a
25258 secret at this point, so it is safe to use it, see the clearing code
25259 in 'cmse_nonsecure_entry_clear_before_return'. */
25260 if (reg_containing_return_addr
!= LR_REGNUM
)
25261 asm_fprintf (f
, "\tmov\tlr, r0\n");
25263 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
25264 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
25267 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
25270 /* Scan INSN just before assembler is output for it.
25271 For Thumb-1, we track the status of the condition codes; this
25272 information is used in the cbranchsi4_insn pattern. */
25274 thumb1_final_prescan_insn (rtx_insn
*insn
)
25276 if (flag_print_asm_name
)
25277 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
25278 INSN_ADDRESSES (INSN_UID (insn
)));
25279 /* Don't overwrite the previous setter when we get to a cbranch. */
25280 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
25282 enum attr_conds conds
;
25284 if (cfun
->machine
->thumb1_cc_insn
)
25286 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
25287 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
25290 conds
= get_attr_conds (insn
);
25291 if (conds
== CONDS_SET
)
25293 rtx set
= single_set (insn
);
25294 cfun
->machine
->thumb1_cc_insn
= insn
;
25295 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
25296 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
25297 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
25298 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
25300 rtx src1
= XEXP (SET_SRC (set
), 1);
25301 if (src1
== const0_rtx
)
25302 cfun
->machine
->thumb1_cc_mode
= CCmode
;
25304 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
25306 /* Record the src register operand instead of dest because
25307 cprop_hardreg pass propagates src. */
25308 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
25311 else if (conds
!= CONDS_NOCOND
)
25312 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
25315 /* Check if unexpected far jump is used. */
25316 if (cfun
->machine
->lr_save_eliminated
25317 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
25318 internal_error("Unexpected thumb1 far jump");
25322 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
25324 unsigned HOST_WIDE_INT mask
= 0xff;
25327 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
25328 if (val
== 0) /* XXX */
25331 for (i
= 0; i
< 25; i
++)
25332 if ((val
& (mask
<< i
)) == val
)
25338 /* Returns nonzero if the current function contains,
25339 or might contain a far jump. */
25341 thumb_far_jump_used_p (void)
25344 bool far_jump
= false;
25345 unsigned int func_size
= 0;
25347 /* If we have already decided that far jumps may be used,
25348 do not bother checking again, and always return true even if
25349 it turns out that they are not being used. Once we have made
25350 the decision that far jumps are present (and that hence the link
25351 register will be pushed onto the stack) we cannot go back on it. */
25352 if (cfun
->machine
->far_jump_used
)
25355 /* If this function is not being called from the prologue/epilogue
25356 generation code then it must be being called from the
25357 INITIAL_ELIMINATION_OFFSET macro. */
25358 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
25360 /* In this case we know that we are being asked about the elimination
25361 of the arg pointer register. If that register is not being used,
25362 then there are no arguments on the stack, and we do not have to
25363 worry that a far jump might force the prologue to push the link
25364 register, changing the stack offsets. In this case we can just
25365 return false, since the presence of far jumps in the function will
25366 not affect stack offsets.
25368 If the arg pointer is live (or if it was live, but has now been
25369 eliminated and so set to dead) then we do have to test to see if
25370 the function might contain a far jump. This test can lead to some
25371 false negatives, since before reload is completed, then length of
25372 branch instructions is not known, so gcc defaults to returning their
25373 longest length, which in turn sets the far jump attribute to true.
25375 A false negative will not result in bad code being generated, but it
25376 will result in a needless push and pop of the link register. We
25377 hope that this does not occur too often.
25379 If we need doubleword stack alignment this could affect the other
25380 elimination offsets so we can't risk getting it wrong. */
25381 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
25382 cfun
->machine
->arg_pointer_live
= 1;
25383 else if (!cfun
->machine
->arg_pointer_live
)
25387 /* We should not change far_jump_used during or after reload, as there is
25388 no chance to change stack frame layout. */
25389 if (reload_in_progress
|| reload_completed
)
25392 /* Check to see if the function contains a branch
25393 insn with the far jump attribute set. */
25394 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25396 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
25400 func_size
+= get_attr_length (insn
);
25403 /* Attribute far_jump will always be true for thumb1 before
25404 shorten_branch pass. So checking far_jump attribute before
25405 shorten_branch isn't much useful.
25407 Following heuristic tries to estimate more accurately if a far jump
25408 may finally be used. The heuristic is very conservative as there is
25409 no chance to roll-back the decision of not to use far jump.
25411 Thumb1 long branch offset is -2048 to 2046. The worst case is each
25412 2-byte insn is associated with a 4 byte constant pool. Using
25413 function size 2048/3 as the threshold is conservative enough. */
25416 if ((func_size
* 3) >= 2048)
25418 /* Record the fact that we have decided that
25419 the function does use far jumps. */
25420 cfun
->machine
->far_jump_used
= 1;
25428 /* Return nonzero if FUNC must be entered in ARM mode. */
25430 is_called_in_ARM_mode (tree func
)
25432 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
25434 /* Ignore the problem about functions whose address is taken. */
25435 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
25439 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
25445 /* Given the stack offsets and register mask in OFFSETS, decide how
25446 many additional registers to push instead of subtracting a constant
25447 from SP. For epilogues the principle is the same except we use pop.
25448 FOR_PROLOGUE indicates which we're generating. */
25450 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
25452 HOST_WIDE_INT amount
;
25453 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
25454 /* Extract a mask of the ones we can give to the Thumb's push/pop
25456 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
25457 /* Then count how many other high registers will need to be pushed. */
25458 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25459 int n_free
, reg_base
, size
;
25461 if (!for_prologue
&& frame_pointer_needed
)
25462 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25464 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25466 /* If the stack frame size is 512 exactly, we can save one load
25467 instruction, which should make this a win even when optimizing
25469 if (!optimize_size
&& amount
!= 512)
25472 /* Can't do this if there are high registers to push. */
25473 if (high_regs_pushed
!= 0)
25476 /* Shouldn't do it in the prologue if no registers would normally
25477 be pushed at all. In the epilogue, also allow it if we'll have
25478 a pop insn for the PC. */
25481 || TARGET_BACKTRACE
25482 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
25483 || TARGET_INTERWORK
25484 || crtl
->args
.pretend_args_size
!= 0))
25487 /* Don't do this if thumb_expand_prologue wants to emit instructions
25488 between the push and the stack frame allocation. */
25490 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
25491 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
25498 size
= arm_size_return_regs ();
25499 reg_base
= ARM_NUM_INTS (size
);
25500 live_regs_mask
>>= reg_base
;
25503 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
25504 && (for_prologue
|| call_used_or_fixed_reg_p (reg_base
+ n_free
)))
25506 live_regs_mask
>>= 1;
25512 gcc_assert (amount
/ 4 * 4 == amount
);
25514 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
25515 return (amount
- 508) / 4;
25516 if (amount
<= n_free
* 4)
25521 /* The bits which aren't usefully expanded as rtl. */
25523 thumb1_unexpanded_epilogue (void)
25525 arm_stack_offsets
*offsets
;
25527 unsigned long live_regs_mask
= 0;
25528 int high_regs_pushed
= 0;
25530 int had_to_push_lr
;
25533 if (cfun
->machine
->return_used_this_function
!= 0)
25536 if (IS_NAKED (arm_current_func_type ()))
25539 offsets
= arm_get_frame_offsets ();
25540 live_regs_mask
= offsets
->saved_regs_mask
;
25541 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25543 /* If we can deduce the registers used from the function's return value.
25544 This is more reliable that examining df_regs_ever_live_p () because that
25545 will be set if the register is ever used in the function, not just if
25546 the register is used to hold a return value. */
25547 size
= arm_size_return_regs ();
25549 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
25552 unsigned long extra_mask
= (1 << extra_pop
) - 1;
25553 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
25556 /* The prolog may have pushed some high registers to use as
25557 work registers. e.g. the testsuite file:
25558 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
25559 compiles to produce:
25560 push {r4, r5, r6, r7, lr}
25564 as part of the prolog. We have to undo that pushing here. */
25566 if (high_regs_pushed
)
25568 unsigned long mask
= live_regs_mask
& 0xff;
25571 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
25574 /* Oh dear! We have no low registers into which we can pop
25577 ("no low registers available for popping high registers");
25579 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
25580 if (live_regs_mask
& (1 << next_hi_reg
))
25583 while (high_regs_pushed
)
25585 /* Find lo register(s) into which the high register(s) can
25587 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
25589 if (mask
& (1 << regno
))
25590 high_regs_pushed
--;
25591 if (high_regs_pushed
== 0)
25595 if (high_regs_pushed
== 0 && regno
>= 0)
25596 mask
&= ~((1 << regno
) - 1);
25598 /* Pop the values into the low register(s). */
25599 thumb_pop (asm_out_file
, mask
);
25601 /* Move the value(s) into the high registers. */
25602 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
25604 if (mask
& (1 << regno
))
25606 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
25609 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
25611 if (live_regs_mask
& (1 << next_hi_reg
))
25616 live_regs_mask
&= ~0x0f00;
25619 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
25620 live_regs_mask
&= 0xff;
25622 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
25624 /* Pop the return address into the PC. */
25625 if (had_to_push_lr
)
25626 live_regs_mask
|= 1 << PC_REGNUM
;
25628 /* Either no argument registers were pushed or a backtrace
25629 structure was created which includes an adjusted stack
25630 pointer, so just pop everything. */
25631 if (live_regs_mask
)
25632 thumb_pop (asm_out_file
, live_regs_mask
);
25634 /* We have either just popped the return address into the
25635 PC or it is was kept in LR for the entire function.
25636 Note that thumb_pop has already called thumb_exit if the
25637 PC was in the list. */
25638 if (!had_to_push_lr
)
25639 thumb_exit (asm_out_file
, LR_REGNUM
);
25643 /* Pop everything but the return address. */
25644 if (live_regs_mask
)
25645 thumb_pop (asm_out_file
, live_regs_mask
);
25647 if (had_to_push_lr
)
25651 /* We have no free low regs, so save one. */
25652 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
25656 /* Get the return address into a temporary register. */
25657 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
25661 /* Move the return address to lr. */
25662 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
25664 /* Restore the low register. */
25665 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
25670 regno
= LAST_ARG_REGNUM
;
25675 /* Remove the argument registers that were pushed onto the stack. */
25676 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
25677 SP_REGNUM
, SP_REGNUM
,
25678 crtl
->args
.pretend_args_size
);
25680 thumb_exit (asm_out_file
, regno
);
25686 /* Functions to save and restore machine-specific function data. */
25687 static struct machine_function
*
25688 arm_init_machine_status (void)
25690 struct machine_function
*machine
;
25691 machine
= ggc_cleared_alloc
<machine_function
> ();
25693 #if ARM_FT_UNKNOWN != 0
25694 machine
->func_type
= ARM_FT_UNKNOWN
;
25696 machine
->static_chain_stack_bytes
= -1;
25700 /* Return an RTX indicating where the return address to the
25701 calling function can be found. */
25703 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
25708 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
25711 /* Do anything needed before RTL is emitted for each function. */
25713 arm_init_expanders (void)
25715 /* Arrange to initialize and mark the machine per-function status. */
25716 init_machine_status
= arm_init_machine_status
;
25718 /* This is to stop the combine pass optimizing away the alignment
25719 adjustment of va_arg. */
25720 /* ??? It is claimed that this should not be necessary. */
25722 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
25725 /* Check that FUNC is called with a different mode. */
25728 arm_change_mode_p (tree func
)
25730 if (TREE_CODE (func
) != FUNCTION_DECL
)
25733 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
25736 callee_tree
= target_option_default_node
;
25738 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25739 int flags
= callee_opts
->x_target_flags
;
25741 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
25744 /* Like arm_compute_initial_elimination offset. Simpler because there
25745 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25746 to point at the base of the local variables after static stack
25747 space for a function has been allocated. */
25750 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
25752 arm_stack_offsets
*offsets
;
25754 offsets
= arm_get_frame_offsets ();
25758 case ARG_POINTER_REGNUM
:
25761 case STACK_POINTER_REGNUM
:
25762 return offsets
->outgoing_args
- offsets
->saved_args
;
25764 case FRAME_POINTER_REGNUM
:
25765 return offsets
->soft_frame
- offsets
->saved_args
;
25767 case ARM_HARD_FRAME_POINTER_REGNUM
:
25768 return offsets
->saved_regs
- offsets
->saved_args
;
25770 case THUMB_HARD_FRAME_POINTER_REGNUM
:
25771 return offsets
->locals_base
- offsets
->saved_args
;
25774 gcc_unreachable ();
25778 case FRAME_POINTER_REGNUM
:
25781 case STACK_POINTER_REGNUM
:
25782 return offsets
->outgoing_args
- offsets
->soft_frame
;
25784 case ARM_HARD_FRAME_POINTER_REGNUM
:
25785 return offsets
->saved_regs
- offsets
->soft_frame
;
25787 case THUMB_HARD_FRAME_POINTER_REGNUM
:
25788 return offsets
->locals_base
- offsets
->soft_frame
;
25791 gcc_unreachable ();
25796 gcc_unreachable ();
25800 /* Generate the function's prologue. */
25803 thumb1_expand_prologue (void)
25807 HOST_WIDE_INT amount
;
25808 HOST_WIDE_INT size
;
25809 arm_stack_offsets
*offsets
;
25810 unsigned long func_type
;
25812 unsigned long live_regs_mask
;
25813 unsigned long l_mask
;
25814 unsigned high_regs_pushed
= 0;
25815 bool lr_needs_saving
;
25817 func_type
= arm_current_func_type ();
25819 /* Naked functions don't have prologues. */
25820 if (IS_NAKED (func_type
))
25822 if (flag_stack_usage_info
)
25823 current_function_static_stack_size
= 0;
25827 if (IS_INTERRUPT (func_type
))
25829 error ("interrupt Service Routines cannot be coded in Thumb mode");
25833 if (is_called_in_ARM_mode (current_function_decl
))
25834 emit_insn (gen_prologue_thumb1_interwork ());
25836 offsets
= arm_get_frame_offsets ();
25837 live_regs_mask
= offsets
->saved_regs_mask
;
25838 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
25840 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25841 l_mask
= live_regs_mask
& 0x40ff;
25842 /* Then count how many other high registers will need to be pushed. */
25843 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25845 if (crtl
->args
.pretend_args_size
)
25847 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
25849 if (cfun
->machine
->uses_anonymous_args
)
25851 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
25852 unsigned long mask
;
25854 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
25855 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
25857 insn
= thumb1_emit_multi_reg_push (mask
, 0);
25861 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25862 stack_pointer_rtx
, x
));
25864 RTX_FRAME_RELATED_P (insn
) = 1;
25867 if (TARGET_BACKTRACE
)
25869 HOST_WIDE_INT offset
= 0;
25870 unsigned work_register
;
25871 rtx work_reg
, x
, arm_hfp_rtx
;
25873 /* We have been asked to create a stack backtrace structure.
25874 The code looks like this:
25878 0 sub SP, #16 Reserve space for 4 registers.
25879 2 push {R7} Push low registers.
25880 4 add R7, SP, #20 Get the stack pointer before the push.
25881 6 str R7, [SP, #8] Store the stack pointer
25882 (before reserving the space).
25883 8 mov R7, PC Get hold of the start of this code + 12.
25884 10 str R7, [SP, #16] Store it.
25885 12 mov R7, FP Get hold of the current frame pointer.
25886 14 str R7, [SP, #4] Store it.
25887 16 mov R7, LR Get hold of the current return address.
25888 18 str R7, [SP, #12] Store it.
25889 20 add R7, SP, #16 Point at the start of the
25890 backtrace structure.
25891 22 mov FP, R7 Put this value into the frame pointer. */
25893 work_register
= thumb_find_work_register (live_regs_mask
);
25894 work_reg
= gen_rtx_REG (SImode
, work_register
);
25895 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
25897 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25898 stack_pointer_rtx
, GEN_INT (-16)));
25899 RTX_FRAME_RELATED_P (insn
) = 1;
25903 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
25904 RTX_FRAME_RELATED_P (insn
) = 1;
25905 lr_needs_saving
= false;
25907 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
25910 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
25911 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25913 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
25914 x
= gen_frame_mem (SImode
, x
);
25915 emit_move_insn (x
, work_reg
);
25917 /* Make sure that the instruction fetching the PC is in the right place
25918 to calculate "start of backtrace creation code + 12". */
25919 /* ??? The stores using the common WORK_REG ought to be enough to
25920 prevent the scheduler from doing anything weird. Failing that
25921 we could always move all of the following into an UNSPEC_VOLATILE. */
25924 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25925 emit_move_insn (work_reg
, x
);
25927 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25928 x
= gen_frame_mem (SImode
, x
);
25929 emit_move_insn (x
, work_reg
);
25931 emit_move_insn (work_reg
, arm_hfp_rtx
);
25933 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25934 x
= gen_frame_mem (SImode
, x
);
25935 emit_move_insn (x
, work_reg
);
25939 emit_move_insn (work_reg
, arm_hfp_rtx
);
25941 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25942 x
= gen_frame_mem (SImode
, x
);
25943 emit_move_insn (x
, work_reg
);
25945 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25946 emit_move_insn (work_reg
, x
);
25948 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25949 x
= gen_frame_mem (SImode
, x
);
25950 emit_move_insn (x
, work_reg
);
25953 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
25954 emit_move_insn (work_reg
, x
);
25956 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
25957 x
= gen_frame_mem (SImode
, x
);
25958 emit_move_insn (x
, work_reg
);
25960 x
= GEN_INT (offset
+ 12);
25961 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25963 emit_move_insn (arm_hfp_rtx
, work_reg
);
25965 /* Optimization: If we are not pushing any low registers but we are going
25966 to push some high registers then delay our first push. This will just
25967 be a push of LR and we can combine it with the push of the first high
25969 else if ((l_mask
& 0xff) != 0
25970 || (high_regs_pushed
== 0 && lr_needs_saving
))
25972 unsigned long mask
= l_mask
;
25973 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
25974 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
25975 RTX_FRAME_RELATED_P (insn
) = 1;
25976 lr_needs_saving
= false;
25979 if (high_regs_pushed
)
25981 unsigned pushable_regs
;
25982 unsigned next_hi_reg
;
25983 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
25984 : crtl
->args
.info
.nregs
;
25985 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
25987 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
25988 if (live_regs_mask
& (1 << next_hi_reg
))
25991 /* Here we need to mask out registers used for passing arguments
25992 even if they can be pushed. This is to avoid using them to
25993 stash the high registers. Such kind of stash may clobber the
25994 use of arguments. */
25995 pushable_regs
= l_mask
& (~arg_regs_mask
);
25996 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
25998 /* Normally, LR can be used as a scratch register once it has been
25999 saved; but if the function examines its own return address then
26000 the value is still live and we need to avoid using it. */
26001 bool return_addr_live
26002 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
26005 if (lr_needs_saving
|| return_addr_live
)
26006 pushable_regs
&= ~(1 << LR_REGNUM
);
26008 if (pushable_regs
== 0)
26009 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
26011 while (high_regs_pushed
> 0)
26013 unsigned long real_regs_mask
= 0;
26014 unsigned long push_mask
= 0;
26016 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
26018 if (pushable_regs
& (1 << regno
))
26020 emit_move_insn (gen_rtx_REG (SImode
, regno
),
26021 gen_rtx_REG (SImode
, next_hi_reg
));
26023 high_regs_pushed
--;
26024 real_regs_mask
|= (1 << next_hi_reg
);
26025 push_mask
|= (1 << regno
);
26027 if (high_regs_pushed
)
26029 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26031 if (live_regs_mask
& (1 << next_hi_reg
))
26039 /* If we had to find a work register and we have not yet
26040 saved the LR then add it to the list of regs to push. */
26041 if (lr_needs_saving
)
26043 push_mask
|= 1 << LR_REGNUM
;
26044 real_regs_mask
|= 1 << LR_REGNUM
;
26045 lr_needs_saving
= false;
26046 /* If the return address is not live at this point, we
26047 can add LR to the list of registers that we can use
26049 if (!return_addr_live
)
26050 pushable_regs
|= 1 << LR_REGNUM
;
26053 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
26054 RTX_FRAME_RELATED_P (insn
) = 1;
26058 /* Load the pic register before setting the frame pointer,
26059 so we can use r7 as a temporary work register. */
26060 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26061 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
26063 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
26064 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
26065 stack_pointer_rtx
);
26067 size
= offsets
->outgoing_args
- offsets
->saved_args
;
26068 if (flag_stack_usage_info
)
26069 current_function_static_stack_size
= size
;
26071 /* If we have a frame, then do stack checking. FIXME: not implemented. */
26072 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
26073 || flag_stack_clash_protection
)
26075 sorry ("%<-fstack-check=specific%> for Thumb-1");
26077 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26078 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
26083 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26084 GEN_INT (- amount
)));
26085 RTX_FRAME_RELATED_P (insn
) = 1;
26091 /* The stack decrement is too big for an immediate value in a single
26092 insn. In theory we could issue multiple subtracts, but after
26093 three of them it becomes more space efficient to place the full
26094 value in the constant pool and load into a register. (Also the
26095 ARM debugger really likes to see only one stack decrement per
26096 function). So instead we look for a scratch register into which
26097 we can load the decrement, and then we subtract this from the
26098 stack pointer. Unfortunately on the thumb the only available
26099 scratch registers are the argument registers, and we cannot use
26100 these as they may hold arguments to the function. Instead we
26101 attempt to locate a call preserved register which is used by this
26102 function. If we can find one, then we know that it will have
26103 been pushed at the start of the prologue and so we can corrupt
26105 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
26106 if (live_regs_mask
& (1 << regno
))
26109 gcc_assert(regno
<= LAST_LO_REGNUM
);
26111 reg
= gen_rtx_REG (SImode
, regno
);
26113 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
26115 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26116 stack_pointer_rtx
, reg
));
26118 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
26119 plus_constant (Pmode
, stack_pointer_rtx
,
26121 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
26122 RTX_FRAME_RELATED_P (insn
) = 1;
26126 if (frame_pointer_needed
)
26127 thumb_set_frame_pointer (offsets
);
26129 /* If we are profiling, make sure no instructions are scheduled before
26130 the call to mcount. Similarly if the user has requested no
26131 scheduling in the prolog. Similarly if we want non-call exceptions
26132 using the EABI unwinder, to prevent faulting instructions from being
26133 swapped with a stack adjustment. */
26134 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
26135 || (arm_except_unwind_info (&global_options
) == UI_TARGET
26136 && cfun
->can_throw_non_call_exceptions
))
26137 emit_insn (gen_blockage ());
26139 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
26140 if (live_regs_mask
& 0xff)
26141 cfun
->machine
->lr_save_eliminated
= 0;
26144 /* Clear caller saved registers not used to pass return values and leaked
26145 condition flags before exiting a cmse_nonsecure_entry function. */
26148 cmse_nonsecure_entry_clear_before_return (void)
26150 int regno
, maxregno
= TARGET_HARD_FLOAT
? LAST_VFP_REGNUM
: IP_REGNUM
;
26151 uint32_t padding_bits_to_clear
= 0;
26152 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
26153 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
26156 bitmap_clear (to_clear_bitmap
);
26157 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
26158 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
26160 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
26162 if (TARGET_HARD_FLOAT
)
26164 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
26166 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
26168 /* Make sure we don't clear the two scratch registers used to clear the
26169 relevant FPSCR bits in output_return_instruction. */
26170 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
26171 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
26172 emit_use (gen_rtx_REG (SImode
, 4));
26173 bitmap_clear_bit (to_clear_bitmap
, 4);
26176 /* If the user has defined registers to be caller saved, these are no longer
26177 restored by the function before returning and must thus be cleared for
26178 security purposes. */
26179 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
26181 /* We do not touch registers that can be used to pass arguments as per
26182 the AAPCS, since these should never be made callee-saved by user
26184 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
26186 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
26188 if (call_used_or_fixed_reg_p (regno
))
26189 bitmap_set_bit (to_clear_bitmap
, regno
);
26192 /* Make sure we do not clear the registers used to return the result in. */
26193 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
26194 if (!VOID_TYPE_P (result_type
))
26196 uint64_t to_clear_return_mask
;
26197 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
26199 /* No need to check that we return in registers, because we don't
26200 support returning on stack yet. */
26201 gcc_assert (REG_P (result_rtl
));
26202 to_clear_return_mask
26203 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
26204 &padding_bits_to_clear
);
26205 if (to_clear_return_mask
)
26207 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
26208 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
26210 if (to_clear_return_mask
& (1ULL << regno
))
26211 bitmap_clear_bit (to_clear_bitmap
, regno
);
26216 if (padding_bits_to_clear
!= 0)
26218 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
26219 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
26221 /* Padding_bits_to_clear is not 0 so we know we are dealing with
26222 returning a composite type, which only uses r0. Let's make sure that
26223 r1-r3 is cleared too. */
26224 bitmap_clear (to_clear_arg_regs_bitmap
);
26225 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
26226 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
26229 /* Clear full registers that leak before returning. */
26230 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
26231 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
26232 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
26236 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26237 POP instruction can be generated. LR should be replaced by PC. All
26238 the checks required are already done by USE_RETURN_INSN (). Hence,
26239 all we really need to check here is if single register is to be
26240 returned, or multiple register return. */
26242 thumb2_expand_return (bool simple_return
)
26245 unsigned long saved_regs_mask
;
26246 arm_stack_offsets
*offsets
;
26248 offsets
= arm_get_frame_offsets ();
26249 saved_regs_mask
= offsets
->saved_regs_mask
;
26251 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26252 if (saved_regs_mask
& (1 << i
))
26255 if (!simple_return
&& saved_regs_mask
)
26257 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
26258 functions or adapt code to handle according to ACLE. This path should
26259 not be reachable for cmse_nonsecure_entry functions though we prefer
26260 to assert it for now to ensure that future code changes do not silently
26261 change this behavior. */
26262 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
26265 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
26266 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
26267 rtx addr
= gen_rtx_MEM (SImode
,
26268 gen_rtx_POST_INC (SImode
,
26269 stack_pointer_rtx
));
26270 set_mem_alias_set (addr
, get_frame_alias_set ());
26271 XVECEXP (par
, 0, 0) = ret_rtx
;
26272 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
26273 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
26274 emit_jump_insn (par
);
26278 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
26279 saved_regs_mask
|= (1 << PC_REGNUM
);
26280 arm_emit_multi_reg_pop (saved_regs_mask
);
26285 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26286 cmse_nonsecure_entry_clear_before_return ();
26287 emit_jump_insn (simple_return_rtx
);
26292 thumb1_expand_epilogue (void)
26294 HOST_WIDE_INT amount
;
26295 arm_stack_offsets
*offsets
;
26298 /* Naked functions don't have prologues. */
26299 if (IS_NAKED (arm_current_func_type ()))
26302 offsets
= arm_get_frame_offsets ();
26303 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26305 if (frame_pointer_needed
)
26307 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
26308 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26310 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
26312 gcc_assert (amount
>= 0);
26315 emit_insn (gen_blockage ());
26318 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26319 GEN_INT (amount
)));
26322 /* r3 is always free in the epilogue. */
26323 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
26325 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
26326 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
26330 /* Emit a USE (stack_pointer_rtx), so that
26331 the stack adjustment will not be deleted. */
26332 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26334 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
26335 emit_insn (gen_blockage ());
26337 /* Emit a clobber for each insn that will be restored in the epilogue,
26338 so that flow2 will get register lifetimes correct. */
26339 for (regno
= 0; regno
< 13; regno
++)
26340 if (df_regs_ever_live_p (regno
) && !call_used_or_fixed_reg_p (regno
))
26341 emit_clobber (gen_rtx_REG (SImode
, regno
));
26343 if (! df_regs_ever_live_p (LR_REGNUM
))
26344 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
26346 /* Clear all caller-saved regs that are not used to return. */
26347 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26348 cmse_nonsecure_entry_clear_before_return ();
26351 /* Epilogue code for APCS frame. */
26353 arm_expand_epilogue_apcs_frame (bool really_return
)
26355 unsigned long func_type
;
26356 unsigned long saved_regs_mask
;
26359 int floats_from_frame
= 0;
26360 arm_stack_offsets
*offsets
;
26362 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
26363 func_type
= arm_current_func_type ();
26365 /* Get frame offsets for ARM. */
26366 offsets
= arm_get_frame_offsets ();
26367 saved_regs_mask
= offsets
->saved_regs_mask
;
26369 /* Find the offset of the floating-point save area in the frame. */
26371 = (offsets
->saved_args
26372 + arm_compute_static_chain_stack_bytes ()
26375 /* Compute how many core registers saved and how far away the floats are. */
26376 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26377 if (saved_regs_mask
& (1 << i
))
26380 floats_from_frame
+= 4;
26383 if (TARGET_HARD_FLOAT
)
26386 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
26388 /* The offset is from IP_REGNUM. */
26389 int saved_size
= arm_get_vfp_saved_size ();
26390 if (saved_size
> 0)
26393 floats_from_frame
+= saved_size
;
26394 insn
= emit_insn (gen_addsi3 (ip_rtx
,
26395 hard_frame_pointer_rtx
,
26396 GEN_INT (-floats_from_frame
)));
26397 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
26398 ip_rtx
, hard_frame_pointer_rtx
);
26401 /* Generate VFP register multi-pop. */
26402 start_reg
= FIRST_VFP_REGNUM
;
26404 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
26405 /* Look for a case where a reg does not need restoring. */
26406 if ((!df_regs_ever_live_p (i
) || call_used_or_fixed_reg_p (i
))
26407 && (!df_regs_ever_live_p (i
+ 1)
26408 || call_used_or_fixed_reg_p (i
+ 1)))
26410 if (start_reg
!= i
)
26411 arm_emit_vfp_multi_reg_pop (start_reg
,
26412 (i
- start_reg
) / 2,
26413 gen_rtx_REG (SImode
,
26418 /* Restore the remaining regs that we have discovered (or possibly
26419 even all of them, if the conditional in the for loop never
26421 if (start_reg
!= i
)
26422 arm_emit_vfp_multi_reg_pop (start_reg
,
26423 (i
- start_reg
) / 2,
26424 gen_rtx_REG (SImode
, IP_REGNUM
));
26429 /* The frame pointer is guaranteed to be non-double-word aligned, as
26430 it is set to double-word-aligned old_stack_pointer - 4. */
26432 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
26434 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
26435 if (df_regs_ever_live_p (i
) && !call_used_or_fixed_reg_p (i
))
26437 rtx addr
= gen_frame_mem (V2SImode
,
26438 plus_constant (Pmode
, hard_frame_pointer_rtx
,
26440 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
26441 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26442 gen_rtx_REG (V2SImode
, i
),
26448 /* saved_regs_mask should contain IP which contains old stack pointer
26449 at the time of activation creation. Since SP and IP are adjacent registers,
26450 we can restore the value directly into SP. */
26451 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
26452 saved_regs_mask
&= ~(1 << IP_REGNUM
);
26453 saved_regs_mask
|= (1 << SP_REGNUM
);
26455 /* There are two registers left in saved_regs_mask - LR and PC. We
26456 only need to restore LR (the return address), but to
26457 save time we can load it directly into PC, unless we need a
26458 special function exit sequence, or we are not really returning. */
26460 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
26461 && !crtl
->calls_eh_return
)
26462 /* Delete LR from the register mask, so that LR on
26463 the stack is loaded into the PC in the register mask. */
26464 saved_regs_mask
&= ~(1 << LR_REGNUM
);
26466 saved_regs_mask
&= ~(1 << PC_REGNUM
);
26468 num_regs
= bit_count (saved_regs_mask
);
26469 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
26472 emit_insn (gen_blockage ());
26473 /* Unwind the stack to just below the saved registers. */
26474 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26475 hard_frame_pointer_rtx
,
26476 GEN_INT (- 4 * num_regs
)));
26478 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
26479 stack_pointer_rtx
, hard_frame_pointer_rtx
);
26482 arm_emit_multi_reg_pop (saved_regs_mask
);
26484 if (IS_INTERRUPT (func_type
))
26486 /* Interrupt handlers will have pushed the
26487 IP onto the stack, so restore it now. */
26489 rtx addr
= gen_rtx_MEM (SImode
,
26490 gen_rtx_POST_INC (SImode
,
26491 stack_pointer_rtx
));
26492 set_mem_alias_set (addr
, get_frame_alias_set ());
26493 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
26494 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26495 gen_rtx_REG (SImode
, IP_REGNUM
),
26499 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
26502 if (crtl
->calls_eh_return
)
26503 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26505 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
26507 if (IS_STACKALIGN (func_type
))
26508 /* Restore the original stack pointer. Before prologue, the stack was
26509 realigned and the original stack pointer saved in r0. For details,
26510 see comment in arm_expand_prologue. */
26511 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
26513 emit_jump_insn (simple_return_rtx
);
26516 /* Generate RTL to represent ARM epilogue. Really_return is true if the
26517 function is not a sibcall. */
26519 arm_expand_epilogue (bool really_return
)
26521 unsigned long func_type
;
26522 unsigned long saved_regs_mask
;
26526 arm_stack_offsets
*offsets
;
26528 func_type
= arm_current_func_type ();
26530 /* Naked functions don't have epilogue. Hence, generate return pattern, and
26531 let output_return_instruction take care of instruction emission if any. */
26532 if (IS_NAKED (func_type
)
26533 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
26536 emit_jump_insn (simple_return_rtx
);
26540 /* If we are throwing an exception, then we really must be doing a
26541 return, so we can't tail-call. */
26542 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
26544 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
26546 arm_expand_epilogue_apcs_frame (really_return
);
26550 /* Get frame offsets for ARM. */
26551 offsets
= arm_get_frame_offsets ();
26552 saved_regs_mask
= offsets
->saved_regs_mask
;
26553 num_regs
= bit_count (saved_regs_mask
);
26555 if (frame_pointer_needed
)
26558 /* Restore stack pointer if necessary. */
26561 /* In ARM mode, frame pointer points to first saved register.
26562 Restore stack pointer to last saved register. */
26563 amount
= offsets
->frame
- offsets
->saved_regs
;
26565 /* Force out any pending memory operations that reference stacked data
26566 before stack de-allocation occurs. */
26567 emit_insn (gen_blockage ());
26568 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26569 hard_frame_pointer_rtx
,
26570 GEN_INT (amount
)));
26571 arm_add_cfa_adjust_cfa_note (insn
, amount
,
26573 hard_frame_pointer_rtx
);
26575 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26577 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26581 /* In Thumb-2 mode, the frame pointer points to the last saved
26583 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26586 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
26587 hard_frame_pointer_rtx
,
26588 GEN_INT (amount
)));
26589 arm_add_cfa_adjust_cfa_note (insn
, amount
,
26590 hard_frame_pointer_rtx
,
26591 hard_frame_pointer_rtx
);
26594 /* Force out any pending memory operations that reference stacked data
26595 before stack de-allocation occurs. */
26596 emit_insn (gen_blockage ());
26597 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
26598 hard_frame_pointer_rtx
));
26599 arm_add_cfa_adjust_cfa_note (insn
, 0,
26601 hard_frame_pointer_rtx
);
26602 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26604 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26609 /* Pop off outgoing args and local frame to adjust stack pointer to
26610 last saved register. */
26611 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26615 /* Force out any pending memory operations that reference stacked data
26616 before stack de-allocation occurs. */
26617 emit_insn (gen_blockage ());
26618 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26620 GEN_INT (amount
)));
26621 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
26622 stack_pointer_rtx
, stack_pointer_rtx
);
26623 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
26625 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26629 if (TARGET_HARD_FLOAT
)
26631 /* Generate VFP register multi-pop. */
26632 int end_reg
= LAST_VFP_REGNUM
+ 1;
26634 /* Scan the registers in reverse order. We need to match
26635 any groupings made in the prologue and generate matching
26636 vldm operations. The need to match groups is because,
26637 unlike pop, vldm can only do consecutive regs. */
26638 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
26639 /* Look for a case where a reg does not need restoring. */
26640 if ((!df_regs_ever_live_p (i
) || call_used_or_fixed_reg_p (i
))
26641 && (!df_regs_ever_live_p (i
+ 1)
26642 || call_used_or_fixed_reg_p (i
+ 1)))
26644 /* Restore the regs discovered so far (from reg+2 to
26646 if (end_reg
> i
+ 2)
26647 arm_emit_vfp_multi_reg_pop (i
+ 2,
26648 (end_reg
- (i
+ 2)) / 2,
26649 stack_pointer_rtx
);
26653 /* Restore the remaining regs that we have discovered (or possibly
26654 even all of them, if the conditional in the for loop never
26656 if (end_reg
> i
+ 2)
26657 arm_emit_vfp_multi_reg_pop (i
+ 2,
26658 (end_reg
- (i
+ 2)) / 2,
26659 stack_pointer_rtx
);
26663 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
26664 if (df_regs_ever_live_p (i
) && !call_used_or_fixed_reg_p (i
))
26667 rtx addr
= gen_rtx_MEM (V2SImode
,
26668 gen_rtx_POST_INC (SImode
,
26669 stack_pointer_rtx
));
26670 set_mem_alias_set (addr
, get_frame_alias_set ());
26671 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
26672 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26673 gen_rtx_REG (V2SImode
, i
),
26675 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
26676 stack_pointer_rtx
, stack_pointer_rtx
);
26679 if (saved_regs_mask
)
26682 bool return_in_pc
= false;
26684 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
26685 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
26686 && !IS_CMSE_ENTRY (func_type
)
26687 && !IS_STACKALIGN (func_type
)
26689 && crtl
->args
.pretend_args_size
== 0
26690 && saved_regs_mask
& (1 << LR_REGNUM
)
26691 && !crtl
->calls_eh_return
)
26693 saved_regs_mask
&= ~(1 << LR_REGNUM
);
26694 saved_regs_mask
|= (1 << PC_REGNUM
);
26695 return_in_pc
= true;
26698 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
26700 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26701 if (saved_regs_mask
& (1 << i
))
26703 rtx addr
= gen_rtx_MEM (SImode
,
26704 gen_rtx_POST_INC (SImode
,
26705 stack_pointer_rtx
));
26706 set_mem_alias_set (addr
, get_frame_alias_set ());
26708 if (i
== PC_REGNUM
)
26710 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
26711 XVECEXP (insn
, 0, 0) = ret_rtx
;
26712 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
26714 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
26715 insn
= emit_jump_insn (insn
);
26719 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
26721 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26722 gen_rtx_REG (SImode
, i
),
26724 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
26726 stack_pointer_rtx
);
26733 && current_tune
->prefer_ldrd_strd
26734 && !optimize_function_for_size_p (cfun
))
26737 thumb2_emit_ldrd_pop (saved_regs_mask
);
26738 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
26739 arm_emit_ldrd_pop (saved_regs_mask
);
26741 arm_emit_multi_reg_pop (saved_regs_mask
);
26744 arm_emit_multi_reg_pop (saved_regs_mask
);
26752 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
26756 rtx dwarf
= NULL_RTX
;
26758 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26760 GEN_INT (amount
)));
26762 RTX_FRAME_RELATED_P (tmp
) = 1;
26764 if (cfun
->machine
->uses_anonymous_args
)
26766 /* Restore pretend args. Refer arm_expand_prologue on how to save
26767 pretend_args in stack. */
26768 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
26769 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
26770 for (j
= 0, i
= 0; j
< num_regs
; i
++)
26771 if (saved_regs_mask
& (1 << i
))
26773 rtx reg
= gen_rtx_REG (SImode
, i
);
26774 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
26777 REG_NOTES (tmp
) = dwarf
;
26779 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
26780 stack_pointer_rtx
, stack_pointer_rtx
);
26783 /* Clear all caller-saved regs that are not used to return. */
26784 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26786 /* CMSE_ENTRY always returns. */
26787 gcc_assert (really_return
);
26788 cmse_nonsecure_entry_clear_before_return ();
26791 if (!really_return
)
26794 if (crtl
->calls_eh_return
)
26795 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26797 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
26799 if (IS_STACKALIGN (func_type
))
26800 /* Restore the original stack pointer. Before prologue, the stack was
26801 realigned and the original stack pointer saved in r0. For details,
26802 see comment in arm_expand_prologue. */
26803 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
26805 emit_jump_insn (simple_return_rtx
);
26808 /* Implementation of insn prologue_thumb1_interwork. This is the first
26809 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26812 thumb1_output_interwork (void)
26815 FILE *f
= asm_out_file
;
26817 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
26818 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
26820 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
26822 /* Generate code sequence to switch us into Thumb mode. */
26823 /* The .code 32 directive has already been emitted by
26824 ASM_DECLARE_FUNCTION_NAME. */
26825 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
26826 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
26828 /* Generate a label, so that the debugger will notice the
26829 change in instruction sets. This label is also used by
26830 the assembler to bypass the ARM code when this function
26831 is called from a Thumb encoded function elsewhere in the
26832 same file. Hence the definition of STUB_NAME here must
26833 agree with the definition in gas/config/tc-arm.c. */
26835 #define STUB_NAME ".real_start_of"
26837 fprintf (f
, "\t.code\t16\n");
26839 if (arm_dllexport_name_p (name
))
26840 name
= arm_strip_name_encoding (name
);
26842 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
26843 fprintf (f
, "\t.thumb_func\n");
26844 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
26849 /* Handle the case of a double word load into a low register from
26850 a computed memory address. The computed address may involve a
26851 register which is overwritten by the load. */
26853 thumb_load_double_from_address (rtx
*operands
)
26861 gcc_assert (REG_P (operands
[0]));
26862 gcc_assert (MEM_P (operands
[1]));
26864 /* Get the memory address. */
26865 addr
= XEXP (operands
[1], 0);
26867 /* Work out how the memory address is computed. */
26868 switch (GET_CODE (addr
))
26871 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26873 if (REGNO (operands
[0]) == REGNO (addr
))
26875 output_asm_insn ("ldr\t%H0, %2", operands
);
26876 output_asm_insn ("ldr\t%0, %1", operands
);
26880 output_asm_insn ("ldr\t%0, %1", operands
);
26881 output_asm_insn ("ldr\t%H0, %2", operands
);
26886 /* Compute <address> + 4 for the high order load. */
26887 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26889 output_asm_insn ("ldr\t%0, %1", operands
);
26890 output_asm_insn ("ldr\t%H0, %2", operands
);
26894 arg1
= XEXP (addr
, 0);
26895 arg2
= XEXP (addr
, 1);
26897 if (CONSTANT_P (arg1
))
26898 base
= arg2
, offset
= arg1
;
26900 base
= arg1
, offset
= arg2
;
26902 gcc_assert (REG_P (base
));
26904 /* Catch the case of <address> = <reg> + <reg> */
26905 if (REG_P (offset
))
26907 int reg_offset
= REGNO (offset
);
26908 int reg_base
= REGNO (base
);
26909 int reg_dest
= REGNO (operands
[0]);
26911 /* Add the base and offset registers together into the
26912 higher destination register. */
26913 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
26914 reg_dest
+ 1, reg_base
, reg_offset
);
26916 /* Load the lower destination register from the address in
26917 the higher destination register. */
26918 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
26919 reg_dest
, reg_dest
+ 1);
26921 /* Load the higher destination register from its own address
26923 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
26924 reg_dest
+ 1, reg_dest
+ 1);
26928 /* Compute <address> + 4 for the high order load. */
26929 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26931 /* If the computed address is held in the low order register
26932 then load the high order register first, otherwise always
26933 load the low order register first. */
26934 if (REGNO (operands
[0]) == REGNO (base
))
26936 output_asm_insn ("ldr\t%H0, %2", operands
);
26937 output_asm_insn ("ldr\t%0, %1", operands
);
26941 output_asm_insn ("ldr\t%0, %1", operands
);
26942 output_asm_insn ("ldr\t%H0, %2", operands
);
26948 /* With no registers to worry about we can just load the value
26950 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26952 output_asm_insn ("ldr\t%H0, %2", operands
);
26953 output_asm_insn ("ldr\t%0, %1", operands
);
26957 gcc_unreachable ();
26964 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
26969 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26970 std::swap (operands
[4], operands
[5]);
26972 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
26973 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
26977 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26978 std::swap (operands
[4], operands
[5]);
26979 if (REGNO (operands
[5]) > REGNO (operands
[6]))
26980 std::swap (operands
[5], operands
[6]);
26981 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26982 std::swap (operands
[4], operands
[5]);
26984 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
26985 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
26989 gcc_unreachable ();
26995 /* Output a call-via instruction for thumb state. */
26997 thumb_call_via_reg (rtx reg
)
26999 int regno
= REGNO (reg
);
27002 gcc_assert (regno
< LR_REGNUM
);
27004 /* If we are in the normal text section we can use a single instance
27005 per compilation unit. If we are doing function sections, then we need
27006 an entry per section, since we can't rely on reachability. */
27007 if (in_section
== text_section
)
27009 thumb_call_reg_needed
= 1;
27011 if (thumb_call_via_label
[regno
] == NULL
)
27012 thumb_call_via_label
[regno
] = gen_label_rtx ();
27013 labelp
= thumb_call_via_label
+ regno
;
27017 if (cfun
->machine
->call_via
[regno
] == NULL
)
27018 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
27019 labelp
= cfun
->machine
->call_via
+ regno
;
27022 output_asm_insn ("bl\t%a0", labelp
);
27026 /* Routines for generating rtl. */
27028 thumb_expand_cpymemqi (rtx
*operands
)
27030 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
27031 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
27032 HOST_WIDE_INT len
= INTVAL (operands
[2]);
27033 HOST_WIDE_INT offset
= 0;
27037 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
27043 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
27049 rtx reg
= gen_reg_rtx (SImode
);
27050 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
27051 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
27058 rtx reg
= gen_reg_rtx (HImode
);
27059 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
27060 plus_constant (Pmode
, in
,
27062 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
27071 rtx reg
= gen_reg_rtx (QImode
);
27072 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
27073 plus_constant (Pmode
, in
,
27075 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
27082 thumb_reload_out_hi (rtx
*operands
)
27084 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
27087 /* Return the length of a function name prefix
27088 that starts with the character 'c'. */
27090 arm_get_strip_length (int c
)
27094 ARM_NAME_ENCODING_LENGTHS
27099 /* Return a pointer to a function's name with any
27100 and all prefix encodings stripped from it. */
27102 arm_strip_name_encoding (const char *name
)
27106 while ((skip
= arm_get_strip_length (* name
)))
27112 /* If there is a '*' anywhere in the name's prefix, then
27113 emit the stripped name verbatim, otherwise prepend an
27114 underscore if leading underscores are being used. */
27116 arm_asm_output_labelref (FILE *stream
, const char *name
)
27121 while ((skip
= arm_get_strip_length (* name
)))
27123 verbatim
|= (*name
== '*');
27128 fputs (name
, stream
);
27130 asm_fprintf (stream
, "%U%s", name
);
27133 /* This function is used to emit an EABI tag and its associated value.
27134 We emit the numerical value of the tag in case the assembler does not
27135 support textual tags. (Eg gas prior to 2.20). If requested we include
27136 the tag name in a comment so that anyone reading the assembler output
27137 will know which tag is being set.
27139 This function is not static because arm-c.c needs it too. */
27142 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
27144 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
27145 if (flag_verbose_asm
|| flag_debug_asm
)
27146 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
27147 asm_fprintf (asm_out_file
, "\n");
27150 /* This function is used to print CPU tuning information as comment
27151 in assembler file. Pointers are not printed for now. */
27154 arm_print_tune_info (void)
27156 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
27157 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
27158 current_tune
->constant_limit
);
27159 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27160 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
27161 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27162 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
27163 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27164 "prefetch.l1_cache_size:\t%d\n",
27165 current_tune
->prefetch
.l1_cache_size
);
27166 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27167 "prefetch.l1_cache_line_size:\t%d\n",
27168 current_tune
->prefetch
.l1_cache_line_size
);
27169 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27170 "prefer_constant_pool:\t%d\n",
27171 (int) current_tune
->prefer_constant_pool
);
27172 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27173 "branch_cost:\t(s:speed, p:predictable)\n");
27174 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
27175 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
27176 current_tune
->branch_cost (false, false));
27177 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
27178 current_tune
->branch_cost (false, true));
27179 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
27180 current_tune
->branch_cost (true, false));
27181 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
27182 current_tune
->branch_cost (true, true));
27183 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27184 "prefer_ldrd_strd:\t%d\n",
27185 (int) current_tune
->prefer_ldrd_strd
);
27186 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27187 "logical_op_non_short_circuit:\t[%d,%d]\n",
27188 (int) current_tune
->logical_op_non_short_circuit_thumb
,
27189 (int) current_tune
->logical_op_non_short_circuit_arm
);
27190 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27191 "disparage_flag_setting_t16_encodings:\t%d\n",
27192 (int) current_tune
->disparage_flag_setting_t16_encodings
);
27193 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27194 "string_ops_prefer_neon:\t%d\n",
27195 (int) current_tune
->string_ops_prefer_neon
);
27196 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
27197 "max_insns_inline_memset:\t%d\n",
27198 current_tune
->max_insns_inline_memset
);
27199 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
27200 current_tune
->fusible_ops
);
27201 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
27202 (int) current_tune
->sched_autopref
);
27205 /* Print .arch and .arch_extension directives corresponding to the
27206 current architecture configuration. */
27208 arm_print_asm_arch_directives ()
27210 const arch_option
*arch
27211 = arm_parse_arch_option_name (all_architectures
, "-march",
27212 arm_active_target
.arch_name
);
27213 auto_sbitmap
opt_bits (isa_num_bits
);
27217 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
27218 arm_last_printed_arch_string
= arm_active_target
.arch_name
;
27219 if (!arch
->common
.extensions
)
27222 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
27228 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
27230 /* If every feature bit of this option is set in the target
27231 ISA specification, print out the option name. However,
27232 don't print anything if all the bits are part of the
27233 FPU specification. */
27234 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
27235 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
27236 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
27242 arm_file_start (void)
27248 /* We don't have a specified CPU. Use the architecture to
27251 Note: it might be better to do this unconditionally, then the
27252 assembler would not need to know about all new CPU names as
27254 if (!arm_active_target
.core_name
)
27256 /* armv7ve doesn't support any extensions. */
27257 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
27259 /* Keep backward compatability for assemblers
27260 which don't support armv7ve. */
27261 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
27262 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
27263 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
27264 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
27265 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
27266 arm_last_printed_arch_string
= "armv7ve";
27269 arm_print_asm_arch_directives ();
27271 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
27273 asm_fprintf (asm_out_file
, "\t.arch %s\n",
27274 arm_active_target
.core_name
+ 8);
27275 arm_last_printed_arch_string
= arm_active_target
.core_name
+ 8;
27279 const char* truncated_name
27280 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
27281 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
27284 if (print_tune_info
)
27285 arm_print_tune_info ();
27287 if (! TARGET_SOFT_FLOAT
)
27289 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
27290 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
27292 if (TARGET_HARD_FLOAT_ABI
)
27293 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27296 /* Some of these attributes only apply when the corresponding features
27297 are used. However we don't have any easy way of figuring this out.
27298 Conservatively record the setting that would have been used. */
27300 if (flag_rounding_math
)
27301 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27303 if (!flag_unsafe_math_optimizations
)
27305 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27306 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27308 if (flag_signaling_nans
)
27309 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27311 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27312 flag_finite_math_only
? 1 : 3);
27314 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27315 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27316 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27317 flag_short_enums
? 1 : 2);
27319 /* Tag_ABI_optimization_goals. */
27322 else if (optimize
>= 2)
27328 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
27330 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27333 if (arm_fp16_format
)
27334 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27335 (int) arm_fp16_format
);
27337 if (arm_lang_output_object_attributes_hook
)
27338 arm_lang_output_object_attributes_hook();
27341 default_file_start ();
27345 arm_file_end (void)
27349 if (NEED_INDICATE_EXEC_STACK
)
27350 /* Add .note.GNU-stack. */
27351 file_end_indicate_exec_stack ();
27353 if (! thumb_call_reg_needed
)
27356 switch_to_section (text_section
);
27357 asm_fprintf (asm_out_file
, "\t.code 16\n");
27358 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
27360 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
27362 rtx label
= thumb_call_via_label
[regno
];
27366 targetm
.asm_out
.internal_label (asm_out_file
, "L",
27367 CODE_LABEL_NUMBER (label
));
27368 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
27374 /* Symbols in the text segment can be accessed without indirecting via the
27375 constant pool; it may take an extra binary operation, but this is still
27376 faster than indirecting via memory. Don't do this when not optimizing,
27377 since we won't be calculating al of the offsets necessary to do this
27381 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
27383 if (optimize
> 0 && TREE_CONSTANT (decl
))
27384 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
27386 default_encode_section_info (decl
, rtl
, first
);
27388 #endif /* !ARM_PE */
27391 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
27393 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
27394 && !strcmp (prefix
, "L"))
27396 arm_ccfsm_state
= 0;
27397 arm_target_insn
= NULL
;
27399 default_internal_label (stream
, prefix
, labelno
);
27402 /* Output code to add DELTA to the first argument, and then jump
27403 to FUNCTION. Used for C++ multiple inheritance. */
27406 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
27407 HOST_WIDE_INT
, tree function
)
27409 static int thunk_label
= 0;
27412 int mi_delta
= delta
;
27413 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
27415 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
27418 mi_delta
= - mi_delta
;
27420 final_start_function (emit_barrier (), file
, 1);
27424 int labelno
= thunk_label
++;
27425 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
27426 /* Thunks are entered in arm mode when available. */
27427 if (TARGET_THUMB1_ONLY
)
27429 /* push r3 so we can use it as a temporary. */
27430 /* TODO: Omit this save if r3 is not used. */
27431 fputs ("\tpush {r3}\n", file
);
27432 fputs ("\tldr\tr3, ", file
);
27436 fputs ("\tldr\tr12, ", file
);
27438 assemble_name (file
, label
);
27439 fputc ('\n', file
);
27442 /* If we are generating PIC, the ldr instruction below loads
27443 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
27444 the address of the add + 8, so we have:
27446 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
27449 Note that we have "+ 1" because some versions of GNU ld
27450 don't set the low bit of the result for R_ARM_REL32
27451 relocations against thumb function symbols.
27452 On ARMv6M this is +4, not +8. */
27453 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
27454 assemble_name (file
, labelpc
);
27455 fputs (":\n", file
);
27456 if (TARGET_THUMB1_ONLY
)
27458 /* This is 2 insns after the start of the thunk, so we know it
27459 is 4-byte aligned. */
27460 fputs ("\tadd\tr3, pc, r3\n", file
);
27461 fputs ("\tmov r12, r3\n", file
);
27464 fputs ("\tadd\tr12, pc, r12\n", file
);
27466 else if (TARGET_THUMB1_ONLY
)
27467 fputs ("\tmov r12, r3\n", file
);
27469 if (TARGET_THUMB1_ONLY
)
27471 if (mi_delta
> 255)
27473 fputs ("\tldr\tr3, ", file
);
27474 assemble_name (file
, label
);
27475 fputs ("+4\n", file
);
27476 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
27477 mi_op
, this_regno
, this_regno
);
27479 else if (mi_delta
!= 0)
27481 /* Thumb1 unified syntax requires s suffix in instruction name when
27482 one of the operands is immediate. */
27483 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
27484 mi_op
, this_regno
, this_regno
,
27490 /* TODO: Use movw/movt for large constants when available. */
27491 while (mi_delta
!= 0)
27493 if ((mi_delta
& (3 << shift
)) == 0)
27497 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
27498 mi_op
, this_regno
, this_regno
,
27499 mi_delta
& (0xff << shift
));
27500 mi_delta
&= ~(0xff << shift
);
27507 if (TARGET_THUMB1_ONLY
)
27508 fputs ("\tpop\t{r3}\n", file
);
27510 fprintf (file
, "\tbx\tr12\n");
27511 ASM_OUTPUT_ALIGN (file
, 2);
27512 assemble_name (file
, label
);
27513 fputs (":\n", file
);
27516 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
27517 rtx tem
= XEXP (DECL_RTL (function
), 0);
27518 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
27519 pipeline offset is four rather than eight. Adjust the offset
27521 tem
= plus_constant (GET_MODE (tem
), tem
,
27522 TARGET_THUMB1_ONLY
? -3 : -7);
27523 tem
= gen_rtx_MINUS (GET_MODE (tem
),
27525 gen_rtx_SYMBOL_REF (Pmode
,
27526 ggc_strdup (labelpc
)));
27527 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
27530 /* Output ".word .LTHUNKn". */
27531 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
27533 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
27534 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
27538 fputs ("\tb\t", file
);
27539 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
27540 if (NEED_PLT_RELOC
)
27541 fputs ("(PLT)", file
);
27542 fputc ('\n', file
);
27545 final_end_function ();
27548 /* MI thunk handling for TARGET_32BIT. */
27551 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
27552 HOST_WIDE_INT vcall_offset
, tree function
)
27554 const bool long_call_p
= arm_is_long_call_p (function
);
27556 /* On ARM, this_regno is R0 or R1 depending on
27557 whether the function returns an aggregate or not.
27559 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
27561 ? R1_REGNUM
: R0_REGNUM
);
27563 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
27564 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
27565 reload_completed
= 1;
27566 emit_note (NOTE_INSN_PROLOGUE_END
);
27568 /* Add DELTA to THIS_RTX. */
27570 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
27571 delta
, this_rtx
, this_rtx
, false);
27573 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
27574 if (vcall_offset
!= 0)
27576 /* Load *THIS_RTX. */
27577 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
27578 /* Compute *THIS_RTX + VCALL_OFFSET. */
27579 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
27581 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
27582 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
27583 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
27586 /* Generate a tail call to the target function. */
27587 if (!TREE_USED (function
))
27589 assemble_external (function
);
27590 TREE_USED (function
) = 1;
27592 rtx funexp
= XEXP (DECL_RTL (function
), 0);
27595 emit_move_insn (temp
, funexp
);
27598 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
27599 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
27600 SIBLING_CALL_P (insn
) = 1;
27603 /* Indirect calls require a bit of fixup in PIC mode. */
27606 split_all_insns_noflow ();
27610 insn
= get_insns ();
27611 shorten_branches (insn
);
27612 final_start_function (insn
, file
, 1);
27613 final (insn
, file
, 1);
27614 final_end_function ();
27616 /* Stop pretending this is a post-reload pass. */
27617 reload_completed
= 0;
27620 /* Output code to add DELTA to the first argument, and then jump
27621 to FUNCTION. Used for C++ multiple inheritance. */
27624 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
27625 HOST_WIDE_INT vcall_offset
, tree function
)
27627 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
27629 assemble_start_function (thunk
, fnname
);
27631 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
27633 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
27634 assemble_end_function (thunk
, fnname
);
27638 arm_emit_vector_const (FILE *file
, rtx x
)
27641 const char * pattern
;
27643 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
27645 switch (GET_MODE (x
))
27647 case E_V2SImode
: pattern
= "%08x"; break;
27648 case E_V4HImode
: pattern
= "%04x"; break;
27649 case E_V8QImode
: pattern
= "%02x"; break;
27650 default: gcc_unreachable ();
27653 fprintf (file
, "0x");
27654 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
27658 element
= CONST_VECTOR_ELT (x
, i
);
27659 fprintf (file
, pattern
, INTVAL (element
));
27665 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27666 HFmode constant pool entries are actually loaded with ldr. */
27668 arm_emit_fp16_const (rtx c
)
27672 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
27673 if (WORDS_BIG_ENDIAN
)
27674 assemble_zeros (2);
27675 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
27676 if (!WORDS_BIG_ENDIAN
)
27677 assemble_zeros (2);
27681 arm_output_load_gr (rtx
*operands
)
27688 if (!MEM_P (operands
[1])
27689 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
27690 || !REG_P (reg
= XEXP (sum
, 0))
27691 || !CONST_INT_P (offset
= XEXP (sum
, 1))
27692 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
27693 return "wldrw%?\t%0, %1";
27695 /* Fix up an out-of-range load of a GR register. */
27696 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
27697 wcgr
= operands
[0];
27699 output_asm_insn ("ldr%?\t%0, %1", operands
);
27701 operands
[0] = wcgr
;
27703 output_asm_insn ("tmcr%?\t%0, %1", operands
);
27704 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
27709 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27711 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27712 named arg and all anonymous args onto the stack.
27713 XXX I know the prologue shouldn't be pushing registers, but it is faster
27717 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
27718 const function_arg_info
&arg
,
27720 int second_time ATTRIBUTE_UNUSED
)
27722 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
27725 cfun
->machine
->uses_anonymous_args
= 1;
27726 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
27728 nregs
= pcum
->aapcs_ncrn
;
27731 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
27732 if (res
< 0 && warn_psabi
)
27733 inform (input_location
, "parameter passing for argument of "
27734 "type %qT changed in GCC 7.1", arg
.type
);
27738 if (res
> 1 && warn_psabi
)
27739 inform (input_location
,
27740 "parameter passing for argument of type "
27741 "%qT changed in GCC 9.1", arg
.type
);
27746 nregs
= pcum
->nregs
;
27748 if (nregs
< NUM_ARG_REGS
)
27749 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
27752 /* We can't rely on the caller doing the proper promotion when
27753 using APCS or ATPCS. */
27756 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
27758 return !TARGET_AAPCS_BASED
;
27761 static machine_mode
27762 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
27764 int *punsignedp ATTRIBUTE_UNUSED
,
27765 const_tree fntype ATTRIBUTE_UNUSED
,
27766 int for_return ATTRIBUTE_UNUSED
)
27768 if (GET_MODE_CLASS (mode
) == MODE_INT
27769 && GET_MODE_SIZE (mode
) < 4)
27777 arm_default_short_enums (void)
27779 return ARM_DEFAULT_SHORT_ENUMS
;
27783 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27786 arm_align_anon_bitfield (void)
27788 return TARGET_AAPCS_BASED
;
27792 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27795 arm_cxx_guard_type (void)
27797 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
27801 /* The EABI says test the least significant bit of a guard variable. */
27804 arm_cxx_guard_mask_bit (void)
27806 return TARGET_AAPCS_BASED
;
27810 /* The EABI specifies that all array cookies are 8 bytes long. */
27813 arm_get_cookie_size (tree type
)
27817 if (!TARGET_AAPCS_BASED
)
27818 return default_cxx_get_cookie_size (type
);
27820 size
= build_int_cst (sizetype
, 8);
27825 /* The EABI says that array cookies should also contain the element size. */
27828 arm_cookie_has_size (void)
27830 return TARGET_AAPCS_BASED
;
27834 /* The EABI says constructors and destructors should return a pointer to
27835 the object constructed/destroyed. */
27838 arm_cxx_cdtor_returns_this (void)
27840 return TARGET_AAPCS_BASED
;
27843 /* The EABI says that an inline function may never be the key
27847 arm_cxx_key_method_may_be_inline (void)
27849 return !TARGET_AAPCS_BASED
;
27853 arm_cxx_determine_class_data_visibility (tree decl
)
27855 if (!TARGET_AAPCS_BASED
27856 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
27859 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27860 is exported. However, on systems without dynamic vague linkage,
27861 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27862 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
27863 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
27865 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
27866 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
27870 arm_cxx_class_data_always_comdat (void)
27872 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27873 vague linkage if the class has no key function. */
27874 return !TARGET_AAPCS_BASED
;
27878 /* The EABI says __aeabi_atexit should be used to register static
27882 arm_cxx_use_aeabi_atexit (void)
27884 return TARGET_AAPCS_BASED
;
27889 arm_set_return_address (rtx source
, rtx scratch
)
27891 arm_stack_offsets
*offsets
;
27892 HOST_WIDE_INT delta
;
27894 unsigned long saved_regs
;
27896 offsets
= arm_get_frame_offsets ();
27897 saved_regs
= offsets
->saved_regs_mask
;
27899 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
27900 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27903 if (frame_pointer_needed
)
27904 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
27907 /* LR will be the first saved register. */
27908 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
27913 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
27914 GEN_INT (delta
& ~4095)));
27919 addr
= stack_pointer_rtx
;
27921 addr
= plus_constant (Pmode
, addr
, delta
);
27924 /* The store needs to be marked to prevent DSE from deleting
27925 it as dead if it is based on fp. */
27926 mem
= gen_frame_mem (Pmode
, addr
);
27927 MEM_VOLATILE_P (mem
) = true;
27928 emit_move_insn (mem
, source
);
27934 thumb_set_return_address (rtx source
, rtx scratch
)
27936 arm_stack_offsets
*offsets
;
27937 HOST_WIDE_INT delta
;
27938 HOST_WIDE_INT limit
;
27941 unsigned long mask
;
27945 offsets
= arm_get_frame_offsets ();
27946 mask
= offsets
->saved_regs_mask
;
27947 if (mask
& (1 << LR_REGNUM
))
27950 /* Find the saved regs. */
27951 if (frame_pointer_needed
)
27953 delta
= offsets
->soft_frame
- offsets
->saved_args
;
27954 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
27960 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
27963 /* Allow for the stack frame. */
27964 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
27966 /* The link register is always the first saved register. */
27969 /* Construct the address. */
27970 addr
= gen_rtx_REG (SImode
, reg
);
27973 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
27974 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
27978 addr
= plus_constant (Pmode
, addr
, delta
);
27980 /* The store needs to be marked to prevent DSE from deleting
27981 it as dead if it is based on fp. */
27982 mem
= gen_frame_mem (Pmode
, addr
);
27983 MEM_VOLATILE_P (mem
) = true;
27984 emit_move_insn (mem
, source
);
27987 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27990 /* Implements target hook vector_mode_supported_p. */
27992 arm_vector_mode_supported_p (machine_mode mode
)
27994 /* Neon also supports V2SImode, etc. listed in the clause below. */
27995 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
27996 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
27997 || mode
== V2DImode
|| mode
== V8HFmode
))
28000 if ((TARGET_NEON
|| TARGET_IWMMXT
)
28001 && ((mode
== V2SImode
)
28002 || (mode
== V4HImode
)
28003 || (mode
== V8QImode
)))
28006 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
28007 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
28008 || mode
== V2HAmode
))
28014 /* Implements target hook array_mode_supported_p. */
28017 arm_array_mode_supported_p (machine_mode mode
,
28018 unsigned HOST_WIDE_INT nelems
)
28020 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
28021 for now, as the lane-swapping logic needs to be extended in the expanders.
28022 See PR target/82518. */
28023 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
28024 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
28025 && (nelems
>= 2 && nelems
<= 4))
28031 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28032 registers when autovectorizing for Neon, at least until multiple vector
28033 widths are supported properly by the middle-end. */
28035 static machine_mode
28036 arm_preferred_simd_mode (scalar_mode mode
)
28042 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
28044 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
28046 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
28048 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
28050 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28057 if (TARGET_REALLY_IWMMXT
)
28073 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28075 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28076 using r0-r4 for function arguments, r7 for the stack frame and don't have
28077 enough left over to do doubleword arithmetic. For Thumb-2 all the
28078 potentially problematic instructions accept high registers so this is not
28079 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28080 that require many low registers. */
28082 arm_class_likely_spilled_p (reg_class_t rclass
)
28084 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
28085 || rclass
== CC_REG
)
28091 /* Implements target hook small_register_classes_for_mode_p. */
28093 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
28095 return TARGET_THUMB1
;
28098 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28099 ARM insns and therefore guarantee that the shift count is modulo 256.
28100 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28101 guarantee no particular behavior for out-of-range counts. */
28103 static unsigned HOST_WIDE_INT
28104 arm_shift_truncation_mask (machine_mode mode
)
28106 return mode
== SImode
? 255 : 0;
28110 /* Map internal gcc register numbers to DWARF2 register numbers. */
28113 arm_dbx_register_number (unsigned int regno
)
28118 if (IS_VFP_REGNUM (regno
))
28120 /* See comment in arm_dwarf_register_span. */
28121 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28122 return 64 + regno
- FIRST_VFP_REGNUM
;
28124 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
28127 if (IS_IWMMXT_GR_REGNUM (regno
))
28128 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
28130 if (IS_IWMMXT_REGNUM (regno
))
28131 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
28133 return DWARF_FRAME_REGISTERS
;
28136 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28137 GCC models tham as 64 32-bit registers, so we need to describe this to
28138 the DWARF generation code. Other registers can use the default. */
28140 arm_dwarf_register_span (rtx rtl
)
28148 regno
= REGNO (rtl
);
28149 if (!IS_VFP_REGNUM (regno
))
28152 /* XXX FIXME: The EABI defines two VFP register ranges:
28153 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28155 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28156 corresponding D register. Until GDB supports this, we shall use the
28157 legacy encodings. We also use these encodings for D0-D15 for
28158 compatibility with older debuggers. */
28159 mode
= GET_MODE (rtl
);
28160 if (GET_MODE_SIZE (mode
) < 8)
28163 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28165 nregs
= GET_MODE_SIZE (mode
) / 4;
28166 for (i
= 0; i
< nregs
; i
+= 2)
28167 if (TARGET_BIG_END
)
28169 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28170 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
28174 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
28175 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28180 nregs
= GET_MODE_SIZE (mode
) / 8;
28181 for (i
= 0; i
< nregs
; i
++)
28182 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
28185 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
28188 #if ARM_UNWIND_INFO
28189 /* Emit unwind directives for a store-multiple instruction or stack pointer
28190 push during alignment.
28191 These should only ever be generated by the function prologue code, so
28192 expect them to have a particular form.
28193 The store-multiple instruction sometimes pushes pc as the last register,
28194 although it should not be tracked into unwind information, or for -Os
28195 sometimes pushes some dummy registers before first register that needs
28196 to be tracked in unwind information; such dummy registers are there just
28197 to avoid separate stack adjustment, and will not be restored in the
28201 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
28204 HOST_WIDE_INT offset
;
28205 HOST_WIDE_INT nregs
;
28209 unsigned padfirst
= 0, padlast
= 0;
28212 e
= XVECEXP (p
, 0, 0);
28213 gcc_assert (GET_CODE (e
) == SET
);
28215 /* First insn will adjust the stack pointer. */
28216 gcc_assert (GET_CODE (e
) == SET
28217 && REG_P (SET_DEST (e
))
28218 && REGNO (SET_DEST (e
)) == SP_REGNUM
28219 && GET_CODE (SET_SRC (e
)) == PLUS
);
28221 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
28222 nregs
= XVECLEN (p
, 0) - 1;
28223 gcc_assert (nregs
);
28225 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
28228 /* For -Os dummy registers can be pushed at the beginning to
28229 avoid separate stack pointer adjustment. */
28230 e
= XVECEXP (p
, 0, 1);
28231 e
= XEXP (SET_DEST (e
), 0);
28232 if (GET_CODE (e
) == PLUS
)
28233 padfirst
= INTVAL (XEXP (e
, 1));
28234 gcc_assert (padfirst
== 0 || optimize_size
);
28235 /* The function prologue may also push pc, but not annotate it as it is
28236 never restored. We turn this into a stack pointer adjustment. */
28237 e
= XVECEXP (p
, 0, nregs
);
28238 e
= XEXP (SET_DEST (e
), 0);
28239 if (GET_CODE (e
) == PLUS
)
28240 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
28242 padlast
= offset
- 4;
28243 gcc_assert (padlast
== 0 || padlast
== 4);
28245 fprintf (asm_out_file
, "\t.pad #4\n");
28247 fprintf (asm_out_file
, "\t.save {");
28249 else if (IS_VFP_REGNUM (reg
))
28252 fprintf (asm_out_file
, "\t.vsave {");
28255 /* Unknown register type. */
28256 gcc_unreachable ();
28258 /* If the stack increment doesn't match the size of the saved registers,
28259 something has gone horribly wrong. */
28260 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
28264 /* The remaining insns will describe the stores. */
28265 for (i
= 1; i
<= nregs
; i
++)
28267 /* Expect (set (mem <addr>) (reg)).
28268 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28269 e
= XVECEXP (p
, 0, i
);
28270 gcc_assert (GET_CODE (e
) == SET
28271 && MEM_P (SET_DEST (e
))
28272 && REG_P (SET_SRC (e
)));
28274 reg
= REGNO (SET_SRC (e
));
28275 gcc_assert (reg
>= lastreg
);
28278 fprintf (asm_out_file
, ", ");
28279 /* We can't use %r for vfp because we need to use the
28280 double precision register names. */
28281 if (IS_VFP_REGNUM (reg
))
28282 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
28284 asm_fprintf (asm_out_file
, "%r", reg
);
28288 /* Check that the addresses are consecutive. */
28289 e
= XEXP (SET_DEST (e
), 0);
28290 if (GET_CODE (e
) == PLUS
)
28291 gcc_assert (REG_P (XEXP (e
, 0))
28292 && REGNO (XEXP (e
, 0)) == SP_REGNUM
28293 && CONST_INT_P (XEXP (e
, 1))
28294 && offset
== INTVAL (XEXP (e
, 1)));
28298 && REGNO (e
) == SP_REGNUM
);
28299 offset
+= reg_size
;
28302 fprintf (asm_out_file
, "}\n");
28304 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
28307 /* Emit unwind directives for a SET. */
28310 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
28318 switch (GET_CODE (e0
))
28321 /* Pushing a single register. */
28322 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
28323 || !REG_P (XEXP (XEXP (e0
, 0), 0))
28324 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
28327 asm_fprintf (asm_out_file
, "\t.save ");
28328 if (IS_VFP_REGNUM (REGNO (e1
)))
28329 asm_fprintf(asm_out_file
, "{d%d}\n",
28330 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
28332 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
28336 if (REGNO (e0
) == SP_REGNUM
)
28338 /* A stack increment. */
28339 if (GET_CODE (e1
) != PLUS
28340 || !REG_P (XEXP (e1
, 0))
28341 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
28342 || !CONST_INT_P (XEXP (e1
, 1)))
28345 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
28346 -INTVAL (XEXP (e1
, 1)));
28348 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
28350 HOST_WIDE_INT offset
;
28352 if (GET_CODE (e1
) == PLUS
)
28354 if (!REG_P (XEXP (e1
, 0))
28355 || !CONST_INT_P (XEXP (e1
, 1)))
28357 reg
= REGNO (XEXP (e1
, 0));
28358 offset
= INTVAL (XEXP (e1
, 1));
28359 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
28360 HARD_FRAME_POINTER_REGNUM
, reg
,
28363 else if (REG_P (e1
))
28366 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
28367 HARD_FRAME_POINTER_REGNUM
, reg
);
28372 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
28374 /* Move from sp to reg. */
28375 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
28377 else if (GET_CODE (e1
) == PLUS
28378 && REG_P (XEXP (e1
, 0))
28379 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
28380 && CONST_INT_P (XEXP (e1
, 1)))
28382 /* Set reg to offset from sp. */
28383 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
28384 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
28396 /* Emit unwind directives for the given insn. */
28399 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
28402 bool handled_one
= false;
28404 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28407 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28408 && (TREE_NOTHROW (current_function_decl
)
28409 || crtl
->all_throwers_are_sibcalls
))
28412 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
28415 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
28417 switch (REG_NOTE_KIND (note
))
28419 case REG_FRAME_RELATED_EXPR
:
28420 pat
= XEXP (note
, 0);
28423 case REG_CFA_REGISTER
:
28424 pat
= XEXP (note
, 0);
28427 pat
= PATTERN (insn
);
28428 if (GET_CODE (pat
) == PARALLEL
)
28429 pat
= XVECEXP (pat
, 0, 0);
28432 /* Only emitted for IS_STACKALIGN re-alignment. */
28437 src
= SET_SRC (pat
);
28438 dest
= SET_DEST (pat
);
28440 gcc_assert (src
== stack_pointer_rtx
);
28441 reg
= REGNO (dest
);
28442 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28445 handled_one
= true;
28448 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28449 to get correct dwarf information for shrink-wrap. We should not
28450 emit unwind information for it because these are used either for
28451 pretend arguments or notes to adjust sp and restore registers from
28453 case REG_CFA_DEF_CFA
:
28454 case REG_CFA_ADJUST_CFA
:
28455 case REG_CFA_RESTORE
:
28458 case REG_CFA_EXPRESSION
:
28459 case REG_CFA_OFFSET
:
28460 /* ??? Only handling here what we actually emit. */
28461 gcc_unreachable ();
28469 pat
= PATTERN (insn
);
28472 switch (GET_CODE (pat
))
28475 arm_unwind_emit_set (asm_out_file
, pat
);
28479 /* Store multiple. */
28480 arm_unwind_emit_sequence (asm_out_file
, pat
);
28489 /* Output a reference from a function exception table to the type_info
28490 object X. The EABI specifies that the symbol should be relocated by
28491 an R_ARM_TARGET2 relocation. */
28494 arm_output_ttype (rtx x
)
28496 fputs ("\t.word\t", asm_out_file
);
28497 output_addr_const (asm_out_file
, x
);
28498 /* Use special relocations for symbol references. */
28499 if (!CONST_INT_P (x
))
28500 fputs ("(TARGET2)", asm_out_file
);
28501 fputc ('\n', asm_out_file
);
28506 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28509 arm_asm_emit_except_personality (rtx personality
)
28511 fputs ("\t.personality\t", asm_out_file
);
28512 output_addr_const (asm_out_file
, personality
);
28513 fputc ('\n', asm_out_file
);
28515 #endif /* ARM_UNWIND_INFO */
28517 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28520 arm_asm_init_sections (void)
28522 #if ARM_UNWIND_INFO
28523 exception_section
= get_unnamed_section (0, output_section_asm_op
,
28525 #endif /* ARM_UNWIND_INFO */
28527 #ifdef OBJECT_FORMAT_ELF
28528 if (target_pure_code
)
28529 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
28533 /* Output unwind directives for the start/end of a function. */
28536 arm_output_fn_unwind (FILE * f
, bool prologue
)
28538 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28542 fputs ("\t.fnstart\n", f
);
28545 /* If this function will never be unwound, then mark it as such.
28546 The came condition is used in arm_unwind_emit to suppress
28547 the frame annotations. */
28548 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28549 && (TREE_NOTHROW (current_function_decl
)
28550 || crtl
->all_throwers_are_sibcalls
))
28551 fputs("\t.cantunwind\n", f
);
28553 fputs ("\t.fnend\n", f
);
28558 arm_emit_tls_decoration (FILE *fp
, rtx x
)
28560 enum tls_reloc reloc
;
28563 val
= XVECEXP (x
, 0, 0);
28564 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
28566 output_addr_const (fp
, val
);
28571 fputs ("(tlsgd)", fp
);
28573 case TLS_GD32_FDPIC
:
28574 fputs ("(tlsgd_fdpic)", fp
);
28577 fputs ("(tlsldm)", fp
);
28579 case TLS_LDM32_FDPIC
:
28580 fputs ("(tlsldm_fdpic)", fp
);
28583 fputs ("(tlsldo)", fp
);
28586 fputs ("(gottpoff)", fp
);
28588 case TLS_IE32_FDPIC
:
28589 fputs ("(gottpoff_fdpic)", fp
);
28592 fputs ("(tpoff)", fp
);
28595 fputs ("(tlsdesc)", fp
);
28598 gcc_unreachable ();
28607 fputs (" + (. - ", fp
);
28608 output_addr_const (fp
, XVECEXP (x
, 0, 2));
28609 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
28610 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
28611 output_addr_const (fp
, XVECEXP (x
, 0, 3));
28621 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
28624 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
28626 gcc_assert (size
== 4);
28627 fputs ("\t.word\t", file
);
28628 output_addr_const (file
, x
);
28629 fputs ("(tlsldo)", file
);
28632 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
28635 arm_output_addr_const_extra (FILE *fp
, rtx x
)
28637 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
28638 return arm_emit_tls_decoration (fp
, x
);
28639 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
28642 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
28644 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
28645 assemble_name_raw (fp
, label
);
28649 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
28651 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
28655 output_addr_const (fp
, XVECEXP (x
, 0, 0));
28659 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
28661 output_addr_const (fp
, XVECEXP (x
, 0, 0));
28665 output_addr_const (fp
, XVECEXP (x
, 0, 1));
28669 else if (GET_CODE (x
) == CONST_VECTOR
)
28670 return arm_emit_vector_const (fp
, x
);
28675 /* Output assembly for a shift instruction.
28676 SET_FLAGS determines how the instruction modifies the condition codes.
28677 0 - Do not set condition codes.
28678 1 - Set condition codes.
28679 2 - Use smallest instruction. */
28681 arm_output_shift(rtx
* operands
, int set_flags
)
28684 static const char flag_chars
[3] = {'?', '.', '!'};
28689 c
= flag_chars
[set_flags
];
28690 shift
= shift_op(operands
[3], &val
);
28694 operands
[2] = GEN_INT(val
);
28695 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
28698 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
28700 output_asm_insn (pattern
, operands
);
28704 /* Output assembly for a WMMX immediate shift instruction. */
28706 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
28708 int shift
= INTVAL (operands
[2]);
28710 machine_mode opmode
= GET_MODE (operands
[0]);
28712 gcc_assert (shift
>= 0);
28714 /* If the shift value in the register versions is > 63 (for D qualifier),
28715 31 (for W qualifier) or 15 (for H qualifier). */
28716 if (((opmode
== V4HImode
) && (shift
> 15))
28717 || ((opmode
== V2SImode
) && (shift
> 31))
28718 || ((opmode
== DImode
) && (shift
> 63)))
28722 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
28723 output_asm_insn (templ
, operands
);
28724 if (opmode
== DImode
)
28726 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
28727 output_asm_insn (templ
, operands
);
28732 /* The destination register will contain all zeros. */
28733 sprintf (templ
, "wzero\t%%0");
28734 output_asm_insn (templ
, operands
);
28739 if ((opmode
== DImode
) && (shift
> 32))
28741 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
28742 output_asm_insn (templ
, operands
);
28743 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
28744 output_asm_insn (templ
, operands
);
28748 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
28749 output_asm_insn (templ
, operands
);
28754 /* Output assembly for a WMMX tinsr instruction. */
28756 arm_output_iwmmxt_tinsr (rtx
*operands
)
28758 int mask
= INTVAL (operands
[3]);
28761 int units
= mode_nunits
[GET_MODE (operands
[0])];
28762 gcc_assert ((mask
& (mask
- 1)) == 0);
28763 for (i
= 0; i
< units
; ++i
)
28765 if ((mask
& 0x01) == 1)
28771 gcc_assert (i
< units
);
28773 switch (GET_MODE (operands
[0]))
28776 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
28779 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
28782 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
28785 gcc_unreachable ();
28788 output_asm_insn (templ
, operands
);
28793 /* Output a Thumb-1 casesi dispatch sequence. */
28795 thumb1_output_casesi (rtx
*operands
)
28797 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
28799 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
28801 switch (GET_MODE(diff_vec
))
28804 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
28805 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28807 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
28808 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28810 return "bl\t%___gnu_thumb1_case_si";
28812 gcc_unreachable ();
28816 /* Output a Thumb-2 casesi instruction. */
28818 thumb2_output_casesi (rtx
*operands
)
28820 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
28822 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
28824 output_asm_insn ("cmp\t%0, %1", operands
);
28825 output_asm_insn ("bhi\t%l3", operands
);
28826 switch (GET_MODE(diff_vec
))
28829 return "tbb\t[%|pc, %0]";
28831 return "tbh\t[%|pc, %0, lsl #1]";
28835 output_asm_insn ("adr\t%4, %l2", operands
);
28836 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
28837 output_asm_insn ("add\t%4, %4, %5", operands
);
28842 output_asm_insn ("adr\t%4, %l2", operands
);
28843 return "ldr\t%|pc, [%4, %0, lsl #2]";
28846 gcc_unreachable ();
28850 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28851 per-core tuning structs. */
28853 arm_issue_rate (void)
28855 return current_tune
->issue_rate
;
28858 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
28860 arm_sched_variable_issue (FILE *, int, rtx_insn
*insn
, int more
)
28862 if (DEBUG_INSN_P (insn
))
28865 rtx_code code
= GET_CODE (PATTERN (insn
));
28866 if (code
== USE
|| code
== CLOBBER
)
28869 if (get_attr_type (insn
) == TYPE_NO_INSN
)
28875 /* Return how many instructions should scheduler lookahead to choose the
28878 arm_first_cycle_multipass_dfa_lookahead (void)
28880 int issue_rate
= arm_issue_rate ();
28882 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
28885 /* Enable modeling of L2 auto-prefetcher. */
28887 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
28889 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
28893 arm_mangle_type (const_tree type
)
28895 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28896 has to be managled as if it is in the "std" namespace. */
28897 if (TARGET_AAPCS_BASED
28898 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
28899 return "St9__va_list";
28901 /* Half-precision float. */
28902 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
28905 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28907 if (TYPE_NAME (type
) != NULL
)
28908 return arm_mangle_builtin_type (type
);
28910 /* Use the default mangling. */
28914 /* Order of allocation of core registers for Thumb: this allocation is
28915 written over the corresponding initial entries of the array
28916 initialized with REG_ALLOC_ORDER. We allocate all low registers
28917 first. Saving and restoring a low register is usually cheaper than
28918 using a call-clobbered high register. */
28920 static const int thumb_core_reg_alloc_order
[] =
28922 3, 2, 1, 0, 4, 5, 6, 7,
28923 12, 14, 8, 9, 10, 11
28926 /* Adjust register allocation order when compiling for Thumb. */
28929 arm_order_regs_for_local_alloc (void)
28931 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
28932 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
28934 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
28935 sizeof (thumb_core_reg_alloc_order
));
28938 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28941 arm_frame_pointer_required (void)
28943 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
28946 /* If the function receives nonlocal gotos, it needs to save the frame
28947 pointer in the nonlocal_goto_save_area object. */
28948 if (cfun
->has_nonlocal_label
)
28951 /* The frame pointer is required for non-leaf APCS frames. */
28952 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
28955 /* If we are probing the stack in the prologue, we will have a faulting
28956 instruction prior to the stack adjustment and this requires a frame
28957 pointer if we want to catch the exception using the EABI unwinder. */
28958 if (!IS_INTERRUPT (arm_current_func_type ())
28959 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
28960 || flag_stack_clash_protection
)
28961 && arm_except_unwind_info (&global_options
) == UI_TARGET
28962 && cfun
->can_throw_non_call_exceptions
)
28964 HOST_WIDE_INT size
= get_frame_size ();
28966 /* That's irrelevant if there is no stack adjustment. */
28970 /* That's relevant only if there is a stack probe. */
28971 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
28973 /* We don't have the final size of the frame so adjust. */
28974 size
+= 32 * UNITS_PER_WORD
;
28975 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
28985 /* Only thumb1 can't support conditional execution, so return true if
28986 the target is not thumb1. */
28988 arm_have_conditional_execution (void)
28990 return !TARGET_THUMB1
;
28993 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28994 static HOST_WIDE_INT
28995 arm_vector_alignment (const_tree type
)
28997 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
28999 if (TARGET_AAPCS_BASED
)
29000 align
= MIN (align
, 64);
29006 arm_autovectorize_vector_sizes (vector_sizes
*sizes
, bool)
29008 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29010 sizes
->safe_push (16);
29011 sizes
->safe_push (8);
29016 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
29018 /* Vectors which aren't in packed structures will not be less aligned than
29019 the natural alignment of their element type, so this is safe. */
29020 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29023 return default_builtin_vector_alignment_reachable (type
, is_packed
);
29027 arm_builtin_support_vector_misalignment (machine_mode mode
,
29028 const_tree type
, int misalignment
,
29031 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
29033 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
29038 /* If the misalignment is unknown, we should be able to handle the access
29039 so long as it is not to a member of a packed data structure. */
29040 if (misalignment
== -1)
29043 /* Return true if the misalignment is a multiple of the natural alignment
29044 of the vector's element type. This is probably always going to be
29045 true in practice, since we've already established that this isn't a
29047 return ((misalignment
% align
) == 0);
29050 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
29055 arm_conditional_register_usage (void)
29059 if (TARGET_THUMB1
&& optimize_size
)
29061 /* When optimizing for size on Thumb-1, it's better not
29062 to use the HI regs, because of the overhead of
29064 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
29065 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
29068 /* The link register can be clobbered by any branch insn,
29069 but we have no way to track that at present, so mark
29070 it as unavailable. */
29072 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
29074 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
29076 /* VFPv3 registers are disabled when earlier VFP
29077 versions are selected due to the definition of
29078 LAST_VFP_REGNUM. */
29079 for (regno
= FIRST_VFP_REGNUM
;
29080 regno
<= LAST_VFP_REGNUM
; ++ regno
)
29082 fixed_regs
[regno
] = 0;
29083 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
29084 || regno
>= FIRST_VFP_REGNUM
+ 32;
29088 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
29090 regno
= FIRST_IWMMXT_GR_REGNUM
;
29091 /* The 2002/10/09 revision of the XScale ABI has wCG0
29092 and wCG1 as call-preserved registers. The 2002/11/21
29093 revision changed this so that all wCG registers are
29094 scratch registers. */
29095 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
29096 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
29097 fixed_regs
[regno
] = 0;
29098 /* The XScale ABI has wR0 - wR9 as scratch registers,
29099 the rest as call-preserved registers. */
29100 for (regno
= FIRST_IWMMXT_REGNUM
;
29101 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
29103 fixed_regs
[regno
] = 0;
29104 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
29108 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
29110 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29111 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29113 else if (TARGET_APCS_STACK
)
29115 fixed_regs
[10] = 1;
29116 call_used_regs
[10] = 1;
29118 /* -mcaller-super-interworking reserves r11 for calls to
29119 _interwork_r11_call_via_rN(). Making the register global
29120 is an easy way of ensuring that it remains valid for all
29122 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
29123 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
29125 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29126 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29127 if (TARGET_CALLER_INTERWORKING
)
29128 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29130 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29134 arm_preferred_rename_class (reg_class_t rclass
)
29136 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29137 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29138 and code size can be reduced. */
29139 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
29145 /* Compute the attribute "length" of insn "*push_multi".
29146 So this function MUST be kept in sync with that insn pattern. */
29148 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
29150 int i
, regno
, hi_reg
;
29151 int num_saves
= XVECLEN (parallel_op
, 0);
29161 regno
= REGNO (first_op
);
29162 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
29163 list is 8-bit. Normally this means all registers in the list must be
29164 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
29165 encodings. There is one exception for PUSH that LR in HI_REGS can be used
29166 with 16-bit encoding. */
29167 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29168 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
29170 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
29171 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29179 /* Compute the attribute "length" of insn. Currently, this function is used
29180 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
29181 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
29182 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
29183 true if OPERANDS contains insn which explicit updates base register. */
29186 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
29195 rtx parallel_op
= operands
[0];
29196 /* Initialize to elements number of PARALLEL. */
29197 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
29198 /* Initialize the value to base register. */
29199 unsigned regno
= REGNO (operands
[1]);
29200 /* Skip return and write back pattern.
29201 We only need register pop pattern for later analysis. */
29202 unsigned first_indx
= 0;
29203 first_indx
+= return_pc
? 1 : 0;
29204 first_indx
+= write_back_p
? 1 : 0;
29206 /* A pop operation can be done through LDM or POP. If the base register is SP
29207 and if it's with write back, then a LDM will be alias of POP. */
29208 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
29209 bool ldm_p
= !pop_p
;
29211 /* Check base register for LDM. */
29212 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
29215 /* Check each register in the list. */
29216 for (; indx
>= first_indx
; indx
--)
29218 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
29219 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
29220 comment in arm_attr_length_push_multi. */
29221 if (REGNO_REG_CLASS (regno
) == HI_REGS
29222 && (regno
!= PC_REGNUM
|| ldm_p
))
29229 /* Compute the number of instructions emitted by output_move_double. */
29231 arm_count_output_move_double_insns (rtx
*operands
)
29235 /* output_move_double may modify the operands array, so call it
29236 here on a copy of the array. */
29237 ops
[0] = operands
[0];
29238 ops
[1] = operands
[1];
29239 output_move_double (ops
, false, &count
);
29243 /* Same as above, but operands are a register/memory pair in SImode.
29244 Assumes operands has the base register in position 0 and memory in position
29245 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
29247 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
29251 int regnum
, memnum
;
29253 regnum
= 0, memnum
= 1;
29255 regnum
= 1, memnum
= 0;
29256 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
29257 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
29258 output_move_double (ops
, false, &count
);
29264 vfp3_const_double_for_fract_bits (rtx operand
)
29266 REAL_VALUE_TYPE r0
;
29268 if (!CONST_DOUBLE_P (operand
))
29271 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
29272 if (exact_real_inverse (DFmode
, &r0
)
29273 && !REAL_VALUE_NEGATIVE (r0
))
29275 if (exact_real_truncate (DFmode
, &r0
))
29277 HOST_WIDE_INT value
= real_to_integer (&r0
);
29278 value
= value
& 0xffffffff;
29279 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29281 int ret
= exact_log2 (value
);
29282 gcc_assert (IN_RANGE (ret
, 0, 31));
29290 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
29291 log2 is in [1, 32], return that log2. Otherwise return -1.
29292 This is used in the patterns for vcvt.s32.f32 floating-point to
29293 fixed-point conversions. */
29296 vfp3_const_double_for_bits (rtx x
)
29298 const REAL_VALUE_TYPE
*r
;
29300 if (!CONST_DOUBLE_P (x
))
29303 r
= CONST_DOUBLE_REAL_VALUE (x
);
29305 if (REAL_VALUE_NEGATIVE (*r
)
29306 || REAL_VALUE_ISNAN (*r
)
29307 || REAL_VALUE_ISINF (*r
)
29308 || !real_isinteger (r
, SFmode
))
29311 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
29313 /* The exact_log2 above will have returned -1 if this is
29314 not an exact log2. */
29315 if (!IN_RANGE (hwint
, 1, 32))
29322 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29325 arm_pre_atomic_barrier (enum memmodel model
)
29327 if (need_atomic_barrier_p (model
, true))
29328 emit_insn (gen_memory_barrier ());
29332 arm_post_atomic_barrier (enum memmodel model
)
29334 if (need_atomic_barrier_p (model
, false))
29335 emit_insn (gen_memory_barrier ());
29338 /* Emit the load-exclusive and store-exclusive instructions.
29339 Use acquire and release versions if necessary. */
29342 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
29344 rtx (*gen
) (rtx
, rtx
);
29350 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
29351 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
29352 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
29353 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
29355 gcc_unreachable ();
29362 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
29363 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
29364 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
29365 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
29367 gcc_unreachable ();
29371 emit_insn (gen (rval
, mem
));
29375 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
29378 rtx (*gen
) (rtx
, rtx
, rtx
);
29384 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
29385 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
29386 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
29387 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
29389 gcc_unreachable ();
29396 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
29397 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
29398 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
29399 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
29401 gcc_unreachable ();
29405 emit_insn (gen (bval
, rval
, mem
));
29408 /* Mark the previous jump instruction as unlikely. */
29411 emit_unlikely_jump (rtx insn
)
29413 rtx_insn
*jump
= emit_jump_insn (insn
);
29414 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
29417 /* Expand a compare and swap pattern. */
29420 arm_expand_compare_and_swap (rtx operands
[])
29422 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
29423 machine_mode mode
, cmp_mode
;
29425 bval
= operands
[0];
29426 rval
= operands
[1];
29428 oldval
= operands
[3];
29429 newval
= operands
[4];
29430 is_weak
= operands
[5];
29431 mod_s
= operands
[6];
29432 mod_f
= operands
[7];
29433 mode
= GET_MODE (mem
);
29435 /* Normally the succ memory model must be stronger than fail, but in the
29436 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29437 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29439 if (TARGET_HAVE_LDACQ
29440 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
29441 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
29442 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
29448 /* For narrow modes, we're going to perform the comparison in SImode,
29449 so do the zero-extension now. */
29450 rval
= gen_reg_rtx (SImode
);
29451 oldval
= convert_modes (SImode
, mode
, oldval
, true);
29455 /* Force the value into a register if needed. We waited until after
29456 the zero-extension above to do this properly. */
29457 if (!arm_add_operand (oldval
, SImode
))
29458 oldval
= force_reg (SImode
, oldval
);
29462 if (!cmpdi_operand (oldval
, mode
))
29463 oldval
= force_reg (mode
, oldval
);
29467 gcc_unreachable ();
29471 cmp_mode
= E_SImode
;
29473 cmp_mode
= CC_Zmode
;
29475 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
29476 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
29477 oldval
, newval
, is_weak
, mod_s
, mod_f
));
29479 if (mode
== QImode
|| mode
== HImode
)
29480 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
29482 /* In all cases, we arrange for success to be signaled by Z set.
29483 This arrangement allows for the boolean result to be used directly
29484 in a subsequent branch, post optimization. For Thumb-1 targets, the
29485 boolean negation of the result is also stored in bval because Thumb-1
29486 backend lacks dependency tracking for CC flag due to flag-setting not
29487 being represented at RTL level. */
29489 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
29492 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
29493 emit_insn (gen_rtx_SET (bval
, x
));
29497 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29498 another memory store between the load-exclusive and store-exclusive can
29499 reset the monitor from Exclusive to Open state. This means we must wait
29500 until after reload to split the pattern, lest we get a register spill in
29501 the middle of the atomic sequence. Success of the compare and swap is
29502 indicated by the Z flag set for 32bit targets and by neg_bval being zero
29503 for Thumb-1 targets (ie. negation of the boolean value returned by
29504 atomic_compare_and_swapmode standard pattern in operand 0). */
29507 arm_split_compare_and_swap (rtx operands
[])
29509 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
29511 enum memmodel mod_s
, mod_f
;
29513 rtx_code_label
*label1
, *label2
;
29516 rval
= operands
[1];
29518 oldval
= operands
[3];
29519 newval
= operands
[4];
29520 is_weak
= (operands
[5] != const0_rtx
);
29521 mod_s_rtx
= operands
[6];
29522 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
29523 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
29524 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
29525 mode
= GET_MODE (mem
);
29527 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
29529 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
29530 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
29532 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
29533 a full barrier is emitted after the store-release. */
29535 use_acquire
= false;
29537 /* Checks whether a barrier is needed and emits one accordingly. */
29538 if (!(use_acquire
|| use_release
))
29539 arm_pre_atomic_barrier (mod_s
);
29544 label1
= gen_label_rtx ();
29545 emit_label (label1
);
29547 label2
= gen_label_rtx ();
29549 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
29551 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
29552 as required to communicate with arm_expand_compare_and_swap. */
29555 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
29556 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29557 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29558 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
29559 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
29563 emit_move_insn (neg_bval
, const1_rtx
);
29564 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
29565 if (thumb1_cmpneg_operand (oldval
, SImode
))
29566 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
29569 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
29572 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
29574 /* Weak or strong, we want EQ to be true for success, so that we
29575 match the flags that we got from the compare above. */
29578 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29579 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
29580 emit_insn (gen_rtx_SET (cond
, x
));
29585 /* Z is set to boolean value of !neg_bval, as required to communicate
29586 with arm_expand_compare_and_swap. */
29587 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
29588 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
29591 if (!is_mm_relaxed (mod_f
))
29592 emit_label (label2
);
29594 /* Checks whether a barrier is needed and emits one accordingly. */
29596 || !(use_acquire
|| use_release
))
29597 arm_post_atomic_barrier (mod_s
);
29599 if (is_mm_relaxed (mod_f
))
29600 emit_label (label2
);
29603 /* Split an atomic operation pattern. Operation is given by CODE and is one
29604 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
29605 operation). Operation is performed on the content at MEM and on VALUE
29606 following the memory model MODEL_RTX. The content at MEM before and after
29607 the operation is returned in OLD_OUT and NEW_OUT respectively while the
29608 success of the operation is returned in COND. Using a scratch register or
29609 an operand register for these determines what result is returned for that
29613 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
29614 rtx value
, rtx model_rtx
, rtx cond
)
29616 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
29617 machine_mode mode
= GET_MODE (mem
);
29618 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
29619 rtx_code_label
*label
;
29620 bool all_low_regs
, bind_old_new
;
29623 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
29625 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
29626 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
29628 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
29629 a full barrier is emitted after the store-release. */
29631 use_acquire
= false;
29633 /* Checks whether a barrier is needed and emits one accordingly. */
29634 if (!(use_acquire
|| use_release
))
29635 arm_pre_atomic_barrier (model
);
29637 label
= gen_label_rtx ();
29638 emit_label (label
);
29641 new_out
= gen_lowpart (wmode
, new_out
);
29643 old_out
= gen_lowpart (wmode
, old_out
);
29646 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
29648 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
29650 /* Does the operation require destination and first operand to use the same
29651 register? This is decided by register constraints of relevant insn
29652 patterns in thumb1.md. */
29653 gcc_assert (!new_out
|| REG_P (new_out
));
29654 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
29655 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
29656 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
29661 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
29663 /* We want to return the old value while putting the result of the operation
29664 in the same register as the old value so copy the old value over to the
29665 destination register and use that register for the operation. */
29666 if (old_out
&& bind_old_new
)
29668 emit_move_insn (new_out
, old_out
);
29679 x
= gen_rtx_AND (wmode
, old_out
, value
);
29680 emit_insn (gen_rtx_SET (new_out
, x
));
29681 x
= gen_rtx_NOT (wmode
, new_out
);
29682 emit_insn (gen_rtx_SET (new_out
, x
));
29686 if (CONST_INT_P (value
))
29688 value
= GEN_INT (-INTVAL (value
));
29694 if (mode
== DImode
)
29696 /* DImode plus/minus need to clobber flags. */
29697 /* The adddi3 and subdi3 patterns are incorrectly written so that
29698 they require matching operands, even when we could easily support
29699 three operands. Thankfully, this can be fixed up post-splitting,
29700 as the individual add+adc patterns do accept three operands and
29701 post-reload cprop can make these moves go away. */
29702 emit_move_insn (new_out
, old_out
);
29704 x
= gen_adddi3 (new_out
, new_out
, value
);
29706 x
= gen_subdi3 (new_out
, new_out
, value
);
29713 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
29714 emit_insn (gen_rtx_SET (new_out
, x
));
29718 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
29721 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29722 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
29724 /* Checks whether a barrier is needed and emits one accordingly. */
29726 || !(use_acquire
|| use_release
))
29727 arm_post_atomic_barrier (model
);
29730 #define MAX_VECT_LEN 16
29732 struct expand_vec_perm_d
29734 rtx target
, op0
, op1
;
29735 vec_perm_indices perm
;
29736 machine_mode vmode
;
29741 /* Generate a variable permutation. */
29744 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29746 machine_mode vmode
= GET_MODE (target
);
29747 bool one_vector_p
= rtx_equal_p (op0
, op1
);
29749 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
29750 gcc_checking_assert (GET_MODE (op0
) == vmode
);
29751 gcc_checking_assert (GET_MODE (op1
) == vmode
);
29752 gcc_checking_assert (GET_MODE (sel
) == vmode
);
29753 gcc_checking_assert (TARGET_NEON
);
29757 if (vmode
== V8QImode
)
29758 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
29760 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
29766 if (vmode
== V8QImode
)
29768 pair
= gen_reg_rtx (V16QImode
);
29769 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
29770 pair
= gen_lowpart (TImode
, pair
);
29771 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
29775 pair
= gen_reg_rtx (OImode
);
29776 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
29777 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
29783 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29785 machine_mode vmode
= GET_MODE (target
);
29786 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
29787 bool one_vector_p
= rtx_equal_p (op0
, op1
);
29790 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29791 numbering of elements for big-endian, we must reverse the order. */
29792 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
29794 /* The VTBL instruction does not use a modulo index, so we must take care
29795 of that ourselves. */
29796 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29797 mask
= gen_const_vec_duplicate (vmode
, mask
);
29798 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
29800 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
29803 /* Map lane ordering between architectural lane order, and GCC lane order,
29804 taking into account ABI. See comment above output_move_neon for details. */
29807 neon_endian_lane_map (machine_mode mode
, int lane
)
29809 if (BYTES_BIG_ENDIAN
)
29811 int nelems
= GET_MODE_NUNITS (mode
);
29812 /* Reverse lane order. */
29813 lane
= (nelems
- 1 - lane
);
29814 /* Reverse D register order, to match ABI. */
29815 if (GET_MODE_SIZE (mode
) == 16)
29816 lane
= lane
^ (nelems
/ 2);
29821 /* Some permutations index into pairs of vectors, this is a helper function
29822 to map indexes into those pairs of vectors. */
29825 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
29827 int nelem
= GET_MODE_NUNITS (mode
);
29828 if (BYTES_BIG_ENDIAN
)
29830 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
29834 /* Generate or test for an insn that supports a constant permutation. */
29836 /* Recognize patterns for the VUZP insns. */
29839 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
29841 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
29842 rtx out0
, out1
, in0
, in1
;
29846 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29849 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29850 big endian pattern on 64 bit vectors, so we correct for that. */
29851 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
29852 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
29854 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
29856 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
29858 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
29862 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29864 for (i
= 0; i
< nelt
; i
++)
29867 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
29868 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
29878 if (swap_nelt
!= 0)
29879 std::swap (in0
, in1
);
29882 out1
= gen_reg_rtx (d
->vmode
);
29884 std::swap (out0
, out1
);
29886 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
29890 /* Recognize patterns for the VZIP insns. */
29893 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
29895 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
29896 rtx out0
, out1
, in0
, in1
;
29900 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29903 is_swapped
= BYTES_BIG_ENDIAN
;
29905 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
29908 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
29910 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
29914 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29916 for (i
= 0; i
< nelt
/ 2; i
++)
29919 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
29920 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
29924 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
29925 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
29937 std::swap (in0
, in1
);
29940 out1
= gen_reg_rtx (d
->vmode
);
29942 std::swap (out0
, out1
);
29944 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
29948 /* Recognize patterns for the VREV insns. */
29950 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
29952 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
29953 rtx (*gen
) (machine_mode
, rtx
, rtx
);
29955 if (!d
->one_vector_p
)
29966 gen
= gen_neon_vrev64
;
29977 gen
= gen_neon_vrev32
;
29983 gen
= gen_neon_vrev64
;
29994 gen
= gen_neon_vrev16
;
29998 gen
= gen_neon_vrev32
;
30004 gen
= gen_neon_vrev64
;
30014 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
30015 for (j
= 0; j
<= diff
; j
+= 1)
30017 /* This is guaranteed to be true as the value of diff
30018 is 7, 3, 1 and we should have enough elements in the
30019 queue to generate this. Getting a vector mask with a
30020 value of diff other than these values implies that
30021 something is wrong by the time we get here. */
30022 gcc_assert (i
+ j
< nelt
);
30023 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
30031 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
30035 /* Recognize patterns for the VTRN insns. */
30038 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
30040 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
30041 rtx out0
, out1
, in0
, in1
;
30043 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30046 /* Note that these are little-endian tests. Adjust for big-endian later. */
30047 if (d
->perm
[0] == 0)
30049 else if (d
->perm
[0] == 1)
30053 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30055 for (i
= 0; i
< nelt
; i
+= 2)
30057 if (d
->perm
[i
] != i
+ odd
)
30059 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
30069 if (BYTES_BIG_ENDIAN
)
30071 std::swap (in0
, in1
);
30076 out1
= gen_reg_rtx (d
->vmode
);
30078 std::swap (out0
, out1
);
30080 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
30084 /* Recognize patterns for the VEXT insns. */
30087 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
30089 unsigned int i
, nelt
= d
->perm
.length ();
30092 unsigned int location
;
30094 unsigned int next
= d
->perm
[0] + 1;
30096 /* TODO: Handle GCC's numbering of elements for big-endian. */
30097 if (BYTES_BIG_ENDIAN
)
30100 /* Check if the extracted indexes are increasing by one. */
30101 for (i
= 1; i
< nelt
; next
++, i
++)
30103 /* If we hit the most significant element of the 2nd vector in
30104 the previous iteration, no need to test further. */
30105 if (next
== 2 * nelt
)
30108 /* If we are operating on only one vector: it could be a
30109 rotation. If there are only two elements of size < 64, let
30110 arm_evpc_neon_vrev catch it. */
30111 if (d
->one_vector_p
&& (next
== nelt
))
30113 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
30119 if (d
->perm
[i
] != next
)
30123 location
= d
->perm
[0];
30129 offset
= GEN_INT (location
);
30131 if(d
->vmode
== E_DImode
)
30134 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
30138 /* The NEON VTBL instruction is a fully variable permuation that's even
30139 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30140 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30141 can do slightly better by expanding this as a constant where we don't
30142 have to apply a mask. */
30145 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
30147 rtx rperm
[MAX_VECT_LEN
], sel
;
30148 machine_mode vmode
= d
->vmode
;
30149 unsigned int i
, nelt
= d
->perm
.length ();
30151 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30152 numbering of elements for big-endian, we must reverse the order. */
30153 if (BYTES_BIG_ENDIAN
)
30159 /* Generic code will try constant permutation twice. Once with the
30160 original mode and again with the elements lowered to QImode.
30161 So wait and don't do the selector expansion ourselves. */
30162 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
30165 for (i
= 0; i
< nelt
; ++i
)
30166 rperm
[i
] = GEN_INT (d
->perm
[i
]);
30167 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
30168 sel
= force_reg (vmode
, sel
);
30170 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
30175 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
30177 /* Check if the input mask matches vext before reordering the
30180 if (arm_evpc_neon_vext (d
))
30183 /* The pattern matching functions above are written to look for a small
30184 number to begin the sequence (0, 1, N/2). If we begin with an index
30185 from the second operand, we can swap the operands. */
30186 unsigned int nelt
= d
->perm
.length ();
30187 if (d
->perm
[0] >= nelt
)
30189 d
->perm
.rotate_inputs (1);
30190 std::swap (d
->op0
, d
->op1
);
30195 if (arm_evpc_neon_vuzp (d
))
30197 if (arm_evpc_neon_vzip (d
))
30199 if (arm_evpc_neon_vrev (d
))
30201 if (arm_evpc_neon_vtrn (d
))
30203 return arm_evpc_neon_vtbl (d
);
30208 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
30211 arm_vectorize_vec_perm_const (machine_mode vmode
, rtx target
, rtx op0
, rtx op1
,
30212 const vec_perm_indices
&sel
)
30214 struct expand_vec_perm_d d
;
30215 int i
, nelt
, which
;
30217 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
30225 gcc_assert (VECTOR_MODE_P (d
.vmode
));
30226 d
.testing_p
= !target
;
30228 nelt
= GET_MODE_NUNITS (d
.vmode
);
30229 for (i
= which
= 0; i
< nelt
; ++i
)
30231 int ei
= sel
[i
] & (2 * nelt
- 1);
30232 which
|= (ei
< nelt
? 1 : 2);
30241 d
.one_vector_p
= false;
30242 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
30245 /* The elements of PERM do not suggest that only the first operand
30246 is used, but both operands are identical. Allow easier matching
30247 of the permutation by folding the permutation into the single
30252 d
.one_vector_p
= true;
30257 d
.one_vector_p
= true;
30261 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
30264 return arm_expand_vec_perm_const_1 (&d
);
30266 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
30267 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
30268 if (!d
.one_vector_p
)
30269 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
30272 bool ret
= arm_expand_vec_perm_const_1 (&d
);
30279 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
30281 /* If we are soft float and we do not have ldrd
30282 then all auto increment forms are ok. */
30283 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
30288 /* Post increment and Pre Decrement are supported for all
30289 instruction forms except for vector forms. */
30292 if (VECTOR_MODE_P (mode
))
30294 if (code
!= ARM_PRE_DEC
)
30304 /* Without LDRD and mode size greater than
30305 word size, there is no point in auto-incrementing
30306 because ldm and stm will not have these forms. */
30307 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
30310 /* Vector and floating point modes do not support
30311 these auto increment forms. */
30312 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
30325 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30326 on ARM, since we know that shifts by negative amounts are no-ops.
30327 Additionally, the default expansion code is not available or suitable
30328 for post-reload insn splits (this can occur when the register allocator
30329 chooses not to do a shift in NEON).
30331 This function is used in both initial expand and post-reload splits, and
30332 handles all kinds of 64-bit shifts.
30334 Input requirements:
30335 - It is safe for the input and output to be the same register, but
30336 early-clobber rules apply for the shift amount and scratch registers.
30337 - Shift by register requires both scratch registers. In all other cases
30338 the scratch registers may be NULL.
30339 - Ashiftrt by a register also clobbers the CC register. */
30341 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
30342 rtx amount
, rtx scratch1
, rtx scratch2
)
30344 rtx out_high
= gen_highpart (SImode
, out
);
30345 rtx out_low
= gen_lowpart (SImode
, out
);
30346 rtx in_high
= gen_highpart (SImode
, in
);
30347 rtx in_low
= gen_lowpart (SImode
, in
);
30350 in = the register pair containing the input value.
30351 out = the destination register pair.
30352 up = the high- or low-part of each pair.
30353 down = the opposite part to "up".
30354 In a shift, we can consider bits to shift from "up"-stream to
30355 "down"-stream, so in a left-shift "up" is the low-part and "down"
30356 is the high-part of each register pair. */
30358 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
30359 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
30360 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
30361 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
30363 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
30365 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
30366 && GET_MODE (out
) == DImode
);
30368 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
30369 && GET_MODE (in
) == DImode
);
30371 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
30372 && GET_MODE (amount
) == SImode
)
30373 || CONST_INT_P (amount
)));
30374 gcc_assert (scratch1
== NULL
30375 || (GET_CODE (scratch1
) == SCRATCH
)
30376 || (GET_MODE (scratch1
) == SImode
30377 && REG_P (scratch1
)));
30378 gcc_assert (scratch2
== NULL
30379 || (GET_CODE (scratch2
) == SCRATCH
)
30380 || (GET_MODE (scratch2
) == SImode
30381 && REG_P (scratch2
)));
30382 gcc_assert (!REG_P (out
) || !REG_P (amount
)
30383 || !HARD_REGISTER_P (out
)
30384 || (REGNO (out
) != REGNO (amount
)
30385 && REGNO (out
) + 1 != REGNO (amount
)));
30387 /* Macros to make following code more readable. */
30388 #define SUB_32(DEST,SRC) \
30389 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30390 #define RSB_32(DEST,SRC) \
30391 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30392 #define SUB_S_32(DEST,SRC) \
30393 gen_addsi3_compare0 ((DEST), (SRC), \
30395 #define SET(DEST,SRC) \
30396 gen_rtx_SET ((DEST), (SRC))
30397 #define SHIFT(CODE,SRC,AMOUNT) \
30398 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30399 #define LSHIFT(CODE,SRC,AMOUNT) \
30400 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30401 SImode, (SRC), (AMOUNT))
30402 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30403 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30404 SImode, (SRC), (AMOUNT))
30406 gen_rtx_IOR (SImode, (A), (B))
30407 #define BRANCH(COND,LABEL) \
30408 gen_arm_cond_branch ((LABEL), \
30409 gen_rtx_ ## COND (CCmode, cc_reg, \
30413 /* Shifts by register and shifts by constant are handled separately. */
30414 if (CONST_INT_P (amount
))
30416 /* We have a shift-by-constant. */
30418 /* First, handle out-of-range shift amounts.
30419 In both cases we try to match the result an ARM instruction in a
30420 shift-by-register would give. This helps reduce execution
30421 differences between optimization levels, but it won't stop other
30422 parts of the compiler doing different things. This is "undefined
30423 behavior, in any case. */
30424 if (INTVAL (amount
) <= 0)
30425 emit_insn (gen_movdi (out
, in
));
30426 else if (INTVAL (amount
) >= 64)
30428 if (code
== ASHIFTRT
)
30430 rtx const31_rtx
= GEN_INT (31);
30431 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
30432 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
30435 emit_insn (gen_movdi (out
, const0_rtx
));
30438 /* Now handle valid shifts. */
30439 else if (INTVAL (amount
) < 32)
30441 /* Shifts by a constant less than 32. */
30442 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
30444 /* Clearing the out register in DImode first avoids lots
30445 of spilling and results in less stack usage.
30446 Later this redundant insn is completely removed.
30447 Do that only if "in" and "out" are different registers. */
30448 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
30449 emit_insn (SET (out
, const0_rtx
));
30450 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30451 emit_insn (SET (out_down
,
30452 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
30454 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30458 /* Shifts by a constant greater than 31. */
30459 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
30461 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
30462 emit_insn (SET (out
, const0_rtx
));
30463 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
30464 if (code
== ASHIFTRT
)
30465 emit_insn (gen_ashrsi3 (out_up
, in_up
,
30468 emit_insn (SET (out_up
, const0_rtx
));
30473 /* We have a shift-by-register. */
30474 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
30476 /* This alternative requires the scratch registers. */
30477 gcc_assert (scratch1
&& REG_P (scratch1
));
30478 gcc_assert (scratch2
&& REG_P (scratch2
));
30480 /* We will need the values "amount-32" and "32-amount" later.
30481 Swapping them around now allows the later code to be more general. */
30485 emit_insn (SUB_32 (scratch1
, amount
));
30486 emit_insn (RSB_32 (scratch2
, amount
));
30489 emit_insn (RSB_32 (scratch1
, amount
));
30490 /* Also set CC = amount > 32. */
30491 emit_insn (SUB_S_32 (scratch2
, amount
));
30494 emit_insn (RSB_32 (scratch1
, amount
));
30495 emit_insn (SUB_32 (scratch2
, amount
));
30498 gcc_unreachable ();
30501 /* Emit code like this:
30504 out_down = in_down << amount;
30505 out_down = (in_up << (amount - 32)) | out_down;
30506 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30507 out_up = in_up << amount;
30510 out_down = in_down >> amount;
30511 out_down = (in_up << (32 - amount)) | out_down;
30513 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30514 out_up = in_up << amount;
30517 out_down = in_down >> amount;
30518 out_down = (in_up << (32 - amount)) | out_down;
30520 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30521 out_up = in_up << amount;
30523 The ARM and Thumb2 variants are the same but implemented slightly
30524 differently. If this were only called during expand we could just
30525 use the Thumb2 case and let combine do the right thing, but this
30526 can also be called from post-reload splitters. */
30528 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30530 if (!TARGET_THUMB2
)
30532 /* Emit code for ARM mode. */
30533 emit_insn (SET (out_down
,
30534 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
30535 if (code
== ASHIFTRT
)
30537 rtx_code_label
*done_label
= gen_label_rtx ();
30538 emit_jump_insn (BRANCH (LT
, done_label
));
30539 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
30541 emit_label (done_label
);
30544 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
30549 /* Emit code for Thumb2 mode.
30550 Thumb2 can't do shift and or in one insn. */
30551 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
30552 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
30554 if (code
== ASHIFTRT
)
30556 rtx_code_label
*done_label
= gen_label_rtx ();
30557 emit_jump_insn (BRANCH (LT
, done_label
));
30558 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
30559 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
30560 emit_label (done_label
);
30564 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
30565 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
30569 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30583 /* Returns true if the pattern is a valid symbolic address, which is either a
30584 symbol_ref or (symbol_ref + addend).
30586 According to the ARM ELF ABI, the initial addend of REL-type relocations
30587 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
30588 literal field of the instruction as a 16-bit signed value in the range
30589 -32768 <= A < 32768. */
30592 arm_valid_symbolic_address_p (rtx addr
)
30594 rtx xop0
, xop1
= NULL_RTX
;
30597 if (target_word_relocations
)
30600 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
30603 /* (const (plus: symbol_ref const_int)) */
30604 if (GET_CODE (addr
) == CONST
)
30605 tmp
= XEXP (addr
, 0);
30607 if (GET_CODE (tmp
) == PLUS
)
30609 xop0
= XEXP (tmp
, 0);
30610 xop1
= XEXP (tmp
, 1);
30612 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
30613 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
30619 /* Returns true if a valid comparison operation and makes
30620 the operands in a form that is valid. */
30622 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
30624 enum rtx_code code
= GET_CODE (*comparison
);
30626 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
30627 ? GET_MODE (*op2
) : GET_MODE (*op1
);
30629 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
30631 if (code
== UNEQ
|| code
== LTGT
)
30634 code_int
= (int)code
;
30635 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
30636 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
30641 if (!arm_add_operand (*op1
, mode
))
30642 *op1
= force_reg (mode
, *op1
);
30643 if (!arm_add_operand (*op2
, mode
))
30644 *op2
= force_reg (mode
, *op2
);
30648 /* gen_compare_reg() will sort out any invalid operands. */
30652 if (!TARGET_VFP_FP16INST
)
30654 /* FP16 comparisons are done in SF mode. */
30656 *op1
= convert_to_mode (mode
, *op1
, 1);
30657 *op2
= convert_to_mode (mode
, *op2
, 1);
30658 /* Fall through. */
30661 if (!vfp_compare_operand (*op1
, mode
))
30662 *op1
= force_reg (mode
, *op1
);
30663 if (!vfp_compare_operand (*op2
, mode
))
30664 *op2
= force_reg (mode
, *op2
);
30674 /* Maximum number of instructions to set block of memory. */
30676 arm_block_set_max_insns (void)
30678 if (optimize_function_for_size_p (cfun
))
30681 return current_tune
->max_insns_inline_memset
;
30684 /* Return TRUE if it's profitable to set block of memory for
30685 non-vectorized case. VAL is the value to set the memory
30686 with. LENGTH is the number of bytes to set. ALIGN is the
30687 alignment of the destination memory in bytes. UNALIGNED_P
30688 is TRUE if we can only set the memory with instructions
30689 meeting alignment requirements. USE_STRD_P is TRUE if we
30690 can use strd to set the memory. */
30692 arm_block_set_non_vect_profit_p (rtx val
,
30693 unsigned HOST_WIDE_INT length
,
30694 unsigned HOST_WIDE_INT align
,
30695 bool unaligned_p
, bool use_strd_p
)
30698 /* For leftovers in bytes of 0-7, we can set the memory block using
30699 strb/strh/str with minimum instruction number. */
30700 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
30704 num
= arm_const_inline_cost (SET
, val
);
30705 num
+= length
/ align
+ length
% align
;
30707 else if (use_strd_p
)
30709 num
= arm_const_double_inline_cost (val
);
30710 num
+= (length
>> 3) + leftover
[length
& 7];
30714 num
= arm_const_inline_cost (SET
, val
);
30715 num
+= (length
>> 2) + leftover
[length
& 3];
30718 /* We may be able to combine last pair STRH/STRB into a single STR
30719 by shifting one byte back. */
30720 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
30723 return (num
<= arm_block_set_max_insns ());
30726 /* Return TRUE if it's profitable to set block of memory for
30727 vectorized case. LENGTH is the number of bytes to set.
30728 ALIGN is the alignment of destination memory in bytes.
30729 MODE is the vector mode used to set the memory. */
30731 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
30732 unsigned HOST_WIDE_INT align
,
30736 bool unaligned_p
= ((align
& 3) != 0);
30737 unsigned int nelt
= GET_MODE_NUNITS (mode
);
30739 /* Instruction loading constant value. */
30741 /* Instructions storing the memory. */
30742 num
+= (length
+ nelt
- 1) / nelt
;
30743 /* Instructions adjusting the address expression. Only need to
30744 adjust address expression if it's 4 bytes aligned and bytes
30745 leftover can only be stored by mis-aligned store instruction. */
30746 if (!unaligned_p
&& (length
& 3) != 0)
30749 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
30750 if (!unaligned_p
&& mode
== V16QImode
)
30753 return (num
<= arm_block_set_max_insns ());
30756 /* Set a block of memory using vectorization instructions for the
30757 unaligned case. We fill the first LENGTH bytes of the memory
30758 area starting from DSTBASE with byte constant VALUE. ALIGN is
30759 the alignment requirement of memory. Return TRUE if succeeded. */
30761 arm_block_set_unaligned_vect (rtx dstbase
,
30762 unsigned HOST_WIDE_INT length
,
30763 unsigned HOST_WIDE_INT value
,
30764 unsigned HOST_WIDE_INT align
)
30766 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
30769 rtx (*gen_func
) (rtx
, rtx
);
30771 unsigned HOST_WIDE_INT v
= value
;
30772 unsigned int offset
= 0;
30773 gcc_assert ((align
& 0x3) != 0);
30774 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
30775 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
30776 if (length
>= nelt_v16
)
30779 gen_func
= gen_movmisalignv16qi
;
30784 gen_func
= gen_movmisalignv8qi
;
30786 nelt_mode
= GET_MODE_NUNITS (mode
);
30787 gcc_assert (length
>= nelt_mode
);
30788 /* Skip if it isn't profitable. */
30789 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
30792 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30793 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30795 v
= sext_hwi (v
, BITS_PER_WORD
);
30797 reg
= gen_reg_rtx (mode
);
30798 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
30799 /* Emit instruction loading the constant value. */
30800 emit_move_insn (reg
, val_vec
);
30802 /* Handle nelt_mode bytes in a vector. */
30803 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
30805 emit_insn ((*gen_func
) (mem
, reg
));
30806 if (i
+ 2 * nelt_mode
<= length
)
30808 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
30809 offset
+= nelt_mode
;
30810 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30814 /* If there are not less than nelt_v8 bytes leftover, we must be in
30816 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
30818 /* Handle (8, 16) bytes leftover. */
30819 if (i
+ nelt_v8
< length
)
30821 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
30822 offset
+= length
- i
;
30823 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30825 /* We are shifting bytes back, set the alignment accordingly. */
30826 if ((length
& 1) != 0 && align
>= 2)
30827 set_mem_align (mem
, BITS_PER_UNIT
);
30829 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30831 /* Handle (0, 8] bytes leftover. */
30832 else if (i
< length
&& i
+ nelt_v8
>= length
)
30834 if (mode
== V16QImode
)
30835 reg
= gen_lowpart (V8QImode
, reg
);
30837 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
30838 + (nelt_mode
- nelt_v8
))));
30839 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
30840 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
30842 /* We are shifting bytes back, set the alignment accordingly. */
30843 if ((length
& 1) != 0 && align
>= 2)
30844 set_mem_align (mem
, BITS_PER_UNIT
);
30846 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30852 /* Set a block of memory using vectorization instructions for the
30853 aligned case. We fill the first LENGTH bytes of the memory area
30854 starting from DSTBASE with byte constant VALUE. ALIGN is the
30855 alignment requirement of memory. Return TRUE if succeeded. */
30857 arm_block_set_aligned_vect (rtx dstbase
,
30858 unsigned HOST_WIDE_INT length
,
30859 unsigned HOST_WIDE_INT value
,
30860 unsigned HOST_WIDE_INT align
)
30862 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
30863 rtx dst
, addr
, mem
;
30866 unsigned int offset
= 0;
30868 gcc_assert ((align
& 0x3) == 0);
30869 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
30870 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
30871 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
30876 nelt_mode
= GET_MODE_NUNITS (mode
);
30877 gcc_assert (length
>= nelt_mode
);
30878 /* Skip if it isn't profitable. */
30879 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
30882 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30884 reg
= gen_reg_rtx (mode
);
30885 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
30886 /* Emit instruction loading the constant value. */
30887 emit_move_insn (reg
, val_vec
);
30890 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30891 if (mode
== V16QImode
)
30893 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30894 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30896 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30897 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
30899 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30900 offset
+= length
- nelt_mode
;
30901 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30902 /* We are shifting bytes back, set the alignment accordingly. */
30903 if ((length
& 0x3) == 0)
30904 set_mem_align (mem
, BITS_PER_UNIT
* 4);
30905 else if ((length
& 0x1) == 0)
30906 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30908 set_mem_align (mem
, BITS_PER_UNIT
);
30910 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30913 /* Fall through for bytes leftover. */
30915 nelt_mode
= GET_MODE_NUNITS (mode
);
30916 reg
= gen_lowpart (V8QImode
, reg
);
30919 /* Handle 8 bytes in a vector. */
30920 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
30922 addr
= plus_constant (Pmode
, dst
, i
);
30923 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
30924 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
30925 emit_move_insn (mem
, reg
);
30927 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
30930 /* Handle single word leftover by shifting 4 bytes back. We can
30931 use aligned access for this case. */
30932 if (i
+ UNITS_PER_WORD
== length
)
30934 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
30935 offset
+= i
- UNITS_PER_WORD
;
30936 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
30937 /* We are shifting 4 bytes back, set the alignment accordingly. */
30938 if (align
> UNITS_PER_WORD
)
30939 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
30941 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
30943 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30944 We have to use unaligned access for this case. */
30945 else if (i
< length
)
30947 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30948 offset
+= length
- nelt_mode
;
30949 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30950 /* We are shifting bytes back, set the alignment accordingly. */
30951 if ((length
& 1) == 0)
30952 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30954 set_mem_align (mem
, BITS_PER_UNIT
);
30956 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30962 /* Set a block of memory using plain strh/strb instructions, only
30963 using instructions allowed by ALIGN on processor. We fill the
30964 first LENGTH bytes of the memory area starting from DSTBASE
30965 with byte constant VALUE. ALIGN is the alignment requirement
30968 arm_block_set_unaligned_non_vect (rtx dstbase
,
30969 unsigned HOST_WIDE_INT length
,
30970 unsigned HOST_WIDE_INT value
,
30971 unsigned HOST_WIDE_INT align
)
30974 rtx dst
, addr
, mem
;
30975 rtx val_exp
, val_reg
, reg
;
30977 HOST_WIDE_INT v
= value
;
30979 gcc_assert (align
== 1 || align
== 2);
30982 v
|= (value
<< BITS_PER_UNIT
);
30984 v
= sext_hwi (v
, BITS_PER_WORD
);
30985 val_exp
= GEN_INT (v
);
30986 /* Skip if it isn't profitable. */
30987 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30988 align
, true, false))
30991 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30992 mode
= (align
== 2 ? HImode
: QImode
);
30993 val_reg
= force_reg (SImode
, val_exp
);
30994 reg
= gen_lowpart (mode
, val_reg
);
30996 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
30998 addr
= plus_constant (Pmode
, dst
, i
);
30999 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
31000 emit_move_insn (mem
, reg
);
31003 /* Handle single byte leftover. */
31004 if (i
+ 1 == length
)
31006 reg
= gen_lowpart (QImode
, val_reg
);
31007 addr
= plus_constant (Pmode
, dst
, i
);
31008 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
31009 emit_move_insn (mem
, reg
);
31013 gcc_assert (i
== length
);
31017 /* Set a block of memory using plain strd/str/strh/strb instructions,
31018 to permit unaligned copies on processors which support unaligned
31019 semantics for those instructions. We fill the first LENGTH bytes
31020 of the memory area starting from DSTBASE with byte constant VALUE.
31021 ALIGN is the alignment requirement of memory. */
31023 arm_block_set_aligned_non_vect (rtx dstbase
,
31024 unsigned HOST_WIDE_INT length
,
31025 unsigned HOST_WIDE_INT value
,
31026 unsigned HOST_WIDE_INT align
)
31029 rtx dst
, addr
, mem
;
31030 rtx val_exp
, val_reg
, reg
;
31031 unsigned HOST_WIDE_INT v
;
31034 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
31035 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
31037 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
31038 if (length
< UNITS_PER_WORD
)
31039 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
31042 v
|= (v
<< BITS_PER_WORD
);
31044 v
= sext_hwi (v
, BITS_PER_WORD
);
31046 val_exp
= GEN_INT (v
);
31047 /* Skip if it isn't profitable. */
31048 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
31049 align
, false, use_strd_p
))
31054 /* Try without strd. */
31055 v
= (v
>> BITS_PER_WORD
);
31056 v
= sext_hwi (v
, BITS_PER_WORD
);
31057 val_exp
= GEN_INT (v
);
31058 use_strd_p
= false;
31059 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
31060 align
, false, use_strd_p
))
31065 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
31066 /* Handle double words using strd if possible. */
31069 val_reg
= force_reg (DImode
, val_exp
);
31071 for (; (i
+ 8 <= length
); i
+= 8)
31073 addr
= plus_constant (Pmode
, dst
, i
);
31074 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
31075 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
31076 emit_move_insn (mem
, reg
);
31078 emit_insn (gen_unaligned_storedi (mem
, reg
));
31082 val_reg
= force_reg (SImode
, val_exp
);
31084 /* Handle words. */
31085 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
31086 for (; (i
+ 4 <= length
); i
+= 4)
31088 addr
= plus_constant (Pmode
, dst
, i
);
31089 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
31090 if ((align
& 3) == 0)
31091 emit_move_insn (mem
, reg
);
31093 emit_insn (gen_unaligned_storesi (mem
, reg
));
31096 /* Merge last pair of STRH and STRB into a STR if possible. */
31097 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
31099 addr
= plus_constant (Pmode
, dst
, i
- 1);
31100 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
31101 /* We are shifting one byte back, set the alignment accordingly. */
31102 if ((align
& 1) == 0)
31103 set_mem_align (mem
, BITS_PER_UNIT
);
31105 /* Most likely this is an unaligned access, and we can't tell at
31106 compilation time. */
31107 emit_insn (gen_unaligned_storesi (mem
, reg
));
31111 /* Handle half word leftover. */
31112 if (i
+ 2 <= length
)
31114 reg
= gen_lowpart (HImode
, val_reg
);
31115 addr
= plus_constant (Pmode
, dst
, i
);
31116 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
31117 if ((align
& 1) == 0)
31118 emit_move_insn (mem
, reg
);
31120 emit_insn (gen_unaligned_storehi (mem
, reg
));
31125 /* Handle single byte leftover. */
31126 if (i
+ 1 == length
)
31128 reg
= gen_lowpart (QImode
, val_reg
);
31129 addr
= plus_constant (Pmode
, dst
, i
);
31130 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
31131 emit_move_insn (mem
, reg
);
31137 /* Set a block of memory using vectorization instructions for both
31138 aligned and unaligned cases. We fill the first LENGTH bytes of
31139 the memory area starting from DSTBASE with byte constant VALUE.
31140 ALIGN is the alignment requirement of memory. */
31142 arm_block_set_vect (rtx dstbase
,
31143 unsigned HOST_WIDE_INT length
,
31144 unsigned HOST_WIDE_INT value
,
31145 unsigned HOST_WIDE_INT align
)
31147 /* Check whether we need to use unaligned store instruction. */
31148 if (((align
& 3) != 0 || (length
& 3) != 0)
31149 /* Check whether unaligned store instruction is available. */
31150 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
31153 if ((align
& 3) == 0)
31154 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
31156 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
31159 /* Expand string store operation. Firstly we try to do that by using
31160 vectorization instructions, then try with ARM unaligned access and
31161 double-word store if profitable. OPERANDS[0] is the destination,
31162 OPERANDS[1] is the number of bytes, operands[2] is the value to
31163 initialize the memory, OPERANDS[3] is the known alignment of the
31166 arm_gen_setmem (rtx
*operands
)
31168 rtx dstbase
= operands
[0];
31169 unsigned HOST_WIDE_INT length
;
31170 unsigned HOST_WIDE_INT value
;
31171 unsigned HOST_WIDE_INT align
;
31173 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
31176 length
= UINTVAL (operands
[1]);
31180 value
= (UINTVAL (operands
[2]) & 0xFF);
31181 align
= UINTVAL (operands
[3]);
31182 if (TARGET_NEON
&& length
>= 8
31183 && current_tune
->string_ops_prefer_neon
31184 && arm_block_set_vect (dstbase
, length
, value
, align
))
31187 if (!unaligned_access
&& (align
& 3) != 0)
31188 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
31190 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
31195 arm_macro_fusion_p (void)
31197 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
31200 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
31201 for MOVW / MOVT macro fusion. */
31204 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
31206 /* We are trying to fuse
31207 movw imm / movt imm
31208 instructions as a group that gets scheduled together. */
31210 rtx set_dest
= SET_DEST (curr_set
);
31212 if (GET_MODE (set_dest
) != SImode
)
31215 /* We are trying to match:
31216 prev (movw) == (set (reg r0) (const_int imm16))
31217 curr (movt) == (set (zero_extract (reg r0)
31220 (const_int imm16_1))
31222 prev (movw) == (set (reg r1)
31223 (high (symbol_ref ("SYM"))))
31224 curr (movt) == (set (reg r0)
31226 (symbol_ref ("SYM")))) */
31228 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
31230 if (CONST_INT_P (SET_SRC (curr_set
))
31231 && CONST_INT_P (SET_SRC (prev_set
))
31232 && REG_P (XEXP (set_dest
, 0))
31233 && REG_P (SET_DEST (prev_set
))
31234 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
31238 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
31239 && REG_P (SET_DEST (curr_set
))
31240 && REG_P (SET_DEST (prev_set
))
31241 && GET_CODE (SET_SRC (prev_set
)) == HIGH
31242 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
31249 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
31251 rtx prev_set
= single_set (prev
);
31252 rtx curr_set
= single_set (curr
);
31258 if (any_condjump_p (curr
))
31261 if (!arm_macro_fusion_p ())
31264 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
31265 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
31271 /* Return true iff the instruction fusion described by OP is enabled. */
31273 arm_fusion_enabled_p (tune_params::fuse_ops op
)
31275 return current_tune
->fusible_ops
& op
;
31278 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
31279 scheduled for speculative execution. Reject the long-running division
31280 and square-root instructions. */
31283 arm_sched_can_speculate_insn (rtx_insn
*insn
)
31285 switch (get_attr_type (insn
))
31293 case TYPE_NEON_FP_SQRT_S
:
31294 case TYPE_NEON_FP_SQRT_D
:
31295 case TYPE_NEON_FP_SQRT_S_Q
:
31296 case TYPE_NEON_FP_SQRT_D_Q
:
31297 case TYPE_NEON_FP_DIV_S
:
31298 case TYPE_NEON_FP_DIV_D
:
31299 case TYPE_NEON_FP_DIV_S_Q
:
31300 case TYPE_NEON_FP_DIV_D_Q
:
31307 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31309 static unsigned HOST_WIDE_INT
31310 arm_asan_shadow_offset (void)
31312 return HOST_WIDE_INT_1U
<< 29;
31316 /* This is a temporary fix for PR60655. Ideally we need
31317 to handle most of these cases in the generic part but
31318 currently we reject minus (..) (sym_ref). We try to
31319 ameliorate the case with minus (sym_ref1) (sym_ref2)
31320 where they are in the same section. */
31323 arm_const_not_ok_for_debug_p (rtx p
)
31325 tree decl_op0
= NULL
;
31326 tree decl_op1
= NULL
;
31328 if (GET_CODE (p
) == UNSPEC
)
31330 if (GET_CODE (p
) == MINUS
)
31332 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
31334 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
31336 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
31337 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
31339 if ((VAR_P (decl_op1
)
31340 || TREE_CODE (decl_op1
) == CONST_DECL
)
31341 && (VAR_P (decl_op0
)
31342 || TREE_CODE (decl_op0
) == CONST_DECL
))
31343 return (get_variable_section (decl_op1
, false)
31344 != get_variable_section (decl_op0
, false));
31346 if (TREE_CODE (decl_op1
) == LABEL_DECL
31347 && TREE_CODE (decl_op0
) == LABEL_DECL
)
31348 return (DECL_CONTEXT (decl_op1
)
31349 != DECL_CONTEXT (decl_op0
));
31359 /* return TRUE if x is a reference to a value in a constant pool */
31361 arm_is_constant_pool_ref (rtx x
)
31364 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
31365 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
31368 /* Remember the last target of arm_set_current_function. */
31369 static GTY(()) tree arm_previous_fndecl
;
31371 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
31374 save_restore_target_globals (tree new_tree
)
31376 /* If we have a previous state, use it. */
31377 if (TREE_TARGET_GLOBALS (new_tree
))
31378 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
31379 else if (new_tree
== target_option_default_node
)
31380 restore_target_globals (&default_target_globals
);
31383 /* Call target_reinit and save the state for TARGET_GLOBALS. */
31384 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
31387 arm_option_params_internal ();
31390 /* Invalidate arm_previous_fndecl. */
31393 arm_reset_previous_fndecl (void)
31395 arm_previous_fndecl
= NULL_TREE
;
31398 /* Establish appropriate back-end context for processing the function
31399 FNDECL. The argument might be NULL to indicate processing at top
31400 level, outside of any function scope. */
31403 arm_set_current_function (tree fndecl
)
31405 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
31408 tree old_tree
= (arm_previous_fndecl
31409 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
31412 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
31414 /* If current function has no attributes but previous one did,
31415 use the default node. */
31416 if (! new_tree
&& old_tree
)
31417 new_tree
= target_option_default_node
;
31419 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
31420 the default have been handled by save_restore_target_globals from
31421 arm_pragma_target_parse. */
31422 if (old_tree
== new_tree
)
31425 arm_previous_fndecl
= fndecl
;
31427 /* First set the target options. */
31428 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
31430 save_restore_target_globals (new_tree
);
31433 /* Implement TARGET_OPTION_PRINT. */
31436 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
31438 int flags
= ptr
->x_target_flags
;
31439 const char *fpu_name
;
31441 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
31442 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
31444 fprintf (file
, "%*sselected isa %s\n", indent
, "",
31445 TARGET_THUMB2_P (flags
) ? "thumb2" :
31446 TARGET_THUMB_P (flags
) ? "thumb1" :
31449 if (ptr
->x_arm_arch_string
)
31450 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
31451 ptr
->x_arm_arch_string
);
31453 if (ptr
->x_arm_cpu_string
)
31454 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
31455 ptr
->x_arm_cpu_string
);
31457 if (ptr
->x_arm_tune_string
)
31458 fprintf (file
, "%*sselected tune %s\n", indent
, "",
31459 ptr
->x_arm_tune_string
);
31461 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
31464 /* Hook to determine if one function can safely inline another. */
31467 arm_can_inline_p (tree caller
, tree callee
)
31469 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
31470 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
31471 bool can_inline
= true;
31473 struct cl_target_option
*caller_opts
31474 = TREE_TARGET_OPTION (caller_tree
? caller_tree
31475 : target_option_default_node
);
31477 struct cl_target_option
*callee_opts
31478 = TREE_TARGET_OPTION (callee_tree
? callee_tree
31479 : target_option_default_node
);
31481 if (callee_opts
== caller_opts
)
31484 /* Callee's ISA features should be a subset of the caller's. */
31485 struct arm_build_target caller_target
;
31486 struct arm_build_target callee_target
;
31487 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
31488 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
31490 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
31492 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
31494 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
31495 can_inline
= false;
31497 sbitmap_free (caller_target
.isa
);
31498 sbitmap_free (callee_target
.isa
);
31500 /* OK to inline between different modes.
31501 Function with mode specific instructions, e.g using asm,
31502 must be explicitly protected with noinline. */
31506 /* Hook to fix function's alignment affected by target attribute. */
31509 arm_relayout_function (tree fndecl
)
31511 if (DECL_USER_ALIGN (fndecl
))
31514 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
31517 callee_tree
= target_option_default_node
;
31519 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
31522 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
31525 /* Inner function to process the attribute((target(...))), take an argument and
31526 set the current options from the argument. If we have a list, recursively
31527 go over the list. */
31530 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
31532 if (TREE_CODE (args
) == TREE_LIST
)
31536 for (; args
; args
= TREE_CHAIN (args
))
31537 if (TREE_VALUE (args
)
31538 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
31543 else if (TREE_CODE (args
) != STRING_CST
)
31545 error ("attribute %<target%> argument not a string");
31549 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
31552 while ((q
= strtok (argstr
, ",")) != NULL
)
31555 if (!strcmp (q
, "thumb"))
31557 opts
->x_target_flags
|= MASK_THUMB
;
31558 if (TARGET_FDPIC
&& !arm_arch_thumb2
)
31559 sorry ("FDPIC mode is not supported in Thumb-1 mode");
31562 else if (!strcmp (q
, "arm"))
31563 opts
->x_target_flags
&= ~MASK_THUMB
;
31565 else if (!strcmp (q
, "general-regs-only"))
31566 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
31568 else if (!strncmp (q
, "fpu=", 4))
31571 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
31572 &fpu_index
, CL_TARGET
))
31574 error ("invalid fpu for target attribute or pragma %qs", q
);
31577 if (fpu_index
== TARGET_FPU_auto
)
31579 /* This doesn't really make sense until we support
31580 general dynamic selection of the architecture and all
31582 sorry ("auto fpu selection not currently permitted here");
31585 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
31587 else if (!strncmp (q
, "arch=", 5))
31589 char *arch
= q
+ 5;
31590 const arch_option
*arm_selected_arch
31591 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
31593 if (!arm_selected_arch
)
31595 error ("invalid architecture for target attribute or pragma %qs",
31600 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
31602 else if (q
[0] == '+')
31604 opts
->x_arm_arch_string
31605 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
31609 error ("unknown target attribute or pragma %qs", q
);
31617 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
31620 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
31621 struct gcc_options
*opts_set
)
31623 struct cl_target_option cl_opts
;
31625 if (!arm_valid_target_attribute_rec (args
, opts
))
31628 cl_target_option_save (&cl_opts
, opts
);
31629 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
31630 arm_option_check_internal (opts
);
31631 /* Do any overrides, such as global options arch=xxx.
31632 We do this since arm_active_target was overridden. */
31633 arm_option_reconfigure_globals ();
31634 arm_options_perform_arch_sanity_checks ();
31635 arm_option_override_internal (opts
, opts_set
);
31637 return build_target_option_node (opts
);
31641 add_attribute (const char * mode
, tree
*attributes
)
31643 size_t len
= strlen (mode
);
31644 tree value
= build_string (len
, mode
);
31646 TREE_TYPE (value
) = build_array_type (char_type_node
,
31647 build_index_type (size_int (len
)));
31649 *attributes
= tree_cons (get_identifier ("target"),
31650 build_tree_list (NULL_TREE
, value
),
31654 /* For testing. Insert thumb or arm modes alternatively on functions. */
31657 arm_insert_attributes (tree fndecl
, tree
* attributes
)
31661 if (! TARGET_FLIP_THUMB
)
31664 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
31665 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
31668 /* Nested definitions must inherit mode. */
31669 if (current_function_decl
)
31671 mode
= TARGET_THUMB
? "thumb" : "arm";
31672 add_attribute (mode
, attributes
);
31676 /* If there is already a setting don't change it. */
31677 if (lookup_attribute ("target", *attributes
) != NULL
)
31680 mode
= thumb_flipper
? "thumb" : "arm";
31681 add_attribute (mode
, attributes
);
31683 thumb_flipper
= !thumb_flipper
;
31686 /* Hook to validate attribute((target("string"))). */
31689 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
31690 tree args
, int ARG_UNUSED (flags
))
31693 struct gcc_options func_options
;
31694 tree cur_tree
, new_optimize
;
31695 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
31697 /* Get the optimization options of the current function. */
31698 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
31700 /* If the function changed the optimization levels as well as setting target
31701 options, start with the optimizations specified. */
31702 if (!func_optimize
)
31703 func_optimize
= optimization_default_node
;
31705 /* Init func_options. */
31706 memset (&func_options
, 0, sizeof (func_options
));
31707 init_options_struct (&func_options
, NULL
);
31708 lang_hooks
.init_options_struct (&func_options
);
31710 /* Initialize func_options to the defaults. */
31711 cl_optimization_restore (&func_options
,
31712 TREE_OPTIMIZATION (func_optimize
));
31714 cl_target_option_restore (&func_options
,
31715 TREE_TARGET_OPTION (target_option_default_node
));
31717 /* Set func_options flags with new target mode. */
31718 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
31719 &global_options_set
);
31721 if (cur_tree
== NULL_TREE
)
31724 new_optimize
= build_optimization_node (&func_options
);
31726 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
31728 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
31730 finalize_options_struct (&func_options
);
31735 /* Match an ISA feature bitmap to a named FPU. We always use the
31736 first entry that exactly matches the feature set, so that we
31737 effectively canonicalize the FPU name for the assembler. */
31739 arm_identify_fpu_from_isa (sbitmap isa
)
31741 auto_sbitmap
fpubits (isa_num_bits
);
31742 auto_sbitmap
cand_fpubits (isa_num_bits
);
31744 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
31746 /* If there are no ISA feature bits relating to the FPU, we must be
31747 doing soft-float. */
31748 if (bitmap_empty_p (fpubits
))
31751 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
31753 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
31754 if (bitmap_equal_p (fpubits
, cand_fpubits
))
31755 return all_fpus
[i
].name
;
31757 /* We must find an entry, or things have gone wrong. */
31758 gcc_unreachable ();
31761 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
31762 by the function fndecl. */
31764 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
31766 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
31768 struct cl_target_option
*targ_options
;
31770 targ_options
= TREE_TARGET_OPTION (target_parts
);
31772 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
31773 gcc_assert (targ_options
);
31775 /* Only update the assembler .arch string if it is distinct from the last
31776 such string we printed. arch_to_print is set conditionally in case
31777 targ_options->x_arm_arch_string is NULL which can be the case
31778 when cc1 is invoked directly without passing -march option. */
31779 std::string arch_to_print
;
31780 if (targ_options
->x_arm_arch_string
)
31781 arch_to_print
= targ_options
->x_arm_arch_string
;
31783 if (arch_to_print
!= arm_last_printed_arch_string
)
31785 std::string arch_name
31786 = arch_to_print
.substr (0, arch_to_print
.find ("+"));
31787 asm_fprintf (asm_out_file
, "\t.arch %s\n", arch_name
.c_str ());
31788 const arch_option
*arch
31789 = arm_parse_arch_option_name (all_architectures
, "-march",
31790 targ_options
->x_arm_arch_string
);
31791 auto_sbitmap
opt_bits (isa_num_bits
);
31794 if (arch
->common
.extensions
)
31796 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
31802 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
31803 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
31804 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
31805 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n",
31811 arm_last_printed_arch_string
= arch_to_print
;
31814 fprintf (stream
, "\t.syntax unified\n");
31818 if (is_called_in_ARM_mode (decl
)
31819 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
31820 && cfun
->is_thunk
))
31821 fprintf (stream
, "\t.code 32\n");
31822 else if (TARGET_THUMB1
)
31823 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
31825 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
31828 fprintf (stream
, "\t.arm\n");
31830 std::string fpu_to_print
31831 = TARGET_SOFT_FLOAT
31832 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target
.isa
);
31834 if (fpu_to_print
!= arm_last_printed_arch_string
)
31836 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_to_print
.c_str ());
31837 arm_last_printed_fpu_string
= fpu_to_print
;
31840 if (TARGET_POKE_FUNCTION_NAME
)
31841 arm_poke_function_name (stream
, (const char *) name
);
31844 /* If MEM is in the form of [base+offset], extract the two parts
31845 of address and set to BASE and OFFSET, otherwise return false
31846 after clearing BASE and OFFSET. */
31849 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
31853 gcc_assert (MEM_P (mem
));
31855 addr
= XEXP (mem
, 0);
31857 /* Strip off const from addresses like (const (addr)). */
31858 if (GET_CODE (addr
) == CONST
)
31859 addr
= XEXP (addr
, 0);
31861 if (GET_CODE (addr
) == REG
)
31864 *offset
= const0_rtx
;
31868 if (GET_CODE (addr
) == PLUS
31869 && GET_CODE (XEXP (addr
, 0)) == REG
31870 && CONST_INT_P (XEXP (addr
, 1)))
31872 *base
= XEXP (addr
, 0);
31873 *offset
= XEXP (addr
, 1);
31878 *offset
= NULL_RTX
;
31883 /* If INSN is a load or store of address in the form of [base+offset],
31884 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31885 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31886 otherwise return FALSE. */
31889 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
31893 gcc_assert (INSN_P (insn
));
31894 x
= PATTERN (insn
);
31895 if (GET_CODE (x
) != SET
)
31899 dest
= SET_DEST (x
);
31900 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
31903 extract_base_offset_in_addr (dest
, base
, offset
);
31905 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
31908 extract_base_offset_in_addr (src
, base
, offset
);
31913 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
31916 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31918 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31919 and PRI are only calculated for these instructions. For other instruction,
31920 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31921 instruction fusion can be supported by returning different priorities.
31923 It's important that irrelevant instructions get the largest FUSION_PRI. */
31926 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
31927 int *fusion_pri
, int *pri
)
31933 gcc_assert (INSN_P (insn
));
31936 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
31943 /* Load goes first. */
31945 *fusion_pri
= tmp
- 1;
31947 *fusion_pri
= tmp
- 2;
31951 /* INSN with smaller base register goes first. */
31952 tmp
-= ((REGNO (base
) & 0xff) << 20);
31954 /* INSN with smaller offset goes first. */
31955 off_val
= (int)(INTVAL (offset
));
31957 tmp
-= (off_val
& 0xfffff);
31959 tmp
+= ((- off_val
) & 0xfffff);
31966 /* Construct and return a PARALLEL RTX vector with elements numbering the
31967 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31968 the vector - from the perspective of the architecture. This does not
31969 line up with GCC's perspective on lane numbers, so we end up with
31970 different masks depending on our target endian-ness. The diagram
31971 below may help. We must draw the distinction when building masks
31972 which select one half of the vector. An instruction selecting
31973 architectural low-lanes for a big-endian target, must be described using
31974 a mask selecting GCC high-lanes.
31976 Big-Endian Little-Endian
31978 GCC 0 1 2 3 3 2 1 0
31979 | x | x | x | x | | x | x | x | x |
31980 Architecture 3 2 1 0 3 2 1 0
31982 Low Mask: { 2, 3 } { 0, 1 }
31983 High Mask: { 0, 1 } { 2, 3 }
31987 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
31989 int nunits
= GET_MODE_NUNITS (mode
);
31990 rtvec v
= rtvec_alloc (nunits
/ 2);
31991 int high_base
= nunits
/ 2;
31997 if (BYTES_BIG_ENDIAN
)
31998 base
= high
? low_base
: high_base
;
32000 base
= high
? high_base
: low_base
;
32002 for (i
= 0; i
< nunits
/ 2; i
++)
32003 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
32005 t1
= gen_rtx_PARALLEL (mode
, v
);
32009 /* Check OP for validity as a PARALLEL RTX vector with elements
32010 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
32011 from the perspective of the architecture. See the diagram above
32012 arm_simd_vect_par_cnst_half_p for more details. */
32015 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
32018 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
32019 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
32020 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
32023 if (!VECTOR_MODE_P (mode
))
32026 if (count_op
!= count_ideal
)
32029 for (i
= 0; i
< count_ideal
; i
++)
32031 rtx elt_op
= XVECEXP (op
, 0, i
);
32032 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
32034 if (!CONST_INT_P (elt_op
)
32035 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
32041 /* Can output mi_thunk for all cases except for non-zero vcall_offset
32044 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
32047 /* For now, we punt and not handle this for TARGET_THUMB1. */
32048 if (vcall_offset
&& TARGET_THUMB1
)
32051 /* Otherwise ok. */
32055 /* Generate RTL for a conditional branch with rtx comparison CODE in
32056 mode CC_MODE. The destination of the unlikely conditional branch
32060 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
32064 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
32065 gen_rtx_REG (cc_mode
, CC_REGNUM
),
32068 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
32069 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
32071 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
32074 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
32076 For pure-code sections there is no letter code for this attribute, so
32077 output all the section flags numerically when this is needed. */
32080 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
32083 if (flags
& SECTION_ARM_PURECODE
)
32087 if (!(flags
& SECTION_DEBUG
))
32089 if (flags
& SECTION_EXCLUDE
)
32090 *num
|= 0x80000000;
32091 if (flags
& SECTION_WRITE
)
32093 if (flags
& SECTION_CODE
)
32095 if (flags
& SECTION_MERGE
)
32097 if (flags
& SECTION_STRINGS
)
32099 if (flags
& SECTION_TLS
)
32101 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
32110 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
32112 If pure-code is passed as an option, make sure all functions are in
32113 sections that have the SHF_ARM_PURECODE attribute. */
32116 arm_function_section (tree decl
, enum node_frequency freq
,
32117 bool startup
, bool exit
)
32119 const char * section_name
;
32122 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
32123 return default_function_section (decl
, freq
, startup
, exit
);
32125 if (!target_pure_code
)
32126 return default_function_section (decl
, freq
, startup
, exit
);
32129 section_name
= DECL_SECTION_NAME (decl
);
32131 /* If a function is not in a named section then it falls under the 'default'
32132 text section, also known as '.text'. We can preserve previous behavior as
32133 the default text section already has the SHF_ARM_PURECODE section
32137 section
*default_sec
= default_function_section (decl
, freq
, startup
,
32140 /* If default_sec is not null, then it must be a special section like for
32141 example .text.startup. We set the pure-code attribute and return the
32142 same section to preserve existing behavior. */
32144 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
32145 return default_sec
;
32148 /* Otherwise look whether a section has already been created with
32150 sec
= get_named_section (decl
, section_name
, 0);
32152 /* If that is not the case passing NULL as the section's name to
32153 'get_named_section' will create a section with the declaration's
32155 sec
= get_named_section (decl
, NULL
, 0);
32157 /* Set the SHF_ARM_PURECODE attribute. */
32158 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
32163 /* Implements the TARGET_SECTION_FLAGS hook.
32165 If DECL is a function declaration and pure-code is passed as an option
32166 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
32167 section's name and RELOC indicates whether the declarations initializer may
32168 contain runtime relocations. */
32170 static unsigned int
32171 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
32173 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
32175 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
32176 flags
|= SECTION_ARM_PURECODE
;
32181 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
32184 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
32186 rtx
*quot_p
, rtx
*rem_p
)
32188 if (mode
== SImode
)
32189 gcc_assert (!TARGET_IDIV
);
32191 scalar_int_mode libval_mode
32192 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
32194 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
32196 op0
, GET_MODE (op0
),
32197 op1
, GET_MODE (op1
));
32199 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
32200 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
32201 GET_MODE_SIZE (mode
));
32203 gcc_assert (quotient
);
32204 gcc_assert (remainder
);
32206 *quot_p
= quotient
;
32207 *rem_p
= remainder
;
32210 /* This function checks for the availability of the coprocessor builtin passed
32211 in BUILTIN for the current target. Returns true if it is available and
32212 false otherwise. If a BUILTIN is passed for which this function has not
32213 been implemented it will cause an exception. */
32216 arm_coproc_builtin_available (enum unspecv builtin
)
32218 /* None of these builtins are available in Thumb mode if the target only
32219 supports Thumb-1. */
32237 case VUNSPEC_LDC2L
:
32239 case VUNSPEC_STC2L
:
32242 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
32249 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
32251 if (arm_arch6
|| arm_arch5te
)
32254 case VUNSPEC_MCRR2
:
32255 case VUNSPEC_MRRC2
:
32260 gcc_unreachable ();
32265 /* This function returns true if OP is a valid memory operand for the ldc and
32266 stc coprocessor instructions and false otherwise. */
32269 arm_coproc_ldc_stc_legitimate_address (rtx op
)
32271 HOST_WIDE_INT range
;
32272 /* Has to be a memory operand. */
32278 /* We accept registers. */
32282 switch GET_CODE (op
)
32286 /* Or registers with an offset. */
32287 if (!REG_P (XEXP (op
, 0)))
32292 /* The offset must be an immediate though. */
32293 if (!CONST_INT_P (op
))
32296 range
= INTVAL (op
);
32298 /* Within the range of [-1020,1020]. */
32299 if (!IN_RANGE (range
, -1020, 1020))
32302 /* And a multiple of 4. */
32303 return (range
% 4) == 0;
32309 return REG_P (XEXP (op
, 0));
32311 gcc_unreachable ();
32316 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
32318 In VFPv1, VFP registers could only be accessed in the mode they were
32319 set, so subregs would be invalid there. However, we don't support
32320 VFPv1 at the moment, and the restriction was lifted in VFPv2.
32322 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
32323 VFP registers in little-endian order. We can't describe that accurately to
32324 GCC, so avoid taking subregs of such values.
32326 The only exception is going from a 128-bit to a 64-bit type. In that
32327 case the data layout happens to be consistent for big-endian, so we
32328 explicitly allow that case. */
32331 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
32332 reg_class_t rclass
)
32335 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
32336 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
32337 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
32338 && reg_classes_intersect_p (VFP_REGS
, rclass
))
32343 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
32344 strcpy from constants will be faster. */
32346 static HOST_WIDE_INT
32347 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
32349 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
32350 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
32351 return MAX (align
, BITS_PER_WORD
* factor
);
32355 /* Emit a speculation barrier on target architectures that do not have
32356 DSB/ISB directly. Such systems probably don't need a barrier
32357 themselves, but if the code is ever run on a later architecture, it
32358 might become a problem. */
32360 arm_emit_speculation_barrier_function ()
32362 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
32366 namespace selftest
{
32368 /* Scan the static data tables generated by parsecpu.awk looking for
32369 potential issues with the data. We primarily check for
32370 inconsistencies in the option extensions at present (extensions
32371 that duplicate others but aren't marked as aliases). Furthermore,
32372 for correct canonicalization later options must never be a subset
32373 of an earlier option. Any extension should also only specify other
32374 feature bits and never an architecture bit. The architecture is inferred
32375 from the declaration of the extension. */
32377 arm_test_cpu_arch_data (void)
32379 const arch_option
*arch
;
32380 const cpu_option
*cpu
;
32381 auto_sbitmap
target_isa (isa_num_bits
);
32382 auto_sbitmap
isa1 (isa_num_bits
);
32383 auto_sbitmap
isa2 (isa_num_bits
);
32385 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
32387 const cpu_arch_extension
*ext1
, *ext2
;
32389 if (arch
->common
.extensions
== NULL
)
32392 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
32394 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
32399 arm_initialize_isa (isa1
, ext1
->isa_bits
);
32400 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
32402 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
32405 arm_initialize_isa (isa2
, ext2
->isa_bits
);
32406 /* If the option is a subset of the parent option, it doesn't
32407 add anything and so isn't useful. */
32408 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
32410 /* If the extension specifies any architectural bits then
32411 disallow it. Extensions should only specify feature bits. */
32412 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
32417 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
32419 const cpu_arch_extension
*ext1
, *ext2
;
32421 if (cpu
->common
.extensions
== NULL
)
32424 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
32426 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
32431 arm_initialize_isa (isa1
, ext1
->isa_bits
);
32432 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
32434 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
32437 arm_initialize_isa (isa2
, ext2
->isa_bits
);
32438 /* If the option is a subset of the parent option, it doesn't
32439 add anything and so isn't useful. */
32440 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
32442 /* If the extension specifies any architectural bits then
32443 disallow it. Extensions should only specify feature bits. */
32444 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
32450 /* Scan the static data tables generated by parsecpu.awk looking for
32451 potential issues with the data. Here we check for consistency between the
32452 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
32453 a feature bit that is not defined by any FPU flag. */
32455 arm_test_fpu_data (void)
32457 auto_sbitmap
isa_all_fpubits (isa_num_bits
);
32458 auto_sbitmap
fpubits (isa_num_bits
);
32459 auto_sbitmap
tmpset (isa_num_bits
);
32461 static const enum isa_feature fpu_bitlist
[]
32462 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
32463 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
32465 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
32467 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
32468 bitmap_and_compl (tmpset
, isa_all_fpubits
, fpubits
);
32469 bitmap_clear (isa_all_fpubits
);
32470 bitmap_copy (isa_all_fpubits
, tmpset
);
32473 if (!bitmap_empty_p (isa_all_fpubits
))
32475 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
32476 " group that are not defined by any FPU.\n"
32477 " Check your arm-cpus.in.\n");
32478 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits
));
32483 arm_run_selftests (void)
32485 arm_test_cpu_arch_data ();
32486 arm_test_fpu_data ();
32488 } /* Namespace selftest. */
32490 #undef TARGET_RUN_TARGET_SELFTESTS
32491 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
32492 #endif /* CHECKING_P */
32494 struct gcc_target targetm
= TARGET_INITIALIZER
;
32496 #include "gt-arm.h"