1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
51 #include "insn-attr.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
74 /* This file should be included last. */
75 #include "target-def.h"
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode
;
79 typedef struct minipool_fixup Mfix
;
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string
;
83 static std::string arm_last_printed_fpu_string
;
85 void (*arm_lang_output_object_attributes_hook
)(void);
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx
);
94 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets
*arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
100 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap
);
103 static int arm_address_register_rtx_p (rtx
, int);
104 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
105 static bool is_called_in_ARM_mode (tree
);
106 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
107 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
108 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
109 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
110 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
111 inline static int thumb1_index_register_rtx_p (rtx
, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx
, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx
, int);
117 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
118 static bool arm_print_operand_punct_valid_p (unsigned char code
);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
120 static arm_cc
get_arm_condition_code (rtx
);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx
*, const char *, const char *,
124 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
125 static struct machine_function
*arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
128 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
129 static Mnode
*add_minipool_forward_ref (Mfix
*);
130 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
131 static Mnode
*add_minipool_backward_ref (Mfix
*);
132 static void assign_minipool_offsets (Mfix
*);
133 static void arm_print_value (FILE *, rtx
);
134 static void dump_minipool (rtx_insn
*);
135 static int arm_barrier_cost (rtx_insn
*);
136 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
137 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
138 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree
);
145 static unsigned long arm_compute_func_type (void);
146 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
147 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
148 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
152 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree
, const_tree
);
157 static void arm_set_default_type_attributes (tree
);
158 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code
,
161 unsigned HOST_WIDE_INT val
,
162 struct four_ints
*return_sequence
);
163 static int optimal_immediate_sequence_1 (enum rtx_code code
,
164 unsigned HOST_WIDE_INT val
,
165 struct four_ints
*return_sequence
,
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree
, tree
);
169 static machine_mode
arm_promote_function_mode (const_tree
,
172 static bool arm_return_in_memory (const_tree
, const_tree
);
173 static rtx
arm_function_value (const_tree
, const_tree
, bool);
174 static rtx
arm_libcall_value_1 (machine_mode
);
175 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
182 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
183 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
184 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
185 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
186 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
187 static void emit_constant_insn (rtx cond
, rtx pattern
);
188 static rtx_insn
*emit_set_insn (rtx
, rtx
);
189 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t
,
191 const function_arg_info
&);
192 static rtx
arm_function_arg (cumulative_args_t
, const function_arg_info
&);
193 static void arm_function_arg_advance (cumulative_args_t
,
194 const function_arg_info
&);
195 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
196 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
197 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
199 static rtx
aapcs_libcall_value (machine_mode
);
200 static int aapcs_select_return_coproc (const_tree
, const_tree
);
202 #ifdef OBJECT_FORMAT_ELF
203 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
204 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
207 static void arm_encode_section_info (tree
, rtx
, int);
210 static void arm_file_end (void);
211 static void arm_file_start (void);
212 static void arm_insert_attributes (tree
, tree
*);
214 static void arm_setup_incoming_varargs (cumulative_args_t
,
215 const function_arg_info
&, int *, int);
216 static bool arm_pass_by_reference (cumulative_args_t
,
217 const function_arg_info
&);
218 static bool arm_promote_prototypes (const_tree
);
219 static bool arm_default_short_enums (void);
220 static bool arm_align_anon_bitfield (void);
221 static bool arm_return_in_msb (const_tree
);
222 static bool arm_must_pass_in_stack (const function_arg_info
&);
223 static bool arm_return_in_memory (const_tree
, const_tree
);
225 static void arm_unwind_emit (FILE *, rtx_insn
*);
226 static bool arm_output_ttype (rtx
);
227 static void arm_asm_emit_except_personality (rtx
);
229 static void arm_asm_init_sections (void);
230 static rtx
arm_dwarf_register_span (rtx
);
232 static tree
arm_cxx_guard_type (void);
233 static bool arm_cxx_guard_mask_bit (void);
234 static tree
arm_get_cookie_size (tree
);
235 static bool arm_cookie_has_size (void);
236 static bool arm_cxx_cdtor_returns_this (void);
237 static bool arm_cxx_key_method_may_be_inline (void);
238 static void arm_cxx_determine_class_data_visibility (tree
);
239 static bool arm_cxx_class_data_always_comdat (void);
240 static bool arm_cxx_use_aeabi_atexit (void);
241 static void arm_init_libfuncs (void);
242 static tree
arm_build_builtin_va_list (void);
243 static void arm_expand_builtin_va_start (tree
, rtx
);
244 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
245 static void arm_option_override (void);
246 static void arm_option_save (struct cl_target_option
*, struct gcc_options
*);
247 static void arm_option_restore (struct gcc_options
*,
248 struct cl_target_option
*);
249 static void arm_override_options_after_change (void);
250 static void arm_option_print (FILE *, int, struct cl_target_option
*);
251 static void arm_set_current_function (tree
);
252 static bool arm_can_inline_p (tree
, tree
);
253 static void arm_relayout_function (tree
);
254 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
255 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
256 static bool arm_sched_can_speculate_insn (rtx_insn
*);
257 static bool arm_macro_fusion_p (void);
258 static bool arm_cannot_copy_insn_p (rtx_insn
*);
259 static int arm_issue_rate (void);
260 static int arm_sched_variable_issue (FILE *, int, rtx_insn
*, int);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
264 static bool arm_output_addr_const_extra (FILE *, rtx
);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree
);
267 static tree
arm_promoted_type (const_tree t
);
268 static bool arm_scalar_mode_supported_p (scalar_mode
);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx
, tree
, rtx
);
273 static rtx
arm_trampoline_adjust_address (rtx
);
274 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
278 static bool arm_array_mode_supported_p (machine_mode
,
279 unsigned HOST_WIDE_INT
);
280 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
281 static bool arm_class_likely_spilled_p (reg_class_t
);
282 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
283 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
290 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
291 static void arm_autovectorize_vector_sizes (vector_sizes
*, bool);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
297 static bool arm_vectorize_vec_perm_const (machine_mode
, rtx
, rtx
, rtx
,
298 const vec_perm_indices
&);
300 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
304 int misalign ATTRIBUTE_UNUSED
);
305 static unsigned arm_add_stmt_cost (void *data
, int count
,
306 enum vect_cost_for_stmt kind
,
307 struct _stmt_vec_info
*stmt_info
,
309 enum vect_cost_model_location where
);
311 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
312 bool op0_preserve_value
);
313 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
315 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
318 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
320 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
322 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
323 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
326 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
327 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table
[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333 affects_type_identity, handler, exclude } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
337 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
338 /* Whereas these functions are always known to reside within the 26 bit
340 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
347 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
349 { "naked", 0, 0, true, false, false, false,
350 arm_handle_fndecl_attribute
, NULL
},
352 /* ARM/PE has three new attributes:
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
361 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
362 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
363 { "interfacearm", 0, 0, true, false, false, false,
364 arm_handle_fndecl_attribute
, NULL
},
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
368 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
370 { "notshared", 0, 0, false, true, false, false,
371 arm_handle_notshared_attribute
, NULL
},
373 /* ARMv8-M Security Extensions support. */
374 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375 arm_handle_cmse_nonsecure_entry
, NULL
},
376 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377 arm_handle_cmse_nonsecure_call
, NULL
},
378 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
390 #undef TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
416 #undef TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
419 #undef TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
428 #undef TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
476 #undef TARGET_ENCODE_SECTION_INFO
478 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
480 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
483 #undef TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
486 #undef TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
504 #undef TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524 arm_autovectorize_vector_sizes
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
529 #undef TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS arm_init_builtins
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
556 #undef TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604 arm_cxx_determine_class_data_visibility
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
622 /* EABI unwinding tables use a different format for the typeinfo tables. */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
632 #endif /* ARM_UNWIND_INFO */
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
640 #undef TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
660 /* The minimum is set such that the total size of the block
661 for a particular anchor is -4088 + 1 + 4095 bytes, which is
662 divisible by eight, ensuring natural spacing of anchors. */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
669 #undef TARGET_SCHED_VARIABLE_ISSUE
670 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
672 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
673 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
674 arm_first_cycle_multipass_dfa_lookahead
676 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
677 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
678 arm_first_cycle_multipass_dfa_lookahead_guard
680 #undef TARGET_MANGLE_TYPE
681 #define TARGET_MANGLE_TYPE arm_mangle_type
683 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
684 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
686 #undef TARGET_BUILD_BUILTIN_VA_LIST
687 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
688 #undef TARGET_EXPAND_BUILTIN_VA_START
689 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
690 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
691 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
694 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
695 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
698 #undef TARGET_LEGITIMATE_ADDRESS_P
699 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
701 #undef TARGET_PREFERRED_RELOAD_CLASS
702 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
704 #undef TARGET_PROMOTED_TYPE
705 #define TARGET_PROMOTED_TYPE arm_promoted_type
707 #undef TARGET_SCALAR_MODE_SUPPORTED_P
708 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
710 #undef TARGET_COMPUTE_FRAME_LAYOUT
711 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
713 #undef TARGET_FRAME_POINTER_REQUIRED
714 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
716 #undef TARGET_CAN_ELIMINATE
717 #define TARGET_CAN_ELIMINATE arm_can_eliminate
719 #undef TARGET_CONDITIONAL_REGISTER_USAGE
720 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
722 #undef TARGET_CLASS_LIKELY_SPILLED_P
723 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
725 #undef TARGET_VECTORIZE_BUILTINS
726 #define TARGET_VECTORIZE_BUILTINS
728 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
729 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
730 arm_builtin_vectorized_function
732 #undef TARGET_VECTOR_ALIGNMENT
733 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
735 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
736 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
737 arm_vector_alignment_reachable
739 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
740 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
741 arm_builtin_support_vector_misalignment
743 #undef TARGET_PREFERRED_RENAME_CLASS
744 #define TARGET_PREFERRED_RENAME_CLASS \
745 arm_preferred_rename_class
747 #undef TARGET_VECTORIZE_VEC_PERM_CONST
748 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
750 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
751 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
752 arm_builtin_vectorization_cost
753 #undef TARGET_VECTORIZE_ADD_STMT_COST
754 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
756 #undef TARGET_CANONICALIZE_COMPARISON
757 #define TARGET_CANONICALIZE_COMPARISON \
758 arm_canonicalize_comparison
760 #undef TARGET_ASAN_SHADOW_OFFSET
761 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
763 #undef MAX_INSN_PER_IT_BLOCK
764 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
766 #undef TARGET_CAN_USE_DOLOOP_P
767 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
769 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
770 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
772 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
773 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
775 #undef TARGET_SCHED_FUSION_PRIORITY
776 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
778 #undef TARGET_ASM_FUNCTION_SECTION
779 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
781 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
782 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
784 #undef TARGET_SECTION_TYPE_FLAGS
785 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
787 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
788 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
790 #undef TARGET_C_EXCESS_PRECISION
791 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
793 /* Although the architecture reserves bits 0 and 1, only the former is
794 used for ARM/Thumb ISA selection in v7 and earlier versions. */
795 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
796 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
798 #undef TARGET_FIXED_CONDITION_CODE_REGS
799 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
801 #undef TARGET_HARD_REGNO_NREGS
802 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
803 #undef TARGET_HARD_REGNO_MODE_OK
804 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
806 #undef TARGET_MODES_TIEABLE_P
807 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
809 #undef TARGET_CAN_CHANGE_MODE_CLASS
810 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
812 #undef TARGET_CONSTANT_ALIGNMENT
813 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
815 /* Obstack for minipool constant handling. */
816 static struct obstack minipool_obstack
;
817 static char * minipool_startobj
;
819 /* The maximum number of insns skipped which
820 will be conditionalised if possible. */
821 static int max_insns_skipped
= 5;
823 extern FILE * asm_out_file
;
825 /* True if we are currently building a constant table. */
826 int making_const_table
;
828 /* The processor for which instructions should be scheduled. */
829 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
831 /* The current tuning set. */
832 const struct tune_params
*current_tune
;
834 /* Which floating point hardware to schedule for. */
837 /* Used for Thumb call_via trampolines. */
838 rtx thumb_call_via_label
[14];
839 static int thumb_call_reg_needed
;
841 /* The bits in this mask specify which instruction scheduling options should
843 unsigned int tune_flags
= 0;
845 /* The highest ARM architecture version supported by the
847 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
849 /* Active target architecture and tuning. */
851 struct arm_build_target arm_active_target
;
853 /* The following are used in the arm.md file as equivalents to bits
854 in the above two flag variables. */
856 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
859 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
862 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
865 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
868 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
871 /* Nonzero if this chip supports the ARM 6K extensions. */
874 /* Nonzero if this chip supports the ARM 6KZ extensions. */
877 /* Nonzero if instructions present in ARMv6-M can be used. */
880 /* Nonzero if this chip supports the ARM 7 extensions. */
883 /* Nonzero if this chip supports the Large Physical Address Extension. */
884 int arm_arch_lpae
= 0;
886 /* Nonzero if instructions not present in the 'M' profile can be used. */
887 int arm_arch_notm
= 0;
889 /* Nonzero if instructions present in ARMv7E-M can be used. */
892 /* Nonzero if instructions present in ARMv8 can be used. */
895 /* Nonzero if this chip supports the ARMv8.1 extensions. */
898 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
901 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
904 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
907 /* Nonzero if this chip supports the FP16 instructions extension of ARM
909 int arm_fp16_inst
= 0;
911 /* Nonzero if this chip can benefit from load scheduling. */
912 int arm_ld_sched
= 0;
914 /* Nonzero if this chip is a StrongARM. */
915 int arm_tune_strongarm
= 0;
917 /* Nonzero if this chip supports Intel Wireless MMX technology. */
918 int arm_arch_iwmmxt
= 0;
920 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
921 int arm_arch_iwmmxt2
= 0;
923 /* Nonzero if this chip is an XScale. */
924 int arm_arch_xscale
= 0;
926 /* Nonzero if tuning for XScale */
927 int arm_tune_xscale
= 0;
929 /* Nonzero if we want to tune for stores that access the write-buffer.
930 This typically means an ARM6 or ARM7 with MMU or MPU. */
931 int arm_tune_wbuf
= 0;
933 /* Nonzero if tuning for Cortex-A9. */
934 int arm_tune_cortex_a9
= 0;
936 /* Nonzero if we should define __THUMB_INTERWORK__ in the
938 XXX This is a bit of a hack, it's intended to help work around
939 problems in GLD which doesn't understand that armv5t code is
940 interworking clean. */
941 int arm_cpp_interwork
= 0;
943 /* Nonzero if chip supports Thumb 1. */
946 /* Nonzero if chip supports Thumb 2. */
949 /* Nonzero if chip supports integer division instruction. */
950 int arm_arch_arm_hwdiv
;
951 int arm_arch_thumb_hwdiv
;
953 /* Nonzero if chip disallows volatile memory access in IT block. */
954 int arm_arch_no_volatile_ce
;
956 /* Nonzero if we shouldn't use literal pools. */
957 bool arm_disable_literal_pool
= false;
959 /* The register number to be used for the PIC offset register. */
960 unsigned arm_pic_register
= INVALID_REGNUM
;
962 enum arm_pcs arm_pcs_default
;
964 /* For an explanation of these variables, see final_prescan_insn below. */
966 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
967 enum arm_cond_code arm_current_cc
;
970 int arm_target_label
;
971 /* The number of conditionally executed insns, including the current insn. */
972 int arm_condexec_count
= 0;
973 /* A bitmask specifying the patterns for the IT block.
974 Zero means do not output an IT block before this insn. */
975 int arm_condexec_mask
= 0;
976 /* The number of bits used in arm_condexec_mask. */
977 int arm_condexec_masklen
= 0;
979 /* Nonzero if chip supports the ARMv8 CRC instructions. */
980 int arm_arch_crc
= 0;
982 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
983 int arm_arch_dotprod
= 0;
985 /* Nonzero if chip supports the ARMv8-M security extensions. */
986 int arm_arch_cmse
= 0;
988 /* Nonzero if the core has a very small, high-latency, multiply unit. */
989 int arm_m_profile_small_mul
= 0;
991 /* The condition codes of the ARM, and the inverse function. */
992 static const char * const arm_condition_codes
[] =
994 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
995 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
998 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
999 int arm_regs_in_sequence
[] =
1001 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1004 #define ARM_LSL_NAME "lsl"
1005 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1007 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1008 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1009 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1011 /* Initialization code. */
1015 enum processor_type scheduler
;
1016 unsigned int tune_flags
;
1017 const struct tune_params
*tune
;
1020 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1021 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1028 /* arm generic vectorizer costs. */
1030 struct cpu_vec_costs arm_default_vec_cost
= {
1031 1, /* scalar_stmt_cost. */
1032 1, /* scalar load_cost. */
1033 1, /* scalar_store_cost. */
1034 1, /* vec_stmt_cost. */
1035 1, /* vec_to_scalar_cost. */
1036 1, /* scalar_to_vec_cost. */
1037 1, /* vec_align_load_cost. */
1038 1, /* vec_unalign_load_cost. */
1039 1, /* vec_unalign_store_cost. */
1040 1, /* vec_store_cost. */
1041 3, /* cond_taken_branch_cost. */
1042 1, /* cond_not_taken_branch_cost. */
1045 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1046 #include "aarch-cost-tables.h"
1050 const struct cpu_cost_table cortexa9_extra_costs
=
1057 COSTS_N_INSNS (1), /* shift_reg. */
1058 COSTS_N_INSNS (1), /* arith_shift. */
1059 COSTS_N_INSNS (2), /* arith_shift_reg. */
1061 COSTS_N_INSNS (1), /* log_shift_reg. */
1062 COSTS_N_INSNS (1), /* extend. */
1063 COSTS_N_INSNS (2), /* extend_arith. */
1064 COSTS_N_INSNS (1), /* bfi. */
1065 COSTS_N_INSNS (1), /* bfx. */
1069 true /* non_exec_costs_exec. */
1074 COSTS_N_INSNS (3), /* simple. */
1075 COSTS_N_INSNS (3), /* flag_setting. */
1076 COSTS_N_INSNS (2), /* extend. */
1077 COSTS_N_INSNS (3), /* add. */
1078 COSTS_N_INSNS (2), /* extend_add. */
1079 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1083 0, /* simple (N/A). */
1084 0, /* flag_setting (N/A). */
1085 COSTS_N_INSNS (4), /* extend. */
1087 COSTS_N_INSNS (4), /* extend_add. */
1093 COSTS_N_INSNS (2), /* load. */
1094 COSTS_N_INSNS (2), /* load_sign_extend. */
1095 COSTS_N_INSNS (2), /* ldrd. */
1096 COSTS_N_INSNS (2), /* ldm_1st. */
1097 1, /* ldm_regs_per_insn_1st. */
1098 2, /* ldm_regs_per_insn_subsequent. */
1099 COSTS_N_INSNS (5), /* loadf. */
1100 COSTS_N_INSNS (5), /* loadd. */
1101 COSTS_N_INSNS (1), /* load_unaligned. */
1102 COSTS_N_INSNS (2), /* store. */
1103 COSTS_N_INSNS (2), /* strd. */
1104 COSTS_N_INSNS (2), /* stm_1st. */
1105 1, /* stm_regs_per_insn_1st. */
1106 2, /* stm_regs_per_insn_subsequent. */
1107 COSTS_N_INSNS (1), /* storef. */
1108 COSTS_N_INSNS (1), /* stored. */
1109 COSTS_N_INSNS (1), /* store_unaligned. */
1110 COSTS_N_INSNS (1), /* loadv. */
1111 COSTS_N_INSNS (1) /* storev. */
1116 COSTS_N_INSNS (14), /* div. */
1117 COSTS_N_INSNS (4), /* mult. */
1118 COSTS_N_INSNS (7), /* mult_addsub. */
1119 COSTS_N_INSNS (30), /* fma. */
1120 COSTS_N_INSNS (3), /* addsub. */
1121 COSTS_N_INSNS (1), /* fpconst. */
1122 COSTS_N_INSNS (1), /* neg. */
1123 COSTS_N_INSNS (3), /* compare. */
1124 COSTS_N_INSNS (3), /* widen. */
1125 COSTS_N_INSNS (3), /* narrow. */
1126 COSTS_N_INSNS (3), /* toint. */
1127 COSTS_N_INSNS (3), /* fromint. */
1128 COSTS_N_INSNS (3) /* roundint. */
1132 COSTS_N_INSNS (24), /* div. */
1133 COSTS_N_INSNS (5), /* mult. */
1134 COSTS_N_INSNS (8), /* mult_addsub. */
1135 COSTS_N_INSNS (30), /* fma. */
1136 COSTS_N_INSNS (3), /* addsub. */
1137 COSTS_N_INSNS (1), /* fpconst. */
1138 COSTS_N_INSNS (1), /* neg. */
1139 COSTS_N_INSNS (3), /* compare. */
1140 COSTS_N_INSNS (3), /* widen. */
1141 COSTS_N_INSNS (3), /* narrow. */
1142 COSTS_N_INSNS (3), /* toint. */
1143 COSTS_N_INSNS (3), /* fromint. */
1144 COSTS_N_INSNS (3) /* roundint. */
1149 COSTS_N_INSNS (1) /* alu. */
1153 const struct cpu_cost_table cortexa8_extra_costs
=
1159 COSTS_N_INSNS (1), /* shift. */
1161 COSTS_N_INSNS (1), /* arith_shift. */
1162 0, /* arith_shift_reg. */
1163 COSTS_N_INSNS (1), /* log_shift. */
1164 0, /* log_shift_reg. */
1166 0, /* extend_arith. */
1172 true /* non_exec_costs_exec. */
1177 COSTS_N_INSNS (1), /* simple. */
1178 COSTS_N_INSNS (1), /* flag_setting. */
1179 COSTS_N_INSNS (1), /* extend. */
1180 COSTS_N_INSNS (1), /* add. */
1181 COSTS_N_INSNS (1), /* extend_add. */
1182 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1186 0, /* simple (N/A). */
1187 0, /* flag_setting (N/A). */
1188 COSTS_N_INSNS (2), /* extend. */
1190 COSTS_N_INSNS (2), /* extend_add. */
1196 COSTS_N_INSNS (1), /* load. */
1197 COSTS_N_INSNS (1), /* load_sign_extend. */
1198 COSTS_N_INSNS (1), /* ldrd. */
1199 COSTS_N_INSNS (1), /* ldm_1st. */
1200 1, /* ldm_regs_per_insn_1st. */
1201 2, /* ldm_regs_per_insn_subsequent. */
1202 COSTS_N_INSNS (1), /* loadf. */
1203 COSTS_N_INSNS (1), /* loadd. */
1204 COSTS_N_INSNS (1), /* load_unaligned. */
1205 COSTS_N_INSNS (1), /* store. */
1206 COSTS_N_INSNS (1), /* strd. */
1207 COSTS_N_INSNS (1), /* stm_1st. */
1208 1, /* stm_regs_per_insn_1st. */
1209 2, /* stm_regs_per_insn_subsequent. */
1210 COSTS_N_INSNS (1), /* storef. */
1211 COSTS_N_INSNS (1), /* stored. */
1212 COSTS_N_INSNS (1), /* store_unaligned. */
1213 COSTS_N_INSNS (1), /* loadv. */
1214 COSTS_N_INSNS (1) /* storev. */
1219 COSTS_N_INSNS (36), /* div. */
1220 COSTS_N_INSNS (11), /* mult. */
1221 COSTS_N_INSNS (20), /* mult_addsub. */
1222 COSTS_N_INSNS (30), /* fma. */
1223 COSTS_N_INSNS (9), /* addsub. */
1224 COSTS_N_INSNS (3), /* fpconst. */
1225 COSTS_N_INSNS (3), /* neg. */
1226 COSTS_N_INSNS (6), /* compare. */
1227 COSTS_N_INSNS (4), /* widen. */
1228 COSTS_N_INSNS (4), /* narrow. */
1229 COSTS_N_INSNS (8), /* toint. */
1230 COSTS_N_INSNS (8), /* fromint. */
1231 COSTS_N_INSNS (8) /* roundint. */
1235 COSTS_N_INSNS (64), /* div. */
1236 COSTS_N_INSNS (16), /* mult. */
1237 COSTS_N_INSNS (25), /* mult_addsub. */
1238 COSTS_N_INSNS (30), /* fma. */
1239 COSTS_N_INSNS (9), /* addsub. */
1240 COSTS_N_INSNS (3), /* fpconst. */
1241 COSTS_N_INSNS (3), /* neg. */
1242 COSTS_N_INSNS (6), /* compare. */
1243 COSTS_N_INSNS (6), /* widen. */
1244 COSTS_N_INSNS (6), /* narrow. */
1245 COSTS_N_INSNS (8), /* toint. */
1246 COSTS_N_INSNS (8), /* fromint. */
1247 COSTS_N_INSNS (8) /* roundint. */
1252 COSTS_N_INSNS (1) /* alu. */
1256 const struct cpu_cost_table cortexa5_extra_costs
=
1262 COSTS_N_INSNS (1), /* shift. */
1263 COSTS_N_INSNS (1), /* shift_reg. */
1264 COSTS_N_INSNS (1), /* arith_shift. */
1265 COSTS_N_INSNS (1), /* arith_shift_reg. */
1266 COSTS_N_INSNS (1), /* log_shift. */
1267 COSTS_N_INSNS (1), /* log_shift_reg. */
1268 COSTS_N_INSNS (1), /* extend. */
1269 COSTS_N_INSNS (1), /* extend_arith. */
1270 COSTS_N_INSNS (1), /* bfi. */
1271 COSTS_N_INSNS (1), /* bfx. */
1272 COSTS_N_INSNS (1), /* clz. */
1273 COSTS_N_INSNS (1), /* rev. */
1275 true /* non_exec_costs_exec. */
1282 COSTS_N_INSNS (1), /* flag_setting. */
1283 COSTS_N_INSNS (1), /* extend. */
1284 COSTS_N_INSNS (1), /* add. */
1285 COSTS_N_INSNS (1), /* extend_add. */
1286 COSTS_N_INSNS (7) /* idiv. */
1290 0, /* simple (N/A). */
1291 0, /* flag_setting (N/A). */
1292 COSTS_N_INSNS (1), /* extend. */
1294 COSTS_N_INSNS (2), /* extend_add. */
1300 COSTS_N_INSNS (1), /* load. */
1301 COSTS_N_INSNS (1), /* load_sign_extend. */
1302 COSTS_N_INSNS (6), /* ldrd. */
1303 COSTS_N_INSNS (1), /* ldm_1st. */
1304 1, /* ldm_regs_per_insn_1st. */
1305 2, /* ldm_regs_per_insn_subsequent. */
1306 COSTS_N_INSNS (2), /* loadf. */
1307 COSTS_N_INSNS (4), /* loadd. */
1308 COSTS_N_INSNS (1), /* load_unaligned. */
1309 COSTS_N_INSNS (1), /* store. */
1310 COSTS_N_INSNS (3), /* strd. */
1311 COSTS_N_INSNS (1), /* stm_1st. */
1312 1, /* stm_regs_per_insn_1st. */
1313 2, /* stm_regs_per_insn_subsequent. */
1314 COSTS_N_INSNS (2), /* storef. */
1315 COSTS_N_INSNS (2), /* stored. */
1316 COSTS_N_INSNS (1), /* store_unaligned. */
1317 COSTS_N_INSNS (1), /* loadv. */
1318 COSTS_N_INSNS (1) /* storev. */
1323 COSTS_N_INSNS (15), /* div. */
1324 COSTS_N_INSNS (3), /* mult. */
1325 COSTS_N_INSNS (7), /* mult_addsub. */
1326 COSTS_N_INSNS (7), /* fma. */
1327 COSTS_N_INSNS (3), /* addsub. */
1328 COSTS_N_INSNS (3), /* fpconst. */
1329 COSTS_N_INSNS (3), /* neg. */
1330 COSTS_N_INSNS (3), /* compare. */
1331 COSTS_N_INSNS (3), /* widen. */
1332 COSTS_N_INSNS (3), /* narrow. */
1333 COSTS_N_INSNS (3), /* toint. */
1334 COSTS_N_INSNS (3), /* fromint. */
1335 COSTS_N_INSNS (3) /* roundint. */
1339 COSTS_N_INSNS (30), /* div. */
1340 COSTS_N_INSNS (6), /* mult. */
1341 COSTS_N_INSNS (10), /* mult_addsub. */
1342 COSTS_N_INSNS (7), /* fma. */
1343 COSTS_N_INSNS (3), /* addsub. */
1344 COSTS_N_INSNS (3), /* fpconst. */
1345 COSTS_N_INSNS (3), /* neg. */
1346 COSTS_N_INSNS (3), /* compare. */
1347 COSTS_N_INSNS (3), /* widen. */
1348 COSTS_N_INSNS (3), /* narrow. */
1349 COSTS_N_INSNS (3), /* toint. */
1350 COSTS_N_INSNS (3), /* fromint. */
1351 COSTS_N_INSNS (3) /* roundint. */
1356 COSTS_N_INSNS (1) /* alu. */
1361 const struct cpu_cost_table cortexa7_extra_costs
=
1367 COSTS_N_INSNS (1), /* shift. */
1368 COSTS_N_INSNS (1), /* shift_reg. */
1369 COSTS_N_INSNS (1), /* arith_shift. */
1370 COSTS_N_INSNS (1), /* arith_shift_reg. */
1371 COSTS_N_INSNS (1), /* log_shift. */
1372 COSTS_N_INSNS (1), /* log_shift_reg. */
1373 COSTS_N_INSNS (1), /* extend. */
1374 COSTS_N_INSNS (1), /* extend_arith. */
1375 COSTS_N_INSNS (1), /* bfi. */
1376 COSTS_N_INSNS (1), /* bfx. */
1377 COSTS_N_INSNS (1), /* clz. */
1378 COSTS_N_INSNS (1), /* rev. */
1380 true /* non_exec_costs_exec. */
1387 COSTS_N_INSNS (1), /* flag_setting. */
1388 COSTS_N_INSNS (1), /* extend. */
1389 COSTS_N_INSNS (1), /* add. */
1390 COSTS_N_INSNS (1), /* extend_add. */
1391 COSTS_N_INSNS (7) /* idiv. */
1395 0, /* simple (N/A). */
1396 0, /* flag_setting (N/A). */
1397 COSTS_N_INSNS (1), /* extend. */
1399 COSTS_N_INSNS (2), /* extend_add. */
1405 COSTS_N_INSNS (1), /* load. */
1406 COSTS_N_INSNS (1), /* load_sign_extend. */
1407 COSTS_N_INSNS (3), /* ldrd. */
1408 COSTS_N_INSNS (1), /* ldm_1st. */
1409 1, /* ldm_regs_per_insn_1st. */
1410 2, /* ldm_regs_per_insn_subsequent. */
1411 COSTS_N_INSNS (2), /* loadf. */
1412 COSTS_N_INSNS (2), /* loadd. */
1413 COSTS_N_INSNS (1), /* load_unaligned. */
1414 COSTS_N_INSNS (1), /* store. */
1415 COSTS_N_INSNS (3), /* strd. */
1416 COSTS_N_INSNS (1), /* stm_1st. */
1417 1, /* stm_regs_per_insn_1st. */
1418 2, /* stm_regs_per_insn_subsequent. */
1419 COSTS_N_INSNS (2), /* storef. */
1420 COSTS_N_INSNS (2), /* stored. */
1421 COSTS_N_INSNS (1), /* store_unaligned. */
1422 COSTS_N_INSNS (1), /* loadv. */
1423 COSTS_N_INSNS (1) /* storev. */
1428 COSTS_N_INSNS (15), /* div. */
1429 COSTS_N_INSNS (3), /* mult. */
1430 COSTS_N_INSNS (7), /* mult_addsub. */
1431 COSTS_N_INSNS (7), /* fma. */
1432 COSTS_N_INSNS (3), /* addsub. */
1433 COSTS_N_INSNS (3), /* fpconst. */
1434 COSTS_N_INSNS (3), /* neg. */
1435 COSTS_N_INSNS (3), /* compare. */
1436 COSTS_N_INSNS (3), /* widen. */
1437 COSTS_N_INSNS (3), /* narrow. */
1438 COSTS_N_INSNS (3), /* toint. */
1439 COSTS_N_INSNS (3), /* fromint. */
1440 COSTS_N_INSNS (3) /* roundint. */
1444 COSTS_N_INSNS (30), /* div. */
1445 COSTS_N_INSNS (6), /* mult. */
1446 COSTS_N_INSNS (10), /* mult_addsub. */
1447 COSTS_N_INSNS (7), /* fma. */
1448 COSTS_N_INSNS (3), /* addsub. */
1449 COSTS_N_INSNS (3), /* fpconst. */
1450 COSTS_N_INSNS (3), /* neg. */
1451 COSTS_N_INSNS (3), /* compare. */
1452 COSTS_N_INSNS (3), /* widen. */
1453 COSTS_N_INSNS (3), /* narrow. */
1454 COSTS_N_INSNS (3), /* toint. */
1455 COSTS_N_INSNS (3), /* fromint. */
1456 COSTS_N_INSNS (3) /* roundint. */
1461 COSTS_N_INSNS (1) /* alu. */
1465 const struct cpu_cost_table cortexa12_extra_costs
=
1472 COSTS_N_INSNS (1), /* shift_reg. */
1473 COSTS_N_INSNS (1), /* arith_shift. */
1474 COSTS_N_INSNS (1), /* arith_shift_reg. */
1475 COSTS_N_INSNS (1), /* log_shift. */
1476 COSTS_N_INSNS (1), /* log_shift_reg. */
1478 COSTS_N_INSNS (1), /* extend_arith. */
1480 COSTS_N_INSNS (1), /* bfx. */
1481 COSTS_N_INSNS (1), /* clz. */
1482 COSTS_N_INSNS (1), /* rev. */
1484 true /* non_exec_costs_exec. */
1489 COSTS_N_INSNS (2), /* simple. */
1490 COSTS_N_INSNS (3), /* flag_setting. */
1491 COSTS_N_INSNS (2), /* extend. */
1492 COSTS_N_INSNS (3), /* add. */
1493 COSTS_N_INSNS (2), /* extend_add. */
1494 COSTS_N_INSNS (18) /* idiv. */
1498 0, /* simple (N/A). */
1499 0, /* flag_setting (N/A). */
1500 COSTS_N_INSNS (3), /* extend. */
1502 COSTS_N_INSNS (3), /* extend_add. */
1508 COSTS_N_INSNS (3), /* load. */
1509 COSTS_N_INSNS (3), /* load_sign_extend. */
1510 COSTS_N_INSNS (3), /* ldrd. */
1511 COSTS_N_INSNS (3), /* ldm_1st. */
1512 1, /* ldm_regs_per_insn_1st. */
1513 2, /* ldm_regs_per_insn_subsequent. */
1514 COSTS_N_INSNS (3), /* loadf. */
1515 COSTS_N_INSNS (3), /* loadd. */
1516 0, /* load_unaligned. */
1520 1, /* stm_regs_per_insn_1st. */
1521 2, /* stm_regs_per_insn_subsequent. */
1522 COSTS_N_INSNS (2), /* storef. */
1523 COSTS_N_INSNS (2), /* stored. */
1524 0, /* store_unaligned. */
1525 COSTS_N_INSNS (1), /* loadv. */
1526 COSTS_N_INSNS (1) /* storev. */
1531 COSTS_N_INSNS (17), /* div. */
1532 COSTS_N_INSNS (4), /* mult. */
1533 COSTS_N_INSNS (8), /* mult_addsub. */
1534 COSTS_N_INSNS (8), /* fma. */
1535 COSTS_N_INSNS (4), /* addsub. */
1536 COSTS_N_INSNS (2), /* fpconst. */
1537 COSTS_N_INSNS (2), /* neg. */
1538 COSTS_N_INSNS (2), /* compare. */
1539 COSTS_N_INSNS (4), /* widen. */
1540 COSTS_N_INSNS (4), /* narrow. */
1541 COSTS_N_INSNS (4), /* toint. */
1542 COSTS_N_INSNS (4), /* fromint. */
1543 COSTS_N_INSNS (4) /* roundint. */
1547 COSTS_N_INSNS (31), /* div. */
1548 COSTS_N_INSNS (4), /* mult. */
1549 COSTS_N_INSNS (8), /* mult_addsub. */
1550 COSTS_N_INSNS (8), /* fma. */
1551 COSTS_N_INSNS (4), /* addsub. */
1552 COSTS_N_INSNS (2), /* fpconst. */
1553 COSTS_N_INSNS (2), /* neg. */
1554 COSTS_N_INSNS (2), /* compare. */
1555 COSTS_N_INSNS (4), /* widen. */
1556 COSTS_N_INSNS (4), /* narrow. */
1557 COSTS_N_INSNS (4), /* toint. */
1558 COSTS_N_INSNS (4), /* fromint. */
1559 COSTS_N_INSNS (4) /* roundint. */
1564 COSTS_N_INSNS (1) /* alu. */
1568 const struct cpu_cost_table cortexa15_extra_costs
=
1576 COSTS_N_INSNS (1), /* arith_shift. */
1577 COSTS_N_INSNS (1), /* arith_shift_reg. */
1578 COSTS_N_INSNS (1), /* log_shift. */
1579 COSTS_N_INSNS (1), /* log_shift_reg. */
1581 COSTS_N_INSNS (1), /* extend_arith. */
1582 COSTS_N_INSNS (1), /* bfi. */
1587 true /* non_exec_costs_exec. */
1592 COSTS_N_INSNS (2), /* simple. */
1593 COSTS_N_INSNS (3), /* flag_setting. */
1594 COSTS_N_INSNS (2), /* extend. */
1595 COSTS_N_INSNS (2), /* add. */
1596 COSTS_N_INSNS (2), /* extend_add. */
1597 COSTS_N_INSNS (18) /* idiv. */
1601 0, /* simple (N/A). */
1602 0, /* flag_setting (N/A). */
1603 COSTS_N_INSNS (3), /* extend. */
1605 COSTS_N_INSNS (3), /* extend_add. */
1611 COSTS_N_INSNS (3), /* load. */
1612 COSTS_N_INSNS (3), /* load_sign_extend. */
1613 COSTS_N_INSNS (3), /* ldrd. */
1614 COSTS_N_INSNS (4), /* ldm_1st. */
1615 1, /* ldm_regs_per_insn_1st. */
1616 2, /* ldm_regs_per_insn_subsequent. */
1617 COSTS_N_INSNS (4), /* loadf. */
1618 COSTS_N_INSNS (4), /* loadd. */
1619 0, /* load_unaligned. */
1622 COSTS_N_INSNS (1), /* stm_1st. */
1623 1, /* stm_regs_per_insn_1st. */
1624 2, /* stm_regs_per_insn_subsequent. */
1627 0, /* store_unaligned. */
1628 COSTS_N_INSNS (1), /* loadv. */
1629 COSTS_N_INSNS (1) /* storev. */
1634 COSTS_N_INSNS (17), /* div. */
1635 COSTS_N_INSNS (4), /* mult. */
1636 COSTS_N_INSNS (8), /* mult_addsub. */
1637 COSTS_N_INSNS (8), /* fma. */
1638 COSTS_N_INSNS (4), /* addsub. */
1639 COSTS_N_INSNS (2), /* fpconst. */
1640 COSTS_N_INSNS (2), /* neg. */
1641 COSTS_N_INSNS (5), /* compare. */
1642 COSTS_N_INSNS (4), /* widen. */
1643 COSTS_N_INSNS (4), /* narrow. */
1644 COSTS_N_INSNS (4), /* toint. */
1645 COSTS_N_INSNS (4), /* fromint. */
1646 COSTS_N_INSNS (4) /* roundint. */
1650 COSTS_N_INSNS (31), /* div. */
1651 COSTS_N_INSNS (4), /* mult. */
1652 COSTS_N_INSNS (8), /* mult_addsub. */
1653 COSTS_N_INSNS (8), /* fma. */
1654 COSTS_N_INSNS (4), /* addsub. */
1655 COSTS_N_INSNS (2), /* fpconst. */
1656 COSTS_N_INSNS (2), /* neg. */
1657 COSTS_N_INSNS (2), /* compare. */
1658 COSTS_N_INSNS (4), /* widen. */
1659 COSTS_N_INSNS (4), /* narrow. */
1660 COSTS_N_INSNS (4), /* toint. */
1661 COSTS_N_INSNS (4), /* fromint. */
1662 COSTS_N_INSNS (4) /* roundint. */
1667 COSTS_N_INSNS (1) /* alu. */
1671 const struct cpu_cost_table v7m_extra_costs
=
1679 0, /* arith_shift. */
1680 COSTS_N_INSNS (1), /* arith_shift_reg. */
1682 COSTS_N_INSNS (1), /* log_shift_reg. */
1684 COSTS_N_INSNS (1), /* extend_arith. */
1689 COSTS_N_INSNS (1), /* non_exec. */
1690 false /* non_exec_costs_exec. */
1695 COSTS_N_INSNS (1), /* simple. */
1696 COSTS_N_INSNS (1), /* flag_setting. */
1697 COSTS_N_INSNS (2), /* extend. */
1698 COSTS_N_INSNS (1), /* add. */
1699 COSTS_N_INSNS (3), /* extend_add. */
1700 COSTS_N_INSNS (8) /* idiv. */
1704 0, /* simple (N/A). */
1705 0, /* flag_setting (N/A). */
1706 COSTS_N_INSNS (2), /* extend. */
1708 COSTS_N_INSNS (3), /* extend_add. */
1714 COSTS_N_INSNS (2), /* load. */
1715 0, /* load_sign_extend. */
1716 COSTS_N_INSNS (3), /* ldrd. */
1717 COSTS_N_INSNS (2), /* ldm_1st. */
1718 1, /* ldm_regs_per_insn_1st. */
1719 1, /* ldm_regs_per_insn_subsequent. */
1720 COSTS_N_INSNS (2), /* loadf. */
1721 COSTS_N_INSNS (3), /* loadd. */
1722 COSTS_N_INSNS (1), /* load_unaligned. */
1723 COSTS_N_INSNS (2), /* store. */
1724 COSTS_N_INSNS (3), /* strd. */
1725 COSTS_N_INSNS (2), /* stm_1st. */
1726 1, /* stm_regs_per_insn_1st. */
1727 1, /* stm_regs_per_insn_subsequent. */
1728 COSTS_N_INSNS (2), /* storef. */
1729 COSTS_N_INSNS (3), /* stored. */
1730 COSTS_N_INSNS (1), /* store_unaligned. */
1731 COSTS_N_INSNS (1), /* loadv. */
1732 COSTS_N_INSNS (1) /* storev. */
1737 COSTS_N_INSNS (7), /* div. */
1738 COSTS_N_INSNS (2), /* mult. */
1739 COSTS_N_INSNS (5), /* mult_addsub. */
1740 COSTS_N_INSNS (3), /* fma. */
1741 COSTS_N_INSNS (1), /* addsub. */
1753 COSTS_N_INSNS (15), /* div. */
1754 COSTS_N_INSNS (5), /* mult. */
1755 COSTS_N_INSNS (7), /* mult_addsub. */
1756 COSTS_N_INSNS (7), /* fma. */
1757 COSTS_N_INSNS (3), /* addsub. */
1770 COSTS_N_INSNS (1) /* alu. */
1774 const struct addr_mode_cost_table generic_addr_mode_costs
=
1778 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1779 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1780 COSTS_N_INSNS (0) /* AMO_WB. */
1784 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1785 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1786 COSTS_N_INSNS (0) /* AMO_WB. */
1790 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1791 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1792 COSTS_N_INSNS (0) /* AMO_WB. */
1796 const struct tune_params arm_slowmul_tune
=
1798 &generic_extra_costs
, /* Insn extra costs. */
1799 &generic_addr_mode_costs
, /* Addressing mode costs. */
1800 NULL
, /* Sched adj cost. */
1801 arm_default_branch_cost
,
1802 &arm_default_vec_cost
,
1803 3, /* Constant limit. */
1804 5, /* Max cond insns. */
1805 8, /* Memset max inline. */
1806 1, /* Issue rate. */
1807 ARM_PREFETCH_NOT_BENEFICIAL
,
1808 tune_params::PREF_CONST_POOL_TRUE
,
1809 tune_params::PREF_LDRD_FALSE
,
1810 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1811 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1812 tune_params::DISPARAGE_FLAGS_NEITHER
,
1813 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1814 tune_params::FUSE_NOTHING
,
1815 tune_params::SCHED_AUTOPREF_OFF
1818 const struct tune_params arm_fastmul_tune
=
1820 &generic_extra_costs
, /* Insn extra costs. */
1821 &generic_addr_mode_costs
, /* Addressing mode costs. */
1822 NULL
, /* Sched adj cost. */
1823 arm_default_branch_cost
,
1824 &arm_default_vec_cost
,
1825 1, /* Constant limit. */
1826 5, /* Max cond insns. */
1827 8, /* Memset max inline. */
1828 1, /* Issue rate. */
1829 ARM_PREFETCH_NOT_BENEFICIAL
,
1830 tune_params::PREF_CONST_POOL_TRUE
,
1831 tune_params::PREF_LDRD_FALSE
,
1832 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1833 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1834 tune_params::DISPARAGE_FLAGS_NEITHER
,
1835 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1836 tune_params::FUSE_NOTHING
,
1837 tune_params::SCHED_AUTOPREF_OFF
1840 /* StrongARM has early execution of branches, so a sequence that is worth
1841 skipping is shorter. Set max_insns_skipped to a lower value. */
1843 const struct tune_params arm_strongarm_tune
=
1845 &generic_extra_costs
, /* Insn extra costs. */
1846 &generic_addr_mode_costs
, /* Addressing mode costs. */
1847 NULL
, /* Sched adj cost. */
1848 arm_default_branch_cost
,
1849 &arm_default_vec_cost
,
1850 1, /* Constant limit. */
1851 3, /* Max cond insns. */
1852 8, /* Memset max inline. */
1853 1, /* Issue rate. */
1854 ARM_PREFETCH_NOT_BENEFICIAL
,
1855 tune_params::PREF_CONST_POOL_TRUE
,
1856 tune_params::PREF_LDRD_FALSE
,
1857 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1858 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1859 tune_params::DISPARAGE_FLAGS_NEITHER
,
1860 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1861 tune_params::FUSE_NOTHING
,
1862 tune_params::SCHED_AUTOPREF_OFF
1865 const struct tune_params arm_xscale_tune
=
1867 &generic_extra_costs
, /* Insn extra costs. */
1868 &generic_addr_mode_costs
, /* Addressing mode costs. */
1869 xscale_sched_adjust_cost
,
1870 arm_default_branch_cost
,
1871 &arm_default_vec_cost
,
1872 2, /* Constant limit. */
1873 3, /* Max cond insns. */
1874 8, /* Memset max inline. */
1875 1, /* Issue rate. */
1876 ARM_PREFETCH_NOT_BENEFICIAL
,
1877 tune_params::PREF_CONST_POOL_TRUE
,
1878 tune_params::PREF_LDRD_FALSE
,
1879 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1880 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1881 tune_params::DISPARAGE_FLAGS_NEITHER
,
1882 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1883 tune_params::FUSE_NOTHING
,
1884 tune_params::SCHED_AUTOPREF_OFF
1887 const struct tune_params arm_9e_tune
=
1889 &generic_extra_costs
, /* Insn extra costs. */
1890 &generic_addr_mode_costs
, /* Addressing mode costs. */
1891 NULL
, /* Sched adj cost. */
1892 arm_default_branch_cost
,
1893 &arm_default_vec_cost
,
1894 1, /* Constant limit. */
1895 5, /* Max cond insns. */
1896 8, /* Memset max inline. */
1897 1, /* Issue rate. */
1898 ARM_PREFETCH_NOT_BENEFICIAL
,
1899 tune_params::PREF_CONST_POOL_TRUE
,
1900 tune_params::PREF_LDRD_FALSE
,
1901 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1903 tune_params::DISPARAGE_FLAGS_NEITHER
,
1904 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1905 tune_params::FUSE_NOTHING
,
1906 tune_params::SCHED_AUTOPREF_OFF
1909 const struct tune_params arm_marvell_pj4_tune
=
1911 &generic_extra_costs
, /* Insn extra costs. */
1912 &generic_addr_mode_costs
, /* Addressing mode costs. */
1913 NULL
, /* Sched adj cost. */
1914 arm_default_branch_cost
,
1915 &arm_default_vec_cost
,
1916 1, /* Constant limit. */
1917 5, /* Max cond insns. */
1918 8, /* Memset max inline. */
1919 2, /* Issue rate. */
1920 ARM_PREFETCH_NOT_BENEFICIAL
,
1921 tune_params::PREF_CONST_POOL_TRUE
,
1922 tune_params::PREF_LDRD_FALSE
,
1923 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1925 tune_params::DISPARAGE_FLAGS_NEITHER
,
1926 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1927 tune_params::FUSE_NOTHING
,
1928 tune_params::SCHED_AUTOPREF_OFF
1931 const struct tune_params arm_v6t2_tune
=
1933 &generic_extra_costs
, /* Insn extra costs. */
1934 &generic_addr_mode_costs
, /* Addressing mode costs. */
1935 NULL
, /* Sched adj cost. */
1936 arm_default_branch_cost
,
1937 &arm_default_vec_cost
,
1938 1, /* Constant limit. */
1939 5, /* Max cond insns. */
1940 8, /* Memset max inline. */
1941 1, /* Issue rate. */
1942 ARM_PREFETCH_NOT_BENEFICIAL
,
1943 tune_params::PREF_CONST_POOL_FALSE
,
1944 tune_params::PREF_LDRD_FALSE
,
1945 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1946 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1947 tune_params::DISPARAGE_FLAGS_NEITHER
,
1948 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1949 tune_params::FUSE_NOTHING
,
1950 tune_params::SCHED_AUTOPREF_OFF
1954 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1955 const struct tune_params arm_cortex_tune
=
1957 &generic_extra_costs
,
1958 &generic_addr_mode_costs
, /* Addressing mode costs. */
1959 NULL
, /* Sched adj cost. */
1960 arm_default_branch_cost
,
1961 &arm_default_vec_cost
,
1962 1, /* Constant limit. */
1963 5, /* Max cond insns. */
1964 8, /* Memset max inline. */
1965 2, /* Issue rate. */
1966 ARM_PREFETCH_NOT_BENEFICIAL
,
1967 tune_params::PREF_CONST_POOL_FALSE
,
1968 tune_params::PREF_LDRD_FALSE
,
1969 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1970 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1971 tune_params::DISPARAGE_FLAGS_NEITHER
,
1972 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1973 tune_params::FUSE_NOTHING
,
1974 tune_params::SCHED_AUTOPREF_OFF
1977 const struct tune_params arm_cortex_a8_tune
=
1979 &cortexa8_extra_costs
,
1980 &generic_addr_mode_costs
, /* Addressing mode costs. */
1981 NULL
, /* Sched adj cost. */
1982 arm_default_branch_cost
,
1983 &arm_default_vec_cost
,
1984 1, /* Constant limit. */
1985 5, /* Max cond insns. */
1986 8, /* Memset max inline. */
1987 2, /* Issue rate. */
1988 ARM_PREFETCH_NOT_BENEFICIAL
,
1989 tune_params::PREF_CONST_POOL_FALSE
,
1990 tune_params::PREF_LDRD_FALSE
,
1991 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1992 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1993 tune_params::DISPARAGE_FLAGS_NEITHER
,
1994 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1995 tune_params::FUSE_NOTHING
,
1996 tune_params::SCHED_AUTOPREF_OFF
1999 const struct tune_params arm_cortex_a7_tune
=
2001 &cortexa7_extra_costs
,
2002 &generic_addr_mode_costs
, /* Addressing mode costs. */
2003 NULL
, /* Sched adj cost. */
2004 arm_default_branch_cost
,
2005 &arm_default_vec_cost
,
2006 1, /* Constant limit. */
2007 5, /* Max cond insns. */
2008 8, /* Memset max inline. */
2009 2, /* Issue rate. */
2010 ARM_PREFETCH_NOT_BENEFICIAL
,
2011 tune_params::PREF_CONST_POOL_FALSE
,
2012 tune_params::PREF_LDRD_FALSE
,
2013 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2014 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2015 tune_params::DISPARAGE_FLAGS_NEITHER
,
2016 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2017 tune_params::FUSE_NOTHING
,
2018 tune_params::SCHED_AUTOPREF_OFF
2021 const struct tune_params arm_cortex_a15_tune
=
2023 &cortexa15_extra_costs
,
2024 &generic_addr_mode_costs
, /* Addressing mode costs. */
2025 NULL
, /* Sched adj cost. */
2026 arm_default_branch_cost
,
2027 &arm_default_vec_cost
,
2028 1, /* Constant limit. */
2029 2, /* Max cond insns. */
2030 8, /* Memset max inline. */
2031 3, /* Issue rate. */
2032 ARM_PREFETCH_NOT_BENEFICIAL
,
2033 tune_params::PREF_CONST_POOL_FALSE
,
2034 tune_params::PREF_LDRD_TRUE
,
2035 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2036 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2037 tune_params::DISPARAGE_FLAGS_ALL
,
2038 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2039 tune_params::FUSE_NOTHING
,
2040 tune_params::SCHED_AUTOPREF_FULL
2043 const struct tune_params arm_cortex_a35_tune
=
2045 &cortexa53_extra_costs
,
2046 &generic_addr_mode_costs
, /* Addressing mode costs. */
2047 NULL
, /* Sched adj cost. */
2048 arm_default_branch_cost
,
2049 &arm_default_vec_cost
,
2050 1, /* Constant limit. */
2051 5, /* Max cond insns. */
2052 8, /* Memset max inline. */
2053 1, /* Issue rate. */
2054 ARM_PREFETCH_NOT_BENEFICIAL
,
2055 tune_params::PREF_CONST_POOL_FALSE
,
2056 tune_params::PREF_LDRD_FALSE
,
2057 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2058 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2059 tune_params::DISPARAGE_FLAGS_NEITHER
,
2060 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2061 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2062 tune_params::SCHED_AUTOPREF_OFF
2065 const struct tune_params arm_cortex_a53_tune
=
2067 &cortexa53_extra_costs
,
2068 &generic_addr_mode_costs
, /* Addressing mode costs. */
2069 NULL
, /* Sched adj cost. */
2070 arm_default_branch_cost
,
2071 &arm_default_vec_cost
,
2072 1, /* Constant limit. */
2073 5, /* Max cond insns. */
2074 8, /* Memset max inline. */
2075 2, /* Issue rate. */
2076 ARM_PREFETCH_NOT_BENEFICIAL
,
2077 tune_params::PREF_CONST_POOL_FALSE
,
2078 tune_params::PREF_LDRD_FALSE
,
2079 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2080 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2081 tune_params::DISPARAGE_FLAGS_NEITHER
,
2082 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2083 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2084 tune_params::SCHED_AUTOPREF_OFF
2087 const struct tune_params arm_cortex_a57_tune
=
2089 &cortexa57_extra_costs
,
2090 &generic_addr_mode_costs
, /* addressing mode costs */
2091 NULL
, /* Sched adj cost. */
2092 arm_default_branch_cost
,
2093 &arm_default_vec_cost
,
2094 1, /* Constant limit. */
2095 2, /* Max cond insns. */
2096 8, /* Memset max inline. */
2097 3, /* Issue rate. */
2098 ARM_PREFETCH_NOT_BENEFICIAL
,
2099 tune_params::PREF_CONST_POOL_FALSE
,
2100 tune_params::PREF_LDRD_TRUE
,
2101 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2102 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2103 tune_params::DISPARAGE_FLAGS_ALL
,
2104 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2105 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2106 tune_params::SCHED_AUTOPREF_FULL
2109 const struct tune_params arm_exynosm1_tune
=
2111 &exynosm1_extra_costs
,
2112 &generic_addr_mode_costs
, /* Addressing mode costs. */
2113 NULL
, /* Sched adj cost. */
2114 arm_default_branch_cost
,
2115 &arm_default_vec_cost
,
2116 1, /* Constant limit. */
2117 2, /* Max cond insns. */
2118 8, /* Memset max inline. */
2119 3, /* Issue rate. */
2120 ARM_PREFETCH_NOT_BENEFICIAL
,
2121 tune_params::PREF_CONST_POOL_FALSE
,
2122 tune_params::PREF_LDRD_TRUE
,
2123 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2124 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2125 tune_params::DISPARAGE_FLAGS_ALL
,
2126 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2127 tune_params::FUSE_NOTHING
,
2128 tune_params::SCHED_AUTOPREF_OFF
2131 const struct tune_params arm_xgene1_tune
=
2133 &xgene1_extra_costs
,
2134 &generic_addr_mode_costs
, /* Addressing mode costs. */
2135 NULL
, /* Sched adj cost. */
2136 arm_default_branch_cost
,
2137 &arm_default_vec_cost
,
2138 1, /* Constant limit. */
2139 2, /* Max cond insns. */
2140 32, /* Memset max inline. */
2141 4, /* Issue rate. */
2142 ARM_PREFETCH_NOT_BENEFICIAL
,
2143 tune_params::PREF_CONST_POOL_FALSE
,
2144 tune_params::PREF_LDRD_TRUE
,
2145 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2146 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2147 tune_params::DISPARAGE_FLAGS_ALL
,
2148 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2149 tune_params::FUSE_NOTHING
,
2150 tune_params::SCHED_AUTOPREF_OFF
2153 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2154 less appealing. Set max_insns_skipped to a low value. */
2156 const struct tune_params arm_cortex_a5_tune
=
2158 &cortexa5_extra_costs
,
2159 &generic_addr_mode_costs
, /* Addressing mode costs. */
2160 NULL
, /* Sched adj cost. */
2161 arm_cortex_a5_branch_cost
,
2162 &arm_default_vec_cost
,
2163 1, /* Constant limit. */
2164 1, /* Max cond insns. */
2165 8, /* Memset max inline. */
2166 2, /* Issue rate. */
2167 ARM_PREFETCH_NOT_BENEFICIAL
,
2168 tune_params::PREF_CONST_POOL_FALSE
,
2169 tune_params::PREF_LDRD_FALSE
,
2170 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2172 tune_params::DISPARAGE_FLAGS_NEITHER
,
2173 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2174 tune_params::FUSE_NOTHING
,
2175 tune_params::SCHED_AUTOPREF_OFF
2178 const struct tune_params arm_cortex_a9_tune
=
2180 &cortexa9_extra_costs
,
2181 &generic_addr_mode_costs
, /* Addressing mode costs. */
2182 cortex_a9_sched_adjust_cost
,
2183 arm_default_branch_cost
,
2184 &arm_default_vec_cost
,
2185 1, /* Constant limit. */
2186 5, /* Max cond insns. */
2187 8, /* Memset max inline. */
2188 2, /* Issue rate. */
2189 ARM_PREFETCH_BENEFICIAL(4,32,32),
2190 tune_params::PREF_CONST_POOL_FALSE
,
2191 tune_params::PREF_LDRD_FALSE
,
2192 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2193 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2194 tune_params::DISPARAGE_FLAGS_NEITHER
,
2195 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2196 tune_params::FUSE_NOTHING
,
2197 tune_params::SCHED_AUTOPREF_OFF
2200 const struct tune_params arm_cortex_a12_tune
=
2202 &cortexa12_extra_costs
,
2203 &generic_addr_mode_costs
, /* Addressing mode costs. */
2204 NULL
, /* Sched adj cost. */
2205 arm_default_branch_cost
,
2206 &arm_default_vec_cost
, /* Vectorizer costs. */
2207 1, /* Constant limit. */
2208 2, /* Max cond insns. */
2209 8, /* Memset max inline. */
2210 2, /* Issue rate. */
2211 ARM_PREFETCH_NOT_BENEFICIAL
,
2212 tune_params::PREF_CONST_POOL_FALSE
,
2213 tune_params::PREF_LDRD_TRUE
,
2214 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2215 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2216 tune_params::DISPARAGE_FLAGS_ALL
,
2217 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2218 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2219 tune_params::SCHED_AUTOPREF_OFF
2222 const struct tune_params arm_cortex_a73_tune
=
2224 &cortexa57_extra_costs
,
2225 &generic_addr_mode_costs
, /* Addressing mode costs. */
2226 NULL
, /* Sched adj cost. */
2227 arm_default_branch_cost
,
2228 &arm_default_vec_cost
, /* Vectorizer costs. */
2229 1, /* Constant limit. */
2230 2, /* Max cond insns. */
2231 8, /* Memset max inline. */
2232 2, /* Issue rate. */
2233 ARM_PREFETCH_NOT_BENEFICIAL
,
2234 tune_params::PREF_CONST_POOL_FALSE
,
2235 tune_params::PREF_LDRD_TRUE
,
2236 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2238 tune_params::DISPARAGE_FLAGS_ALL
,
2239 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2240 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2241 tune_params::SCHED_AUTOPREF_FULL
2244 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2245 cycle to execute each. An LDR from the constant pool also takes two cycles
2246 to execute, but mildly increases pipelining opportunity (consecutive
2247 loads/stores can be pipelined together, saving one cycle), and may also
2248 improve icache utilisation. Hence we prefer the constant pool for such
2251 const struct tune_params arm_v7m_tune
=
2254 &generic_addr_mode_costs
, /* Addressing mode costs. */
2255 NULL
, /* Sched adj cost. */
2256 arm_cortex_m_branch_cost
,
2257 &arm_default_vec_cost
,
2258 1, /* Constant limit. */
2259 2, /* Max cond insns. */
2260 8, /* Memset max inline. */
2261 1, /* Issue rate. */
2262 ARM_PREFETCH_NOT_BENEFICIAL
,
2263 tune_params::PREF_CONST_POOL_TRUE
,
2264 tune_params::PREF_LDRD_FALSE
,
2265 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2266 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2267 tune_params::DISPARAGE_FLAGS_NEITHER
,
2268 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2269 tune_params::FUSE_NOTHING
,
2270 tune_params::SCHED_AUTOPREF_OFF
2273 /* Cortex-M7 tuning. */
2275 const struct tune_params arm_cortex_m7_tune
=
2278 &generic_addr_mode_costs
, /* Addressing mode costs. */
2279 NULL
, /* Sched adj cost. */
2280 arm_cortex_m7_branch_cost
,
2281 &arm_default_vec_cost
,
2282 0, /* Constant limit. */
2283 1, /* Max cond insns. */
2284 8, /* Memset max inline. */
2285 2, /* Issue rate. */
2286 ARM_PREFETCH_NOT_BENEFICIAL
,
2287 tune_params::PREF_CONST_POOL_TRUE
,
2288 tune_params::PREF_LDRD_FALSE
,
2289 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2290 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2291 tune_params::DISPARAGE_FLAGS_NEITHER
,
2292 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2293 tune_params::FUSE_NOTHING
,
2294 tune_params::SCHED_AUTOPREF_OFF
2297 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2298 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2300 const struct tune_params arm_v6m_tune
=
2302 &generic_extra_costs
, /* Insn extra costs. */
2303 &generic_addr_mode_costs
, /* Addressing mode costs. */
2304 NULL
, /* Sched adj cost. */
2305 arm_default_branch_cost
,
2306 &arm_default_vec_cost
, /* Vectorizer costs. */
2307 1, /* Constant limit. */
2308 5, /* Max cond insns. */
2309 8, /* Memset max inline. */
2310 1, /* Issue rate. */
2311 ARM_PREFETCH_NOT_BENEFICIAL
,
2312 tune_params::PREF_CONST_POOL_FALSE
,
2313 tune_params::PREF_LDRD_FALSE
,
2314 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2315 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2316 tune_params::DISPARAGE_FLAGS_NEITHER
,
2317 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2318 tune_params::FUSE_NOTHING
,
2319 tune_params::SCHED_AUTOPREF_OFF
2322 const struct tune_params arm_fa726te_tune
=
2324 &generic_extra_costs
, /* Insn extra costs. */
2325 &generic_addr_mode_costs
, /* Addressing mode costs. */
2326 fa726te_sched_adjust_cost
,
2327 arm_default_branch_cost
,
2328 &arm_default_vec_cost
,
2329 1, /* Constant limit. */
2330 5, /* Max cond insns. */
2331 8, /* Memset max inline. */
2332 2, /* Issue rate. */
2333 ARM_PREFETCH_NOT_BENEFICIAL
,
2334 tune_params::PREF_CONST_POOL_TRUE
,
2335 tune_params::PREF_LDRD_FALSE
,
2336 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2337 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2338 tune_params::DISPARAGE_FLAGS_NEITHER
,
2339 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2340 tune_params::FUSE_NOTHING
,
2341 tune_params::SCHED_AUTOPREF_OFF
2344 /* Auto-generated CPU, FPU and architecture tables. */
2345 #include "arm-cpu-data.h"
2347 /* The name of the preprocessor macro to define for this architecture. PROFILE
2348 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2349 is thus chosen to be big enough to hold the longest architecture name. */
2351 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2353 /* Supported TLS relocations. */
2364 TLS_DESCSEQ
/* GNU scheme */
2367 /* The maximum number of insns to be used when loading a constant. */
2369 arm_constant_limit (bool size_p
)
2371 return size_p
? 1 : current_tune
->constant_limit
;
2374 /* Emit an insn that's a simple single-set. Both the operands must be known
2376 inline static rtx_insn
*
2377 emit_set_insn (rtx x
, rtx y
)
2379 return emit_insn (gen_rtx_SET (x
, y
));
2382 /* Return the number of bits set in VALUE. */
2384 bit_count (unsigned long value
)
2386 unsigned long count
= 0;
2391 value
&= value
- 1; /* Clear the least-significant set bit. */
2397 /* Return the number of bits set in BMAP. */
2399 bitmap_popcount (const sbitmap bmap
)
2401 unsigned int count
= 0;
2403 sbitmap_iterator sbi
;
2405 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2414 } arm_fixed_mode_set
;
2416 /* A small helper for setting fixed-point library libfuncs. */
2419 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2420 const char *funcname
, const char *modename
,
2425 if (num_suffix
== 0)
2426 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2428 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2430 set_optab_libfunc (optable
, mode
, buffer
);
2434 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2435 machine_mode from
, const char *funcname
,
2436 const char *toname
, const char *fromname
)
2439 const char *maybe_suffix_2
= "";
2441 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2442 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2443 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2444 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2445 maybe_suffix_2
= "2";
2447 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2450 set_conv_libfunc (optable
, to
, from
, buffer
);
2453 static GTY(()) rtx speculation_barrier_libfunc
;
2455 /* Set up library functions unique to ARM. */
2457 arm_init_libfuncs (void)
2459 /* For Linux, we have access to kernel support for atomic operations. */
2460 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2461 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2463 /* There are no special library functions unless we are using the
2468 /* The functions below are described in Section 4 of the "Run-Time
2469 ABI for the ARM architecture", Version 1.0. */
2471 /* Double-precision floating-point arithmetic. Table 2. */
2472 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2473 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2474 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2475 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2476 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2478 /* Double-precision comparisons. Table 3. */
2479 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2480 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2481 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2482 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2483 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2484 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2485 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2487 /* Single-precision floating-point arithmetic. Table 4. */
2488 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2489 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2490 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2491 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2492 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2494 /* Single-precision comparisons. Table 5. */
2495 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2496 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2497 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2498 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2499 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2500 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2501 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2503 /* Floating-point to integer conversions. Table 6. */
2504 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2505 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2506 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2507 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2508 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2509 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2510 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2511 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2513 /* Conversions between floating types. Table 7. */
2514 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2515 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2517 /* Integer to floating-point conversions. Table 8. */
2518 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2519 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2520 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2521 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2522 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2523 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2524 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2525 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2527 /* Long long. Table 9. */
2528 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2529 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2530 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2531 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2532 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2533 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2534 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2535 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2537 /* Integer (32/32->32) division. \S 4.3.1. */
2538 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2539 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2541 /* The divmod functions are designed so that they can be used for
2542 plain division, even though they return both the quotient and the
2543 remainder. The quotient is returned in the usual location (i.e.,
2544 r0 for SImode, {r0, r1} for DImode), just as would be expected
2545 for an ordinary division routine. Because the AAPCS calling
2546 conventions specify that all of { r0, r1, r2, r3 } are
2547 callee-saved registers, there is no need to tell the compiler
2548 explicitly that those registers are clobbered by these
2550 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2551 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2553 /* For SImode division the ABI provides div-without-mod routines,
2554 which are faster. */
2555 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2556 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2558 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2559 divmod libcalls instead. */
2560 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2561 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2562 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2563 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2565 /* Half-precision float operations. The compiler handles all operations
2566 with NULL libfuncs by converting the SFmode. */
2567 switch (arm_fp16_format
)
2569 case ARM_FP16_FORMAT_IEEE
:
2570 case ARM_FP16_FORMAT_ALTERNATIVE
:
2573 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2574 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2576 : "__gnu_f2h_alternative"));
2577 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2578 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2580 : "__gnu_h2f_alternative"));
2582 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2583 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2585 : "__gnu_d2h_alternative"));
2588 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2589 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2590 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2591 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2592 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2595 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2596 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2597 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2598 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2599 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2600 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2601 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2608 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2610 const arm_fixed_mode_set fixed_arith_modes
[] =
2613 { E_UQQmode
, "uqq" },
2615 { E_UHQmode
, "uhq" },
2617 { E_USQmode
, "usq" },
2619 { E_UDQmode
, "udq" },
2621 { E_UTQmode
, "utq" },
2623 { E_UHAmode
, "uha" },
2625 { E_USAmode
, "usa" },
2627 { E_UDAmode
, "uda" },
2629 { E_UTAmode
, "uta" }
2631 const arm_fixed_mode_set fixed_conv_modes
[] =
2634 { E_UQQmode
, "uqq" },
2636 { E_UHQmode
, "uhq" },
2638 { E_USQmode
, "usq" },
2640 { E_UDQmode
, "udq" },
2642 { E_UTQmode
, "utq" },
2644 { E_UHAmode
, "uha" },
2646 { E_USAmode
, "usa" },
2648 { E_UDAmode
, "uda" },
2650 { E_UTAmode
, "uta" },
2661 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2663 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2664 "add", fixed_arith_modes
[i
].name
, 3);
2665 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2666 "ssadd", fixed_arith_modes
[i
].name
, 3);
2667 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2668 "usadd", fixed_arith_modes
[i
].name
, 3);
2669 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2670 "sub", fixed_arith_modes
[i
].name
, 3);
2671 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2672 "sssub", fixed_arith_modes
[i
].name
, 3);
2673 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2674 "ussub", fixed_arith_modes
[i
].name
, 3);
2675 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2676 "mul", fixed_arith_modes
[i
].name
, 3);
2677 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2678 "ssmul", fixed_arith_modes
[i
].name
, 3);
2679 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2680 "usmul", fixed_arith_modes
[i
].name
, 3);
2681 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2682 "div", fixed_arith_modes
[i
].name
, 3);
2683 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2684 "udiv", fixed_arith_modes
[i
].name
, 3);
2685 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2686 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2687 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2688 "usdiv", fixed_arith_modes
[i
].name
, 3);
2689 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2690 "neg", fixed_arith_modes
[i
].name
, 2);
2691 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2692 "ssneg", fixed_arith_modes
[i
].name
, 2);
2693 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2694 "usneg", fixed_arith_modes
[i
].name
, 2);
2695 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2696 "ashl", fixed_arith_modes
[i
].name
, 3);
2697 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2698 "ashr", fixed_arith_modes
[i
].name
, 3);
2699 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2700 "lshr", fixed_arith_modes
[i
].name
, 3);
2701 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2702 "ssashl", fixed_arith_modes
[i
].name
, 3);
2703 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2704 "usashl", fixed_arith_modes
[i
].name
, 3);
2705 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2706 "cmp", fixed_arith_modes
[i
].name
, 2);
2709 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2710 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2713 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2714 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2717 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2718 fixed_conv_modes
[j
].mode
, "fract",
2719 fixed_conv_modes
[i
].name
,
2720 fixed_conv_modes
[j
].name
);
2721 arm_set_fixed_conv_libfunc (satfract_optab
,
2722 fixed_conv_modes
[i
].mode
,
2723 fixed_conv_modes
[j
].mode
, "satfract",
2724 fixed_conv_modes
[i
].name
,
2725 fixed_conv_modes
[j
].name
);
2726 arm_set_fixed_conv_libfunc (fractuns_optab
,
2727 fixed_conv_modes
[i
].mode
,
2728 fixed_conv_modes
[j
].mode
, "fractuns",
2729 fixed_conv_modes
[i
].name
,
2730 fixed_conv_modes
[j
].name
);
2731 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2732 fixed_conv_modes
[i
].mode
,
2733 fixed_conv_modes
[j
].mode
, "satfractuns",
2734 fixed_conv_modes
[i
].name
,
2735 fixed_conv_modes
[j
].name
);
2739 if (TARGET_AAPCS_BASED
)
2740 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2742 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2745 /* On AAPCS systems, this is the "struct __va_list". */
2746 static GTY(()) tree va_list_type
;
2748 /* Return the type to use as __builtin_va_list. */
2750 arm_build_builtin_va_list (void)
2755 if (!TARGET_AAPCS_BASED
)
2756 return std_build_builtin_va_list ();
2758 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2766 The C Library ABI further reinforces this definition in \S
2769 We must follow this definition exactly. The structure tag
2770 name is visible in C++ mangled names, and thus forms a part
2771 of the ABI. The field name may be used by people who
2772 #include <stdarg.h>. */
2773 /* Create the type. */
2774 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2775 /* Give it the required name. */
2776 va_list_name
= build_decl (BUILTINS_LOCATION
,
2778 get_identifier ("__va_list"),
2780 DECL_ARTIFICIAL (va_list_name
) = 1;
2781 TYPE_NAME (va_list_type
) = va_list_name
;
2782 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2783 /* Create the __ap field. */
2784 ap_field
= build_decl (BUILTINS_LOCATION
,
2786 get_identifier ("__ap"),
2788 DECL_ARTIFICIAL (ap_field
) = 1;
2789 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2790 TYPE_FIELDS (va_list_type
) = ap_field
;
2791 /* Compute its layout. */
2792 layout_type (va_list_type
);
2794 return va_list_type
;
2797 /* Return an expression of type "void *" pointing to the next
2798 available argument in a variable-argument list. VALIST is the
2799 user-level va_list object, of type __builtin_va_list. */
2801 arm_extract_valist_ptr (tree valist
)
2803 if (TREE_TYPE (valist
) == error_mark_node
)
2804 return error_mark_node
;
2806 /* On an AAPCS target, the pointer is stored within "struct
2808 if (TARGET_AAPCS_BASED
)
2810 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2811 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2812 valist
, ap_field
, NULL_TREE
);
2818 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2820 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2822 valist
= arm_extract_valist_ptr (valist
);
2823 std_expand_builtin_va_start (valist
, nextarg
);
2826 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2828 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2831 valist
= arm_extract_valist_ptr (valist
);
2832 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2835 /* Check any incompatible options that the user has specified. */
2837 arm_option_check_internal (struct gcc_options
*opts
)
2839 int flags
= opts
->x_target_flags
;
2841 /* iWMMXt and NEON are incompatible. */
2843 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2844 error ("iWMMXt and NEON are incompatible");
2846 /* Make sure that the processor choice does not conflict with any of the
2847 other command line choices. */
2848 if (TARGET_ARM_P (flags
)
2849 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2850 error ("target CPU does not support ARM mode");
2852 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2853 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2854 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2856 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2857 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2859 /* If this target is normally configured to use APCS frames, warn if they
2860 are turned off and debugging is turned on. */
2861 if (TARGET_ARM_P (flags
)
2862 && write_symbols
!= NO_DEBUG
2863 && !TARGET_APCS_FRAME
2864 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2865 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2868 /* iWMMXt unsupported under Thumb mode. */
2869 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2870 error ("iWMMXt unsupported under Thumb mode");
2872 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2873 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2875 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2877 error ("RTP PIC is incompatible with Thumb");
2881 if (target_pure_code
|| target_slow_flash_data
)
2883 const char *flag
= (target_pure_code
? "-mpure-code" :
2884 "-mslow-flash-data");
2886 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2888 if (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
)
2889 error ("%s only supports non-pic code on M-profile targets with the "
2890 "MOVT instruction", flag
);
2892 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2893 -mword-relocations forbids relocation of MOVT/MOVW. */
2894 if (target_word_relocations
)
2895 error ("%s incompatible with %<-mword-relocations%>", flag
);
2899 /* Recompute the global settings depending on target attribute options. */
2902 arm_option_params_internal (void)
2904 /* If we are not using the default (ARM mode) section anchor offset
2905 ranges, then set the correct ranges now. */
2908 /* Thumb-1 LDR instructions cannot have negative offsets.
2909 Permissible positive offset ranges are 5-bit (for byte loads),
2910 6-bit (for halfword loads), or 7-bit (for word loads).
2911 Empirical results suggest a 7-bit anchor range gives the best
2912 overall code size. */
2913 targetm
.min_anchor_offset
= 0;
2914 targetm
.max_anchor_offset
= 127;
2916 else if (TARGET_THUMB2
)
2918 /* The minimum is set such that the total size of the block
2919 for a particular anchor is 248 + 1 + 4095 bytes, which is
2920 divisible by eight, ensuring natural spacing of anchors. */
2921 targetm
.min_anchor_offset
= -248;
2922 targetm
.max_anchor_offset
= 4095;
2926 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2927 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2930 /* Increase the number of conditional instructions with -Os. */
2931 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
2933 /* For THUMB2, we limit the conditional sequence to one IT block. */
2935 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
2938 /* True if -mflip-thumb should next add an attribute for the default
2939 mode, false if it should next add an attribute for the opposite mode. */
2940 static GTY(()) bool thumb_flipper
;
2942 /* Options after initial target override. */
2943 static GTY(()) tree init_optimize
;
2946 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2948 /* -falign-functions without argument: supply one. */
2949 if (opts
->x_flag_align_functions
&& !opts
->x_str_align_functions
)
2950 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2951 && opts
->x_optimize_size
? "2" : "4";
2954 /* Implement targetm.override_options_after_change. */
2957 arm_override_options_after_change (void)
2959 arm_configure_build_target (&arm_active_target
,
2960 TREE_TARGET_OPTION (target_option_default_node
),
2961 &global_options_set
, false);
2963 arm_override_options_after_change_1 (&global_options
);
2966 /* Implement TARGET_OPTION_SAVE. */
2968 arm_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
2970 ptr
->x_arm_arch_string
= opts
->x_arm_arch_string
;
2971 ptr
->x_arm_cpu_string
= opts
->x_arm_cpu_string
;
2972 ptr
->x_arm_tune_string
= opts
->x_arm_tune_string
;
2975 /* Implement TARGET_OPTION_RESTORE. */
2977 arm_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
2979 opts
->x_arm_arch_string
= ptr
->x_arm_arch_string
;
2980 opts
->x_arm_cpu_string
= ptr
->x_arm_cpu_string
;
2981 opts
->x_arm_tune_string
= ptr
->x_arm_tune_string
;
2982 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2986 /* Reset options between modes that the user has specified. */
2988 arm_option_override_internal (struct gcc_options
*opts
,
2989 struct gcc_options
*opts_set
)
2991 arm_override_options_after_change_1 (opts
);
2993 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2995 /* The default is to enable interworking, so this warning message would
2996 be confusing to users who have just compiled with
2997 eg, -march=armv4. */
2998 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2999 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3002 if (TARGET_THUMB_P (opts
->x_target_flags
)
3003 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3005 warning (0, "target CPU does not support THUMB instructions");
3006 opts
->x_target_flags
&= ~MASK_THUMB
;
3009 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3011 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3012 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3015 /* Callee super interworking implies thumb interworking. Adding
3016 this to the flags here simplifies the logic elsewhere. */
3017 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3018 opts
->x_target_flags
|= MASK_INTERWORK
;
3020 /* need to remember initial values so combinaisons of options like
3021 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3022 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3024 if (! opts_set
->x_arm_restrict_it
)
3025 opts
->x_arm_restrict_it
= arm_arch8
;
3027 /* ARM execution state and M profile don't have [restrict] IT. */
3028 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3029 opts
->x_arm_restrict_it
= 0;
3031 /* Enable -munaligned-access by default for
3032 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3033 i.e. Thumb2 and ARM state only.
3034 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3035 - ARMv8 architecture-base processors.
3037 Disable -munaligned-access by default for
3038 - all pre-ARMv6 architecture-based processors
3039 - ARMv6-M architecture-based processors
3040 - ARMv8-M Baseline processors. */
3042 if (! opts_set
->x_unaligned_access
)
3044 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3045 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3047 else if (opts
->x_unaligned_access
== 1
3048 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3050 warning (0, "target CPU does not support unaligned accesses");
3051 opts
->x_unaligned_access
= 0;
3054 /* Don't warn since it's on by default in -O2. */
3055 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3056 opts
->x_flag_schedule_insns
= 0;
3058 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3060 /* Disable shrink-wrap when optimizing function for size, since it tends to
3061 generate additional returns. */
3062 if (optimize_function_for_size_p (cfun
)
3063 && TARGET_THUMB2_P (opts
->x_target_flags
))
3064 opts
->x_flag_shrink_wrap
= false;
3066 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3068 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3069 - epilogue_insns - does not accurately model the corresponding insns
3070 emitted in the asm file. In particular, see the comment in thumb_exit
3071 'Find out how many of the (return) argument registers we can corrupt'.
3072 As a consequence, the epilogue may clobber registers without fipa-ra
3073 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3074 TODO: Accurately model clobbers for epilogue_insns and reenable
3076 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3077 opts
->x_flag_ipa_ra
= 0;
3079 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3081 /* Thumb2 inline assembly code should always use unified syntax.
3082 This will apply to ARM and Thumb1 eventually. */
3083 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3084 opts
->x_inline_asm_unified
= true;
3086 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3087 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3091 static sbitmap isa_all_fpubits
;
3092 static sbitmap isa_quirkbits
;
3094 /* Configure a build target TARGET from the user-specified options OPTS and
3095 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3096 architecture have been specified, but the two are not identical. */
3098 arm_configure_build_target (struct arm_build_target
*target
,
3099 struct cl_target_option
*opts
,
3100 struct gcc_options
*opts_set
,
3101 bool warn_compatible
)
3103 const cpu_option
*arm_selected_tune
= NULL
;
3104 const arch_option
*arm_selected_arch
= NULL
;
3105 const cpu_option
*arm_selected_cpu
= NULL
;
3106 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3107 const char *tune_opts
= NULL
;
3108 const char *arch_opts
= NULL
;
3109 const char *cpu_opts
= NULL
;
3111 bitmap_clear (target
->isa
);
3112 target
->core_name
= NULL
;
3113 target
->arch_name
= NULL
;
3115 if (opts_set
->x_arm_arch_string
)
3117 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3119 opts
->x_arm_arch_string
);
3120 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3123 if (opts_set
->x_arm_cpu_string
)
3125 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3126 opts
->x_arm_cpu_string
);
3127 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3128 arm_selected_tune
= arm_selected_cpu
;
3129 /* If taking the tuning from -mcpu, we don't need to rescan the
3130 options for tuning. */
3133 if (opts_set
->x_arm_tune_string
)
3135 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3136 opts
->x_arm_tune_string
);
3137 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3140 if (arm_selected_arch
)
3142 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3143 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3146 if (arm_selected_cpu
)
3148 auto_sbitmap
cpu_isa (isa_num_bits
);
3149 auto_sbitmap
isa_delta (isa_num_bits
);
3151 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3152 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3154 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3155 /* Ignore any bits that are quirk bits. */
3156 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3157 /* Ignore (for now) any bits that might be set by -mfpu. */
3158 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits
);
3160 if (!bitmap_empty_p (isa_delta
))
3162 if (warn_compatible
)
3163 warning (0, "switch %<-mcpu=%s%> conflicts "
3164 "with %<-march=%s%> switch",
3165 arm_selected_cpu
->common
.name
,
3166 arm_selected_arch
->common
.name
);
3167 /* -march wins for code generation.
3168 -mcpu wins for default tuning. */
3169 if (!arm_selected_tune
)
3170 arm_selected_tune
= arm_selected_cpu
;
3172 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3173 target
->arch_name
= arm_selected_arch
->common
.name
;
3177 /* Architecture and CPU are essentially the same.
3178 Prefer the CPU setting. */
3179 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3180 target
->core_name
= arm_selected_cpu
->common
.name
;
3181 /* Copy the CPU's capabilities, so that we inherit the
3182 appropriate extensions and quirks. */
3183 bitmap_copy (target
->isa
, cpu_isa
);
3188 /* Pick a CPU based on the architecture. */
3189 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3190 target
->arch_name
= arm_selected_arch
->common
.name
;
3191 /* Note: target->core_name is left unset in this path. */
3194 else if (arm_selected_cpu
)
3196 target
->core_name
= arm_selected_cpu
->common
.name
;
3197 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3198 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3200 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3202 /* If the user did not specify a processor or architecture, choose
3206 const cpu_option
*sel
;
3207 auto_sbitmap
sought_isa (isa_num_bits
);
3208 bitmap_clear (sought_isa
);
3209 auto_sbitmap
default_isa (isa_num_bits
);
3211 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3212 TARGET_CPU_DEFAULT
);
3213 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3214 gcc_assert (arm_selected_cpu
->common
.name
);
3216 /* RWE: All of the selection logic below (to the end of this
3217 'if' clause) looks somewhat suspect. It appears to be mostly
3218 there to support forcing thumb support when the default CPU
3219 does not have thumb (somewhat dubious in terms of what the
3220 user might be expecting). I think it should be removed once
3221 support for the pre-thumb era cores is removed. */
3222 sel
= arm_selected_cpu
;
3223 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3224 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3227 /* Now check to see if the user has specified any command line
3228 switches that require certain abilities from the cpu. */
3230 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3231 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3233 /* If there are such requirements and the default CPU does not
3234 satisfy them, we need to run over the complete list of
3235 cores looking for one that is satisfactory. */
3236 if (!bitmap_empty_p (sought_isa
)
3237 && !bitmap_subset_p (sought_isa
, default_isa
))
3239 auto_sbitmap
candidate_isa (isa_num_bits
);
3240 /* We're only interested in a CPU with at least the
3241 capabilities of the default CPU and the required
3242 additional features. */
3243 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3245 /* Try to locate a CPU type that supports all of the abilities
3246 of the default CPU, plus the extra abilities requested by
3248 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3250 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3251 /* An exact match? */
3252 if (bitmap_equal_p (default_isa
, candidate_isa
))
3256 if (sel
->common
.name
== NULL
)
3258 unsigned current_bit_count
= isa_num_bits
;
3259 const cpu_option
*best_fit
= NULL
;
3261 /* Ideally we would like to issue an error message here
3262 saying that it was not possible to find a CPU compatible
3263 with the default CPU, but which also supports the command
3264 line options specified by the programmer, and so they
3265 ought to use the -mcpu=<name> command line option to
3266 override the default CPU type.
3268 If we cannot find a CPU that has exactly the
3269 characteristics of the default CPU and the given
3270 command line options we scan the array again looking
3271 for a best match. The best match must have at least
3272 the capabilities of the perfect match. */
3273 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3275 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3277 if (bitmap_subset_p (default_isa
, candidate_isa
))
3281 bitmap_and_compl (candidate_isa
, candidate_isa
,
3283 count
= bitmap_popcount (candidate_isa
);
3285 if (count
< current_bit_count
)
3288 current_bit_count
= count
;
3292 gcc_assert (best_fit
);
3296 arm_selected_cpu
= sel
;
3299 /* Now we know the CPU, we can finally initialize the target
3301 target
->core_name
= arm_selected_cpu
->common
.name
;
3302 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3303 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3305 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3308 gcc_assert (arm_selected_cpu
);
3309 gcc_assert (arm_selected_arch
);
3311 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3313 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3314 auto_sbitmap
fpu_bits (isa_num_bits
);
3316 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3317 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3318 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3321 if (!arm_selected_tune
)
3322 arm_selected_tune
= arm_selected_cpu
;
3323 else /* Validate the features passed to -mtune. */
3324 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3326 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3328 /* Finish initializing the target structure. */
3329 target
->arch_pp_name
= arm_selected_arch
->arch
;
3330 target
->base_arch
= arm_selected_arch
->base_arch
;
3331 target
->profile
= arm_selected_arch
->profile
;
3333 target
->tune_flags
= tune_data
->tune_flags
;
3334 target
->tune
= tune_data
->tune
;
3335 target
->tune_core
= tune_data
->scheduler
;
3336 arm_option_reconfigure_globals ();
3339 /* Fix up any incompatible options that the user has specified. */
3341 arm_option_override (void)
3343 static const enum isa_feature fpu_bitlist
[]
3344 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3345 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3346 cl_target_option opts
;
3348 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3349 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3351 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3352 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3354 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3356 if (!global_options_set
.x_arm_fpu_index
)
3361 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3364 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3367 cl_target_option_save (&opts
, &global_options
);
3368 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3371 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3372 SUBTARGET_OVERRIDE_OPTIONS
;
3375 /* Initialize boolean versions of the architectural flags, for use
3376 in the arm.md file and for enabling feature flags. */
3377 arm_option_reconfigure_globals ();
3379 arm_tune
= arm_active_target
.tune_core
;
3380 tune_flags
= arm_active_target
.tune_flags
;
3381 current_tune
= arm_active_target
.tune
;
3383 /* TBD: Dwarf info for apcs frame is not handled yet. */
3384 if (TARGET_APCS_FRAME
)
3385 flag_shrink_wrap
= false;
3387 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3389 warning (0, "%<-mapcs-stack-check%> incompatible with "
3390 "%<-mno-apcs-frame%>");
3391 target_flags
|= MASK_APCS_FRAME
;
3394 if (TARGET_POKE_FUNCTION_NAME
)
3395 target_flags
|= MASK_APCS_FRAME
;
3397 if (TARGET_APCS_REENT
&& flag_pic
)
3398 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3400 if (TARGET_APCS_REENT
)
3401 warning (0, "APCS reentrant code not supported. Ignored");
3403 /* Set up some tuning parameters. */
3404 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3405 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3406 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3407 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3408 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3409 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3411 /* For arm2/3 there is no need to do any scheduling if we are doing
3412 software floating-point. */
3413 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3414 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3416 /* Override the default structure alignment for AAPCS ABI. */
3417 if (!global_options_set
.x_arm_structure_size_boundary
)
3419 if (TARGET_AAPCS_BASED
)
3420 arm_structure_size_boundary
= 8;
3424 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3426 if (arm_structure_size_boundary
!= 8
3427 && arm_structure_size_boundary
!= 32
3428 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3430 if (ARM_DOUBLEWORD_ALIGN
)
3432 "structure size boundary can only be set to 8, 32 or 64");
3434 warning (0, "structure size boundary can only be set to 8 or 32");
3435 arm_structure_size_boundary
3436 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3440 if (TARGET_VXWORKS_RTP
)
3442 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3443 arm_pic_data_is_text_relative
= 0;
3446 && !arm_pic_data_is_text_relative
3447 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3448 /* When text & data segments don't have a fixed displacement, the
3449 intended use is with a single, read only, pic base register.
3450 Unless the user explicitly requested not to do that, set
3452 target_flags
|= MASK_SINGLE_PIC_BASE
;
3454 /* If stack checking is disabled, we can use r10 as the PIC register,
3455 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3456 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3458 if (TARGET_VXWORKS_RTP
)
3459 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3460 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3463 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3464 arm_pic_register
= 9;
3466 /* If in FDPIC mode then force arm_pic_register to be r9. */
3469 arm_pic_register
= FDPIC_REGNUM
;
3471 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3474 if (arm_pic_register_string
!= NULL
)
3476 int pic_register
= decode_reg_name (arm_pic_register_string
);
3479 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3481 /* Prevent the user from choosing an obviously stupid PIC register. */
3482 else if (pic_register
< 0 || call_used_or_fixed_reg_p (pic_register
)
3483 || pic_register
== HARD_FRAME_POINTER_REGNUM
3484 || pic_register
== STACK_POINTER_REGNUM
3485 || pic_register
>= PC_REGNUM
3486 || (TARGET_VXWORKS_RTP
3487 && (unsigned int) pic_register
!= arm_pic_register
))
3488 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3490 arm_pic_register
= pic_register
;
3494 target_word_relocations
= 1;
3496 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3497 if (fix_cm3_ldrd
== 2)
3499 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3505 /* Hot/Cold partitioning is not currently supported, since we can't
3506 handle literal pool placement in that case. */
3507 if (flag_reorder_blocks_and_partition
)
3509 inform (input_location
,
3510 "%<-freorder-blocks-and-partition%> not supported "
3511 "on this architecture");
3512 flag_reorder_blocks_and_partition
= 0;
3513 flag_reorder_blocks
= 1;
3517 /* Hoisting PIC address calculations more aggressively provides a small,
3518 but measurable, size reduction for PIC code. Therefore, we decrease
3519 the bar for unrestricted expression hoisting to the cost of PIC address
3520 calculation, which is 2 instructions. */
3521 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3522 global_options
.x_param_values
,
3523 global_options_set
.x_param_values
);
3525 /* ARM EABI defaults to strict volatile bitfields. */
3526 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3527 && abi_version_at_least(2))
3528 flag_strict_volatile_bitfields
= 1;
3530 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3531 have deemed it beneficial (signified by setting
3532 prefetch.num_slots to 1 or more). */
3533 if (flag_prefetch_loop_arrays
< 0
3536 && current_tune
->prefetch
.num_slots
> 0)
3537 flag_prefetch_loop_arrays
= 1;
3539 /* Set up parameters to be used in prefetching algorithm. Do not
3540 override the defaults unless we are tuning for a core we have
3541 researched values for. */
3542 if (current_tune
->prefetch
.num_slots
> 0)
3543 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3544 current_tune
->prefetch
.num_slots
,
3545 global_options
.x_param_values
,
3546 global_options_set
.x_param_values
);
3547 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3548 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3549 current_tune
->prefetch
.l1_cache_line_size
,
3550 global_options
.x_param_values
,
3551 global_options_set
.x_param_values
);
3552 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3553 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3554 current_tune
->prefetch
.l1_cache_size
,
3555 global_options
.x_param_values
,
3556 global_options_set
.x_param_values
);
3558 /* Look through ready list and all of queue for instructions
3559 relevant for L2 auto-prefetcher. */
3560 int param_sched_autopref_queue_depth
;
3562 switch (current_tune
->sched_autopref
)
3564 case tune_params::SCHED_AUTOPREF_OFF
:
3565 param_sched_autopref_queue_depth
= -1;
3568 case tune_params::SCHED_AUTOPREF_RANK
:
3569 param_sched_autopref_queue_depth
= 0;
3572 case tune_params::SCHED_AUTOPREF_FULL
:
3573 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3580 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3581 param_sched_autopref_queue_depth
,
3582 global_options
.x_param_values
,
3583 global_options_set
.x_param_values
);
3585 /* Currently, for slow flash data, we just disable literal pools. We also
3586 disable it for pure-code. */
3587 if (target_slow_flash_data
|| target_pure_code
)
3588 arm_disable_literal_pool
= true;
3590 /* Disable scheduling fusion by default if it's not armv7 processor
3591 or doesn't prefer ldrd/strd. */
3592 if (flag_schedule_fusion
== 2
3593 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3594 flag_schedule_fusion
= 0;
3596 /* Need to remember initial options before they are overriden. */
3597 init_optimize
= build_optimization_node (&global_options
);
3599 arm_options_perform_arch_sanity_checks ();
3600 arm_option_override_internal (&global_options
, &global_options_set
);
3601 arm_option_check_internal (&global_options
);
3602 arm_option_params_internal ();
3604 /* Create the default target_options structure. */
3605 target_option_default_node
= target_option_current_node
3606 = build_target_option_node (&global_options
);
3608 /* Register global variables with the garbage collector. */
3609 arm_add_gc_roots ();
3611 /* Init initial mode for testing. */
3612 thumb_flipper
= TARGET_THUMB
;
3616 /* Reconfigure global status flags from the active_target.isa. */
3618 arm_option_reconfigure_globals (void)
3620 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3621 arm_base_arch
= arm_active_target
.base_arch
;
3623 /* Initialize boolean versions of the architectural flags, for use
3624 in the arm.md file. */
3625 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3626 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3627 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3628 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3629 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3630 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3631 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3632 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3633 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3634 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3635 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3636 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3637 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3638 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3639 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3640 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3641 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3642 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3643 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3644 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3645 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3646 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3647 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3648 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3649 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3650 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3653 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3654 error ("selected fp16 options are incompatible");
3655 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3658 /* And finally, set up some quirks. */
3659 arm_arch_no_volatile_ce
3660 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3661 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3662 isa_bit_quirk_armv6kz
);
3664 /* Use the cp15 method if it is available. */
3665 if (target_thread_pointer
== TP_AUTO
)
3667 if (arm_arch6k
&& !TARGET_THUMB1
)
3668 target_thread_pointer
= TP_CP15
;
3670 target_thread_pointer
= TP_SOFT
;
3674 /* Perform some validation between the desired architecture and the rest of the
3677 arm_options_perform_arch_sanity_checks (void)
3679 /* V5T code we generate is completely interworking capable, so we turn off
3680 TARGET_INTERWORK here to avoid many tests later on. */
3682 /* XXX However, we must pass the right pre-processor defines to CPP
3683 or GLD can get confused. This is a hack. */
3684 if (TARGET_INTERWORK
)
3685 arm_cpp_interwork
= 1;
3688 target_flags
&= ~MASK_INTERWORK
;
3690 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3691 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3693 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3694 error ("iwmmxt abi requires an iwmmxt capable cpu");
3696 /* BPABI targets use linker tricks to allow interworking on cores
3697 without thumb support. */
3698 if (TARGET_INTERWORK
3700 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3702 warning (0, "target CPU does not support interworking" );
3703 target_flags
&= ~MASK_INTERWORK
;
3706 /* If soft-float is specified then don't use FPU. */
3707 if (TARGET_SOFT_FLOAT
)
3708 arm_fpu_attr
= FPU_NONE
;
3710 arm_fpu_attr
= FPU_VFP
;
3712 if (TARGET_AAPCS_BASED
)
3714 if (TARGET_CALLER_INTERWORKING
)
3715 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3717 if (TARGET_CALLEE_INTERWORKING
)
3718 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3721 /* __fp16 support currently assumes the core has ldrh. */
3722 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3723 sorry ("__fp16 and no ldrh");
3725 if (use_cmse
&& !arm_arch_cmse
)
3726 error ("target CPU does not support ARMv8-M Security Extensions");
3728 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3729 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3730 if (use_cmse
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3731 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3734 if (TARGET_AAPCS_BASED
)
3736 if (arm_abi
== ARM_ABI_IWMMXT
)
3737 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3738 else if (TARGET_HARD_FLOAT_ABI
)
3740 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3741 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
))
3742 error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3745 arm_pcs_default
= ARM_PCS_AAPCS
;
3749 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3750 sorry ("%<-mfloat-abi=hard%> and VFP");
3752 if (arm_abi
== ARM_ABI_APCS
)
3753 arm_pcs_default
= ARM_PCS_APCS
;
3755 arm_pcs_default
= ARM_PCS_ATPCS
;
3759 /* Test whether a local function descriptor is canonical, i.e.,
3760 whether we can use GOTOFFFUNCDESC to compute the address of the
3763 arm_fdpic_local_funcdesc_p (rtx fnx
)
3766 enum symbol_visibility vis
;
3772 if (! SYMBOL_REF_LOCAL_P (fnx
))
3775 fn
= SYMBOL_REF_DECL (fnx
);
3780 vis
= DECL_VISIBILITY (fn
);
3782 if (vis
== VISIBILITY_PROTECTED
)
3783 /* Private function descriptors for protected functions are not
3784 canonical. Temporarily change the visibility to global so that
3785 we can ensure uniqueness of funcdesc pointers. */
3786 DECL_VISIBILITY (fn
) = VISIBILITY_DEFAULT
;
3788 ret
= default_binds_local_p_1 (fn
, flag_pic
);
3790 DECL_VISIBILITY (fn
) = vis
;
3796 arm_add_gc_roots (void)
3798 gcc_obstack_init(&minipool_obstack
);
3799 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3802 /* A table of known ARM exception types.
3803 For use with the interrupt function attribute. */
3807 const char *const arg
;
3808 const unsigned long return_value
;
3812 static const isr_attribute_arg isr_attribute_args
[] =
3814 { "IRQ", ARM_FT_ISR
},
3815 { "irq", ARM_FT_ISR
},
3816 { "FIQ", ARM_FT_FIQ
},
3817 { "fiq", ARM_FT_FIQ
},
3818 { "ABORT", ARM_FT_ISR
},
3819 { "abort", ARM_FT_ISR
},
3820 { "ABORT", ARM_FT_ISR
},
3821 { "abort", ARM_FT_ISR
},
3822 { "UNDEF", ARM_FT_EXCEPTION
},
3823 { "undef", ARM_FT_EXCEPTION
},
3824 { "SWI", ARM_FT_EXCEPTION
},
3825 { "swi", ARM_FT_EXCEPTION
},
3826 { NULL
, ARM_FT_NORMAL
}
3829 /* Returns the (interrupt) function type of the current
3830 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3832 static unsigned long
3833 arm_isr_value (tree argument
)
3835 const isr_attribute_arg
* ptr
;
3839 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3841 /* No argument - default to IRQ. */
3842 if (argument
== NULL_TREE
)
3845 /* Get the value of the argument. */
3846 if (TREE_VALUE (argument
) == NULL_TREE
3847 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3848 return ARM_FT_UNKNOWN
;
3850 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3852 /* Check it against the list of known arguments. */
3853 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3854 if (streq (arg
, ptr
->arg
))
3855 return ptr
->return_value
;
3857 /* An unrecognized interrupt type. */
3858 return ARM_FT_UNKNOWN
;
3861 /* Computes the type of the current function. */
3863 static unsigned long
3864 arm_compute_func_type (void)
3866 unsigned long type
= ARM_FT_UNKNOWN
;
3870 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3872 /* Decide if the current function is volatile. Such functions
3873 never return, and many memory cycles can be saved by not storing
3874 register values that will never be needed again. This optimization
3875 was added to speed up context switching in a kernel application. */
3877 && (TREE_NOTHROW (current_function_decl
)
3878 || !(flag_unwind_tables
3880 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3881 && TREE_THIS_VOLATILE (current_function_decl
))
3882 type
|= ARM_FT_VOLATILE
;
3884 if (cfun
->static_chain_decl
!= NULL
)
3885 type
|= ARM_FT_NESTED
;
3887 attr
= DECL_ATTRIBUTES (current_function_decl
);
3889 a
= lookup_attribute ("naked", attr
);
3891 type
|= ARM_FT_NAKED
;
3893 a
= lookup_attribute ("isr", attr
);
3895 a
= lookup_attribute ("interrupt", attr
);
3898 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3900 type
|= arm_isr_value (TREE_VALUE (a
));
3902 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3903 type
|= ARM_FT_CMSE_ENTRY
;
3908 /* Returns the type of the current function. */
3911 arm_current_func_type (void)
3913 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3914 cfun
->machine
->func_type
= arm_compute_func_type ();
3916 return cfun
->machine
->func_type
;
3920 arm_allocate_stack_slots_for_args (void)
3922 /* Naked functions should not allocate stack slots for arguments. */
3923 return !IS_NAKED (arm_current_func_type ());
3927 arm_warn_func_return (tree decl
)
3929 /* Naked functions are implemented entirely in assembly, including the
3930 return sequence, so suppress warnings about this. */
3931 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3935 /* Output assembler code for a block containing the constant parts
3936 of a trampoline, leaving space for the variable parts.
3938 On the ARM, (if r8 is the static chain regnum, and remembering that
3939 referencing pc adds an offset of 8) the trampoline looks like:
3942 .word static chain value
3943 .word function's address
3944 XXX FIXME: When the trampoline returns, r8 will be clobbered.
3946 In FDPIC mode, the trampoline looks like:
3947 .word trampoline address
3948 .word trampoline GOT address
3949 ldr r12, [pc, #8] ; #4 for Arm mode
3950 ldr r9, [pc, #8] ; #4 for Arm mode
3951 ldr pc, [pc, #8] ; #4 for Arm mode
3952 .word static chain value
3954 .word function's address
3958 arm_asm_trampoline_template (FILE *f
)
3960 fprintf (f
, "\t.syntax unified\n");
3964 /* The first two words are a function descriptor pointing to the
3965 trampoline code just below. */
3967 fprintf (f
, "\t.arm\n");
3968 else if (TARGET_THUMB2
)
3969 fprintf (f
, "\t.thumb\n");
3971 /* Only ARM and Thumb-2 are supported. */
3974 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3975 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3976 /* Trampoline code which sets the static chain register but also
3977 PIC register before jumping into real code. */
3978 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
3979 STATIC_CHAIN_REGNUM
, PC_REGNUM
,
3980 TARGET_THUMB2
? 8 : 4);
3981 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
3982 PIC_OFFSET_TABLE_REGNUM
, PC_REGNUM
,
3983 TARGET_THUMB2
? 8 : 4);
3984 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
3985 PC_REGNUM
, PC_REGNUM
,
3986 TARGET_THUMB2
? 8 : 4);
3987 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3989 else if (TARGET_ARM
)
3991 fprintf (f
, "\t.arm\n");
3992 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3993 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3995 else if (TARGET_THUMB2
)
3997 fprintf (f
, "\t.thumb\n");
3998 /* The Thumb-2 trampoline is similar to the arm implementation.
3999 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4000 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
4001 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4002 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
4006 ASM_OUTPUT_ALIGN (f
, 2);
4007 fprintf (f
, "\t.code\t16\n");
4008 fprintf (f
, ".Ltrampoline_start:\n");
4009 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
4010 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4011 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
4012 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4013 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
4014 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
4016 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4017 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4020 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4023 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4025 rtx fnaddr
, mem
, a_tramp
;
4027 emit_block_move (m_tramp
, assemble_trampoline_template (),
4028 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
4032 rtx funcdesc
= XEXP (DECL_RTL (fndecl
), 0);
4033 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
4034 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
4035 /* The function start address is at offset 8, but in Thumb mode
4036 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4038 rtx trampoline_code_start
4039 = plus_constant (Pmode
, XEXP (m_tramp
, 0), TARGET_THUMB2
? 9 : 8);
4041 /* Write initial funcdesc which points to the trampoline. */
4042 mem
= adjust_address (m_tramp
, SImode
, 0);
4043 emit_move_insn (mem
, trampoline_code_start
);
4044 mem
= adjust_address (m_tramp
, SImode
, 4);
4045 emit_move_insn (mem
, gen_rtx_REG (Pmode
, PIC_OFFSET_TABLE_REGNUM
));
4046 /* Setup static chain. */
4047 mem
= adjust_address (m_tramp
, SImode
, 20);
4048 emit_move_insn (mem
, chain_value
);
4049 /* GOT + real function entry point. */
4050 mem
= adjust_address (m_tramp
, SImode
, 24);
4051 emit_move_insn (mem
, gotaddr
);
4052 mem
= adjust_address (m_tramp
, SImode
, 28);
4053 emit_move_insn (mem
, fnaddr
);
4057 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
4058 emit_move_insn (mem
, chain_value
);
4060 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
4061 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4062 emit_move_insn (mem
, fnaddr
);
4065 a_tramp
= XEXP (m_tramp
, 0);
4066 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4067 LCT_NORMAL
, VOIDmode
, a_tramp
, Pmode
,
4068 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
4071 /* Thumb trampolines should be entered in thumb mode, so set
4072 the bottom bit of the address. */
4075 arm_trampoline_adjust_address (rtx addr
)
4077 /* For FDPIC don't fix trampoline address since it's a function
4078 descriptor and not a function address. */
4079 if (TARGET_THUMB
&& !TARGET_FDPIC
)
4080 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4081 NULL
, 0, OPTAB_LIB_WIDEN
);
4085 /* Return 1 if it is possible to return using a single instruction.
4086 If SIBLING is non-null, this is a test for a return before a sibling
4087 call. SIBLING is the call insn, so we can examine its register usage. */
4090 use_return_insn (int iscond
, rtx sibling
)
4093 unsigned int func_type
;
4094 unsigned long saved_int_regs
;
4095 unsigned HOST_WIDE_INT stack_adjust
;
4096 arm_stack_offsets
*offsets
;
4098 /* Never use a return instruction before reload has run. */
4099 if (!reload_completed
)
4102 func_type
= arm_current_func_type ();
4104 /* Naked, volatile and stack alignment functions need special
4106 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4109 /* So do interrupt functions that use the frame pointer and Thumb
4110 interrupt functions. */
4111 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4114 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4115 && !optimize_function_for_size_p (cfun
))
4118 offsets
= arm_get_frame_offsets ();
4119 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4121 /* As do variadic functions. */
4122 if (crtl
->args
.pretend_args_size
4123 || cfun
->machine
->uses_anonymous_args
4124 /* Or if the function calls __builtin_eh_return () */
4125 || crtl
->calls_eh_return
4126 /* Or if the function calls alloca */
4127 || cfun
->calls_alloca
4128 /* Or if there is a stack adjustment. However, if the stack pointer
4129 is saved on the stack, we can use a pre-incrementing stack load. */
4130 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4131 && stack_adjust
== 4))
4132 /* Or if the static chain register was saved above the frame, under the
4133 assumption that the stack pointer isn't saved on the stack. */
4134 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4135 && arm_compute_static_chain_stack_bytes() != 0))
4138 saved_int_regs
= offsets
->saved_regs_mask
;
4140 /* Unfortunately, the insn
4142 ldmib sp, {..., sp, ...}
4144 triggers a bug on most SA-110 based devices, such that the stack
4145 pointer won't be correctly restored if the instruction takes a
4146 page fault. We work around this problem by popping r3 along with
4147 the other registers, since that is never slower than executing
4148 another instruction.
4150 We test for !arm_arch5t here, because code for any architecture
4151 less than this could potentially be run on one of the buggy
4153 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4155 /* Validate that r3 is a call-clobbered register (always true in
4156 the default abi) ... */
4157 if (!call_used_or_fixed_reg_p (3))
4160 /* ... that it isn't being used for a return value ... */
4161 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4164 /* ... or for a tail-call argument ... */
4167 gcc_assert (CALL_P (sibling
));
4169 if (find_regno_fusage (sibling
, USE
, 3))
4173 /* ... and that there are no call-saved registers in r0-r2
4174 (always true in the default ABI). */
4175 if (saved_int_regs
& 0x7)
4179 /* Can't be done if interworking with Thumb, and any registers have been
4181 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4184 /* On StrongARM, conditional returns are expensive if they aren't
4185 taken and multiple registers have been stacked. */
4186 if (iscond
&& arm_tune_strongarm
)
4188 /* Conditional return when just the LR is stored is a simple
4189 conditional-load instruction, that's not expensive. */
4190 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4194 && arm_pic_register
!= INVALID_REGNUM
4195 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4199 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4200 several instructions if anything needs to be popped. */
4201 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4204 /* If there are saved registers but the LR isn't saved, then we need
4205 two instructions for the return. */
4206 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4209 /* Can't be done if any of the VFP regs are pushed,
4210 since this also requires an insn. */
4211 if (TARGET_HARD_FLOAT
)
4212 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4213 if (df_regs_ever_live_p (regno
) && !call_used_or_fixed_reg_p (regno
))
4216 if (TARGET_REALLY_IWMMXT
)
4217 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4218 if (df_regs_ever_live_p (regno
) && ! call_used_or_fixed_reg_p (regno
))
4224 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4225 shrink-wrapping if possible. This is the case if we need to emit a
4226 prologue, which we can test by looking at the offsets. */
4228 use_simple_return_p (void)
4230 arm_stack_offsets
*offsets
;
4232 /* Note this function can be called before or after reload. */
4233 if (!reload_completed
)
4234 arm_compute_frame_layout ();
4236 offsets
= arm_get_frame_offsets ();
4237 return offsets
->outgoing_args
!= 0;
4240 /* Return TRUE if int I is a valid immediate ARM constant. */
4243 const_ok_for_arm (HOST_WIDE_INT i
)
4247 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4248 be all zero, or all one. */
4249 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4250 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4251 != ((~(unsigned HOST_WIDE_INT
) 0)
4252 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4255 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4257 /* Fast return for 0 and small values. We must do this for zero, since
4258 the code below can't handle that one case. */
4259 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4262 /* Get the number of trailing zeros. */
4263 lowbit
= ffs((int) i
) - 1;
4265 /* Only even shifts are allowed in ARM mode so round down to the
4266 nearest even number. */
4270 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4275 /* Allow rotated constants in ARM mode. */
4277 && ((i
& ~0xc000003f) == 0
4278 || (i
& ~0xf000000f) == 0
4279 || (i
& ~0xfc000003) == 0))
4282 else if (TARGET_THUMB2
)
4286 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4289 if (i
== v
|| i
== (v
| (v
<< 8)))
4292 /* Allow repeated pattern 0xXY00XY00. */
4298 else if (TARGET_HAVE_MOVT
)
4300 /* Thumb-1 Targets with MOVT. */
4310 /* Return true if I is a valid constant for the operation CODE. */
4312 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4314 if (const_ok_for_arm (i
))
4320 /* See if we can use movw. */
4321 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4324 /* Otherwise, try mvn. */
4325 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4328 /* See if we can use addw or subw. */
4330 && ((i
& 0xfffff000) == 0
4331 || ((-i
) & 0xfffff000) == 0))
4352 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4354 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4360 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4364 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4371 /* Return true if I is a valid di mode constant for the operation CODE. */
4373 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4375 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4376 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4377 rtx hi
= GEN_INT (hi_val
);
4378 rtx lo
= GEN_INT (lo_val
);
4388 return const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF
4389 || const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF;
4391 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4398 /* Emit a sequence of insns to handle a large constant.
4399 CODE is the code of the operation required, it can be any of SET, PLUS,
4400 IOR, AND, XOR, MINUS;
4401 MODE is the mode in which the operation is being performed;
4402 VAL is the integer to operate on;
4403 SOURCE is the other operand (a register, or a null-pointer for SET);
4404 SUBTARGETS means it is safe to create scratch registers if that will
4405 either produce a simpler sequence, or we will want to cse the values.
4406 Return value is the number of insns emitted. */
4408 /* ??? Tweak this for thumb2. */
4410 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4411 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4415 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4416 cond
= COND_EXEC_TEST (PATTERN (insn
));
4420 if (subtargets
|| code
== SET
4421 || (REG_P (target
) && REG_P (source
)
4422 && REGNO (target
) != REGNO (source
)))
4424 /* After arm_reorg has been called, we can't fix up expensive
4425 constants by pushing them into memory so we must synthesize
4426 them in-line, regardless of the cost. This is only likely to
4427 be more costly on chips that have load delay slots and we are
4428 compiling without running the scheduler (so no splitting
4429 occurred before the final instruction emission).
4431 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4433 if (!cfun
->machine
->after_arm_reorg
4435 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4437 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4442 /* Currently SET is the only monadic value for CODE, all
4443 the rest are diadic. */
4444 if (TARGET_USE_MOVT
)
4445 arm_emit_movpair (target
, GEN_INT (val
));
4447 emit_set_insn (target
, GEN_INT (val
));
4453 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4455 if (TARGET_USE_MOVT
)
4456 arm_emit_movpair (temp
, GEN_INT (val
));
4458 emit_set_insn (temp
, GEN_INT (val
));
4460 /* For MINUS, the value is subtracted from, since we never
4461 have subtraction of a constant. */
4463 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4465 emit_set_insn (target
,
4466 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4472 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4476 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4477 ARM/THUMB2 immediates, and add up to VAL.
4478 Thr function return value gives the number of insns required. */
4480 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4481 struct four_ints
*return_sequence
)
4483 int best_consecutive_zeros
= 0;
4487 struct four_ints tmp_sequence
;
4489 /* If we aren't targeting ARM, the best place to start is always at
4490 the bottom, otherwise look more closely. */
4493 for (i
= 0; i
< 32; i
+= 2)
4495 int consecutive_zeros
= 0;
4497 if (!(val
& (3 << i
)))
4499 while ((i
< 32) && !(val
& (3 << i
)))
4501 consecutive_zeros
+= 2;
4504 if (consecutive_zeros
> best_consecutive_zeros
)
4506 best_consecutive_zeros
= consecutive_zeros
;
4507 best_start
= i
- consecutive_zeros
;
4514 /* So long as it won't require any more insns to do so, it's
4515 desirable to emit a small constant (in bits 0...9) in the last
4516 insn. This way there is more chance that it can be combined with
4517 a later addressing insn to form a pre-indexed load or store
4518 operation. Consider:
4520 *((volatile int *)0xe0000100) = 1;
4521 *((volatile int *)0xe0000110) = 2;
4523 We want this to wind up as:
4527 str rB, [rA, #0x100]
4529 str rB, [rA, #0x110]
4531 rather than having to synthesize both large constants from scratch.
4533 Therefore, we calculate how many insns would be required to emit
4534 the constant starting from `best_start', and also starting from
4535 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4536 yield a shorter sequence, we may as well use zero. */
4537 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4539 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4541 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4542 if (insns2
<= insns1
)
4544 *return_sequence
= tmp_sequence
;
4552 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4554 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4555 struct four_ints
*return_sequence
, int i
)
4557 int remainder
= val
& 0xffffffff;
4560 /* Try and find a way of doing the job in either two or three
4563 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4564 location. We start at position I. This may be the MSB, or
4565 optimial_immediate_sequence may have positioned it at the largest block
4566 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4567 wrapping around to the top of the word when we drop off the bottom.
4568 In the worst case this code should produce no more than four insns.
4570 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4571 constants, shifted to any arbitrary location. We should always start
4576 unsigned int b1
, b2
, b3
, b4
;
4577 unsigned HOST_WIDE_INT result
;
4580 gcc_assert (insns
< 4);
4585 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4586 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4589 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4590 /* We can use addw/subw for the last 12 bits. */
4594 /* Use an 8-bit shifted/rotated immediate. */
4598 result
= remainder
& ((0x0ff << end
)
4599 | ((i
< end
) ? (0xff >> (32 - end
))
4606 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4607 arbitrary shifts. */
4608 i
-= TARGET_ARM
? 2 : 1;
4612 /* Next, see if we can do a better job with a thumb2 replicated
4615 We do it this way around to catch the cases like 0x01F001E0 where
4616 two 8-bit immediates would work, but a replicated constant would
4619 TODO: 16-bit constants that don't clear all the bits, but still win.
4620 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4623 b1
= (remainder
& 0xff000000) >> 24;
4624 b2
= (remainder
& 0x00ff0000) >> 16;
4625 b3
= (remainder
& 0x0000ff00) >> 8;
4626 b4
= remainder
& 0xff;
4630 /* The 8-bit immediate already found clears b1 (and maybe b2),
4631 but must leave b3 and b4 alone. */
4633 /* First try to find a 32-bit replicated constant that clears
4634 almost everything. We can assume that we can't do it in one,
4635 or else we wouldn't be here. */
4636 unsigned int tmp
= b1
& b2
& b3
& b4
;
4637 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4639 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4640 + (tmp
== b3
) + (tmp
== b4
);
4642 && (matching_bytes
>= 3
4643 || (matching_bytes
== 2
4644 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4646 /* At least 3 of the bytes match, and the fourth has at
4647 least as many bits set, or two of the bytes match
4648 and it will only require one more insn to finish. */
4656 /* Second, try to find a 16-bit replicated constant that can
4657 leave three of the bytes clear. If b2 or b4 is already
4658 zero, then we can. If the 8-bit from above would not
4659 clear b2 anyway, then we still win. */
4660 else if (b1
== b3
&& (!b2
|| !b4
4661 || (remainder
& 0x00ff0000 & ~result
)))
4663 result
= remainder
& 0xff00ff00;
4669 /* The 8-bit immediate already found clears b2 (and maybe b3)
4670 and we don't get here unless b1 is alredy clear, but it will
4671 leave b4 unchanged. */
4673 /* If we can clear b2 and b4 at once, then we win, since the
4674 8-bits couldn't possibly reach that far. */
4677 result
= remainder
& 0x00ff00ff;
4683 return_sequence
->i
[insns
++] = result
;
4684 remainder
&= ~result
;
4686 if (code
== SET
|| code
== MINUS
)
4694 /* Emit an instruction with the indicated PATTERN. If COND is
4695 non-NULL, conditionalize the execution of the instruction on COND
4699 emit_constant_insn (rtx cond
, rtx pattern
)
4702 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4703 emit_insn (pattern
);
4706 /* As above, but extra parameter GENERATE which, if clear, suppresses
4710 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4711 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4712 int subtargets
, int generate
)
4716 int final_invert
= 0;
4718 int set_sign_bit_copies
= 0;
4719 int clear_sign_bit_copies
= 0;
4720 int clear_zero_bit_copies
= 0;
4721 int set_zero_bit_copies
= 0;
4722 int insns
= 0, neg_insns
, inv_insns
;
4723 unsigned HOST_WIDE_INT temp1
, temp2
;
4724 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4725 struct four_ints
*immediates
;
4726 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4728 /* Find out which operations are safe for a given CODE. Also do a quick
4729 check for degenerate cases; these can occur when DImode operations
4742 if (remainder
== 0xffffffff)
4745 emit_constant_insn (cond
,
4746 gen_rtx_SET (target
,
4747 GEN_INT (ARM_SIGN_EXTEND (val
))));
4753 if (reload_completed
&& rtx_equal_p (target
, source
))
4757 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4766 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4769 if (remainder
== 0xffffffff)
4771 if (reload_completed
&& rtx_equal_p (target
, source
))
4774 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4783 if (reload_completed
&& rtx_equal_p (target
, source
))
4786 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4790 if (remainder
== 0xffffffff)
4793 emit_constant_insn (cond
,
4794 gen_rtx_SET (target
,
4795 gen_rtx_NOT (mode
, source
)));
4802 /* We treat MINUS as (val - source), since (source - val) is always
4803 passed as (source + (-val)). */
4807 emit_constant_insn (cond
,
4808 gen_rtx_SET (target
,
4809 gen_rtx_NEG (mode
, source
)));
4812 if (const_ok_for_arm (val
))
4815 emit_constant_insn (cond
,
4816 gen_rtx_SET (target
,
4817 gen_rtx_MINUS (mode
, GEN_INT (val
),
4828 /* If we can do it in one insn get out quickly. */
4829 if (const_ok_for_op (val
, code
))
4832 emit_constant_insn (cond
,
4833 gen_rtx_SET (target
,
4835 ? gen_rtx_fmt_ee (code
, mode
, source
,
4841 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4843 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4844 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4848 if (mode
== SImode
&& i
== 16)
4849 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4851 emit_constant_insn (cond
,
4852 gen_zero_extendhisi2
4853 (target
, gen_lowpart (HImode
, source
)));
4855 /* Extz only supports SImode, but we can coerce the operands
4857 emit_constant_insn (cond
,
4858 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4859 gen_lowpart (SImode
, source
),
4860 GEN_INT (i
), const0_rtx
));
4866 /* Calculate a few attributes that may be useful for specific
4868 /* Count number of leading zeros. */
4869 for (i
= 31; i
>= 0; i
--)
4871 if ((remainder
& (1 << i
)) == 0)
4872 clear_sign_bit_copies
++;
4877 /* Count number of leading 1's. */
4878 for (i
= 31; i
>= 0; i
--)
4880 if ((remainder
& (1 << i
)) != 0)
4881 set_sign_bit_copies
++;
4886 /* Count number of trailing zero's. */
4887 for (i
= 0; i
<= 31; i
++)
4889 if ((remainder
& (1 << i
)) == 0)
4890 clear_zero_bit_copies
++;
4895 /* Count number of trailing 1's. */
4896 for (i
= 0; i
<= 31; i
++)
4898 if ((remainder
& (1 << i
)) != 0)
4899 set_zero_bit_copies
++;
4907 /* See if we can do this by sign_extending a constant that is known
4908 to be negative. This is a good, way of doing it, since the shift
4909 may well merge into a subsequent insn. */
4910 if (set_sign_bit_copies
> 1)
4912 if (const_ok_for_arm
4913 (temp1
= ARM_SIGN_EXTEND (remainder
4914 << (set_sign_bit_copies
- 1))))
4918 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4919 emit_constant_insn (cond
,
4920 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4921 emit_constant_insn (cond
,
4922 gen_ashrsi3 (target
, new_src
,
4923 GEN_INT (set_sign_bit_copies
- 1)));
4927 /* For an inverted constant, we will need to set the low bits,
4928 these will be shifted out of harm's way. */
4929 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4930 if (const_ok_for_arm (~temp1
))
4934 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4935 emit_constant_insn (cond
,
4936 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4937 emit_constant_insn (cond
,
4938 gen_ashrsi3 (target
, new_src
,
4939 GEN_INT (set_sign_bit_copies
- 1)));
4945 /* See if we can calculate the value as the difference between two
4946 valid immediates. */
4947 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4949 int topshift
= clear_sign_bit_copies
& ~1;
4951 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4952 & (0xff000000 >> topshift
));
4954 /* If temp1 is zero, then that means the 9 most significant
4955 bits of remainder were 1 and we've caused it to overflow.
4956 When topshift is 0 we don't need to do anything since we
4957 can borrow from 'bit 32'. */
4958 if (temp1
== 0 && topshift
!= 0)
4959 temp1
= 0x80000000 >> (topshift
- 1);
4961 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4963 if (const_ok_for_arm (temp2
))
4967 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4968 emit_constant_insn (cond
,
4969 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4970 emit_constant_insn (cond
,
4971 gen_addsi3 (target
, new_src
,
4979 /* See if we can generate this by setting the bottom (or the top)
4980 16 bits, and then shifting these into the other half of the
4981 word. We only look for the simplest cases, to do more would cost
4982 too much. Be careful, however, not to generate this when the
4983 alternative would take fewer insns. */
4984 if (val
& 0xffff0000)
4986 temp1
= remainder
& 0xffff0000;
4987 temp2
= remainder
& 0x0000ffff;
4989 /* Overlaps outside this range are best done using other methods. */
4990 for (i
= 9; i
< 24; i
++)
4992 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4993 && !const_ok_for_arm (temp2
))
4995 rtx new_src
= (subtargets
4996 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4998 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4999 source
, subtargets
, generate
);
5007 gen_rtx_ASHIFT (mode
, source
,
5014 /* Don't duplicate cases already considered. */
5015 for (i
= 17; i
< 24; i
++)
5017 if (((temp1
| (temp1
>> i
)) == remainder
)
5018 && !const_ok_for_arm (temp1
))
5020 rtx new_src
= (subtargets
5021 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5023 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
5024 source
, subtargets
, generate
);
5029 gen_rtx_SET (target
,
5032 gen_rtx_LSHIFTRT (mode
, source
,
5043 /* If we have IOR or XOR, and the constant can be loaded in a
5044 single instruction, and we can find a temporary to put it in,
5045 then this can be done in two instructions instead of 3-4. */
5047 /* TARGET can't be NULL if SUBTARGETS is 0 */
5048 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
5050 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
5054 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5056 emit_constant_insn (cond
,
5057 gen_rtx_SET (sub
, GEN_INT (val
)));
5058 emit_constant_insn (cond
,
5059 gen_rtx_SET (target
,
5060 gen_rtx_fmt_ee (code
, mode
,
5071 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5072 and the remainder 0s for e.g. 0xfff00000)
5073 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5075 This can be done in 2 instructions by using shifts with mov or mvn.
5080 mvn r0, r0, lsr #12 */
5081 if (set_sign_bit_copies
> 8
5082 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5086 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5087 rtx shift
= GEN_INT (set_sign_bit_copies
);
5093 gen_rtx_ASHIFT (mode
,
5098 gen_rtx_SET (target
,
5100 gen_rtx_LSHIFTRT (mode
, sub
,
5107 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5109 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5111 For eg. r0 = r0 | 0xfff
5116 if (set_zero_bit_copies
> 8
5117 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5121 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5122 rtx shift
= GEN_INT (set_zero_bit_copies
);
5128 gen_rtx_LSHIFTRT (mode
,
5133 gen_rtx_SET (target
,
5135 gen_rtx_ASHIFT (mode
, sub
,
5141 /* This will never be reached for Thumb2 because orn is a valid
5142 instruction. This is for Thumb1 and the ARM 32 bit cases.
5144 x = y | constant (such that ~constant is a valid constant)
5146 x = ~(~y & ~constant).
5148 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5152 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5153 emit_constant_insn (cond
,
5155 gen_rtx_NOT (mode
, source
)));
5158 sub
= gen_reg_rtx (mode
);
5159 emit_constant_insn (cond
,
5161 gen_rtx_AND (mode
, source
,
5163 emit_constant_insn (cond
,
5164 gen_rtx_SET (target
,
5165 gen_rtx_NOT (mode
, sub
)));
5172 /* See if two shifts will do 2 or more insn's worth of work. */
5173 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5175 HOST_WIDE_INT shift_mask
= ((0xffffffff
5176 << (32 - clear_sign_bit_copies
))
5179 if ((remainder
| shift_mask
) != 0xffffffff)
5181 HOST_WIDE_INT new_val
5182 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5186 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5187 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5188 new_src
, source
, subtargets
, 1);
5193 rtx targ
= subtargets
? NULL_RTX
: target
;
5194 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5195 targ
, source
, subtargets
, 0);
5201 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5202 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5204 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5205 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5211 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5213 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5215 if ((remainder
| shift_mask
) != 0xffffffff)
5217 HOST_WIDE_INT new_val
5218 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5221 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5223 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5224 new_src
, source
, subtargets
, 1);
5229 rtx targ
= subtargets
? NULL_RTX
: target
;
5231 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5232 targ
, source
, subtargets
, 0);
5238 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5239 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5241 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5242 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5254 /* Calculate what the instruction sequences would be if we generated it
5255 normally, negated, or inverted. */
5257 /* AND cannot be split into multiple insns, so invert and use BIC. */
5260 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5263 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5268 if (can_invert
|| final_invert
)
5269 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5274 immediates
= &pos_immediates
;
5276 /* Is the negated immediate sequence more efficient? */
5277 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5280 immediates
= &neg_immediates
;
5285 /* Is the inverted immediate sequence more efficient?
5286 We must allow for an extra NOT instruction for XOR operations, although
5287 there is some chance that the final 'mvn' will get optimized later. */
5288 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5291 immediates
= &inv_immediates
;
5299 /* Now output the chosen sequence as instructions. */
5302 for (i
= 0; i
< insns
; i
++)
5304 rtx new_src
, temp1_rtx
;
5306 temp1
= immediates
->i
[i
];
5308 if (code
== SET
|| code
== MINUS
)
5309 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5310 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5311 new_src
= gen_reg_rtx (mode
);
5317 else if (can_negate
)
5320 temp1
= trunc_int_for_mode (temp1
, mode
);
5321 temp1_rtx
= GEN_INT (temp1
);
5325 else if (code
== MINUS
)
5326 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5328 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5330 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5335 can_negate
= can_invert
;
5339 else if (code
== MINUS
)
5347 emit_constant_insn (cond
, gen_rtx_SET (target
,
5348 gen_rtx_NOT (mode
, source
)));
5355 /* Canonicalize a comparison so that we are more likely to recognize it.
5356 This can be done for a few constant compares, where we can make the
5357 immediate value easier to load. */
5360 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5361 bool op0_preserve_value
)
5364 unsigned HOST_WIDE_INT i
, maxval
;
5366 mode
= GET_MODE (*op0
);
5367 if (mode
== VOIDmode
)
5368 mode
= GET_MODE (*op1
);
5370 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5372 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5373 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5374 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5375 for GTU/LEU in Thumb mode. */
5379 if (*code
== GT
|| *code
== LE
5380 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5382 /* Missing comparison. First try to use an available
5384 if (CONST_INT_P (*op1
))
5392 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5394 *op1
= GEN_INT (i
+ 1);
5395 *code
= *code
== GT
? GE
: LT
;
5401 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5402 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5404 *op1
= GEN_INT (i
+ 1);
5405 *code
= *code
== GTU
? GEU
: LTU
;
5414 /* If that did not work, reverse the condition. */
5415 if (!op0_preserve_value
)
5417 std::swap (*op0
, *op1
);
5418 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5424 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5425 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5426 to facilitate possible combining with a cmp into 'ands'. */
5428 && GET_CODE (*op0
) == ZERO_EXTEND
5429 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5430 && GET_MODE (XEXP (*op0
, 0)) == QImode
5431 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5432 && subreg_lowpart_p (XEXP (*op0
, 0))
5433 && *op1
== const0_rtx
)
5434 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5437 /* Comparisons smaller than DImode. Only adjust comparisons against
5438 an out-of-range constant. */
5439 if (!CONST_INT_P (*op1
)
5440 || const_ok_for_arm (INTVAL (*op1
))
5441 || const_ok_for_arm (- INTVAL (*op1
)))
5455 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5457 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5458 *code
= *code
== GT
? GE
: LT
;
5466 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5468 *op1
= GEN_INT (i
- 1);
5469 *code
= *code
== GE
? GT
: LE
;
5476 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5477 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5479 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5480 *code
= *code
== GTU
? GEU
: LTU
;
5488 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5490 *op1
= GEN_INT (i
- 1);
5491 *code
= *code
== GEU
? GTU
: LEU
;
5502 /* Define how to find the value returned by a function. */
5505 arm_function_value(const_tree type
, const_tree func
,
5506 bool outgoing ATTRIBUTE_UNUSED
)
5509 int unsignedp ATTRIBUTE_UNUSED
;
5510 rtx r ATTRIBUTE_UNUSED
;
5512 mode
= TYPE_MODE (type
);
5514 if (TARGET_AAPCS_BASED
)
5515 return aapcs_allocate_return_reg (mode
, type
, func
);
5517 /* Promote integer types. */
5518 if (INTEGRAL_TYPE_P (type
))
5519 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5521 /* Promotes small structs returned in a register to full-word size
5522 for big-endian AAPCS. */
5523 if (arm_return_in_msb (type
))
5525 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5526 if (size
% UNITS_PER_WORD
!= 0)
5528 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5529 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5533 return arm_libcall_value_1 (mode
);
5536 /* libcall hashtable helpers. */
5538 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5540 static inline hashval_t
hash (const rtx_def
*);
5541 static inline bool equal (const rtx_def
*, const rtx_def
*);
5542 static inline void remove (rtx_def
*);
5546 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5548 return rtx_equal_p (p1
, p2
);
5552 libcall_hasher::hash (const rtx_def
*p1
)
5554 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5557 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5560 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5562 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5566 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5568 static bool init_done
= false;
5569 static libcall_table_type
*libcall_htab
= NULL
;
5575 libcall_htab
= new libcall_table_type (31);
5576 add_libcall (libcall_htab
,
5577 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5578 add_libcall (libcall_htab
,
5579 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5580 add_libcall (libcall_htab
,
5581 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5582 add_libcall (libcall_htab
,
5583 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5585 add_libcall (libcall_htab
,
5586 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5587 add_libcall (libcall_htab
,
5588 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5589 add_libcall (libcall_htab
,
5590 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5591 add_libcall (libcall_htab
,
5592 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5594 add_libcall (libcall_htab
,
5595 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5596 add_libcall (libcall_htab
,
5597 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5598 add_libcall (libcall_htab
,
5599 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5600 add_libcall (libcall_htab
,
5601 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5602 add_libcall (libcall_htab
,
5603 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5604 add_libcall (libcall_htab
,
5605 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5606 add_libcall (libcall_htab
,
5607 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5608 add_libcall (libcall_htab
,
5609 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5611 /* Values from double-precision helper functions are returned in core
5612 registers if the selected core only supports single-precision
5613 arithmetic, even if we are using the hard-float ABI. The same is
5614 true for single-precision helpers, but we will never be using the
5615 hard-float ABI on a CPU which doesn't support single-precision
5616 operations in hardware. */
5617 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5618 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5619 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5620 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5621 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5622 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5623 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5624 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5625 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5626 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5627 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5628 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5630 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5632 add_libcall (libcall_htab
,
5633 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5636 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5640 arm_libcall_value_1 (machine_mode mode
)
5642 if (TARGET_AAPCS_BASED
)
5643 return aapcs_libcall_value (mode
);
5644 else if (TARGET_IWMMXT_ABI
5645 && arm_vector_mode_supported_p (mode
))
5646 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5648 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5651 /* Define how to find the value returned by a library function
5652 assuming the value has mode MODE. */
5655 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5657 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5658 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5660 /* The following libcalls return their result in integer registers,
5661 even though they return a floating point value. */
5662 if (arm_libcall_uses_aapcs_base (libcall
))
5663 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5667 return arm_libcall_value_1 (mode
);
5670 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5673 arm_function_value_regno_p (const unsigned int regno
)
5675 if (regno
== ARG_REGISTER (1)
5677 && TARGET_AAPCS_BASED
5678 && TARGET_HARD_FLOAT
5679 && regno
== FIRST_VFP_REGNUM
)
5680 || (TARGET_IWMMXT_ABI
5681 && regno
== FIRST_IWMMXT_REGNUM
))
5687 /* Determine the amount of memory needed to store the possible return
5688 registers of an untyped call. */
5690 arm_apply_result_size (void)
5696 if (TARGET_HARD_FLOAT_ABI
)
5698 if (TARGET_IWMMXT_ABI
)
5705 /* Decide whether TYPE should be returned in memory (true)
5706 or in a register (false). FNTYPE is the type of the function making
5709 arm_return_in_memory (const_tree type
, const_tree fntype
)
5713 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5715 if (TARGET_AAPCS_BASED
)
5717 /* Simple, non-aggregate types (ie not including vectors and
5718 complex) are always returned in a register (or registers).
5719 We don't care about which register here, so we can short-cut
5720 some of the detail. */
5721 if (!AGGREGATE_TYPE_P (type
)
5722 && TREE_CODE (type
) != VECTOR_TYPE
5723 && TREE_CODE (type
) != COMPLEX_TYPE
)
5726 /* Any return value that is no larger than one word can be
5728 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5731 /* Check any available co-processors to see if they accept the
5732 type as a register candidate (VFP, for example, can return
5733 some aggregates in consecutive registers). These aren't
5734 available if the call is variadic. */
5735 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5738 /* Vector values should be returned using ARM registers, not
5739 memory (unless they're over 16 bytes, which will break since
5740 we only have four call-clobbered registers to play with). */
5741 if (TREE_CODE (type
) == VECTOR_TYPE
)
5742 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5744 /* The rest go in memory. */
5748 if (TREE_CODE (type
) == VECTOR_TYPE
)
5749 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5751 if (!AGGREGATE_TYPE_P (type
) &&
5752 (TREE_CODE (type
) != VECTOR_TYPE
))
5753 /* All simple types are returned in registers. */
5756 if (arm_abi
!= ARM_ABI_APCS
)
5758 /* ATPCS and later return aggregate types in memory only if they are
5759 larger than a word (or are variable size). */
5760 return (size
< 0 || size
> UNITS_PER_WORD
);
5763 /* For the arm-wince targets we choose to be compatible with Microsoft's
5764 ARM and Thumb compilers, which always return aggregates in memory. */
5766 /* All structures/unions bigger than one word are returned in memory.
5767 Also catch the case where int_size_in_bytes returns -1. In this case
5768 the aggregate is either huge or of variable size, and in either case
5769 we will want to return it via memory and not in a register. */
5770 if (size
< 0 || size
> UNITS_PER_WORD
)
5773 if (TREE_CODE (type
) == RECORD_TYPE
)
5777 /* For a struct the APCS says that we only return in a register
5778 if the type is 'integer like' and every addressable element
5779 has an offset of zero. For practical purposes this means
5780 that the structure can have at most one non bit-field element
5781 and that this element must be the first one in the structure. */
5783 /* Find the first field, ignoring non FIELD_DECL things which will
5784 have been created by C++. */
5785 for (field
= TYPE_FIELDS (type
);
5786 field
&& TREE_CODE (field
) != FIELD_DECL
;
5787 field
= DECL_CHAIN (field
))
5791 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5793 /* Check that the first field is valid for returning in a register. */
5795 /* ... Floats are not allowed */
5796 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5799 /* ... Aggregates that are not themselves valid for returning in
5800 a register are not allowed. */
5801 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5804 /* Now check the remaining fields, if any. Only bitfields are allowed,
5805 since they are not addressable. */
5806 for (field
= DECL_CHAIN (field
);
5808 field
= DECL_CHAIN (field
))
5810 if (TREE_CODE (field
) != FIELD_DECL
)
5813 if (!DECL_BIT_FIELD_TYPE (field
))
5820 if (TREE_CODE (type
) == UNION_TYPE
)
5824 /* Unions can be returned in registers if every element is
5825 integral, or can be returned in an integer register. */
5826 for (field
= TYPE_FIELDS (type
);
5828 field
= DECL_CHAIN (field
))
5830 if (TREE_CODE (field
) != FIELD_DECL
)
5833 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5836 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5842 #endif /* not ARM_WINCE */
5844 /* Return all other types in memory. */
5848 const struct pcs_attribute_arg
5852 } pcs_attribute_args
[] =
5854 {"aapcs", ARM_PCS_AAPCS
},
5855 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5857 /* We could recognize these, but changes would be needed elsewhere
5858 * to implement them. */
5859 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5860 {"atpcs", ARM_PCS_ATPCS
},
5861 {"apcs", ARM_PCS_APCS
},
5863 {NULL
, ARM_PCS_UNKNOWN
}
5867 arm_pcs_from_attribute (tree attr
)
5869 const struct pcs_attribute_arg
*ptr
;
5872 /* Get the value of the argument. */
5873 if (TREE_VALUE (attr
) == NULL_TREE
5874 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5875 return ARM_PCS_UNKNOWN
;
5877 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5879 /* Check it against the list of known arguments. */
5880 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5881 if (streq (arg
, ptr
->arg
))
5884 /* An unrecognized interrupt type. */
5885 return ARM_PCS_UNKNOWN
;
5888 /* Get the PCS variant to use for this call. TYPE is the function's type
5889 specification, DECL is the specific declartion. DECL may be null if
5890 the call could be indirect or if this is a library call. */
5892 arm_get_pcs_model (const_tree type
, const_tree decl
)
5894 bool user_convention
= false;
5895 enum arm_pcs user_pcs
= arm_pcs_default
;
5900 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5903 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5904 user_convention
= true;
5907 if (TARGET_AAPCS_BASED
)
5909 /* Detect varargs functions. These always use the base rules
5910 (no argument is ever a candidate for a co-processor
5912 bool base_rules
= stdarg_p (type
);
5914 if (user_convention
)
5916 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5917 sorry ("non-AAPCS derived PCS variant");
5918 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5919 error ("variadic functions must use the base AAPCS variant");
5923 return ARM_PCS_AAPCS
;
5924 else if (user_convention
)
5926 else if (decl
&& flag_unit_at_a_time
)
5928 /* Local functions never leak outside this compilation unit,
5929 so we are free to use whatever conventions are
5931 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5932 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5934 return ARM_PCS_AAPCS_LOCAL
;
5937 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5938 sorry ("PCS variant");
5940 /* For everything else we use the target's default. */
5941 return arm_pcs_default
;
5946 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5947 const_tree fntype ATTRIBUTE_UNUSED
,
5948 rtx libcall ATTRIBUTE_UNUSED
,
5949 const_tree fndecl ATTRIBUTE_UNUSED
)
5951 /* Record the unallocated VFP registers. */
5952 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5953 pcum
->aapcs_vfp_reg_alloc
= 0;
5956 /* Walk down the type tree of TYPE counting consecutive base elements.
5957 If *MODEP is VOIDmode, then set it to the first valid floating point
5958 type. If a non-floating point type is found, or if a floating point
5959 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5960 otherwise return the count in the sub-tree. */
5962 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5967 switch (TREE_CODE (type
))
5970 mode
= TYPE_MODE (type
);
5971 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5974 if (*modep
== VOIDmode
)
5983 mode
= TYPE_MODE (TREE_TYPE (type
));
5984 if (mode
!= DFmode
&& mode
!= SFmode
)
5987 if (*modep
== VOIDmode
)
5996 /* Use V2SImode and V4SImode as representatives of all 64-bit
5997 and 128-bit vector types, whether or not those modes are
5998 supported with the present options. */
5999 size
= int_size_in_bytes (type
);
6012 if (*modep
== VOIDmode
)
6015 /* Vector modes are considered to be opaque: two vectors are
6016 equivalent for the purposes of being homogeneous aggregates
6017 if they are the same size. */
6026 tree index
= TYPE_DOMAIN (type
);
6028 /* Can't handle incomplete types nor sizes that are not
6030 if (!COMPLETE_TYPE_P (type
)
6031 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6034 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
6037 || !TYPE_MAX_VALUE (index
)
6038 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6039 || !TYPE_MIN_VALUE (index
)
6040 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6044 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6045 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6047 /* There must be no padding. */
6048 if (wi::to_wide (TYPE_SIZE (type
))
6049 != count
* GET_MODE_BITSIZE (*modep
))
6061 /* Can't handle incomplete types nor sizes that are not
6063 if (!COMPLETE_TYPE_P (type
)
6064 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6067 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6069 if (TREE_CODE (field
) != FIELD_DECL
)
6072 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6078 /* There must be no padding. */
6079 if (wi::to_wide (TYPE_SIZE (type
))
6080 != count
* GET_MODE_BITSIZE (*modep
))
6087 case QUAL_UNION_TYPE
:
6089 /* These aren't very interesting except in a degenerate case. */
6094 /* Can't handle incomplete types nor sizes that are not
6096 if (!COMPLETE_TYPE_P (type
)
6097 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6100 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6102 if (TREE_CODE (field
) != FIELD_DECL
)
6105 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6108 count
= count
> sub_count
? count
: sub_count
;
6111 /* There must be no padding. */
6112 if (wi::to_wide (TYPE_SIZE (type
))
6113 != count
* GET_MODE_BITSIZE (*modep
))
6126 /* Return true if PCS_VARIANT should use VFP registers. */
6128 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6130 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6132 static bool seen_thumb1_vfp
= false;
6134 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6136 sorry ("Thumb-1 hard-float VFP ABI");
6137 /* sorry() is not immediately fatal, so only display this once. */
6138 seen_thumb1_vfp
= true;
6144 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6147 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6148 (TARGET_VFP_DOUBLE
|| !is_double
));
6151 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6152 suitable for passing or returning in VFP registers for the PCS
6153 variant selected. If it is, then *BASE_MODE is updated to contain
6154 a machine mode describing each element of the argument's type and
6155 *COUNT to hold the number of such elements. */
6157 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6158 machine_mode mode
, const_tree type
,
6159 machine_mode
*base_mode
, int *count
)
6161 machine_mode new_mode
= VOIDmode
;
6163 /* If we have the type information, prefer that to working things
6164 out from the mode. */
6167 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6169 if (ag_count
> 0 && ag_count
<= 4)
6174 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6175 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6176 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6181 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6184 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6190 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6193 *base_mode
= new_mode
;
6195 if (TARGET_GENERAL_REGS_ONLY
)
6196 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6203 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6204 machine_mode mode
, const_tree type
)
6206 int count ATTRIBUTE_UNUSED
;
6207 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6209 if (!use_vfp_abi (pcs_variant
, false))
6211 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6216 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6219 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6222 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6223 &pcum
->aapcs_vfp_rmode
,
6224 &pcum
->aapcs_vfp_rcount
);
6227 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6228 for the behaviour of this function. */
6231 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6232 const_tree type ATTRIBUTE_UNUSED
)
6235 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6236 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6237 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6240 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6241 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6243 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6245 || (mode
== TImode
&& ! TARGET_NEON
)
6246 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6249 int rcount
= pcum
->aapcs_vfp_rcount
;
6251 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6255 /* Avoid using unsupported vector modes. */
6256 if (rmode
== V2SImode
)
6258 else if (rmode
== V4SImode
)
6265 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6266 for (i
= 0; i
< rcount
; i
++)
6268 rtx tmp
= gen_rtx_REG (rmode
,
6269 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6270 tmp
= gen_rtx_EXPR_LIST
6272 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6273 XVECEXP (par
, 0, i
) = tmp
;
6276 pcum
->aapcs_reg
= par
;
6279 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6285 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6286 comment there for the behaviour of this function. */
6289 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6291 const_tree type ATTRIBUTE_UNUSED
)
6293 if (!use_vfp_abi (pcs_variant
, false))
6297 || (GET_MODE_CLASS (mode
) == MODE_INT
6298 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6302 machine_mode ag_mode
;
6307 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6312 if (ag_mode
== V2SImode
)
6314 else if (ag_mode
== V4SImode
)
6320 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6321 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6322 for (i
= 0; i
< count
; i
++)
6324 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6325 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6326 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6327 XVECEXP (par
, 0, i
) = tmp
;
6333 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6337 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6338 machine_mode mode ATTRIBUTE_UNUSED
,
6339 const_tree type ATTRIBUTE_UNUSED
)
6341 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6342 pcum
->aapcs_vfp_reg_alloc
= 0;
6346 #define AAPCS_CP(X) \
6348 aapcs_ ## X ## _cum_init, \
6349 aapcs_ ## X ## _is_call_candidate, \
6350 aapcs_ ## X ## _allocate, \
6351 aapcs_ ## X ## _is_return_candidate, \
6352 aapcs_ ## X ## _allocate_return_reg, \
6353 aapcs_ ## X ## _advance \
6356 /* Table of co-processors that can be used to pass arguments in
6357 registers. Idealy no arugment should be a candidate for more than
6358 one co-processor table entry, but the table is processed in order
6359 and stops after the first match. If that entry then fails to put
6360 the argument into a co-processor register, the argument will go on
6364 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6365 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6367 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6368 BLKmode) is a candidate for this co-processor's registers; this
6369 function should ignore any position-dependent state in
6370 CUMULATIVE_ARGS and only use call-type dependent information. */
6371 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6373 /* Return true if the argument does get a co-processor register; it
6374 should set aapcs_reg to an RTX of the register allocated as is
6375 required for a return from FUNCTION_ARG. */
6376 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6378 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6379 be returned in this co-processor's registers. */
6380 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6382 /* Allocate and return an RTX element to hold the return type of a call. This
6383 routine must not fail and will only be called if is_return_candidate
6384 returned true with the same parameters. */
6385 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6387 /* Finish processing this argument and prepare to start processing
6389 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6390 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6398 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6403 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6404 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6411 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6413 /* We aren't passed a decl, so we can't check that a call is local.
6414 However, it isn't clear that that would be a win anyway, since it
6415 might limit some tail-calling opportunities. */
6416 enum arm_pcs pcs_variant
;
6420 const_tree fndecl
= NULL_TREE
;
6422 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6425 fntype
= TREE_TYPE (fntype
);
6428 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6431 pcs_variant
= arm_pcs_default
;
6433 if (pcs_variant
!= ARM_PCS_AAPCS
)
6437 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6438 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6447 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6450 /* We aren't passed a decl, so we can't check that a call is local.
6451 However, it isn't clear that that would be a win anyway, since it
6452 might limit some tail-calling opportunities. */
6453 enum arm_pcs pcs_variant
;
6454 int unsignedp ATTRIBUTE_UNUSED
;
6458 const_tree fndecl
= NULL_TREE
;
6460 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6463 fntype
= TREE_TYPE (fntype
);
6466 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6469 pcs_variant
= arm_pcs_default
;
6471 /* Promote integer types. */
6472 if (type
&& INTEGRAL_TYPE_P (type
))
6473 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6475 if (pcs_variant
!= ARM_PCS_AAPCS
)
6479 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6480 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6482 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6486 /* Promotes small structs returned in a register to full-word size
6487 for big-endian AAPCS. */
6488 if (type
&& arm_return_in_msb (type
))
6490 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6491 if (size
% UNITS_PER_WORD
!= 0)
6493 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6494 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6498 return gen_rtx_REG (mode
, R0_REGNUM
);
6502 aapcs_libcall_value (machine_mode mode
)
6504 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6505 && GET_MODE_SIZE (mode
) <= 4)
6508 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6511 /* Lay out a function argument using the AAPCS rules. The rule
6512 numbers referred to here are those in the AAPCS. */
6514 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6515 const_tree type
, bool named
)
6520 /* We only need to do this once per argument. */
6521 if (pcum
->aapcs_arg_processed
)
6524 pcum
->aapcs_arg_processed
= true;
6526 /* Special case: if named is false then we are handling an incoming
6527 anonymous argument which is on the stack. */
6531 /* Is this a potential co-processor register candidate? */
6532 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6534 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6535 pcum
->aapcs_cprc_slot
= slot
;
6537 /* We don't have to apply any of the rules from part B of the
6538 preparation phase, these are handled elsewhere in the
6543 /* A Co-processor register candidate goes either in its own
6544 class of registers or on the stack. */
6545 if (!pcum
->aapcs_cprc_failed
[slot
])
6547 /* C1.cp - Try to allocate the argument to co-processor
6549 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6552 /* C2.cp - Put the argument on the stack and note that we
6553 can't assign any more candidates in this slot. We also
6554 need to note that we have allocated stack space, so that
6555 we won't later try to split a non-cprc candidate between
6556 core registers and the stack. */
6557 pcum
->aapcs_cprc_failed
[slot
] = true;
6558 pcum
->can_split
= false;
6561 /* We didn't get a register, so this argument goes on the
6563 gcc_assert (pcum
->can_split
== false);
6568 /* C3 - For double-word aligned arguments, round the NCRN up to the
6569 next even number. */
6570 ncrn
= pcum
->aapcs_ncrn
;
6573 int res
= arm_needs_doubleword_align (mode
, type
);
6574 /* Only warn during RTL expansion of call stmts, otherwise we would
6575 warn e.g. during gimplification even on functions that will be
6576 always inlined, and we'd warn multiple times. Don't warn when
6577 called in expand_function_start either, as we warn instead in
6578 arm_function_arg_boundary in that case. */
6579 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6580 inform (input_location
, "parameter passing for argument of type "
6581 "%qT changed in GCC 7.1", type
);
6586 nregs
= ARM_NUM_REGS2(mode
, type
);
6588 /* Sigh, this test should really assert that nregs > 0, but a GCC
6589 extension allows empty structs and then gives them empty size; it
6590 then allows such a structure to be passed by value. For some of
6591 the code below we have to pretend that such an argument has
6592 non-zero size so that we 'locate' it correctly either in
6593 registers or on the stack. */
6594 gcc_assert (nregs
>= 0);
6596 nregs2
= nregs
? nregs
: 1;
6598 /* C4 - Argument fits entirely in core registers. */
6599 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6601 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6602 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6606 /* C5 - Some core registers left and there are no arguments already
6607 on the stack: split this argument between the remaining core
6608 registers and the stack. */
6609 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6611 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6612 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6613 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6617 /* C6 - NCRN is set to 4. */
6618 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6620 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6624 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6625 for a call to a function whose data type is FNTYPE.
6626 For a library call, FNTYPE is NULL. */
6628 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6630 tree fndecl ATTRIBUTE_UNUSED
)
6632 /* Long call handling. */
6634 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6636 pcum
->pcs_variant
= arm_pcs_default
;
6638 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6640 if (arm_libcall_uses_aapcs_base (libname
))
6641 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6643 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6644 pcum
->aapcs_reg
= NULL_RTX
;
6645 pcum
->aapcs_partial
= 0;
6646 pcum
->aapcs_arg_processed
= false;
6647 pcum
->aapcs_cprc_slot
= -1;
6648 pcum
->can_split
= true;
6650 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6654 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6656 pcum
->aapcs_cprc_failed
[i
] = false;
6657 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6665 /* On the ARM, the offset starts at 0. */
6667 pcum
->iwmmxt_nregs
= 0;
6668 pcum
->can_split
= true;
6670 /* Varargs vectors are treated the same as long long.
6671 named_count avoids having to change the way arm handles 'named' */
6672 pcum
->named_count
= 0;
6675 if (TARGET_REALLY_IWMMXT
&& fntype
)
6679 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6681 fn_arg
= TREE_CHAIN (fn_arg
))
6682 pcum
->named_count
+= 1;
6684 if (! pcum
->named_count
)
6685 pcum
->named_count
= INT_MAX
;
6689 /* Return 2 if double word alignment is required for argument passing,
6690 but wasn't required before the fix for PR88469.
6691 Return 1 if double word alignment is required for argument passing.
6692 Return -1 if double word alignment used to be required for argument
6693 passing before PR77728 ABI fix, but is not required anymore.
6694 Return 0 if double word alignment is not required and wasn't requried
6697 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6700 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6702 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6703 if (!AGGREGATE_TYPE_P (type
))
6704 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6706 /* Array types: Use member alignment of element type. */
6707 if (TREE_CODE (type
) == ARRAY_TYPE
)
6708 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6712 /* Record/aggregate types: Use greatest member alignment of any member. */
6713 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6714 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6716 if (TREE_CODE (field
) == FIELD_DECL
)
6719 /* Before PR77728 fix, we were incorrectly considering also
6720 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6721 Make sure we can warn about that with -Wpsabi. */
6724 else if (TREE_CODE (field
) == FIELD_DECL
6725 && DECL_BIT_FIELD_TYPE (field
)
6726 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
6736 /* Determine where to put an argument to a function.
6737 Value is zero to push the argument on the stack,
6738 or a hard register in which to store the argument.
6740 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6741 the preceding args and about the function being called.
6742 ARG is a description of the argument.
6744 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6745 other arguments are passed on the stack. If (NAMED == 0) (which happens
6746 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6747 defined), say it is passed in the stack (function_prologue will
6748 indeed make it pass in the stack if necessary). */
6751 arm_function_arg (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
6753 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6756 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6757 a call insn (op3 of a call_value insn). */
6758 if (arg
.end_marker_p ())
6761 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6763 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
6764 return pcum
->aapcs_reg
;
6767 /* Varargs vectors are treated the same as long long.
6768 named_count avoids having to change the way arm handles 'named' */
6769 if (TARGET_IWMMXT_ABI
6770 && arm_vector_mode_supported_p (arg
.mode
)
6771 && pcum
->named_count
> pcum
->nargs
+ 1)
6773 if (pcum
->iwmmxt_nregs
<= 9)
6774 return gen_rtx_REG (arg
.mode
,
6775 pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6778 pcum
->can_split
= false;
6783 /* Put doubleword aligned quantities in even register pairs. */
6784 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6786 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
6787 if (res
< 0 && warn_psabi
)
6788 inform (input_location
, "parameter passing for argument of type "
6789 "%qT changed in GCC 7.1", arg
.type
);
6793 if (res
> 1 && warn_psabi
)
6794 inform (input_location
, "parameter passing for argument of type "
6795 "%qT changed in GCC 9.1", arg
.type
);
6799 /* Only allow splitting an arg between regs and memory if all preceding
6800 args were allocated to regs. For args passed by reference we only count
6801 the reference pointer. */
6802 if (pcum
->can_split
)
6805 nregs
= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
6807 if (!arg
.named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6810 return gen_rtx_REG (arg
.mode
, pcum
->nregs
);
6814 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6816 if (!ARM_DOUBLEWORD_ALIGN
)
6817 return PARM_BOUNDARY
;
6819 int res
= arm_needs_doubleword_align (mode
, type
);
6820 if (res
< 0 && warn_psabi
)
6821 inform (input_location
, "parameter passing for argument of type %qT "
6822 "changed in GCC 7.1", type
);
6823 if (res
> 1 && warn_psabi
)
6824 inform (input_location
, "parameter passing for argument of type "
6825 "%qT changed in GCC 9.1", type
);
6827 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6831 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
6833 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6834 int nregs
= pcum
->nregs
;
6836 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6838 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
6839 return pcum
->aapcs_partial
;
6842 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
6845 if (NUM_ARG_REGS
> nregs
6846 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
6848 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6853 /* Update the data in PCUM to advance over argument ARG. */
6856 arm_function_arg_advance (cumulative_args_t pcum_v
,
6857 const function_arg_info
&arg
)
6859 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6861 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6863 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
6865 if (pcum
->aapcs_cprc_slot
>= 0)
6867 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, arg
.mode
,
6869 pcum
->aapcs_cprc_slot
= -1;
6872 /* Generic stuff. */
6873 pcum
->aapcs_arg_processed
= false;
6874 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6875 pcum
->aapcs_reg
= NULL_RTX
;
6876 pcum
->aapcs_partial
= 0;
6881 if (arm_vector_mode_supported_p (arg
.mode
)
6882 && pcum
->named_count
> pcum
->nargs
6883 && TARGET_IWMMXT_ABI
)
6884 pcum
->iwmmxt_nregs
+= 1;
6886 pcum
->nregs
+= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
6890 /* Variable sized types are passed by reference. This is a GCC
6891 extension to the ARM ABI. */
6894 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
6896 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
6899 /* Encode the current state of the #pragma [no_]long_calls. */
6902 OFF
, /* No #pragma [no_]long_calls is in effect. */
6903 LONG
, /* #pragma long_calls is in effect. */
6904 SHORT
/* #pragma no_long_calls is in effect. */
6907 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6910 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6912 arm_pragma_long_calls
= LONG
;
6916 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6918 arm_pragma_long_calls
= SHORT
;
6922 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6924 arm_pragma_long_calls
= OFF
;
6927 /* Handle an attribute requiring a FUNCTION_DECL;
6928 arguments as in struct attribute_spec.handler. */
6930 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6931 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6933 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6935 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6937 *no_add_attrs
= true;
6943 /* Handle an "interrupt" or "isr" attribute;
6944 arguments as in struct attribute_spec.handler. */
6946 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6951 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6953 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6955 *no_add_attrs
= true;
6957 /* FIXME: the argument if any is checked for type attributes;
6958 should it be checked for decl ones? */
6962 if (TREE_CODE (*node
) == FUNCTION_TYPE
6963 || TREE_CODE (*node
) == METHOD_TYPE
)
6965 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6967 warning (OPT_Wattributes
, "%qE attribute ignored",
6969 *no_add_attrs
= true;
6972 else if (TREE_CODE (*node
) == POINTER_TYPE
6973 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6974 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6975 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6977 *node
= build_variant_type_copy (*node
);
6978 TREE_TYPE (*node
) = build_type_attribute_variant
6980 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6981 *no_add_attrs
= true;
6985 /* Possibly pass this attribute on from the type to a decl. */
6986 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6987 | (int) ATTR_FLAG_FUNCTION_NEXT
6988 | (int) ATTR_FLAG_ARRAY_NEXT
))
6990 *no_add_attrs
= true;
6991 return tree_cons (name
, args
, NULL_TREE
);
6995 warning (OPT_Wattributes
, "%qE attribute ignored",
7004 /* Handle a "pcs" attribute; arguments as in struct
7005 attribute_spec.handler. */
7007 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
7008 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7010 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
7012 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
7013 *no_add_attrs
= true;
7018 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7019 /* Handle the "notshared" attribute. This attribute is another way of
7020 requesting hidden visibility. ARM's compiler supports
7021 "__declspec(notshared)"; we support the same thing via an
7025 arm_handle_notshared_attribute (tree
*node
,
7026 tree name ATTRIBUTE_UNUSED
,
7027 tree args ATTRIBUTE_UNUSED
,
7028 int flags ATTRIBUTE_UNUSED
,
7031 tree decl
= TYPE_NAME (*node
);
7035 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
7036 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
7037 *no_add_attrs
= false;
7043 /* This function returns true if a function with declaration FNDECL and type
7044 FNTYPE uses the stack to pass arguments or return variables and false
7045 otherwise. This is used for functions with the attributes
7046 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7047 diagnostic messages if the stack is used. NAME is the name of the attribute
7051 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
7053 function_args_iterator args_iter
;
7054 CUMULATIVE_ARGS args_so_far_v
;
7055 cumulative_args_t args_so_far
;
7056 bool first_param
= true;
7057 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
7059 /* Error out if any argument is passed on the stack. */
7060 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
7061 args_so_far
= pack_cumulative_args (&args_so_far_v
);
7062 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
7066 prev_arg_type
= arg_type
;
7067 if (VOID_TYPE_P (arg_type
))
7070 function_arg_info
arg (arg_type
, /*named=*/true);
7072 /* ??? We should advance after processing the argument and pass
7073 the argument we're advancing past. */
7074 arm_function_arg_advance (args_so_far
, arg
);
7075 arg_rtx
= arm_function_arg (args_so_far
, arg
);
7076 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7078 error ("%qE attribute not available to functions with arguments "
7079 "passed on the stack", name
);
7082 first_param
= false;
7085 /* Error out for variadic functions since we cannot control how many
7086 arguments will be passed and thus stack could be used. stdarg_p () is not
7087 used for the checking to avoid browsing arguments twice. */
7088 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7090 error ("%qE attribute not available to functions with variable number "
7091 "of arguments", name
);
7095 /* Error out if return value is passed on the stack. */
7096 ret_type
= TREE_TYPE (fntype
);
7097 if (arm_return_in_memory (ret_type
, fntype
))
7099 error ("%qE attribute not available to functions that return value on "
7106 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7107 function will check whether the attribute is allowed here and will add the
7108 attribute to the function declaration tree or otherwise issue a warning. */
7111 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7120 *no_add_attrs
= true;
7121 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7126 /* Ignore attribute for function types. */
7127 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7129 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7131 *no_add_attrs
= true;
7137 /* Warn for static linkage functions. */
7138 if (!TREE_PUBLIC (fndecl
))
7140 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7141 "with static linkage", name
);
7142 *no_add_attrs
= true;
7146 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7147 TREE_TYPE (fndecl
));
7152 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7153 function will check whether the attribute is allowed here and will add the
7154 attribute to the function type tree or otherwise issue a diagnostic. The
7155 reason we check this at declaration time is to only allow the use of the
7156 attribute with declarations of function pointers and not function
7157 declarations. This function checks NODE is of the expected type and issues
7158 diagnostics otherwise using NAME. If it is not of the expected type
7159 *NO_ADD_ATTRS will be set to true. */
7162 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7167 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
7172 *no_add_attrs
= true;
7173 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7178 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7181 fntype
= TREE_TYPE (decl
);
7184 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7185 fntype
= TREE_TYPE (fntype
);
7187 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
7189 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7190 "function pointer", name
);
7191 *no_add_attrs
= true;
7195 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7200 /* Prevent trees being shared among function types with and without
7201 cmse_nonsecure_call attribute. */
7202 type
= TREE_TYPE (decl
);
7204 type
= build_distinct_type_copy (type
);
7205 TREE_TYPE (decl
) = type
;
7208 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7211 fntype
= TREE_TYPE (fntype
);
7212 fntype
= build_distinct_type_copy (fntype
);
7213 TREE_TYPE (type
) = fntype
;
7216 /* Construct a type attribute and add it to the function type. */
7217 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7218 TYPE_ATTRIBUTES (fntype
));
7219 TYPE_ATTRIBUTES (fntype
) = attrs
;
7223 /* Return 0 if the attributes for two types are incompatible, 1 if they
7224 are compatible, and 2 if they are nearly compatible (which causes a
7225 warning to be generated). */
7227 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7231 /* Check for mismatch of non-default calling convention. */
7232 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7235 /* Check for mismatched call attributes. */
7236 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7237 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7238 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7239 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7241 /* Only bother to check if an attribute is defined. */
7242 if (l1
| l2
| s1
| s2
)
7244 /* If one type has an attribute, the other must have the same attribute. */
7245 if ((l1
!= l2
) || (s1
!= s2
))
7248 /* Disallow mixed attributes. */
7249 if ((l1
& s2
) || (l2
& s1
))
7253 /* Check for mismatched ISR attribute. */
7254 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7256 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7257 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7259 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7263 l1
= lookup_attribute ("cmse_nonsecure_call",
7264 TYPE_ATTRIBUTES (type1
)) != NULL
;
7265 l2
= lookup_attribute ("cmse_nonsecure_call",
7266 TYPE_ATTRIBUTES (type2
)) != NULL
;
7274 /* Assigns default attributes to newly defined type. This is used to
7275 set short_call/long_call attributes for function types of
7276 functions defined inside corresponding #pragma scopes. */
7278 arm_set_default_type_attributes (tree type
)
7280 /* Add __attribute__ ((long_call)) to all functions, when
7281 inside #pragma long_calls or __attribute__ ((short_call)),
7282 when inside #pragma no_long_calls. */
7283 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7285 tree type_attr_list
, attr_name
;
7286 type_attr_list
= TYPE_ATTRIBUTES (type
);
7288 if (arm_pragma_long_calls
== LONG
)
7289 attr_name
= get_identifier ("long_call");
7290 else if (arm_pragma_long_calls
== SHORT
)
7291 attr_name
= get_identifier ("short_call");
7295 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7296 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7300 /* Return true if DECL is known to be linked into section SECTION. */
7303 arm_function_in_section_p (tree decl
, section
*section
)
7305 /* We can only be certain about the prevailing symbol definition. */
7306 if (!decl_binds_to_current_def_p (decl
))
7309 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7310 if (!DECL_SECTION_NAME (decl
))
7312 /* Make sure that we will not create a unique section for DECL. */
7313 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7317 return function_section (decl
) == section
;
7320 /* Return nonzero if a 32-bit "long_call" should be generated for
7321 a call from the current function to DECL. We generate a long_call
7324 a. has an __attribute__((long call))
7325 or b. is within the scope of a #pragma long_calls
7326 or c. the -mlong-calls command line switch has been specified
7328 However we do not generate a long call if the function:
7330 d. has an __attribute__ ((short_call))
7331 or e. is inside the scope of a #pragma no_long_calls
7332 or f. is defined in the same section as the current function. */
7335 arm_is_long_call_p (tree decl
)
7340 return TARGET_LONG_CALLS
;
7342 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7343 if (lookup_attribute ("short_call", attrs
))
7346 /* For "f", be conservative, and only cater for cases in which the
7347 whole of the current function is placed in the same section. */
7348 if (!flag_reorder_blocks_and_partition
7349 && TREE_CODE (decl
) == FUNCTION_DECL
7350 && arm_function_in_section_p (decl
, current_function_section ()))
7353 if (lookup_attribute ("long_call", attrs
))
7356 return TARGET_LONG_CALLS
;
7359 /* Return nonzero if it is ok to make a tail-call to DECL. */
7361 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7363 unsigned long func_type
;
7365 if (cfun
->machine
->sibcall_blocked
)
7370 /* In FDPIC, never tailcall something for which we have no decl:
7371 the target function could be in a different module, requiring
7372 a different FDPIC register value. */
7377 /* Never tailcall something if we are generating code for Thumb-1. */
7381 /* The PIC register is live on entry to VxWorks PLT entries, so we
7382 must make the call before restoring the PIC register. */
7383 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7386 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7387 may be used both as target of the call and base register for restoring
7388 the VFP registers */
7389 if (TARGET_APCS_FRAME
&& TARGET_ARM
7390 && TARGET_HARD_FLOAT
7391 && decl
&& arm_is_long_call_p (decl
))
7394 /* If we are interworking and the function is not declared static
7395 then we can't tail-call it unless we know that it exists in this
7396 compilation unit (since it might be a Thumb routine). */
7397 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7398 && !TREE_ASM_WRITTEN (decl
))
7401 func_type
= arm_current_func_type ();
7402 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7403 if (IS_INTERRUPT (func_type
))
7406 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7407 generated for entry functions themselves. */
7408 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7411 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7412 this would complicate matters for later code generation. */
7413 if (TREE_CODE (exp
) == CALL_EXPR
)
7415 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7416 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7420 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7422 /* Check that the return value locations are the same. For
7423 example that we aren't returning a value from the sibling in
7424 a VFP register but then need to transfer it to a core
7427 tree decl_or_type
= decl
;
7429 /* If it is an indirect function pointer, get the function type. */
7431 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7433 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7434 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7436 if (!rtx_equal_p (a
, b
))
7440 /* Never tailcall if function may be called with a misaligned SP. */
7441 if (IS_STACKALIGN (func_type
))
7444 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7445 references should become a NOP. Don't convert such calls into
7447 if (TARGET_AAPCS_BASED
7448 && arm_abi
== ARM_ABI_AAPCS
7450 && DECL_WEAK (decl
))
7453 /* We cannot do a tailcall for an indirect call by descriptor if all the
7454 argument registers are used because the only register left to load the
7455 address is IP and it will already contain the static chain. */
7456 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7458 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7459 CUMULATIVE_ARGS cum
;
7460 cumulative_args_t cum_v
;
7462 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7463 cum_v
= pack_cumulative_args (&cum
);
7465 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7467 tree type
= TREE_VALUE (t
);
7468 if (!VOID_TYPE_P (type
))
7470 function_arg_info
arg (type
, /*named=*/true);
7471 arm_function_arg_advance (cum_v
, arg
);
7475 function_arg_info
arg (integer_type_node
, /*named=*/true);
7476 if (!arm_function_arg (cum_v
, arg
))
7480 /* Everything else is ok. */
7485 /* Addressing mode support functions. */
7487 /* Return nonzero if X is a legitimate immediate operand when compiling
7488 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7490 legitimate_pic_operand_p (rtx x
)
7492 if (GET_CODE (x
) == SYMBOL_REF
7493 || (GET_CODE (x
) == CONST
7494 && GET_CODE (XEXP (x
, 0)) == PLUS
7495 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7501 /* Record that the current function needs a PIC register. If PIC_REG is null,
7502 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7503 both case cfun->machine->pic_reg is initialized if we have not already done
7504 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7505 PIC register is reloaded in the current position of the instruction stream
7506 irregardless of whether it was loaded before. Otherwise, it is only loaded
7507 if not already done so (crtl->uses_pic_offset_table is null). Note that
7508 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7509 is only supported iff COMPUTE_NOW is false. */
7512 require_pic_register (rtx pic_reg
, bool compute_now
)
7514 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
7516 /* A lot of the logic here is made obscure by the fact that this
7517 routine gets called as part of the rtx cost estimation process.
7518 We don't want those calls to affect any assumptions about the real
7519 function; and further, we can't call entry_of_function() until we
7520 start the real expansion process. */
7521 if (!crtl
->uses_pic_offset_table
|| compute_now
)
7523 gcc_assert (can_create_pseudo_p ()
7524 || (pic_reg
!= NULL_RTX
7526 && GET_MODE (pic_reg
) == Pmode
));
7527 if (arm_pic_register
!= INVALID_REGNUM
7529 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7531 if (!cfun
->machine
->pic_reg
)
7532 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7534 /* Play games to avoid marking the function as needing pic
7535 if we are being called as part of the cost-estimation
7537 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7538 crtl
->uses_pic_offset_table
= 1;
7542 rtx_insn
*seq
, *insn
;
7544 if (pic_reg
== NULL_RTX
)
7545 pic_reg
= gen_reg_rtx (Pmode
);
7546 if (!cfun
->machine
->pic_reg
)
7547 cfun
->machine
->pic_reg
= pic_reg
;
7549 /* Play games to avoid marking the function as needing pic
7550 if we are being called as part of the cost-estimation
7552 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7554 crtl
->uses_pic_offset_table
= 1;
7557 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7558 && arm_pic_register
> LAST_LO_REGNUM
7560 emit_move_insn (cfun
->machine
->pic_reg
,
7561 gen_rtx_REG (Pmode
, arm_pic_register
));
7563 arm_load_pic_register (0UL, pic_reg
);
7568 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7570 INSN_LOCATION (insn
) = prologue_location
;
7572 /* We can be called during expansion of PHI nodes, where
7573 we can't yet emit instructions directly in the final
7574 insn stream. Queue the insns on the entry edge, they will
7575 be committed after everything else is expanded. */
7576 if (currently_expanding_to_rtl
)
7577 insert_insn_on_edge (seq
,
7579 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7587 /* Generate insns to calculate the address of ORIG in pic mode. */
7589 calculate_pic_address_constant (rtx reg
, rtx pic_reg
, rtx orig
)
7594 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
7596 /* Make the MEM as close to a constant as possible. */
7597 mem
= SET_SRC (pat
);
7598 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7599 MEM_READONLY_P (mem
) = 1;
7600 MEM_NOTRAP_P (mem
) = 1;
7602 return emit_insn (pat
);
7605 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7606 created to hold the result of the load. If not NULL, PIC_REG indicates
7607 which register to use as PIC register, otherwise it is decided by register
7608 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7609 location in the instruction stream, irregardless of whether it was loaded
7610 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7611 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7613 Returns the register REG into which the PIC load is performed. */
7616 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
7619 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
7621 if (GET_CODE (orig
) == SYMBOL_REF
7622 || GET_CODE (orig
) == LABEL_REF
)
7626 gcc_assert (can_create_pseudo_p ());
7627 reg
= gen_reg_rtx (Pmode
);
7630 /* VxWorks does not impose a fixed gap between segments; the run-time
7631 gap can be different from the object-file gap. We therefore can't
7632 use GOTOFF unless we are absolutely sure that the symbol is in the
7633 same segment as the GOT. Unfortunately, the flexibility of linker
7634 scripts means that we can't be sure of that in general, so assume
7635 that GOTOFF is never valid on VxWorks. */
7636 /* References to weak symbols cannot be resolved locally: they
7637 may be overridden by a non-weak definition at link time. */
7639 if ((GET_CODE (orig
) == LABEL_REF
7640 || (GET_CODE (orig
) == SYMBOL_REF
7641 && SYMBOL_REF_LOCAL_P (orig
)
7642 && (SYMBOL_REF_DECL (orig
)
7643 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)
7644 && (!SYMBOL_REF_FUNCTION_P (orig
)
7645 || arm_fdpic_local_funcdesc_p (orig
))))
7647 && arm_pic_data_is_text_relative
)
7648 insn
= arm_pic_static_addr (orig
, reg
);
7651 /* If this function doesn't have a pic register, create one now. */
7652 require_pic_register (pic_reg
, compute_now
);
7654 if (pic_reg
== NULL_RTX
)
7655 pic_reg
= cfun
->machine
->pic_reg
;
7657 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
7660 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7662 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7666 else if (GET_CODE (orig
) == CONST
)
7670 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7671 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7674 /* Handle the case where we have: const (UNSPEC_TLS). */
7675 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7676 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7679 /* Handle the case where we have:
7680 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7682 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7683 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7684 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7686 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7692 gcc_assert (can_create_pseudo_p ());
7693 reg
= gen_reg_rtx (Pmode
);
7696 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7698 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
7699 pic_reg
, compute_now
);
7700 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7701 base
== reg
? 0 : reg
, pic_reg
,
7704 if (CONST_INT_P (offset
))
7706 /* The base register doesn't really matter, we only want to
7707 test the index for the appropriate mode. */
7708 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7710 gcc_assert (can_create_pseudo_p ());
7711 offset
= force_reg (Pmode
, offset
);
7714 if (CONST_INT_P (offset
))
7715 return plus_constant (Pmode
, base
, INTVAL (offset
));
7718 if (GET_MODE_SIZE (mode
) > 4
7719 && (GET_MODE_CLASS (mode
) == MODE_INT
7720 || TARGET_SOFT_FLOAT
))
7722 emit_insn (gen_addsi3 (reg
, base
, offset
));
7726 return gen_rtx_PLUS (Pmode
, base
, offset
);
7733 /* Whether a register is callee saved or not. This is necessary because high
7734 registers are marked as caller saved when optimizing for size on Thumb-1
7735 targets despite being callee saved in order to avoid using them. */
7736 #define callee_saved_reg_p(reg) \
7737 (!call_used_or_fixed_reg_p (reg) \
7738 || (TARGET_THUMB1 && optimize_size \
7739 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
7741 /* Return a mask for the call-clobbered low registers that are unused
7742 at the end of the prologue. */
7743 static unsigned long
7744 thumb1_prologue_unused_call_clobbered_lo_regs (void)
7746 unsigned long mask
= 0;
7747 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
7749 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
7750 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
7751 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
7755 /* Similarly for the start of the epilogue. */
7756 static unsigned long
7757 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
7759 unsigned long mask
= 0;
7760 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
7762 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
7763 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
7764 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
7768 /* Find a spare register to use during the prolog of a function. */
7771 thumb_find_work_register (unsigned long pushed_regs_mask
)
7775 unsigned long unused_regs
7776 = thumb1_prologue_unused_call_clobbered_lo_regs ();
7778 /* Check the argument registers first as these are call-used. The
7779 register allocation order means that sometimes r3 might be used
7780 but earlier argument registers might not, so check them all. */
7781 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
7782 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
7785 /* Otherwise look for a call-saved register that is going to be pushed. */
7786 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7787 if (pushed_regs_mask
& (1 << reg
))
7792 /* Thumb-2 can use high regs. */
7793 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7794 if (pushed_regs_mask
& (1 << reg
))
7797 /* Something went wrong - thumb_compute_save_reg_mask()
7798 should have arranged for a suitable register to be pushed. */
7802 static GTY(()) int pic_labelno
;
7804 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7808 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
7810 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
7812 if (crtl
->uses_pic_offset_table
== 0
7813 || TARGET_SINGLE_PIC_BASE
7817 gcc_assert (flag_pic
);
7819 if (pic_reg
== NULL_RTX
)
7820 pic_reg
= cfun
->machine
->pic_reg
;
7821 if (TARGET_VXWORKS_RTP
)
7823 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7824 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7825 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7827 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7829 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7830 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7834 /* We use an UNSPEC rather than a LABEL_REF because this label
7835 never appears in the code stream. */
7837 labelno
= GEN_INT (pic_labelno
++);
7838 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7839 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7841 /* On the ARM the PC register contains 'dot + 8' at the time of the
7842 addition, on the Thumb it is 'dot + 4'. */
7843 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7844 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7846 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7850 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7852 else /* TARGET_THUMB1 */
7854 if (arm_pic_register
!= INVALID_REGNUM
7855 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7857 /* We will have pushed the pic register, so we should always be
7858 able to find a work register. */
7859 pic_tmp
= gen_rtx_REG (SImode
,
7860 thumb_find_work_register (saved_regs
));
7861 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7862 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7863 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7865 else if (arm_pic_register
!= INVALID_REGNUM
7866 && arm_pic_register
> LAST_LO_REGNUM
7867 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7869 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7870 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7871 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7874 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7878 /* Need to emit this whether or not we obey regdecls,
7879 since setjmp/longjmp can cause life info to screw up. */
7883 /* Try to determine whether an object, referenced via ORIG, will be
7884 placed in the text or data segment. This is used in FDPIC mode, to
7885 decide which relocations to use when accessing ORIG. *IS_READONLY
7886 is set to true if ORIG is a read-only location, false otherwise.
7887 Return true if we could determine the location of ORIG, false
7888 otherwise. *IS_READONLY is valid only when we return true. */
7890 arm_is_segment_info_known (rtx orig
, bool *is_readonly
)
7892 *is_readonly
= false;
7894 if (GET_CODE (orig
) == LABEL_REF
)
7896 *is_readonly
= true;
7900 if (SYMBOL_REF_P (orig
))
7902 if (CONSTANT_POOL_ADDRESS_P (orig
))
7904 *is_readonly
= true;
7907 if (SYMBOL_REF_LOCAL_P (orig
)
7908 && !SYMBOL_REF_EXTERNAL_P (orig
)
7909 && SYMBOL_REF_DECL (orig
)
7910 && (!DECL_P (SYMBOL_REF_DECL (orig
))
7911 || !DECL_COMMON (SYMBOL_REF_DECL (orig
))))
7913 tree decl
= SYMBOL_REF_DECL (orig
);
7914 tree init
= (TREE_CODE (decl
) == VAR_DECL
)
7915 ? DECL_INITIAL (decl
) : (TREE_CODE (decl
) == CONSTRUCTOR
)
7918 bool named_section
, readonly
;
7920 if (init
&& init
!= error_mark_node
)
7921 reloc
= compute_reloc_for_constant (init
);
7923 named_section
= TREE_CODE (decl
) == VAR_DECL
7924 && lookup_attribute ("section", DECL_ATTRIBUTES (decl
));
7925 readonly
= decl_readonly_section (decl
, reloc
);
7927 /* We don't know where the link script will put a named
7928 section, so return false in such a case. */
7932 *is_readonly
= readonly
;
7936 /* We don't know. */
7943 /* Generate code to load the address of a static var when flag_pic is set. */
7945 arm_pic_static_addr (rtx orig
, rtx reg
)
7947 rtx l1
, labelno
, offset_rtx
;
7950 gcc_assert (flag_pic
);
7952 bool is_readonly
= false;
7953 bool info_known
= false;
7956 && SYMBOL_REF_P (orig
)
7957 && !SYMBOL_REF_FUNCTION_P (orig
))
7958 info_known
= arm_is_segment_info_known (orig
, &is_readonly
);
7961 && SYMBOL_REF_P (orig
)
7962 && !SYMBOL_REF_FUNCTION_P (orig
)
7965 /* We don't know where orig is stored, so we have be
7966 pessimistic and use a GOT relocation. */
7967 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
7969 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
7971 else if (TARGET_FDPIC
7972 && SYMBOL_REF_P (orig
)
7973 && (SYMBOL_REF_FUNCTION_P (orig
)
7976 /* We use the GOTOFF relocation. */
7977 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
7979 rtx l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig
), UNSPEC_PIC_SYM
);
7980 emit_insn (gen_movsi (reg
, l1
));
7981 insn
= emit_insn (gen_addsi3 (reg
, reg
, pic_reg
));
7985 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
7986 PC-relative access. */
7987 /* We use an UNSPEC rather than a LABEL_REF because this label
7988 never appears in the code stream. */
7989 labelno
= GEN_INT (pic_labelno
++);
7990 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7991 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7993 /* On the ARM the PC register contains 'dot + 8' at the time of the
7994 addition, on the Thumb it is 'dot + 4'. */
7995 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7996 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7997 UNSPEC_SYMBOL_OFFSET
);
7998 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
8000 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
,
8007 /* Return nonzero if X is valid as an ARM state addressing register. */
8009 arm_address_register_rtx_p (rtx x
, int strict_p
)
8019 return ARM_REGNO_OK_FOR_BASE_P (regno
);
8021 return (regno
<= LAST_ARM_REGNUM
8022 || regno
>= FIRST_PSEUDO_REGISTER
8023 || regno
== FRAME_POINTER_REGNUM
8024 || regno
== ARG_POINTER_REGNUM
);
8027 /* Return TRUE if this rtx is the difference of a symbol and a label,
8028 and will reduce to a PC-relative relocation in the object file.
8029 Expressions like this can be left alone when generating PIC, rather
8030 than forced through the GOT. */
8032 pcrel_constant_p (rtx x
)
8034 if (GET_CODE (x
) == MINUS
)
8035 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
8040 /* Return true if X will surely end up in an index register after next
8043 will_be_in_index_register (const_rtx x
)
8045 /* arm.md: calculate_pic_address will split this into a register. */
8046 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
8049 /* Return nonzero if X is a valid ARM state address operand. */
8051 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
8055 enum rtx_code code
= GET_CODE (x
);
8057 if (arm_address_register_rtx_p (x
, strict_p
))
8060 use_ldrd
= (TARGET_LDRD
8061 && (mode
== DImode
|| mode
== DFmode
));
8063 if (code
== POST_INC
|| code
== PRE_DEC
8064 || ((code
== PRE_INC
|| code
== POST_DEC
)
8065 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8066 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8068 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8069 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8070 && GET_CODE (XEXP (x
, 1)) == PLUS
8071 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8073 rtx addend
= XEXP (XEXP (x
, 1), 1);
8075 /* Don't allow ldrd post increment by register because it's hard
8076 to fixup invalid register choices. */
8078 && GET_CODE (x
) == POST_MODIFY
8082 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
8083 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
8086 /* After reload constants split into minipools will have addresses
8087 from a LABEL_REF. */
8088 else if (reload_completed
8089 && (code
== LABEL_REF
8091 && GET_CODE (XEXP (x
, 0)) == PLUS
8092 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8093 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8096 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8099 else if (code
== PLUS
)
8101 rtx xop0
= XEXP (x
, 0);
8102 rtx xop1
= XEXP (x
, 1);
8104 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8105 && ((CONST_INT_P (xop1
)
8106 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
8107 || (!strict_p
&& will_be_in_index_register (xop1
))))
8108 || (arm_address_register_rtx_p (xop1
, strict_p
)
8109 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
8113 /* Reload currently can't handle MINUS, so disable this for now */
8114 else if (GET_CODE (x
) == MINUS
)
8116 rtx xop0
= XEXP (x
, 0);
8117 rtx xop1
= XEXP (x
, 1);
8119 return (arm_address_register_rtx_p (xop0
, strict_p
)
8120 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
8124 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8125 && code
== SYMBOL_REF
8126 && CONSTANT_POOL_ADDRESS_P (x
)
8128 && symbol_mentioned_p (get_pool_constant (x
))
8129 && ! pcrel_constant_p (get_pool_constant (x
))))
8135 /* Return true if we can avoid creating a constant pool entry for x. */
8137 can_avoid_literal_pool_for_label_p (rtx x
)
8139 /* Normally we can assign constant values to target registers without
8140 the help of constant pool. But there are cases we have to use constant
8142 1) assign a label to register.
8143 2) sign-extend a 8bit value to 32bit and then assign to register.
8145 Constant pool access in format:
8146 (set (reg r0) (mem (symbol_ref (".LC0"))))
8147 will cause the use of literal pool (later in function arm_reorg).
8148 So here we mark such format as an invalid format, then the compiler
8149 will adjust it into:
8150 (set (reg r0) (symbol_ref (".LC0")))
8151 (set (reg r0) (mem (reg r0))).
8152 No extra register is required, and (mem (reg r0)) won't cause the use
8153 of literal pools. */
8154 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
8155 && CONSTANT_POOL_ADDRESS_P (x
))
8161 /* Return nonzero if X is a valid Thumb-2 address operand. */
8163 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8166 enum rtx_code code
= GET_CODE (x
);
8168 if (arm_address_register_rtx_p (x
, strict_p
))
8171 use_ldrd
= (TARGET_LDRD
8172 && (mode
== DImode
|| mode
== DFmode
));
8174 if (code
== POST_INC
|| code
== PRE_DEC
8175 || ((code
== PRE_INC
|| code
== POST_DEC
)
8176 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8177 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8179 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8180 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8181 && GET_CODE (XEXP (x
, 1)) == PLUS
8182 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8184 /* Thumb-2 only has autoincrement by constant. */
8185 rtx addend
= XEXP (XEXP (x
, 1), 1);
8186 HOST_WIDE_INT offset
;
8188 if (!CONST_INT_P (addend
))
8191 offset
= INTVAL(addend
);
8192 if (GET_MODE_SIZE (mode
) <= 4)
8193 return (offset
> -256 && offset
< 256);
8195 return (use_ldrd
&& offset
> -1024 && offset
< 1024
8196 && (offset
& 3) == 0);
8199 /* After reload constants split into minipools will have addresses
8200 from a LABEL_REF. */
8201 else if (reload_completed
8202 && (code
== LABEL_REF
8204 && GET_CODE (XEXP (x
, 0)) == PLUS
8205 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8206 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8209 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8212 else if (code
== PLUS
)
8214 rtx xop0
= XEXP (x
, 0);
8215 rtx xop1
= XEXP (x
, 1);
8217 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8218 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8219 || (!strict_p
&& will_be_in_index_register (xop1
))))
8220 || (arm_address_register_rtx_p (xop1
, strict_p
)
8221 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8224 else if (can_avoid_literal_pool_for_label_p (x
))
8227 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8228 && code
== SYMBOL_REF
8229 && CONSTANT_POOL_ADDRESS_P (x
)
8231 && symbol_mentioned_p (get_pool_constant (x
))
8232 && ! pcrel_constant_p (get_pool_constant (x
))))
8238 /* Return nonzero if INDEX is valid for an address index operand in
8241 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8244 HOST_WIDE_INT range
;
8245 enum rtx_code code
= GET_CODE (index
);
8247 /* Standard coprocessor addressing modes. */
8248 if (TARGET_HARD_FLOAT
8249 && (mode
== SFmode
|| mode
== DFmode
))
8250 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8251 && INTVAL (index
) > -1024
8252 && (INTVAL (index
) & 3) == 0);
8254 /* For quad modes, we restrict the constant offset to be slightly less
8255 than what the instruction format permits. We do this because for
8256 quad mode moves, we will actually decompose them into two separate
8257 double-mode reads or writes. INDEX must therefore be a valid
8258 (double-mode) offset and so should INDEX+8. */
8259 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8260 return (code
== CONST_INT
8261 && INTVAL (index
) < 1016
8262 && INTVAL (index
) > -1024
8263 && (INTVAL (index
) & 3) == 0);
8265 /* We have no such constraint on double mode offsets, so we permit the
8266 full range of the instruction format. */
8267 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8268 return (code
== CONST_INT
8269 && INTVAL (index
) < 1024
8270 && INTVAL (index
) > -1024
8271 && (INTVAL (index
) & 3) == 0);
8273 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8274 return (code
== CONST_INT
8275 && INTVAL (index
) < 1024
8276 && INTVAL (index
) > -1024
8277 && (INTVAL (index
) & 3) == 0);
8279 if (arm_address_register_rtx_p (index
, strict_p
)
8280 && (GET_MODE_SIZE (mode
) <= 4))
8283 if (mode
== DImode
|| mode
== DFmode
)
8285 if (code
== CONST_INT
)
8287 HOST_WIDE_INT val
= INTVAL (index
);
8289 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8290 If vldr is selected it uses arm_coproc_mem_operand. */
8292 return val
> -256 && val
< 256;
8294 return val
> -4096 && val
< 4092;
8297 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8300 if (GET_MODE_SIZE (mode
) <= 4
8304 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8308 rtx xiop0
= XEXP (index
, 0);
8309 rtx xiop1
= XEXP (index
, 1);
8311 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8312 && power_of_two_operand (xiop1
, SImode
))
8313 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8314 && power_of_two_operand (xiop0
, SImode
)));
8316 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8317 || code
== ASHIFT
|| code
== ROTATERT
)
8319 rtx op
= XEXP (index
, 1);
8321 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8324 && INTVAL (op
) <= 31);
8328 /* For ARM v4 we may be doing a sign-extend operation during the
8334 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8340 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8342 return (code
== CONST_INT
8343 && INTVAL (index
) < range
8344 && INTVAL (index
) > -range
);
8347 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8348 index operand. i.e. 1, 2, 4 or 8. */
8350 thumb2_index_mul_operand (rtx op
)
8354 if (!CONST_INT_P (op
))
8358 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8361 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8363 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8365 enum rtx_code code
= GET_CODE (index
);
8367 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8368 /* Standard coprocessor addressing modes. */
8369 if (TARGET_HARD_FLOAT
8370 && (mode
== SFmode
|| mode
== DFmode
))
8371 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8372 /* Thumb-2 allows only > -256 index range for it's core register
8373 load/stores. Since we allow SF/DF in core registers, we have
8374 to use the intersection between -256~4096 (core) and -1024~1024
8376 && INTVAL (index
) > -256
8377 && (INTVAL (index
) & 3) == 0);
8379 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8381 /* For DImode assume values will usually live in core regs
8382 and only allow LDRD addressing modes. */
8383 if (!TARGET_LDRD
|| mode
!= DImode
)
8384 return (code
== CONST_INT
8385 && INTVAL (index
) < 1024
8386 && INTVAL (index
) > -1024
8387 && (INTVAL (index
) & 3) == 0);
8390 /* For quad modes, we restrict the constant offset to be slightly less
8391 than what the instruction format permits. We do this because for
8392 quad mode moves, we will actually decompose them into two separate
8393 double-mode reads or writes. INDEX must therefore be a valid
8394 (double-mode) offset and so should INDEX+8. */
8395 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8396 return (code
== CONST_INT
8397 && INTVAL (index
) < 1016
8398 && INTVAL (index
) > -1024
8399 && (INTVAL (index
) & 3) == 0);
8401 /* We have no such constraint on double mode offsets, so we permit the
8402 full range of the instruction format. */
8403 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8404 return (code
== CONST_INT
8405 && INTVAL (index
) < 1024
8406 && INTVAL (index
) > -1024
8407 && (INTVAL (index
) & 3) == 0);
8409 if (arm_address_register_rtx_p (index
, strict_p
)
8410 && (GET_MODE_SIZE (mode
) <= 4))
8413 if (mode
== DImode
|| mode
== DFmode
)
8415 if (code
== CONST_INT
)
8417 HOST_WIDE_INT val
= INTVAL (index
);
8418 /* Thumb-2 ldrd only has reg+const addressing modes.
8419 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8420 If vldr is selected it uses arm_coproc_mem_operand. */
8422 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8424 return IN_RANGE (val
, -255, 4095 - 4);
8432 rtx xiop0
= XEXP (index
, 0);
8433 rtx xiop1
= XEXP (index
, 1);
8435 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8436 && thumb2_index_mul_operand (xiop1
))
8437 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8438 && thumb2_index_mul_operand (xiop0
)));
8440 else if (code
== ASHIFT
)
8442 rtx op
= XEXP (index
, 1);
8444 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8447 && INTVAL (op
) <= 3);
8450 return (code
== CONST_INT
8451 && INTVAL (index
) < 4096
8452 && INTVAL (index
) > -256);
8455 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8457 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8467 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8469 return (regno
<= LAST_LO_REGNUM
8470 || regno
> LAST_VIRTUAL_REGISTER
8471 || regno
== FRAME_POINTER_REGNUM
8472 || (GET_MODE_SIZE (mode
) >= 4
8473 && (regno
== STACK_POINTER_REGNUM
8474 || regno
>= FIRST_PSEUDO_REGISTER
8475 || x
== hard_frame_pointer_rtx
8476 || x
== arg_pointer_rtx
)));
8479 /* Return nonzero if x is a legitimate index register. This is the case
8480 for any base register that can access a QImode object. */
8482 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8484 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8487 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8489 The AP may be eliminated to either the SP or the FP, so we use the
8490 least common denominator, e.g. SImode, and offsets from 0 to 64.
8492 ??? Verify whether the above is the right approach.
8494 ??? Also, the FP may be eliminated to the SP, so perhaps that
8495 needs special handling also.
8497 ??? Look at how the mips16 port solves this problem. It probably uses
8498 better ways to solve some of these problems.
8500 Although it is not incorrect, we don't accept QImode and HImode
8501 addresses based on the frame pointer or arg pointer until the
8502 reload pass starts. This is so that eliminating such addresses
8503 into stack based ones won't produce impossible code. */
8505 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8507 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8510 /* ??? Not clear if this is right. Experiment. */
8511 if (GET_MODE_SIZE (mode
) < 4
8512 && !(reload_in_progress
|| reload_completed
)
8513 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8514 || reg_mentioned_p (arg_pointer_rtx
, x
)
8515 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8516 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8517 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8518 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8521 /* Accept any base register. SP only in SImode or larger. */
8522 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8525 /* This is PC relative data before arm_reorg runs. */
8526 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8527 && GET_CODE (x
) == SYMBOL_REF
8528 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8531 /* This is PC relative data after arm_reorg runs. */
8532 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8534 && (GET_CODE (x
) == LABEL_REF
8535 || (GET_CODE (x
) == CONST
8536 && GET_CODE (XEXP (x
, 0)) == PLUS
8537 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8538 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8541 /* Post-inc indexing only supported for SImode and larger. */
8542 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8543 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8546 else if (GET_CODE (x
) == PLUS
)
8548 /* REG+REG address can be any two index registers. */
8549 /* We disallow FRAME+REG addressing since we know that FRAME
8550 will be replaced with STACK, and SP relative addressing only
8551 permits SP+OFFSET. */
8552 if (GET_MODE_SIZE (mode
) <= 4
8553 && XEXP (x
, 0) != frame_pointer_rtx
8554 && XEXP (x
, 1) != frame_pointer_rtx
8555 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8556 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8557 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8560 /* REG+const has 5-7 bit offset for non-SP registers. */
8561 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8562 || XEXP (x
, 0) == arg_pointer_rtx
)
8563 && CONST_INT_P (XEXP (x
, 1))
8564 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8567 /* REG+const has 10-bit offset for SP, but only SImode and
8568 larger is supported. */
8569 /* ??? Should probably check for DI/DFmode overflow here
8570 just like GO_IF_LEGITIMATE_OFFSET does. */
8571 else if (REG_P (XEXP (x
, 0))
8572 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8573 && GET_MODE_SIZE (mode
) >= 4
8574 && CONST_INT_P (XEXP (x
, 1))
8575 && INTVAL (XEXP (x
, 1)) >= 0
8576 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8577 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8580 else if (REG_P (XEXP (x
, 0))
8581 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8582 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8583 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8584 && REGNO (XEXP (x
, 0))
8585 <= LAST_VIRTUAL_POINTER_REGISTER
))
8586 && GET_MODE_SIZE (mode
) >= 4
8587 && CONST_INT_P (XEXP (x
, 1))
8588 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8592 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8593 && GET_MODE_SIZE (mode
) == 4
8594 && GET_CODE (x
) == SYMBOL_REF
8595 && CONSTANT_POOL_ADDRESS_P (x
)
8597 && symbol_mentioned_p (get_pool_constant (x
))
8598 && ! pcrel_constant_p (get_pool_constant (x
))))
8604 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8605 instruction of mode MODE. */
8607 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8609 switch (GET_MODE_SIZE (mode
))
8612 return val
>= 0 && val
< 32;
8615 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8619 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8625 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8628 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8629 else if (TARGET_THUMB2
)
8630 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8631 else /* if (TARGET_THUMB1) */
8632 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8635 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8637 Given an rtx X being reloaded into a reg required to be
8638 in class CLASS, return the class of reg to actually use.
8639 In general this is just CLASS, but for the Thumb core registers and
8640 immediate constants we prefer a LO_REGS class or a subset. */
8643 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8649 if (rclass
== GENERAL_REGS
)
8656 /* Build the SYMBOL_REF for __tls_get_addr. */
8658 static GTY(()) rtx tls_get_addr_libfunc
;
8661 get_tls_get_addr (void)
8663 if (!tls_get_addr_libfunc
)
8664 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8665 return tls_get_addr_libfunc
;
8669 arm_load_tp (rtx target
)
8672 target
= gen_reg_rtx (SImode
);
8676 /* Can return in any reg. */
8677 emit_insn (gen_load_tp_hard (target
));
8681 /* Always returned in r0. Immediately copy the result into a pseudo,
8682 otherwise other uses of r0 (e.g. setting up function arguments) may
8683 clobber the value. */
8689 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8690 rtx initial_fdpic_reg
= get_hard_reg_initial_val (Pmode
, FDPIC_REGNUM
);
8692 emit_insn (gen_load_tp_soft_fdpic ());
8695 emit_insn (gen_restore_pic_register_after_call(fdpic_reg
, initial_fdpic_reg
));
8698 emit_insn (gen_load_tp_soft ());
8700 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8701 emit_move_insn (target
, tmp
);
8707 load_tls_operand (rtx x
, rtx reg
)
8711 if (reg
== NULL_RTX
)
8712 reg
= gen_reg_rtx (SImode
);
8714 tmp
= gen_rtx_CONST (SImode
, x
);
8716 emit_move_insn (reg
, tmp
);
8722 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8724 rtx label
, labelno
= NULL_RTX
, sum
;
8726 gcc_assert (reloc
!= TLS_DESCSEQ
);
8731 sum
= gen_rtx_UNSPEC (Pmode
,
8732 gen_rtvec (2, x
, GEN_INT (reloc
)),
8737 labelno
= GEN_INT (pic_labelno
++);
8738 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8739 label
= gen_rtx_CONST (VOIDmode
, label
);
8741 sum
= gen_rtx_UNSPEC (Pmode
,
8742 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8743 GEN_INT (TARGET_ARM
? 8 : 4)),
8746 reg
= load_tls_operand (sum
, reg
);
8749 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
8750 else if (TARGET_ARM
)
8751 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8753 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8755 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8756 LCT_PURE
, /* LCT_CONST? */
8759 rtx_insn
*insns
= get_insns ();
8766 arm_tls_descseq_addr (rtx x
, rtx reg
)
8768 rtx labelno
= GEN_INT (pic_labelno
++);
8769 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8770 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8771 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8772 gen_rtx_CONST (VOIDmode
, label
),
8773 GEN_INT (!TARGET_ARM
)),
8775 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8777 emit_insn (gen_tlscall (x
, labelno
));
8779 reg
= gen_reg_rtx (SImode
);
8781 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8783 emit_move_insn (reg
, reg0
);
8790 legitimize_tls_address (rtx x
, rtx reg
)
8792 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8794 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8798 case TLS_MODEL_GLOBAL_DYNAMIC
:
8799 if (TARGET_GNU2_TLS
)
8801 gcc_assert (!TARGET_FDPIC
);
8803 reg
= arm_tls_descseq_addr (x
, reg
);
8805 tp
= arm_load_tp (NULL_RTX
);
8807 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8811 /* Original scheme */
8813 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32_FDPIC
);
8815 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8816 dest
= gen_reg_rtx (Pmode
);
8817 emit_libcall_block (insns
, dest
, ret
, x
);
8821 case TLS_MODEL_LOCAL_DYNAMIC
:
8822 if (TARGET_GNU2_TLS
)
8824 gcc_assert (!TARGET_FDPIC
);
8826 reg
= arm_tls_descseq_addr (x
, reg
);
8828 tp
= arm_load_tp (NULL_RTX
);
8830 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8835 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32_FDPIC
);
8837 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8839 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8840 share the LDM result with other LD model accesses. */
8841 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8843 dest
= gen_reg_rtx (Pmode
);
8844 emit_libcall_block (insns
, dest
, ret
, eqv
);
8846 /* Load the addend. */
8847 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8848 GEN_INT (TLS_LDO32
)),
8850 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8851 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8855 case TLS_MODEL_INITIAL_EXEC
:
8858 sum
= gen_rtx_UNSPEC (Pmode
,
8859 gen_rtvec (2, x
, GEN_INT (TLS_IE32_FDPIC
)),
8861 reg
= load_tls_operand (sum
, reg
);
8862 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
8863 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
8867 labelno
= GEN_INT (pic_labelno
++);
8868 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8869 label
= gen_rtx_CONST (VOIDmode
, label
);
8870 sum
= gen_rtx_UNSPEC (Pmode
,
8871 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8872 GEN_INT (TARGET_ARM
? 8 : 4)),
8874 reg
= load_tls_operand (sum
, reg
);
8877 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8878 else if (TARGET_THUMB2
)
8879 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8882 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8883 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8887 tp
= arm_load_tp (NULL_RTX
);
8889 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8891 case TLS_MODEL_LOCAL_EXEC
:
8892 tp
= arm_load_tp (NULL_RTX
);
8894 reg
= gen_rtx_UNSPEC (Pmode
,
8895 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8897 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8899 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8906 /* Try machine-dependent ways of modifying an illegitimate address
8907 to be legitimate. If we find one, return the new, valid address. */
8909 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8911 if (arm_tls_referenced_p (x
))
8915 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8917 addend
= XEXP (XEXP (x
, 0), 1);
8918 x
= XEXP (XEXP (x
, 0), 0);
8921 if (GET_CODE (x
) != SYMBOL_REF
)
8924 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8926 x
= legitimize_tls_address (x
, NULL_RTX
);
8930 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8938 return thumb_legitimize_address (x
, orig_x
, mode
);
8940 if (GET_CODE (x
) == PLUS
)
8942 rtx xop0
= XEXP (x
, 0);
8943 rtx xop1
= XEXP (x
, 1);
8945 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8946 xop0
= force_reg (SImode
, xop0
);
8948 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8949 && !symbol_mentioned_p (xop1
))
8950 xop1
= force_reg (SImode
, xop1
);
8952 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8953 && CONST_INT_P (xop1
))
8955 HOST_WIDE_INT n
, low_n
;
8959 /* VFP addressing modes actually allow greater offsets, but for
8960 now we just stick with the lowest common denominator. */
8961 if (mode
== DImode
|| mode
== DFmode
)
8973 low_n
= ((mode
) == TImode
? 0
8974 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8978 base_reg
= gen_reg_rtx (SImode
);
8979 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8980 emit_move_insn (base_reg
, val
);
8981 x
= plus_constant (Pmode
, base_reg
, low_n
);
8983 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8984 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8987 /* XXX We don't allow MINUS any more -- see comment in
8988 arm_legitimate_address_outer_p (). */
8989 else if (GET_CODE (x
) == MINUS
)
8991 rtx xop0
= XEXP (x
, 0);
8992 rtx xop1
= XEXP (x
, 1);
8994 if (CONSTANT_P (xop0
))
8995 xop0
= force_reg (SImode
, xop0
);
8997 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8998 xop1
= force_reg (SImode
, xop1
);
9000 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9001 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
9004 /* Make sure to take full advantage of the pre-indexed addressing mode
9005 with absolute addresses which often allows for the base register to
9006 be factorized for multiple adjacent memory references, and it might
9007 even allows for the mini pool to be avoided entirely. */
9008 else if (CONST_INT_P (x
) && optimize
> 0)
9011 HOST_WIDE_INT mask
, base
, index
;
9014 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
9015 use a 8-bit index. So let's use a 12-bit index for SImode only and
9016 hope that arm_gen_constant will enable ldrb to use more bits. */
9017 bits
= (mode
== SImode
) ? 12 : 8;
9018 mask
= (1 << bits
) - 1;
9019 base
= INTVAL (x
) & ~mask
;
9020 index
= INTVAL (x
) & mask
;
9021 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
9023 /* It'll most probably be more efficient to generate the base
9024 with more bits set and use a negative index instead. */
9028 base_reg
= force_reg (SImode
, GEN_INT (base
));
9029 x
= plus_constant (Pmode
, base_reg
, index
);
9034 /* We need to find and carefully transform any SYMBOL and LABEL
9035 references; so go back to the original address expression. */
9036 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9037 false /*compute_now*/);
9039 if (new_x
!= orig_x
)
9047 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9048 to be legitimate. If we find one, return the new, valid address. */
9050 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9052 if (GET_CODE (x
) == PLUS
9053 && CONST_INT_P (XEXP (x
, 1))
9054 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
9055 || INTVAL (XEXP (x
, 1)) < 0))
9057 rtx xop0
= XEXP (x
, 0);
9058 rtx xop1
= XEXP (x
, 1);
9059 HOST_WIDE_INT offset
= INTVAL (xop1
);
9061 /* Try and fold the offset into a biasing of the base register and
9062 then offsetting that. Don't do this when optimizing for space
9063 since it can cause too many CSEs. */
9064 if (optimize_size
&& offset
>= 0
9065 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
9067 HOST_WIDE_INT delta
;
9070 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
9071 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
9072 delta
= 31 * GET_MODE_SIZE (mode
);
9074 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
9076 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
9078 x
= plus_constant (Pmode
, xop0
, delta
);
9080 else if (offset
< 0 && offset
> -256)
9081 /* Small negative offsets are best done with a subtract before the
9082 dereference, forcing these into a register normally takes two
9084 x
= force_operand (x
, NULL_RTX
);
9087 /* For the remaining cases, force the constant into a register. */
9088 xop1
= force_reg (SImode
, xop1
);
9089 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9092 else if (GET_CODE (x
) == PLUS
9093 && s_register_operand (XEXP (x
, 1), SImode
)
9094 && !s_register_operand (XEXP (x
, 0), SImode
))
9096 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
9098 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
9103 /* We need to find and carefully transform any SYMBOL and LABEL
9104 references; so go back to the original address expression. */
9105 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9106 false /*compute_now*/);
9108 if (new_x
!= orig_x
)
9115 /* Return TRUE if X contains any TLS symbol references. */
9118 arm_tls_referenced_p (rtx x
)
9120 if (! TARGET_HAVE_TLS
)
9123 subrtx_iterator::array_type array
;
9124 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
9126 const_rtx x
= *iter
;
9127 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
9129 /* ARM currently does not provide relocations to encode TLS variables
9130 into AArch32 instructions, only data, so there is no way to
9131 currently implement these if a literal pool is disabled. */
9132 if (arm_disable_literal_pool
)
9133 sorry ("accessing thread-local storage is not currently supported "
9134 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9139 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9140 TLS offsets, not real symbol references. */
9141 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9142 iter
.skip_subrtxes ();
9147 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9149 On the ARM, allow any integer (invalid ones are removed later by insn
9150 patterns), nice doubles and symbol_refs which refer to the function's
9153 When generating pic allow anything. */
9156 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
9158 return flag_pic
|| !label_mentioned_p (x
);
9162 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9164 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9165 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9166 for ARMv8-M Baseline or later the result is valid. */
9167 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
9170 return (CONST_INT_P (x
)
9171 || CONST_DOUBLE_P (x
)
9172 || CONSTANT_ADDRESS_P (x
)
9173 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
9178 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
9180 return (!arm_cannot_force_const_mem (mode
, x
)
9182 ? arm_legitimate_constant_p_1 (mode
, x
)
9183 : thumb_legitimate_constant_p (mode
, x
)));
9186 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9189 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9192 split_const (x
, &base
, &offset
);
9194 if (SYMBOL_REF_P (base
))
9196 /* Function symbols cannot have an offset due to the Thumb bit. */
9197 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
9198 && INTVAL (offset
) != 0)
9201 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9202 && !offset_within_block_p (base
, INTVAL (offset
)))
9205 return arm_tls_referenced_p (x
);
9208 #define REG_OR_SUBREG_REG(X) \
9210 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9212 #define REG_OR_SUBREG_RTX(X) \
9213 (REG_P (X) ? (X) : SUBREG_REG (X))
9216 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9218 machine_mode mode
= GET_MODE (x
);
9227 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9234 return COSTS_N_INSNS (1);
9237 if (arm_arch6m
&& arm_m_profile_small_mul
)
9238 return COSTS_N_INSNS (32);
9240 if (CONST_INT_P (XEXP (x
, 1)))
9243 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9250 return COSTS_N_INSNS (2) + cycles
;
9252 return COSTS_N_INSNS (1) + 16;
9255 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9257 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9258 return (COSTS_N_INSNS (words
)
9259 + 4 * ((MEM_P (SET_SRC (x
)))
9260 + MEM_P (SET_DEST (x
))));
9265 if (UINTVAL (x
) < 256
9266 /* 16-bit constant. */
9267 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9269 if (thumb_shiftable_const (INTVAL (x
)))
9270 return COSTS_N_INSNS (2);
9271 return COSTS_N_INSNS (3);
9273 else if ((outer
== PLUS
|| outer
== COMPARE
)
9274 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9276 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9277 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9278 return COSTS_N_INSNS (1);
9279 else if (outer
== AND
)
9282 /* This duplicates the tests in the andsi3 expander. */
9283 for (i
= 9; i
<= 31; i
++)
9284 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9285 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9286 return COSTS_N_INSNS (2);
9288 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9289 || outer
== LSHIFTRT
)
9291 return COSTS_N_INSNS (2);
9297 return COSTS_N_INSNS (3);
9315 /* XXX another guess. */
9316 /* Memory costs quite a lot for the first word, but subsequent words
9317 load at the equivalent of a single insn each. */
9318 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9319 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9324 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9330 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9331 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9337 return total
+ COSTS_N_INSNS (1);
9339 /* Assume a two-shift sequence. Increase the cost slightly so
9340 we prefer actual shifts over an extend operation. */
9341 return total
+ 1 + COSTS_N_INSNS (2);
9348 /* Estimates the size cost of thumb1 instructions.
9349 For now most of the code is copied from thumb1_rtx_costs. We need more
9350 fine grain tuning when we have more related test cases. */
9352 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9354 machine_mode mode
= GET_MODE (x
);
9363 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9367 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9368 defined by RTL expansion, especially for the expansion of
9370 if ((GET_CODE (XEXP (x
, 0)) == MULT
9371 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9372 || (GET_CODE (XEXP (x
, 1)) == MULT
9373 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9374 return COSTS_N_INSNS (2);
9379 return COSTS_N_INSNS (1);
9382 if (CONST_INT_P (XEXP (x
, 1)))
9384 /* Thumb1 mul instruction can't operate on const. We must Load it
9385 into a register first. */
9386 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9387 /* For the targets which have a very small and high-latency multiply
9388 unit, we prefer to synthesize the mult with up to 5 instructions,
9389 giving a good balance between size and performance. */
9390 if (arm_arch6m
&& arm_m_profile_small_mul
)
9391 return COSTS_N_INSNS (5);
9393 return COSTS_N_INSNS (1) + const_size
;
9395 return COSTS_N_INSNS (1);
9398 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9400 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9401 cost
= COSTS_N_INSNS (words
);
9402 if (satisfies_constraint_J (SET_SRC (x
))
9403 || satisfies_constraint_K (SET_SRC (x
))
9404 /* Too big an immediate for a 2-byte mov, using MOVT. */
9405 || (CONST_INT_P (SET_SRC (x
))
9406 && UINTVAL (SET_SRC (x
)) >= 256
9408 && satisfies_constraint_j (SET_SRC (x
)))
9409 /* thumb1_movdi_insn. */
9410 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9411 cost
+= COSTS_N_INSNS (1);
9417 if (UINTVAL (x
) < 256)
9418 return COSTS_N_INSNS (1);
9419 /* movw is 4byte long. */
9420 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9421 return COSTS_N_INSNS (2);
9422 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9423 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9424 return COSTS_N_INSNS (2);
9425 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9426 if (thumb_shiftable_const (INTVAL (x
)))
9427 return COSTS_N_INSNS (2);
9428 return COSTS_N_INSNS (3);
9430 else if ((outer
== PLUS
|| outer
== COMPARE
)
9431 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9433 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9434 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9435 return COSTS_N_INSNS (1);
9436 else if (outer
== AND
)
9439 /* This duplicates the tests in the andsi3 expander. */
9440 for (i
= 9; i
<= 31; i
++)
9441 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9442 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9443 return COSTS_N_INSNS (2);
9445 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9446 || outer
== LSHIFTRT
)
9448 return COSTS_N_INSNS (2);
9454 return COSTS_N_INSNS (3);
9468 return COSTS_N_INSNS (1);
9471 return (COSTS_N_INSNS (1)
9473 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9474 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9475 ? COSTS_N_INSNS (1) : 0));
9479 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9484 /* XXX still guessing. */
9485 switch (GET_MODE (XEXP (x
, 0)))
9488 return (1 + (mode
== DImode
? 4 : 0)
9489 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9492 return (4 + (mode
== DImode
? 4 : 0)
9493 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9496 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9507 /* Helper function for arm_rtx_costs. If one operand of the OP, a
9508 PLUS, adds the carry flag, then return the other operand. If
9509 neither is a carry, return OP unchanged. */
9511 strip_carry_operation (rtx op
)
9513 gcc_assert (GET_CODE (op
) == PLUS
);
9514 if (arm_carry_operation (XEXP (op
, 0), GET_MODE (op
)))
9515 return XEXP (op
, 1);
9516 else if (arm_carry_operation (XEXP (op
, 1), GET_MODE (op
)))
9517 return XEXP (op
, 0);
9521 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9522 operand, then return the operand that is being shifted. If the shift
9523 is not by a constant, then set SHIFT_REG to point to the operand.
9524 Return NULL if OP is not a shifter operand. */
9526 shifter_op_p (rtx op
, rtx
*shift_reg
)
9528 enum rtx_code code
= GET_CODE (op
);
9530 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9531 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9532 return XEXP (op
, 0);
9533 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9534 return XEXP (op
, 0);
9535 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9536 || code
== ASHIFTRT
)
9538 if (!CONST_INT_P (XEXP (op
, 1)))
9539 *shift_reg
= XEXP (op
, 1);
9540 return XEXP (op
, 0);
9547 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9549 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9550 rtx_code code
= GET_CODE (x
);
9551 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9553 switch (XINT (x
, 1))
9555 case UNSPEC_UNALIGNED_LOAD
:
9556 /* We can only do unaligned loads into the integer unit, and we can't
9558 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9560 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9561 + extra_cost
->ldst
.load_unaligned
);
9564 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9565 ADDR_SPACE_GENERIC
, speed_p
);
9569 case UNSPEC_UNALIGNED_STORE
:
9570 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9572 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9573 + extra_cost
->ldst
.store_unaligned
);
9575 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9577 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9578 ADDR_SPACE_GENERIC
, speed_p
);
9589 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9593 *cost
= COSTS_N_INSNS (2);
9599 /* Cost of a libcall. We assume one insn per argument, an amount for the
9600 call (one insn for -Os) and then one for processing the result. */
9601 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9603 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9606 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9607 if (shift_op != NULL \
9608 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9613 *cost += extra_cost->alu.arith_shift_reg; \
9614 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9615 ASHIFT, 1, speed_p); \
9618 *cost += extra_cost->alu.arith_shift; \
9620 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9621 ASHIFT, 0, speed_p) \
9622 + rtx_cost (XEXP (x, 1 - IDX), \
9623 GET_MODE (shift_op), \
9630 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9631 considering the costs of the addressing mode and memory access
9634 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
9635 int *cost
, bool speed_p
)
9637 machine_mode mode
= GET_MODE (x
);
9639 *cost
= COSTS_N_INSNS (1);
9642 && GET_CODE (XEXP (x
, 0)) == PLUS
9643 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9644 /* This will be split into two instructions. Add the cost of the
9645 additional instruction here. The cost of the memory access is computed
9646 below. See arm.md:calculate_pic_address. */
9647 *cost
+= COSTS_N_INSNS (1);
9649 /* Calculate cost of the addressing mode. */
9652 arm_addr_mode_op op_type
;
9653 switch (GET_CODE (XEXP (x
, 0)))
9657 op_type
= AMO_DEFAULT
;
9660 /* MINUS does not appear in RTL, but the architecture supports it,
9661 so handle this case defensively. */
9664 op_type
= AMO_NO_WB
;
9676 if (VECTOR_MODE_P (mode
))
9677 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
9678 else if (FLOAT_MODE_P (mode
))
9679 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
9681 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
9684 /* Calculate cost of memory access. */
9687 if (FLOAT_MODE_P (mode
))
9689 if (GET_MODE_SIZE (mode
) == 8)
9690 *cost
+= extra_cost
->ldst
.loadd
;
9692 *cost
+= extra_cost
->ldst
.loadf
;
9694 else if (VECTOR_MODE_P (mode
))
9695 *cost
+= extra_cost
->ldst
.loadv
;
9699 if (GET_MODE_SIZE (mode
) == 8)
9700 *cost
+= extra_cost
->ldst
.ldrd
;
9702 *cost
+= extra_cost
->ldst
.load
;
9709 /* RTX costs. Make an estimate of the cost of executing the operation
9710 X, which is contained within an operation with code OUTER_CODE.
9711 SPEED_P indicates whether the cost desired is the performance cost,
9712 or the size cost. The estimate is stored in COST and the return
9713 value is TRUE if the cost calculation is final, or FALSE if the
9714 caller should recurse through the operands of X to add additional
9717 We currently make no attempt to model the size savings of Thumb-2
9718 16-bit instructions. At the normal points in compilation where
9719 this code is called we have no measure of whether the condition
9720 flags are live or not, and thus no realistic way to determine what
9721 the size will eventually be. */
9723 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9724 const struct cpu_cost_table
*extra_cost
,
9725 int *cost
, bool speed_p
)
9727 machine_mode mode
= GET_MODE (x
);
9729 *cost
= COSTS_N_INSNS (1);
9734 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9736 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9744 /* SET RTXs don't have a mode so we get it from the destination. */
9745 mode
= GET_MODE (SET_DEST (x
));
9747 if (REG_P (SET_SRC (x
))
9748 && REG_P (SET_DEST (x
)))
9750 /* Assume that most copies can be done with a single insn,
9751 unless we don't have HW FP, in which case everything
9752 larger than word mode will require two insns. */
9753 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9754 && GET_MODE_SIZE (mode
) > 4)
9757 /* Conditional register moves can be encoded
9758 in 16 bits in Thumb mode. */
9759 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9765 if (CONST_INT_P (SET_SRC (x
)))
9767 /* Handle CONST_INT here, since the value doesn't have a mode
9768 and we would otherwise be unable to work out the true cost. */
9769 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9772 /* Slightly lower the cost of setting a core reg to a constant.
9773 This helps break up chains and allows for better scheduling. */
9774 if (REG_P (SET_DEST (x
))
9775 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9778 /* Immediate moves with an immediate in the range [0, 255] can be
9779 encoded in 16 bits in Thumb mode. */
9780 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9781 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9783 goto const_int_cost
;
9789 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
9793 /* Calculations of LDM costs are complex. We assume an initial cost
9794 (ldm_1st) which will load the number of registers mentioned in
9795 ldm_regs_per_insn_1st registers; then each additional
9796 ldm_regs_per_insn_subsequent registers cost one more insn. The
9797 formula for N regs is thus:
9799 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9800 + ldm_regs_per_insn_subsequent - 1)
9801 / ldm_regs_per_insn_subsequent).
9803 Additional costs may also be added for addressing. A similar
9804 formula is used for STM. */
9806 bool is_ldm
= load_multiple_operation (x
, SImode
);
9807 bool is_stm
= store_multiple_operation (x
, SImode
);
9809 if (is_ldm
|| is_stm
)
9813 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9814 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9815 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9816 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9817 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9818 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9819 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9821 *cost
+= regs_per_insn_1st
9822 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9823 + regs_per_insn_sub
- 1)
9824 / regs_per_insn_sub
);
9833 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9834 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9835 *cost
+= COSTS_N_INSNS (speed_p
9836 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9837 else if (mode
== SImode
&& TARGET_IDIV
)
9838 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9840 *cost
= LIBCALL_COST (2);
9842 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9843 possible udiv is prefered. */
9844 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
9845 return false; /* All arguments must be in registers. */
9848 /* MOD by a power of 2 can be expanded as:
9850 and r0, r0, #(n - 1)
9851 and r1, r1, #(n - 1)
9852 rsbpl r0, r1, #0. */
9853 if (CONST_INT_P (XEXP (x
, 1))
9854 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9857 *cost
+= COSTS_N_INSNS (3);
9860 *cost
+= 2 * extra_cost
->alu
.logical
9861 + extra_cost
->alu
.arith
;
9867 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9868 possible udiv is prefered. */
9869 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
9870 return false; /* All arguments must be in registers. */
9873 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9875 *cost
+= (COSTS_N_INSNS (1)
9876 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9878 *cost
+= extra_cost
->alu
.shift_reg
;
9886 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9888 *cost
+= (COSTS_N_INSNS (2)
9889 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9891 *cost
+= 2 * extra_cost
->alu
.shift
;
9892 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9893 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
9897 else if (mode
== SImode
)
9899 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9900 /* Slightly disparage register shifts at -Os, but not by much. */
9901 if (!CONST_INT_P (XEXP (x
, 1)))
9902 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9903 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9906 else if (GET_MODE_CLASS (mode
) == MODE_INT
9907 && GET_MODE_SIZE (mode
) < 4)
9911 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9912 /* Slightly disparage register shifts at -Os, but not by
9914 if (!CONST_INT_P (XEXP (x
, 1)))
9915 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9916 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9918 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9920 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9922 /* Can use SBFX/UBFX. */
9924 *cost
+= extra_cost
->alu
.bfx
;
9925 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9929 *cost
+= COSTS_N_INSNS (1);
9930 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9933 if (CONST_INT_P (XEXP (x
, 1)))
9934 *cost
+= 2 * extra_cost
->alu
.shift
;
9936 *cost
+= (extra_cost
->alu
.shift
9937 + extra_cost
->alu
.shift_reg
);
9940 /* Slightly disparage register shifts. */
9941 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9946 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9947 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9950 if (CONST_INT_P (XEXP (x
, 1)))
9951 *cost
+= (2 * extra_cost
->alu
.shift
9952 + extra_cost
->alu
.log_shift
);
9954 *cost
+= (extra_cost
->alu
.shift
9955 + extra_cost
->alu
.shift_reg
9956 + extra_cost
->alu
.log_shift_reg
);
9962 *cost
= LIBCALL_COST (2);
9971 *cost
+= extra_cost
->alu
.rev
;
9978 /* No rev instruction available. Look at arm_legacy_rev
9979 and thumb_legacy_rev for the form of RTL used then. */
9982 *cost
+= COSTS_N_INSNS (9);
9986 *cost
+= 6 * extra_cost
->alu
.shift
;
9987 *cost
+= 3 * extra_cost
->alu
.logical
;
9992 *cost
+= COSTS_N_INSNS (4);
9996 *cost
+= 2 * extra_cost
->alu
.shift
;
9997 *cost
+= extra_cost
->alu
.arith_shift
;
9998 *cost
+= 2 * extra_cost
->alu
.logical
;
10006 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10007 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10009 if (GET_CODE (XEXP (x
, 0)) == MULT
10010 || GET_CODE (XEXP (x
, 1)) == MULT
)
10012 rtx mul_op0
, mul_op1
, sub_op
;
10015 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10017 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10019 mul_op0
= XEXP (XEXP (x
, 0), 0);
10020 mul_op1
= XEXP (XEXP (x
, 0), 1);
10021 sub_op
= XEXP (x
, 1);
10025 mul_op0
= XEXP (XEXP (x
, 1), 0);
10026 mul_op1
= XEXP (XEXP (x
, 1), 1);
10027 sub_op
= XEXP (x
, 0);
10030 /* The first operand of the multiply may be optionally
10032 if (GET_CODE (mul_op0
) == NEG
)
10033 mul_op0
= XEXP (mul_op0
, 0);
10035 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10036 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10037 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
10043 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10047 if (mode
== SImode
)
10049 rtx shift_by_reg
= NULL
;
10053 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
10054 if (shift_op
== NULL
)
10056 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
10057 non_shift_op
= XEXP (x
, 0);
10060 non_shift_op
= XEXP (x
, 1);
10062 if (shift_op
!= NULL
)
10064 if (shift_by_reg
!= NULL
)
10067 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10068 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
10071 *cost
+= extra_cost
->alu
.arith_shift
;
10073 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
10074 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
10078 if (arm_arch_thumb2
10079 && GET_CODE (XEXP (x
, 1)) == MULT
)
10083 *cost
+= extra_cost
->mult
[0].add
;
10084 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
10085 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
10086 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
10090 if (CONST_INT_P (XEXP (x
, 0)))
10092 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10093 INTVAL (XEXP (x
, 0)), NULL_RTX
,
10095 *cost
= COSTS_N_INSNS (insns
);
10097 *cost
+= insns
* extra_cost
->alu
.arith
;
10098 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10102 *cost
+= extra_cost
->alu
.arith
;
10107 if (GET_MODE_CLASS (mode
) == MODE_INT
10108 && GET_MODE_SIZE (mode
) < 4)
10110 rtx shift_op
, shift_reg
;
10113 /* We check both sides of the MINUS for shifter operands since,
10114 unlike PLUS, it's not commutative. */
10116 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
10117 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
10119 /* Slightly disparage, as we might need to widen the result. */
10122 *cost
+= extra_cost
->alu
.arith
;
10124 if (CONST_INT_P (XEXP (x
, 0)))
10126 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10133 if (mode
== DImode
)
10135 *cost
+= COSTS_N_INSNS (1);
10137 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10139 rtx op1
= XEXP (x
, 1);
10142 *cost
+= 2 * extra_cost
->alu
.arith
;
10144 if (GET_CODE (op1
) == ZERO_EXTEND
)
10145 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10148 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10149 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10153 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10156 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10157 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10159 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10162 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10163 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10166 *cost
+= (extra_cost
->alu
.arith
10167 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10168 ? extra_cost
->alu
.arith
10169 : extra_cost
->alu
.arith_shift
));
10170 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10171 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10172 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10177 *cost
+= 2 * extra_cost
->alu
.arith
;
10183 *cost
= LIBCALL_COST (2);
10187 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10188 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10190 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10192 rtx mul_op0
, mul_op1
, add_op
;
10195 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10197 mul_op0
= XEXP (XEXP (x
, 0), 0);
10198 mul_op1
= XEXP (XEXP (x
, 0), 1);
10199 add_op
= XEXP (x
, 1);
10201 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10202 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10203 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10209 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10212 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10214 *cost
= LIBCALL_COST (2);
10218 /* Narrow modes can be synthesized in SImode, but the range
10219 of useful sub-operations is limited. Check for shift operations
10220 on one of the operands. Only left shifts can be used in the
10222 if (GET_MODE_CLASS (mode
) == MODE_INT
10223 && GET_MODE_SIZE (mode
) < 4)
10225 rtx shift_op
, shift_reg
;
10228 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
10230 if (CONST_INT_P (XEXP (x
, 1)))
10232 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10233 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10235 *cost
= COSTS_N_INSNS (insns
);
10237 *cost
+= insns
* extra_cost
->alu
.arith
;
10238 /* Slightly penalize a narrow operation as the result may
10240 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10244 /* Slightly penalize a narrow operation as the result may
10248 *cost
+= extra_cost
->alu
.arith
;
10253 if (mode
== SImode
)
10255 rtx shift_op
, shift_reg
;
10257 if (TARGET_INT_SIMD
10258 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10259 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10261 /* UXTA[BH] or SXTA[BH]. */
10263 *cost
+= extra_cost
->alu
.extend_arith
;
10264 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10266 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10270 rtx op0
= XEXP (x
, 0);
10271 rtx op1
= XEXP (x
, 1);
10273 /* Handle a side effect of adding in the carry to an addition. */
10274 if (GET_CODE (op0
) == PLUS
10275 && arm_carry_operation (op1
, mode
))
10277 op1
= XEXP (op0
, 1);
10278 op0
= XEXP (op0
, 0);
10280 else if (GET_CODE (op1
) == PLUS
10281 && arm_carry_operation (op0
, mode
))
10283 op0
= XEXP (op1
, 0);
10284 op1
= XEXP (op1
, 1);
10286 else if (GET_CODE (op0
) == PLUS
)
10288 op0
= strip_carry_operation (op0
);
10289 if (swap_commutative_operands_p (op0
, op1
))
10290 std::swap (op0
, op1
);
10293 if (arm_carry_operation (op0
, mode
))
10295 /* Adding the carry to a register is a canonicalization of
10296 adding 0 to the register plus the carry. */
10298 *cost
+= extra_cost
->alu
.arith
;
10299 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10304 shift_op
= shifter_op_p (op0
, &shift_reg
);
10305 if (shift_op
!= NULL
)
10310 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10311 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10314 *cost
+= extra_cost
->alu
.arith_shift
;
10316 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10317 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10321 if (GET_CODE (op0
) == MULT
)
10325 if (TARGET_DSP_MULTIPLY
10326 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10327 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10328 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10329 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10330 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10331 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10332 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10333 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10334 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10335 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10336 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10337 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10340 /* SMLA[BT][BT]. */
10342 *cost
+= extra_cost
->mult
[0].extend_add
;
10343 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10344 SIGN_EXTEND
, 0, speed_p
)
10345 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10346 SIGN_EXTEND
, 0, speed_p
)
10347 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10352 *cost
+= extra_cost
->mult
[0].add
;
10353 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10354 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10355 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10359 if (CONST_INT_P (op1
))
10361 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10362 INTVAL (op1
), NULL_RTX
,
10364 *cost
= COSTS_N_INSNS (insns
);
10366 *cost
+= insns
* extra_cost
->alu
.arith
;
10367 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
10372 *cost
+= extra_cost
->alu
.arith
;
10374 /* Don't recurse here because we want to test the operands
10375 without any carry operation. */
10376 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
10377 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10381 if (mode
== DImode
)
10383 if (GET_CODE (XEXP (x
, 0)) == MULT
10384 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10385 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10386 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10387 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10390 *cost
+= extra_cost
->mult
[1].extend_add
;
10391 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10392 ZERO_EXTEND
, 0, speed_p
)
10393 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10394 ZERO_EXTEND
, 0, speed_p
)
10395 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10399 *cost
+= COSTS_N_INSNS (1);
10401 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10402 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10405 *cost
+= (extra_cost
->alu
.arith
10406 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10407 ? extra_cost
->alu
.arith
10408 : extra_cost
->alu
.arith_shift
));
10410 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10412 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10417 *cost
+= 2 * extra_cost
->alu
.arith
;
10422 *cost
= LIBCALL_COST (2);
10425 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10428 *cost
+= extra_cost
->alu
.rev
;
10432 /* Fall through. */
10433 case AND
: case XOR
:
10434 if (mode
== SImode
)
10436 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10437 rtx op0
= XEXP (x
, 0);
10438 rtx shift_op
, shift_reg
;
10442 || (code
== IOR
&& TARGET_THUMB2
)))
10443 op0
= XEXP (op0
, 0);
10446 shift_op
= shifter_op_p (op0
, &shift_reg
);
10447 if (shift_op
!= NULL
)
10452 *cost
+= extra_cost
->alu
.log_shift_reg
;
10453 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10456 *cost
+= extra_cost
->alu
.log_shift
;
10458 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10459 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10463 if (CONST_INT_P (XEXP (x
, 1)))
10465 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10466 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10469 *cost
= COSTS_N_INSNS (insns
);
10471 *cost
+= insns
* extra_cost
->alu
.logical
;
10472 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10477 *cost
+= extra_cost
->alu
.logical
;
10478 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10479 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10483 if (mode
== DImode
)
10485 rtx op0
= XEXP (x
, 0);
10486 enum rtx_code subcode
= GET_CODE (op0
);
10488 *cost
+= COSTS_N_INSNS (1);
10492 || (code
== IOR
&& TARGET_THUMB2
)))
10493 op0
= XEXP (op0
, 0);
10495 if (GET_CODE (op0
) == ZERO_EXTEND
)
10498 *cost
+= 2 * extra_cost
->alu
.logical
;
10500 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10502 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10505 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10508 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10510 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10512 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10517 *cost
+= 2 * extra_cost
->alu
.logical
;
10523 *cost
= LIBCALL_COST (2);
10527 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10528 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10530 rtx op0
= XEXP (x
, 0);
10532 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10533 op0
= XEXP (op0
, 0);
10536 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10538 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10539 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10542 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10544 *cost
= LIBCALL_COST (2);
10548 if (mode
== SImode
)
10550 if (TARGET_DSP_MULTIPLY
10551 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10552 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10553 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10554 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10555 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10556 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10557 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10558 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10559 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10560 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10561 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10562 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10565 /* SMUL[TB][TB]. */
10567 *cost
+= extra_cost
->mult
[0].extend
;
10568 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10569 SIGN_EXTEND
, 0, speed_p
);
10570 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10571 SIGN_EXTEND
, 1, speed_p
);
10575 *cost
+= extra_cost
->mult
[0].simple
;
10579 if (mode
== DImode
)
10581 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10582 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10583 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10584 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
10587 *cost
+= extra_cost
->mult
[1].extend
;
10588 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10589 ZERO_EXTEND
, 0, speed_p
)
10590 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10591 ZERO_EXTEND
, 0, speed_p
));
10595 *cost
= LIBCALL_COST (2);
10600 *cost
= LIBCALL_COST (2);
10604 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10605 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10607 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10610 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10615 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10619 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10621 *cost
= LIBCALL_COST (1);
10625 if (mode
== SImode
)
10627 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10629 *cost
+= COSTS_N_INSNS (1);
10630 /* Assume the non-flag-changing variant. */
10632 *cost
+= (extra_cost
->alu
.log_shift
10633 + extra_cost
->alu
.arith_shift
);
10634 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10638 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10639 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10641 *cost
+= COSTS_N_INSNS (1);
10642 /* No extra cost for MOV imm and MVN imm. */
10643 /* If the comparison op is using the flags, there's no further
10644 cost, otherwise we need to add the cost of the comparison. */
10645 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10646 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10647 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10649 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10650 *cost
+= (COSTS_N_INSNS (1)
10651 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10653 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10656 *cost
+= extra_cost
->alu
.arith
;
10662 *cost
+= extra_cost
->alu
.arith
;
10666 if (GET_MODE_CLASS (mode
) == MODE_INT
10667 && GET_MODE_SIZE (mode
) < 4)
10669 /* Slightly disparage, as we might need an extend operation. */
10672 *cost
+= extra_cost
->alu
.arith
;
10676 if (mode
== DImode
)
10678 *cost
+= COSTS_N_INSNS (1);
10680 *cost
+= 2 * extra_cost
->alu
.arith
;
10685 *cost
= LIBCALL_COST (1);
10689 if (mode
== SImode
)
10692 rtx shift_reg
= NULL
;
10694 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10698 if (shift_reg
!= NULL
)
10701 *cost
+= extra_cost
->alu
.log_shift_reg
;
10702 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10705 *cost
+= extra_cost
->alu
.log_shift
;
10706 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10711 *cost
+= extra_cost
->alu
.logical
;
10714 if (mode
== DImode
)
10716 *cost
+= COSTS_N_INSNS (1);
10722 *cost
+= LIBCALL_COST (1);
10727 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10729 *cost
+= COSTS_N_INSNS (3);
10732 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10733 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10735 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10736 /* Assume that if one arm of the if_then_else is a register,
10737 that it will be tied with the result and eliminate the
10738 conditional insn. */
10739 if (REG_P (XEXP (x
, 1)))
10741 else if (REG_P (XEXP (x
, 2)))
10747 if (extra_cost
->alu
.non_exec_costs_exec
)
10748 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10750 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10753 *cost
+= op1cost
+ op2cost
;
10759 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10763 machine_mode op0mode
;
10764 /* We'll mostly assume that the cost of a compare is the cost of the
10765 LHS. However, there are some notable exceptions. */
10767 /* Floating point compares are never done as side-effects. */
10768 op0mode
= GET_MODE (XEXP (x
, 0));
10769 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10770 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10773 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10775 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10777 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10783 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10785 *cost
= LIBCALL_COST (2);
10789 /* DImode compares normally take two insns. */
10790 if (op0mode
== DImode
)
10792 *cost
+= COSTS_N_INSNS (1);
10794 *cost
+= 2 * extra_cost
->alu
.arith
;
10798 if (op0mode
== SImode
)
10803 if (XEXP (x
, 1) == const0_rtx
10804 && !(REG_P (XEXP (x
, 0))
10805 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10806 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10808 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10810 /* Multiply operations that set the flags are often
10811 significantly more expensive. */
10813 && GET_CODE (XEXP (x
, 0)) == MULT
10814 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10815 *cost
+= extra_cost
->mult
[0].flag_setting
;
10818 && GET_CODE (XEXP (x
, 0)) == PLUS
10819 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10820 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10822 *cost
+= extra_cost
->mult
[0].flag_setting
;
10827 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10828 if (shift_op
!= NULL
)
10830 if (shift_reg
!= NULL
)
10832 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10835 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10838 *cost
+= extra_cost
->alu
.arith_shift
;
10839 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10840 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10845 *cost
+= extra_cost
->alu
.arith
;
10846 if (CONST_INT_P (XEXP (x
, 1))
10847 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10849 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10857 *cost
= LIBCALL_COST (2);
10880 if (outer_code
== SET
)
10882 /* Is it a store-flag operation? */
10883 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10884 && XEXP (x
, 1) == const0_rtx
)
10886 /* Thumb also needs an IT insn. */
10887 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10890 if (XEXP (x
, 1) == const0_rtx
)
10895 /* LSR Rd, Rn, #31. */
10897 *cost
+= extra_cost
->alu
.shift
;
10907 *cost
+= COSTS_N_INSNS (1);
10911 /* RSBS T1, Rn, Rn, LSR #31
10913 *cost
+= COSTS_N_INSNS (1);
10915 *cost
+= extra_cost
->alu
.arith_shift
;
10919 /* RSB Rd, Rn, Rn, ASR #1
10920 LSR Rd, Rd, #31. */
10921 *cost
+= COSTS_N_INSNS (1);
10923 *cost
+= (extra_cost
->alu
.arith_shift
10924 + extra_cost
->alu
.shift
);
10930 *cost
+= COSTS_N_INSNS (1);
10932 *cost
+= extra_cost
->alu
.shift
;
10936 /* Remaining cases are either meaningless or would take
10937 three insns anyway. */
10938 *cost
= COSTS_N_INSNS (3);
10941 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10946 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10947 if (CONST_INT_P (XEXP (x
, 1))
10948 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10950 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10957 /* Not directly inside a set. If it involves the condition code
10958 register it must be the condition for a branch, cond_exec or
10959 I_T_E operation. Since the comparison is performed elsewhere
10960 this is just the control part which has no additional
10962 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10963 && XEXP (x
, 1) == const0_rtx
)
10971 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10972 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10975 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10979 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10981 *cost
= LIBCALL_COST (1);
10985 if (mode
== SImode
)
10988 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10992 *cost
= LIBCALL_COST (1);
10996 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10997 && MEM_P (XEXP (x
, 0)))
10999 if (mode
== DImode
)
11000 *cost
+= COSTS_N_INSNS (1);
11005 if (GET_MODE (XEXP (x
, 0)) == SImode
)
11006 *cost
+= extra_cost
->ldst
.load
;
11008 *cost
+= extra_cost
->ldst
.load_sign_extend
;
11010 if (mode
== DImode
)
11011 *cost
+= extra_cost
->alu
.shift
;
11016 /* Widening from less than 32-bits requires an extend operation. */
11017 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11019 /* We have SXTB/SXTH. */
11020 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11022 *cost
+= extra_cost
->alu
.extend
;
11024 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11026 /* Needs two shifts. */
11027 *cost
+= COSTS_N_INSNS (1);
11028 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11030 *cost
+= 2 * extra_cost
->alu
.shift
;
11033 /* Widening beyond 32-bits requires one more insn. */
11034 if (mode
== DImode
)
11036 *cost
+= COSTS_N_INSNS (1);
11038 *cost
+= extra_cost
->alu
.shift
;
11045 || GET_MODE (XEXP (x
, 0)) == SImode
11046 || GET_MODE (XEXP (x
, 0)) == QImode
)
11047 && MEM_P (XEXP (x
, 0)))
11049 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11051 if (mode
== DImode
)
11052 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11057 /* Widening from less than 32-bits requires an extend operation. */
11058 if (GET_MODE (XEXP (x
, 0)) == QImode
)
11060 /* UXTB can be a shorter instruction in Thumb2, but it might
11061 be slower than the AND Rd, Rn, #255 alternative. When
11062 optimizing for speed it should never be slower to use
11063 AND, and we don't really model 16-bit vs 32-bit insns
11066 *cost
+= extra_cost
->alu
.logical
;
11068 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11070 /* We have UXTB/UXTH. */
11071 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11073 *cost
+= extra_cost
->alu
.extend
;
11075 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11077 /* Needs two shifts. It's marginally preferable to use
11078 shifts rather than two BIC instructions as the second
11079 shift may merge with a subsequent insn as a shifter
11081 *cost
= COSTS_N_INSNS (2);
11082 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11084 *cost
+= 2 * extra_cost
->alu
.shift
;
11087 /* Widening beyond 32-bits requires one more insn. */
11088 if (mode
== DImode
)
11090 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11097 /* CONST_INT has no mode, so we cannot tell for sure how many
11098 insns are really going to be needed. The best we can do is
11099 look at the value passed. If it fits in SImode, then assume
11100 that's the mode it will be used for. Otherwise assume it
11101 will be used in DImode. */
11102 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
11107 /* Avoid blowing up in arm_gen_constant (). */
11108 if (!(outer_code
== PLUS
11109 || outer_code
== AND
11110 || outer_code
== IOR
11111 || outer_code
== XOR
11112 || outer_code
== MINUS
))
11116 if (mode
== SImode
)
11118 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
11119 INTVAL (x
), NULL
, NULL
,
11125 *cost
+= COSTS_N_INSNS (arm_gen_constant
11126 (outer_code
, SImode
, NULL
,
11127 trunc_int_for_mode (INTVAL (x
), SImode
),
11129 + arm_gen_constant (outer_code
, SImode
, NULL
,
11130 INTVAL (x
) >> 32, NULL
,
11142 if (arm_arch_thumb2
&& !flag_pic
)
11143 *cost
+= COSTS_N_INSNS (1);
11145 *cost
+= extra_cost
->ldst
.load
;
11148 *cost
+= COSTS_N_INSNS (1);
11152 *cost
+= COSTS_N_INSNS (1);
11154 *cost
+= extra_cost
->alu
.arith
;
11160 *cost
= COSTS_N_INSNS (4);
11165 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11166 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11168 if (vfp3_const_double_rtx (x
))
11171 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11177 if (mode
== DFmode
)
11178 *cost
+= extra_cost
->ldst
.loadd
;
11180 *cost
+= extra_cost
->ldst
.loadf
;
11183 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11187 *cost
= COSTS_N_INSNS (4);
11193 && TARGET_HARD_FLOAT
11194 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11195 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11196 *cost
= COSTS_N_INSNS (1);
11198 *cost
= COSTS_N_INSNS (4);
11203 /* When optimizing for size, we prefer constant pool entries to
11204 MOVW/MOVT pairs, so bump the cost of these slightly. */
11211 *cost
+= extra_cost
->alu
.clz
;
11215 if (XEXP (x
, 1) == const0_rtx
)
11218 *cost
+= extra_cost
->alu
.log_shift
;
11219 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11222 /* Fall through. */
11226 *cost
+= COSTS_N_INSNS (1);
11230 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11231 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11232 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11233 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11234 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11235 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11236 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11237 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11241 *cost
+= extra_cost
->mult
[1].extend
;
11242 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11243 ZERO_EXTEND
, 0, speed_p
)
11244 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11245 ZERO_EXTEND
, 0, speed_p
));
11248 *cost
= LIBCALL_COST (1);
11251 case UNSPEC_VOLATILE
:
11253 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11256 /* Reading the PC is like reading any other register. Writing it
11257 is more expensive, but we take that into account elsewhere. */
11262 /* TODO: Simple zero_extract of bottom bits using AND. */
11263 /* Fall through. */
11267 && CONST_INT_P (XEXP (x
, 1))
11268 && CONST_INT_P (XEXP (x
, 2)))
11271 *cost
+= extra_cost
->alu
.bfx
;
11272 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11275 /* Without UBFX/SBFX, need to resort to shift operations. */
11276 *cost
+= COSTS_N_INSNS (1);
11278 *cost
+= 2 * extra_cost
->alu
.shift
;
11279 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11283 if (TARGET_HARD_FLOAT
)
11286 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11288 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11290 /* Pre v8, widening HF->DF is a two-step process, first
11291 widening to SFmode. */
11292 *cost
+= COSTS_N_INSNS (1);
11294 *cost
+= extra_cost
->fp
[0].widen
;
11296 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11300 *cost
= LIBCALL_COST (1);
11303 case FLOAT_TRUNCATE
:
11304 if (TARGET_HARD_FLOAT
)
11307 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11308 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11310 /* Vector modes? */
11312 *cost
= LIBCALL_COST (1);
11316 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11318 rtx op0
= XEXP (x
, 0);
11319 rtx op1
= XEXP (x
, 1);
11320 rtx op2
= XEXP (x
, 2);
11323 /* vfms or vfnma. */
11324 if (GET_CODE (op0
) == NEG
)
11325 op0
= XEXP (op0
, 0);
11327 /* vfnms or vfnma. */
11328 if (GET_CODE (op2
) == NEG
)
11329 op2
= XEXP (op2
, 0);
11331 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11332 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11333 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11336 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11341 *cost
= LIBCALL_COST (3);
11346 if (TARGET_HARD_FLOAT
)
11348 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11349 a vcvt fixed-point conversion. */
11350 if (code
== FIX
&& mode
== SImode
11351 && GET_CODE (XEXP (x
, 0)) == FIX
11352 && GET_MODE (XEXP (x
, 0)) == SFmode
11353 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11354 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11358 *cost
+= extra_cost
->fp
[0].toint
;
11360 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11365 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11367 mode
= GET_MODE (XEXP (x
, 0));
11369 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11370 /* Strip of the 'cost' of rounding towards zero. */
11371 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11372 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11375 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11376 /* ??? Increase the cost to deal with transferring from
11377 FP -> CORE registers? */
11380 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11384 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11387 /* Vector costs? */
11389 *cost
= LIBCALL_COST (1);
11393 case UNSIGNED_FLOAT
:
11394 if (TARGET_HARD_FLOAT
)
11396 /* ??? Increase the cost to deal with transferring from CORE
11397 -> FP registers? */
11399 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11402 *cost
= LIBCALL_COST (1);
11410 /* Just a guess. Guess number of instructions in the asm
11411 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11412 though (see PR60663). */
11413 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11414 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11416 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11420 if (mode
!= VOIDmode
)
11421 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11423 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11428 #undef HANDLE_NARROW_SHIFT_ARITH
11430 /* RTX costs entry point. */
11433 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11434 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11437 int code
= GET_CODE (x
);
11438 gcc_assert (current_tune
->insn_extra_cost
);
11440 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
11441 (enum rtx_code
) outer_code
,
11442 current_tune
->insn_extra_cost
,
11445 if (dump_file
&& arm_verbose_cost
)
11447 print_rtl_single (dump_file
, x
);
11448 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11449 *total
, result
? "final" : "partial");
11454 /* All address computations that can be done are free, but rtx cost returns
11455 the same for practically all of them. So we weight the different types
11456 of address here in the order (most pref first):
11457 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11459 arm_arm_address_cost (rtx x
)
11461 enum rtx_code c
= GET_CODE (x
);
11463 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11465 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11470 if (CONST_INT_P (XEXP (x
, 1)))
11473 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11483 arm_thumb_address_cost (rtx x
)
11485 enum rtx_code c
= GET_CODE (x
);
11490 && REG_P (XEXP (x
, 0))
11491 && CONST_INT_P (XEXP (x
, 1)))
11498 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11499 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11501 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11504 /* Adjust cost hook for XScale. */
11506 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11509 /* Some true dependencies can have a higher cost depending
11510 on precisely how certain input operands are used. */
11512 && recog_memoized (insn
) >= 0
11513 && recog_memoized (dep
) >= 0)
11515 int shift_opnum
= get_attr_shift (insn
);
11516 enum attr_type attr_type
= get_attr_type (dep
);
11518 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11519 operand for INSN. If we have a shifted input operand and the
11520 instruction we depend on is another ALU instruction, then we may
11521 have to account for an additional stall. */
11522 if (shift_opnum
!= 0
11523 && (attr_type
== TYPE_ALU_SHIFT_IMM
11524 || attr_type
== TYPE_ALUS_SHIFT_IMM
11525 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11526 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11527 || attr_type
== TYPE_ALU_SHIFT_REG
11528 || attr_type
== TYPE_ALUS_SHIFT_REG
11529 || attr_type
== TYPE_LOGIC_SHIFT_REG
11530 || attr_type
== TYPE_LOGICS_SHIFT_REG
11531 || attr_type
== TYPE_MOV_SHIFT
11532 || attr_type
== TYPE_MVN_SHIFT
11533 || attr_type
== TYPE_MOV_SHIFT_REG
11534 || attr_type
== TYPE_MVN_SHIFT_REG
))
11536 rtx shifted_operand
;
11539 /* Get the shifted operand. */
11540 extract_insn (insn
);
11541 shifted_operand
= recog_data
.operand
[shift_opnum
];
11543 /* Iterate over all the operands in DEP. If we write an operand
11544 that overlaps with SHIFTED_OPERAND, then we have increase the
11545 cost of this dependency. */
11546 extract_insn (dep
);
11547 preprocess_constraints (dep
);
11548 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11550 /* We can ignore strict inputs. */
11551 if (recog_data
.operand_type
[opno
] == OP_IN
)
11554 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11566 /* Adjust cost hook for Cortex A9. */
11568 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11578 case REG_DEP_OUTPUT
:
11579 if (recog_memoized (insn
) >= 0
11580 && recog_memoized (dep
) >= 0)
11582 if (GET_CODE (PATTERN (insn
)) == SET
)
11585 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11587 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11589 enum attr_type attr_type_insn
= get_attr_type (insn
);
11590 enum attr_type attr_type_dep
= get_attr_type (dep
);
11592 /* By default all dependencies of the form
11595 have an extra latency of 1 cycle because
11596 of the input and output dependency in this
11597 case. However this gets modeled as an true
11598 dependency and hence all these checks. */
11599 if (REG_P (SET_DEST (PATTERN (insn
)))
11600 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11602 /* FMACS is a special case where the dependent
11603 instruction can be issued 3 cycles before
11604 the normal latency in case of an output
11606 if ((attr_type_insn
== TYPE_FMACS
11607 || attr_type_insn
== TYPE_FMACD
)
11608 && (attr_type_dep
== TYPE_FMACS
11609 || attr_type_dep
== TYPE_FMACD
))
11611 if (dep_type
== REG_DEP_OUTPUT
)
11612 *cost
= insn_default_latency (dep
) - 3;
11614 *cost
= insn_default_latency (dep
);
11619 if (dep_type
== REG_DEP_OUTPUT
)
11620 *cost
= insn_default_latency (dep
) + 1;
11622 *cost
= insn_default_latency (dep
);
11632 gcc_unreachable ();
11638 /* Adjust cost hook for FA726TE. */
11640 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11643 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11644 have penalty of 3. */
11645 if (dep_type
== REG_DEP_TRUE
11646 && recog_memoized (insn
) >= 0
11647 && recog_memoized (dep
) >= 0
11648 && get_attr_conds (dep
) == CONDS_SET
)
11650 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11651 if (get_attr_conds (insn
) == CONDS_USE
11652 && get_attr_type (insn
) != TYPE_BRANCH
)
11658 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11659 || get_attr_conds (insn
) == CONDS_USE
)
11669 /* Implement TARGET_REGISTER_MOVE_COST.
11671 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11672 it is typically more expensive than a single memory access. We set
11673 the cost to less than two memory accesses so that floating
11674 point to integer conversion does not go through memory. */
11677 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11678 reg_class_t from
, reg_class_t to
)
11682 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11683 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11685 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11686 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11688 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11695 if (from
== HI_REGS
|| to
== HI_REGS
)
11702 /* Implement TARGET_MEMORY_MOVE_COST. */
11705 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11706 bool in ATTRIBUTE_UNUSED
)
11712 if (GET_MODE_SIZE (mode
) < 4)
11715 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11719 /* Vectorizer cost model implementation. */
11721 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11723 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11725 int misalign ATTRIBUTE_UNUSED
)
11729 switch (type_of_cost
)
11732 return current_tune
->vec_costs
->scalar_stmt_cost
;
11735 return current_tune
->vec_costs
->scalar_load_cost
;
11738 return current_tune
->vec_costs
->scalar_store_cost
;
11741 return current_tune
->vec_costs
->vec_stmt_cost
;
11744 return current_tune
->vec_costs
->vec_align_load_cost
;
11747 return current_tune
->vec_costs
->vec_store_cost
;
11749 case vec_to_scalar
:
11750 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11752 case scalar_to_vec
:
11753 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11755 case unaligned_load
:
11756 case vector_gather_load
:
11757 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11759 case unaligned_store
:
11760 case vector_scatter_store
:
11761 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11763 case cond_branch_taken
:
11764 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11766 case cond_branch_not_taken
:
11767 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11770 case vec_promote_demote
:
11771 return current_tune
->vec_costs
->vec_stmt_cost
;
11773 case vec_construct
:
11774 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11775 return elements
/ 2 + 1;
11778 gcc_unreachable ();
11782 /* Implement targetm.vectorize.add_stmt_cost. */
11785 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11786 struct _stmt_vec_info
*stmt_info
, int misalign
,
11787 enum vect_cost_model_location where
)
11789 unsigned *cost
= (unsigned *) data
;
11790 unsigned retval
= 0;
11792 if (flag_vect_cost_model
)
11794 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11795 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11797 /* Statements in an inner loop relative to the loop being
11798 vectorized are weighted more heavily. The value here is
11799 arbitrary and could potentially be improved with analysis. */
11800 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11801 count
*= 50; /* FIXME. */
11803 retval
= (unsigned) (count
* stmt_cost
);
11804 cost
[where
] += retval
;
11810 /* Return true if and only if this insn can dual-issue only as older. */
11812 cortexa7_older_only (rtx_insn
*insn
)
11814 if (recog_memoized (insn
) < 0)
11817 switch (get_attr_type (insn
))
11819 case TYPE_ALU_DSP_REG
:
11820 case TYPE_ALU_SREG
:
11821 case TYPE_ALUS_SREG
:
11822 case TYPE_LOGIC_REG
:
11823 case TYPE_LOGICS_REG
:
11825 case TYPE_ADCS_REG
:
11830 case TYPE_SHIFT_IMM
:
11831 case TYPE_SHIFT_REG
:
11832 case TYPE_LOAD_BYTE
:
11835 case TYPE_FFARITHS
:
11837 case TYPE_FFARITHD
:
11855 case TYPE_F_STORES
:
11862 /* Return true if and only if this insn can dual-issue as younger. */
11864 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11866 if (recog_memoized (insn
) < 0)
11869 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11873 switch (get_attr_type (insn
))
11876 case TYPE_ALUS_IMM
:
11877 case TYPE_LOGIC_IMM
:
11878 case TYPE_LOGICS_IMM
:
11883 case TYPE_MOV_SHIFT
:
11884 case TYPE_MOV_SHIFT_REG
:
11894 /* Look for an instruction that can dual issue only as an older
11895 instruction, and move it in front of any instructions that can
11896 dual-issue as younger, while preserving the relative order of all
11897 other instructions in the ready list. This is a hueuristic to help
11898 dual-issue in later cycles, by postponing issue of more flexible
11899 instructions. This heuristic may affect dual issue opportunities
11900 in the current cycle. */
11902 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11903 int *n_readyp
, int clock
)
11906 int first_older_only
= -1, first_younger
= -1;
11910 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11914 /* Traverse the ready list from the head (the instruction to issue
11915 first), and looking for the first instruction that can issue as
11916 younger and the first instruction that can dual-issue only as
11918 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11920 rtx_insn
*insn
= ready
[i
];
11921 if (cortexa7_older_only (insn
))
11923 first_older_only
= i
;
11925 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11928 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11932 /* Nothing to reorder because either no younger insn found or insn
11933 that can dual-issue only as older appears before any insn that
11934 can dual-issue as younger. */
11935 if (first_younger
== -1)
11938 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11942 /* Nothing to reorder because no older-only insn in the ready list. */
11943 if (first_older_only
== -1)
11946 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11950 /* Move first_older_only insn before first_younger. */
11952 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11953 INSN_UID(ready
[first_older_only
]),
11954 INSN_UID(ready
[first_younger
]));
11955 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11956 for (i
= first_older_only
; i
< first_younger
; i
++)
11958 ready
[i
] = ready
[i
+1];
11961 ready
[i
] = first_older_only_insn
;
11965 /* Implement TARGET_SCHED_REORDER. */
11967 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11972 case TARGET_CPU_cortexa7
:
11973 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11976 /* Do nothing for other cores. */
11980 return arm_issue_rate ();
11983 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11984 It corrects the value of COST based on the relationship between
11985 INSN and DEP through the dependence LINK. It returns the new
11986 value. There is a per-core adjust_cost hook to adjust scheduler costs
11987 and the per-core hook can choose to completely override the generic
11988 adjust_cost function. Only put bits of code into arm_adjust_cost that
11989 are common across all cores. */
11991 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11996 /* When generating Thumb-1 code, we want to place flag-setting operations
11997 close to a conditional branch which depends on them, so that we can
11998 omit the comparison. */
12001 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12002 && recog_memoized (dep
) >= 0
12003 && get_attr_conds (dep
) == CONDS_SET
)
12006 if (current_tune
->sched_adjust_cost
!= NULL
)
12008 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12012 /* XXX Is this strictly true? */
12013 if (dep_type
== REG_DEP_ANTI
12014 || dep_type
== REG_DEP_OUTPUT
)
12017 /* Call insns don't incur a stall, even if they follow a load. */
12022 if ((i_pat
= single_set (insn
)) != NULL
12023 && MEM_P (SET_SRC (i_pat
))
12024 && (d_pat
= single_set (dep
)) != NULL
12025 && MEM_P (SET_DEST (d_pat
)))
12027 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12028 /* This is a load after a store, there is no conflict if the load reads
12029 from a cached area. Assume that loads from the stack, and from the
12030 constant pool are cached, and that others will miss. This is a
12033 if ((GET_CODE (src_mem
) == SYMBOL_REF
12034 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12035 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12036 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12037 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12045 arm_max_conditional_execute (void)
12047 return max_insns_skipped
;
12051 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12054 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12056 return (optimize
> 0) ? 2 : 0;
12060 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12062 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12065 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12066 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12067 sequences of non-executed instructions in IT blocks probably take the same
12068 amount of time as executed instructions (and the IT instruction itself takes
12069 space in icache). This function was experimentally determined to give good
12070 results on a popular embedded benchmark. */
12073 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12075 return (TARGET_32BIT
&& speed_p
) ? 1
12076 : arm_default_branch_cost (speed_p
, predictable_p
);
12080 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12082 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12085 static bool fp_consts_inited
= false;
12087 static REAL_VALUE_TYPE value_fp0
;
12090 init_fp_table (void)
12094 r
= REAL_VALUE_ATOF ("0", DFmode
);
12096 fp_consts_inited
= true;
12099 /* Return TRUE if rtx X is a valid immediate FP constant. */
12101 arm_const_double_rtx (rtx x
)
12103 const REAL_VALUE_TYPE
*r
;
12105 if (!fp_consts_inited
)
12108 r
= CONST_DOUBLE_REAL_VALUE (x
);
12109 if (REAL_VALUE_MINUS_ZERO (*r
))
12112 if (real_equal (r
, &value_fp0
))
12118 /* VFPv3 has a fairly wide range of representable immediates, formed from
12119 "quarter-precision" floating-point values. These can be evaluated using this
12120 formula (with ^ for exponentiation):
12124 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12125 16 <= n <= 31 and 0 <= r <= 7.
12127 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12129 - A (most-significant) is the sign bit.
12130 - BCD are the exponent (encoded as r XOR 3).
12131 - EFGH are the mantissa (encoded as n - 16).
12134 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12135 fconst[sd] instruction, or -1 if X isn't suitable. */
12137 vfp3_const_double_index (rtx x
)
12139 REAL_VALUE_TYPE r
, m
;
12140 int sign
, exponent
;
12141 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12142 unsigned HOST_WIDE_INT mask
;
12143 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12146 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12149 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12151 /* We can't represent these things, so detect them first. */
12152 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12155 /* Extract sign, exponent and mantissa. */
12156 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12157 r
= real_value_abs (&r
);
12158 exponent
= REAL_EXP (&r
);
12159 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12160 highest (sign) bit, with a fixed binary point at bit point_pos.
12161 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12162 bits for the mantissa, this may fail (low bits would be lost). */
12163 real_ldexp (&m
, &r
, point_pos
- exponent
);
12164 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12165 mantissa
= w
.elt (0);
12166 mant_hi
= w
.elt (1);
12168 /* If there are bits set in the low part of the mantissa, we can't
12169 represent this value. */
12173 /* Now make it so that mantissa contains the most-significant bits, and move
12174 the point_pos to indicate that the least-significant bits have been
12176 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12177 mantissa
= mant_hi
;
12179 /* We can permit four significant bits of mantissa only, plus a high bit
12180 which is always 1. */
12181 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12182 if ((mantissa
& mask
) != 0)
12185 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12186 mantissa
>>= point_pos
- 5;
12188 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12189 floating-point immediate zero with Neon using an integer-zero load, but
12190 that case is handled elsewhere.) */
12194 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12196 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12197 normalized significands are in the range [1, 2). (Our mantissa is shifted
12198 left 4 places at this point relative to normalized IEEE754 values). GCC
12199 internally uses [0.5, 1) (see real.c), so the exponent returned from
12200 REAL_EXP must be altered. */
12201 exponent
= 5 - exponent
;
12203 if (exponent
< 0 || exponent
> 7)
12206 /* Sign, mantissa and exponent are now in the correct form to plug into the
12207 formula described in the comment above. */
12208 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12211 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12213 vfp3_const_double_rtx (rtx x
)
12218 return vfp3_const_double_index (x
) != -1;
12221 /* Recognize immediates which can be used in various Neon instructions. Legal
12222 immediates are described by the following table (for VMVN variants, the
12223 bitwise inverse of the constant shown is recognized. In either case, VMOV
12224 is output and the correct instruction to use for a given constant is chosen
12225 by the assembler). The constant shown is replicated across all elements of
12226 the destination vector.
12228 insn elems variant constant (binary)
12229 ---- ----- ------- -----------------
12230 vmov i32 0 00000000 00000000 00000000 abcdefgh
12231 vmov i32 1 00000000 00000000 abcdefgh 00000000
12232 vmov i32 2 00000000 abcdefgh 00000000 00000000
12233 vmov i32 3 abcdefgh 00000000 00000000 00000000
12234 vmov i16 4 00000000 abcdefgh
12235 vmov i16 5 abcdefgh 00000000
12236 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12237 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12238 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12239 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12240 vmvn i16 10 00000000 abcdefgh
12241 vmvn i16 11 abcdefgh 00000000
12242 vmov i32 12 00000000 00000000 abcdefgh 11111111
12243 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12244 vmov i32 14 00000000 abcdefgh 11111111 11111111
12245 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12246 vmov i8 16 abcdefgh
12247 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12248 eeeeeeee ffffffff gggggggg hhhhhhhh
12249 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12250 vmov f32 19 00000000 00000000 00000000 00000000
12252 For case 18, B = !b. Representable values are exactly those accepted by
12253 vfp3_const_double_index, but are output as floating-point numbers rather
12256 For case 19, we will change it to vmov.i32 when assembling.
12258 Variants 0-5 (inclusive) may also be used as immediates for the second
12259 operand of VORR/VBIC instructions.
12261 The INVERSE argument causes the bitwise inverse of the given operand to be
12262 recognized instead (used for recognizing legal immediates for the VAND/VORN
12263 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12264 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12265 output, rather than the real insns vbic/vorr).
12267 INVERSE makes no difference to the recognition of float vectors.
12269 The return value is the variant of immediate as shown in the above table, or
12270 -1 if the given value doesn't match any of the listed patterns.
12273 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12274 rtx
*modconst
, int *elementwidth
)
12276 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12278 for (i = 0; i < idx; i += (STRIDE)) \
12283 immtype = (CLASS); \
12284 elsize = (ELSIZE); \
12288 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12289 unsigned int innersize
;
12290 unsigned char bytes
[16] = {};
12291 int immtype
= -1, matches
;
12292 unsigned int invmask
= inverse
? 0xff : 0;
12293 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12296 n_elts
= CONST_VECTOR_NUNITS (op
);
12300 gcc_assert (mode
!= VOIDmode
);
12303 innersize
= GET_MODE_UNIT_SIZE (mode
);
12305 /* Vectors of float constants. */
12306 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12308 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12310 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12313 /* FP16 vectors cannot be represented. */
12314 if (GET_MODE_INNER (mode
) == HFmode
)
12317 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12318 are distinct in this context. */
12319 if (!const_vec_duplicate_p (op
))
12323 *modconst
= CONST_VECTOR_ELT (op
, 0);
12328 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12334 /* The tricks done in the code below apply for little-endian vector layout.
12335 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12336 FIXME: Implement logic for big-endian vectors. */
12337 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
12340 /* Splat vector constant out into a byte vector. */
12341 for (i
= 0; i
< n_elts
; i
++)
12343 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12344 unsigned HOST_WIDE_INT elpart
;
12346 gcc_assert (CONST_INT_P (el
));
12347 elpart
= INTVAL (el
);
12349 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
12351 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12352 elpart
>>= BITS_PER_UNIT
;
12356 /* Sanity check. */
12357 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12361 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12362 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12364 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12365 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12367 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12368 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12370 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12371 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12373 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12375 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12377 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12378 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12380 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12381 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12383 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12384 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12386 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12387 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12389 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12391 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12393 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12394 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12396 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12397 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12399 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12400 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12402 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12403 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12405 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12407 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12408 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12416 *elementwidth
= elsize
;
12420 unsigned HOST_WIDE_INT imm
= 0;
12422 /* Un-invert bytes of recognized vector, if necessary. */
12424 for (i
= 0; i
< idx
; i
++)
12425 bytes
[i
] ^= invmask
;
12429 /* FIXME: Broken on 32-bit H_W_I hosts. */
12430 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12432 for (i
= 0; i
< 8; i
++)
12433 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12434 << (i
* BITS_PER_UNIT
);
12436 *modconst
= GEN_INT (imm
);
12440 unsigned HOST_WIDE_INT imm
= 0;
12442 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12443 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12445 *modconst
= GEN_INT (imm
);
12453 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12454 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12455 float elements), and a modified constant (whatever should be output for a
12456 VMOV) in *MODCONST. */
12459 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12460 rtx
*modconst
, int *elementwidth
)
12464 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12470 *modconst
= tmpconst
;
12473 *elementwidth
= tmpwidth
;
12478 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12479 the immediate is valid, write a constant suitable for using as an operand
12480 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12481 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12484 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12485 rtx
*modconst
, int *elementwidth
)
12489 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12491 if (retval
< 0 || retval
> 5)
12495 *modconst
= tmpconst
;
12498 *elementwidth
= tmpwidth
;
12503 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12504 the immediate is valid, write a constant suitable for using as an operand
12505 to VSHR/VSHL to *MODCONST and the corresponding element width to
12506 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12507 because they have different limitations. */
12510 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12511 rtx
*modconst
, int *elementwidth
,
12514 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12515 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12516 unsigned HOST_WIDE_INT last_elt
= 0;
12517 unsigned HOST_WIDE_INT maxshift
;
12519 /* Split vector constant out into a byte vector. */
12520 for (i
= 0; i
< n_elts
; i
++)
12522 rtx el
= CONST_VECTOR_ELT (op
, i
);
12523 unsigned HOST_WIDE_INT elpart
;
12525 if (CONST_INT_P (el
))
12526 elpart
= INTVAL (el
);
12527 else if (CONST_DOUBLE_P (el
))
12530 gcc_unreachable ();
12532 if (i
!= 0 && elpart
!= last_elt
)
12538 /* Shift less than element size. */
12539 maxshift
= innersize
* 8;
12543 /* Left shift immediate value can be from 0 to <size>-1. */
12544 if (last_elt
>= maxshift
)
12549 /* Right shift immediate value can be from 1 to <size>. */
12550 if (last_elt
== 0 || last_elt
> maxshift
)
12555 *elementwidth
= innersize
* 8;
12558 *modconst
= CONST_VECTOR_ELT (op
, 0);
12563 /* Return a string suitable for output of Neon immediate logic operation
12567 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12568 int inverse
, int quad
)
12570 int width
, is_valid
;
12571 static char templ
[40];
12573 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12575 gcc_assert (is_valid
!= 0);
12578 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12580 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12585 /* Return a string suitable for output of Neon immediate shift operation
12586 (VSHR or VSHL) MNEM. */
12589 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12590 machine_mode mode
, int quad
,
12593 int width
, is_valid
;
12594 static char templ
[40];
12596 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12597 gcc_assert (is_valid
!= 0);
12600 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12602 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12607 /* Output a sequence of pairwise operations to implement a reduction.
12608 NOTE: We do "too much work" here, because pairwise operations work on two
12609 registers-worth of operands in one go. Unfortunately we can't exploit those
12610 extra calculations to do the full operation in fewer steps, I don't think.
12611 Although all vector elements of the result but the first are ignored, we
12612 actually calculate the same result in each of the elements. An alternative
12613 such as initially loading a vector with zero to use as each of the second
12614 operands would use up an additional register and take an extra instruction,
12615 for no particular gain. */
12618 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12619 rtx (*reduc
) (rtx
, rtx
, rtx
))
12621 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12624 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12626 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12627 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12632 /* If VALS is a vector constant that can be loaded into a register
12633 using VDUP, generate instructions to do so and return an RTX to
12634 assign to the register. Otherwise return NULL_RTX. */
12637 neon_vdup_constant (rtx vals
)
12639 machine_mode mode
= GET_MODE (vals
);
12640 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12643 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12646 if (!const_vec_duplicate_p (vals
, &x
))
12647 /* The elements are not all the same. We could handle repeating
12648 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12649 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12653 /* We can load this constant by using VDUP and a constant in a
12654 single ARM register. This will be cheaper than a vector
12657 x
= copy_to_mode_reg (inner_mode
, x
);
12658 return gen_vec_duplicate (mode
, x
);
12661 /* Generate code to load VALS, which is a PARALLEL containing only
12662 constants (for vec_init) or CONST_VECTOR, efficiently into a
12663 register. Returns an RTX to copy into the register, or NULL_RTX
12664 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
12667 neon_make_constant (rtx vals
)
12669 machine_mode mode
= GET_MODE (vals
);
12671 rtx const_vec
= NULL_RTX
;
12672 int n_elts
= GET_MODE_NUNITS (mode
);
12676 if (GET_CODE (vals
) == CONST_VECTOR
)
12678 else if (GET_CODE (vals
) == PARALLEL
)
12680 /* A CONST_VECTOR must contain only CONST_INTs and
12681 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12682 Only store valid constants in a CONST_VECTOR. */
12683 for (i
= 0; i
< n_elts
; ++i
)
12685 rtx x
= XVECEXP (vals
, 0, i
);
12686 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12689 if (n_const
== n_elts
)
12690 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12693 gcc_unreachable ();
12695 if (const_vec
!= NULL
12696 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12697 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12699 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12700 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12701 pipeline cycle; creating the constant takes one or two ARM
12702 pipeline cycles. */
12704 else if (const_vec
!= NULL_RTX
)
12705 /* Load from constant pool. On Cortex-A8 this takes two cycles
12706 (for either double or quad vectors). We cannot take advantage
12707 of single-cycle VLD1 because we need a PC-relative addressing
12711 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12712 We cannot construct an initializer. */
12716 /* Initialize vector TARGET to VALS. */
12719 neon_expand_vector_init (rtx target
, rtx vals
)
12721 machine_mode mode
= GET_MODE (target
);
12722 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12723 int n_elts
= GET_MODE_NUNITS (mode
);
12724 int n_var
= 0, one_var
= -1;
12725 bool all_same
= true;
12729 for (i
= 0; i
< n_elts
; ++i
)
12731 x
= XVECEXP (vals
, 0, i
);
12732 if (!CONSTANT_P (x
))
12733 ++n_var
, one_var
= i
;
12735 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12741 rtx constant
= neon_make_constant (vals
);
12742 if (constant
!= NULL_RTX
)
12744 emit_move_insn (target
, constant
);
12749 /* Splat a single non-constant element if we can. */
12750 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12752 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12753 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
12757 /* One field is non-constant. Load constant then overwrite varying
12758 field. This is more efficient than using the stack. */
12761 rtx copy
= copy_rtx (vals
);
12762 rtx merge_mask
= GEN_INT (1 << one_var
);
12764 /* Load constant part of vector, substitute neighboring value for
12765 varying element. */
12766 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12767 neon_expand_vector_init (target
, copy
);
12769 /* Insert variable. */
12770 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12771 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
12775 /* Construct the vector in memory one field at a time
12776 and load the whole vector. */
12777 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12778 for (i
= 0; i
< n_elts
; i
++)
12779 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12780 i
* GET_MODE_SIZE (inner_mode
)),
12781 XVECEXP (vals
, 0, i
));
12782 emit_move_insn (target
, mem
);
12785 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12786 ERR if it doesn't. EXP indicates the source location, which includes the
12787 inlining history for intrinsics. */
12790 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12791 const_tree exp
, const char *desc
)
12793 HOST_WIDE_INT lane
;
12795 gcc_assert (CONST_INT_P (operand
));
12797 lane
= INTVAL (operand
);
12799 if (lane
< low
|| lane
>= high
)
12802 error ("%K%s %wd out of range %wd - %wd",
12803 exp
, desc
, lane
, low
, high
- 1);
12805 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12809 /* Bounds-check lanes. */
12812 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12815 bounds_check (operand
, low
, high
, exp
, "lane");
12818 /* Bounds-check constants. */
12821 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12823 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12827 neon_element_bits (machine_mode mode
)
12829 return GET_MODE_UNIT_BITSIZE (mode
);
12833 /* Predicates for `match_operand' and `match_operator'. */
12835 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12836 WB is true if full writeback address modes are allowed and is false
12837 if limited writeback address modes (POST_INC and PRE_DEC) are
12841 arm_coproc_mem_operand (rtx op
, bool wb
)
12845 /* Reject eliminable registers. */
12846 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12847 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12848 || reg_mentioned_p (arg_pointer_rtx
, op
)
12849 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12850 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12851 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12852 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12855 /* Constants are converted into offsets from labels. */
12859 ind
= XEXP (op
, 0);
12861 if (reload_completed
12862 && (GET_CODE (ind
) == LABEL_REF
12863 || (GET_CODE (ind
) == CONST
12864 && GET_CODE (XEXP (ind
, 0)) == PLUS
12865 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12866 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12869 /* Match: (mem (reg)). */
12871 return arm_address_register_rtx_p (ind
, 0);
12873 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12874 acceptable in any case (subject to verification by
12875 arm_address_register_rtx_p). We need WB to be true to accept
12876 PRE_INC and POST_DEC. */
12877 if (GET_CODE (ind
) == POST_INC
12878 || GET_CODE (ind
) == PRE_DEC
12880 && (GET_CODE (ind
) == PRE_INC
12881 || GET_CODE (ind
) == POST_DEC
)))
12882 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12885 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12886 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12887 && GET_CODE (XEXP (ind
, 1)) == PLUS
12888 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12889 ind
= XEXP (ind
, 1);
12894 if (GET_CODE (ind
) == PLUS
12895 && REG_P (XEXP (ind
, 0))
12896 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12897 && CONST_INT_P (XEXP (ind
, 1))
12898 && INTVAL (XEXP (ind
, 1)) > -1024
12899 && INTVAL (XEXP (ind
, 1)) < 1024
12900 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12906 /* Return TRUE if OP is a memory operand which we can load or store a vector
12907 to/from. TYPE is one of the following values:
12908 0 - Vector load/stor (vldr)
12909 1 - Core registers (ldm)
12910 2 - Element/structure loads (vld1)
12913 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12917 /* Reject eliminable registers. */
12918 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12919 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12920 || reg_mentioned_p (arg_pointer_rtx
, op
)
12921 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12922 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12923 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12924 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12927 /* Constants are converted into offsets from labels. */
12931 ind
= XEXP (op
, 0);
12933 if (reload_completed
12934 && (GET_CODE (ind
) == LABEL_REF
12935 || (GET_CODE (ind
) == CONST
12936 && GET_CODE (XEXP (ind
, 0)) == PLUS
12937 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12938 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12941 /* Match: (mem (reg)). */
12943 return arm_address_register_rtx_p (ind
, 0);
12945 /* Allow post-increment with Neon registers. */
12946 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12947 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12948 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12950 /* Allow post-increment by register for VLDn */
12951 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12952 && GET_CODE (XEXP (ind
, 1)) == PLUS
12953 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12960 && GET_CODE (ind
) == PLUS
12961 && REG_P (XEXP (ind
, 0))
12962 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12963 && CONST_INT_P (XEXP (ind
, 1))
12964 && INTVAL (XEXP (ind
, 1)) > -1024
12965 /* For quad modes, we restrict the constant offset to be slightly less
12966 than what the instruction format permits. We have no such constraint
12967 on double mode offsets. (This must match arm_legitimate_index_p.) */
12968 && (INTVAL (XEXP (ind
, 1))
12969 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12970 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12976 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12979 neon_struct_mem_operand (rtx op
)
12983 /* Reject eliminable registers. */
12984 if (! (reload_in_progress
|| reload_completed
)
12985 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12986 || reg_mentioned_p (arg_pointer_rtx
, op
)
12987 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12988 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12989 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12990 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12993 /* Constants are converted into offsets from labels. */
12997 ind
= XEXP (op
, 0);
12999 if (reload_completed
13000 && (GET_CODE (ind
) == LABEL_REF
13001 || (GET_CODE (ind
) == CONST
13002 && GET_CODE (XEXP (ind
, 0)) == PLUS
13003 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13004 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13007 /* Match: (mem (reg)). */
13009 return arm_address_register_rtx_p (ind
, 0);
13011 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13012 if (GET_CODE (ind
) == POST_INC
13013 || GET_CODE (ind
) == PRE_DEC
)
13014 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13019 /* Prepares the operands for the VCMLA by lane instruction such that the right
13020 register number is selected. This instruction is special in that it always
13021 requires a D register, however there is a choice to be made between Dn[0],
13022 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13024 The VCMLA by lane function always selects two values. For instance given D0
13025 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13026 used by the instruction. However given V4SF then index 0 and 1 are valid as
13027 D0[0] or D1[0] are both valid.
13029 This function centralizes that information based on OPERANDS, OPERANDS[3]
13030 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13031 updated to contain the right index. */
13034 neon_vcmla_lane_prepare_operands (rtx
*operands
)
13036 int lane
= INTVAL (operands
[4]);
13037 machine_mode constmode
= SImode
;
13038 machine_mode mode
= GET_MODE (operands
[3]);
13039 int regno
= REGNO (operands
[3]);
13040 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
13041 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
13043 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
13045 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
13049 operands
[3] = gen_int_mode (regno
, constmode
);
13050 operands
[4] = gen_int_mode (lane
, constmode
);
13056 /* Return true if X is a register that will be eliminated later on. */
13058 arm_eliminable_register (rtx x
)
13060 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13061 || REGNO (x
) == ARG_POINTER_REGNUM
13062 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13063 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13066 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13067 coprocessor registers. Otherwise return NO_REGS. */
13070 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13072 if (mode
== HFmode
)
13074 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13075 return GENERAL_REGS
;
13076 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13078 return GENERAL_REGS
;
13081 /* The neon move patterns handle all legitimate vector and struct
13084 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13085 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13086 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13087 || VALID_NEON_STRUCT_MODE (mode
)))
13090 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13093 return GENERAL_REGS
;
13096 /* Values which must be returned in the most-significant end of the return
13100 arm_return_in_msb (const_tree valtype
)
13102 return (TARGET_AAPCS_BASED
13103 && BYTES_BIG_ENDIAN
13104 && (AGGREGATE_TYPE_P (valtype
)
13105 || TREE_CODE (valtype
) == COMPLEX_TYPE
13106 || FIXED_POINT_TYPE_P (valtype
)));
13109 /* Return TRUE if X references a SYMBOL_REF. */
13111 symbol_mentioned_p (rtx x
)
13116 if (GET_CODE (x
) == SYMBOL_REF
)
13119 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13120 are constant offsets, not symbols. */
13121 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13124 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13126 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13132 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13133 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13136 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13143 /* Return TRUE if X references a LABEL_REF. */
13145 label_mentioned_p (rtx x
)
13150 if (GET_CODE (x
) == LABEL_REF
)
13153 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13154 instruction, but they are constant offsets, not symbols. */
13155 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13158 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13159 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13165 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13166 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13169 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13177 tls_mentioned_p (rtx x
)
13179 switch (GET_CODE (x
))
13182 return tls_mentioned_p (XEXP (x
, 0));
13185 if (XINT (x
, 1) == UNSPEC_TLS
)
13188 /* Fall through. */
13194 /* Must not copy any rtx that uses a pc-relative address.
13195 Also, disallow copying of load-exclusive instructions that
13196 may appear after splitting of compare-and-swap-style operations
13197 so as to prevent those loops from being transformed away from their
13198 canonical forms (see PR 69904). */
13201 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13203 /* The tls call insn cannot be copied, as it is paired with a data
13205 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13208 subrtx_iterator::array_type array
;
13209 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13211 const_rtx x
= *iter
;
13212 if (GET_CODE (x
) == UNSPEC
13213 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13214 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13218 rtx set
= single_set (insn
);
13221 rtx src
= SET_SRC (set
);
13222 if (GET_CODE (src
) == ZERO_EXTEND
)
13223 src
= XEXP (src
, 0);
13225 /* Catch the load-exclusive and load-acquire operations. */
13226 if (GET_CODE (src
) == UNSPEC_VOLATILE
13227 && (XINT (src
, 1) == VUNSPEC_LL
13228 || XINT (src
, 1) == VUNSPEC_LAX
))
13235 minmax_code (rtx x
)
13237 enum rtx_code code
= GET_CODE (x
);
13250 gcc_unreachable ();
13254 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13257 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13258 int *mask
, bool *signed_sat
)
13260 /* The high bound must be a power of two minus one. */
13261 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13265 /* The low bound is either zero (for usat) or one less than the
13266 negation of the high bound (for ssat). */
13267 if (INTVAL (lo_bound
) == 0)
13272 *signed_sat
= false;
13277 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13282 *signed_sat
= true;
13290 /* Return 1 if memory locations are adjacent. */
13292 adjacent_mem_locations (rtx a
, rtx b
)
13294 /* We don't guarantee to preserve the order of these memory refs. */
13295 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13298 if ((REG_P (XEXP (a
, 0))
13299 || (GET_CODE (XEXP (a
, 0)) == PLUS
13300 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13301 && (REG_P (XEXP (b
, 0))
13302 || (GET_CODE (XEXP (b
, 0)) == PLUS
13303 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13305 HOST_WIDE_INT val0
= 0, val1
= 0;
13309 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13311 reg0
= XEXP (XEXP (a
, 0), 0);
13312 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13315 reg0
= XEXP (a
, 0);
13317 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13319 reg1
= XEXP (XEXP (b
, 0), 0);
13320 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13323 reg1
= XEXP (b
, 0);
13325 /* Don't accept any offset that will require multiple
13326 instructions to handle, since this would cause the
13327 arith_adjacentmem pattern to output an overlong sequence. */
13328 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13331 /* Don't allow an eliminable register: register elimination can make
13332 the offset too large. */
13333 if (arm_eliminable_register (reg0
))
13336 val_diff
= val1
- val0
;
13340 /* If the target has load delay slots, then there's no benefit
13341 to using an ldm instruction unless the offset is zero and
13342 we are optimizing for size. */
13343 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13344 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13345 && (val_diff
== 4 || val_diff
== -4));
13348 return ((REGNO (reg0
) == REGNO (reg1
))
13349 && (val_diff
== 4 || val_diff
== -4));
13355 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13356 for load operations, false for store operations. CONSECUTIVE is true
13357 if the register numbers in the operation must be consecutive in the register
13358 bank. RETURN_PC is true if value is to be loaded in PC.
13359 The pattern we are trying to match for load is:
13360 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13361 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13364 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13367 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13368 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13369 3. If consecutive is TRUE, then for kth register being loaded,
13370 REGNO (R_dk) = REGNO (R_d0) + k.
13371 The pattern for store is similar. */
13373 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13374 bool consecutive
, bool return_pc
)
13376 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13377 rtx reg
, mem
, addr
;
13379 unsigned first_regno
;
13380 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13382 bool addr_reg_in_reglist
= false;
13383 bool update
= false;
13388 /* If not in SImode, then registers must be consecutive
13389 (e.g., VLDM instructions for DFmode). */
13390 gcc_assert ((mode
== SImode
) || consecutive
);
13391 /* Setting return_pc for stores is illegal. */
13392 gcc_assert (!return_pc
|| load
);
13394 /* Set up the increments and the regs per val based on the mode. */
13395 reg_increment
= GET_MODE_SIZE (mode
);
13396 regs_per_val
= reg_increment
/ 4;
13397 offset_adj
= return_pc
? 1 : 0;
13400 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13401 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13404 /* Check if this is a write-back. */
13405 elt
= XVECEXP (op
, 0, offset_adj
);
13406 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13412 /* The offset adjustment must be the number of registers being
13413 popped times the size of a single register. */
13414 if (!REG_P (SET_DEST (elt
))
13415 || !REG_P (XEXP (SET_SRC (elt
), 0))
13416 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13417 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13418 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13419 ((count
- 1 - offset_adj
) * reg_increment
))
13423 i
= i
+ offset_adj
;
13424 base
= base
+ offset_adj
;
13425 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13426 success depends on the type: VLDM can do just one reg,
13427 LDM must do at least two. */
13428 if ((count
<= i
) && (mode
== SImode
))
13431 elt
= XVECEXP (op
, 0, i
- 1);
13432 if (GET_CODE (elt
) != SET
)
13437 reg
= SET_DEST (elt
);
13438 mem
= SET_SRC (elt
);
13442 reg
= SET_SRC (elt
);
13443 mem
= SET_DEST (elt
);
13446 if (!REG_P (reg
) || !MEM_P (mem
))
13449 regno
= REGNO (reg
);
13450 first_regno
= regno
;
13451 addr
= XEXP (mem
, 0);
13452 if (GET_CODE (addr
) == PLUS
)
13454 if (!CONST_INT_P (XEXP (addr
, 1)))
13457 offset
= INTVAL (XEXP (addr
, 1));
13458 addr
= XEXP (addr
, 0);
13464 /* Don't allow SP to be loaded unless it is also the base register. It
13465 guarantees that SP is reset correctly when an LDM instruction
13466 is interrupted. Otherwise, we might end up with a corrupt stack. */
13467 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13470 if (regno
== REGNO (addr
))
13471 addr_reg_in_reglist
= true;
13473 for (; i
< count
; i
++)
13475 elt
= XVECEXP (op
, 0, i
);
13476 if (GET_CODE (elt
) != SET
)
13481 reg
= SET_DEST (elt
);
13482 mem
= SET_SRC (elt
);
13486 reg
= SET_SRC (elt
);
13487 mem
= SET_DEST (elt
);
13491 || GET_MODE (reg
) != mode
13492 || REGNO (reg
) <= regno
13495 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13496 /* Don't allow SP to be loaded unless it is also the base register. It
13497 guarantees that SP is reset correctly when an LDM instruction
13498 is interrupted. Otherwise, we might end up with a corrupt stack. */
13499 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13501 || GET_MODE (mem
) != mode
13502 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13503 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13504 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13505 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13506 offset
+ (i
- base
) * reg_increment
))
13507 && (!REG_P (XEXP (mem
, 0))
13508 || offset
+ (i
- base
) * reg_increment
!= 0)))
13511 regno
= REGNO (reg
);
13512 if (regno
== REGNO (addr
))
13513 addr_reg_in_reglist
= true;
13518 if (update
&& addr_reg_in_reglist
)
13521 /* For Thumb-1, address register is always modified - either by write-back
13522 or by explicit load. If the pattern does not describe an update,
13523 then the address register must be in the list of loaded registers. */
13525 return update
|| addr_reg_in_reglist
;
13531 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13532 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13533 instruction. ADD_OFFSET is nonzero if the base address register needs
13534 to be modified with an add instruction before we can use it. */
13537 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13538 int nops
, HOST_WIDE_INT add_offset
)
13540 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13541 if the offset isn't small enough. The reason 2 ldrs are faster
13542 is because these ARMs are able to do more than one cache access
13543 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13544 whilst the ARM8 has a double bandwidth cache. This means that
13545 these cores can do both an instruction fetch and a data fetch in
13546 a single cycle, so the trick of calculating the address into a
13547 scratch register (one of the result regs) and then doing a load
13548 multiple actually becomes slower (and no smaller in code size).
13549 That is the transformation
13551 ldr rd1, [rbase + offset]
13552 ldr rd2, [rbase + offset + 4]
13556 add rd1, rbase, offset
13557 ldmia rd1, {rd1, rd2}
13559 produces worse code -- '3 cycles + any stalls on rd2' instead of
13560 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13561 access per cycle, the first sequence could never complete in less
13562 than 6 cycles, whereas the ldm sequence would only take 5 and
13563 would make better use of sequential accesses if not hitting the
13566 We cheat here and test 'arm_ld_sched' which we currently know to
13567 only be true for the ARM8, ARM9 and StrongARM. If this ever
13568 changes, then the test below needs to be reworked. */
13569 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13572 /* XScale has load-store double instructions, but they have stricter
13573 alignment requirements than load-store multiple, so we cannot
13576 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13577 the pipeline until completion.
13585 An ldr instruction takes 1-3 cycles, but does not block the
13594 Best case ldr will always win. However, the more ldr instructions
13595 we issue, the less likely we are to be able to schedule them well.
13596 Using ldr instructions also increases code size.
13598 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13599 for counts of 3 or 4 regs. */
13600 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13605 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13606 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13607 an array ORDER which describes the sequence to use when accessing the
13608 offsets that produces an ascending order. In this sequence, each
13609 offset must be larger by exactly 4 than the previous one. ORDER[0]
13610 must have been filled in with the lowest offset by the caller.
13611 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13612 we use to verify that ORDER produces an ascending order of registers.
13613 Return true if it was possible to construct such an order, false if
13617 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13618 int *unsorted_regs
)
13621 for (i
= 1; i
< nops
; i
++)
13625 order
[i
] = order
[i
- 1];
13626 for (j
= 0; j
< nops
; j
++)
13627 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13629 /* We must find exactly one offset that is higher than the
13630 previous one by 4. */
13631 if (order
[i
] != order
[i
- 1])
13635 if (order
[i
] == order
[i
- 1])
13637 /* The register numbers must be ascending. */
13638 if (unsorted_regs
!= NULL
13639 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13645 /* Used to determine in a peephole whether a sequence of load
13646 instructions can be changed into a load-multiple instruction.
13647 NOPS is the number of separate load instructions we are examining. The
13648 first NOPS entries in OPERANDS are the destination registers, the
13649 next NOPS entries are memory operands. If this function is
13650 successful, *BASE is set to the common base register of the memory
13651 accesses; *LOAD_OFFSET is set to the first memory location's offset
13652 from that base register.
13653 REGS is an array filled in with the destination register numbers.
13654 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13655 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13656 the sequence of registers in REGS matches the loads from ascending memory
13657 locations, and the function verifies that the register numbers are
13658 themselves ascending. If CHECK_REGS is false, the register numbers
13659 are stored in the order they are found in the operands. */
13661 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13662 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13664 int unsorted_regs
[MAX_LDM_STM_OPS
];
13665 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13666 int order
[MAX_LDM_STM_OPS
];
13670 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13671 easily extended if required. */
13672 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13674 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13676 /* Loop over the operands and check that the memory references are
13677 suitable (i.e. immediate offsets from the same base register). At
13678 the same time, extract the target register, and the memory
13680 for (i
= 0; i
< nops
; i
++)
13685 /* Convert a subreg of a mem into the mem itself. */
13686 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13687 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13689 gcc_assert (MEM_P (operands
[nops
+ i
]));
13691 /* Don't reorder volatile memory references; it doesn't seem worth
13692 looking for the case where the order is ok anyway. */
13693 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13696 offset
= const0_rtx
;
13698 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13699 || (GET_CODE (reg
) == SUBREG
13700 && REG_P (reg
= SUBREG_REG (reg
))))
13701 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13702 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13703 || (GET_CODE (reg
) == SUBREG
13704 && REG_P (reg
= SUBREG_REG (reg
))))
13705 && (CONST_INT_P (offset
13706 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13710 base_reg
= REGNO (reg
);
13711 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13714 else if (base_reg
!= (int) REGNO (reg
))
13715 /* Not addressed from the same base register. */
13718 unsorted_regs
[i
] = (REG_P (operands
[i
])
13719 ? REGNO (operands
[i
])
13720 : REGNO (SUBREG_REG (operands
[i
])));
13722 /* If it isn't an integer register, or if it overwrites the
13723 base register but isn't the last insn in the list, then
13724 we can't do this. */
13725 if (unsorted_regs
[i
] < 0
13726 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13727 || unsorted_regs
[i
] > 14
13728 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13731 /* Don't allow SP to be loaded unless it is also the base
13732 register. It guarantees that SP is reset correctly when
13733 an LDM instruction is interrupted. Otherwise, we might
13734 end up with a corrupt stack. */
13735 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13738 unsorted_offsets
[i
] = INTVAL (offset
);
13739 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13743 /* Not a suitable memory address. */
13747 /* All the useful information has now been extracted from the
13748 operands into unsorted_regs and unsorted_offsets; additionally,
13749 order[0] has been set to the lowest offset in the list. Sort
13750 the offsets into order, verifying that they are adjacent, and
13751 check that the register numbers are ascending. */
13752 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13753 check_regs
? unsorted_regs
: NULL
))
13757 memcpy (saved_order
, order
, sizeof order
);
13763 for (i
= 0; i
< nops
; i
++)
13764 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13766 *load_offset
= unsorted_offsets
[order
[0]];
13769 if (unsorted_offsets
[order
[0]] == 0)
13770 ldm_case
= 1; /* ldmia */
13771 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13772 ldm_case
= 2; /* ldmib */
13773 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13774 ldm_case
= 3; /* ldmda */
13775 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13776 ldm_case
= 4; /* ldmdb */
13777 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13778 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13783 if (!multiple_operation_profitable_p (false, nops
,
13785 ? unsorted_offsets
[order
[0]] : 0))
13791 /* Used to determine in a peephole whether a sequence of store instructions can
13792 be changed into a store-multiple instruction.
13793 NOPS is the number of separate store instructions we are examining.
13794 NOPS_TOTAL is the total number of instructions recognized by the peephole
13796 The first NOPS entries in OPERANDS are the source registers, the next
13797 NOPS entries are memory operands. If this function is successful, *BASE is
13798 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13799 to the first memory location's offset from that base register. REGS is an
13800 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13801 likewise filled with the corresponding rtx's.
13802 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13803 numbers to an ascending order of stores.
13804 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13805 from ascending memory locations, and the function verifies that the register
13806 numbers are themselves ascending. If CHECK_REGS is false, the register
13807 numbers are stored in the order they are found in the operands. */
13809 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13810 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13811 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13813 int unsorted_regs
[MAX_LDM_STM_OPS
];
13814 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13815 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13816 int order
[MAX_LDM_STM_OPS
];
13818 rtx base_reg_rtx
= NULL
;
13821 /* Write back of base register is currently only supported for Thumb 1. */
13822 int base_writeback
= TARGET_THUMB1
;
13824 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13825 easily extended if required. */
13826 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13828 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13830 /* Loop over the operands and check that the memory references are
13831 suitable (i.e. immediate offsets from the same base register). At
13832 the same time, extract the target register, and the memory
13834 for (i
= 0; i
< nops
; i
++)
13839 /* Convert a subreg of a mem into the mem itself. */
13840 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13841 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13843 gcc_assert (MEM_P (operands
[nops
+ i
]));
13845 /* Don't reorder volatile memory references; it doesn't seem worth
13846 looking for the case where the order is ok anyway. */
13847 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13850 offset
= const0_rtx
;
13852 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13853 || (GET_CODE (reg
) == SUBREG
13854 && REG_P (reg
= SUBREG_REG (reg
))))
13855 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13856 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13857 || (GET_CODE (reg
) == SUBREG
13858 && REG_P (reg
= SUBREG_REG (reg
))))
13859 && (CONST_INT_P (offset
13860 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13862 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13863 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13864 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13868 base_reg
= REGNO (reg
);
13869 base_reg_rtx
= reg
;
13870 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13873 else if (base_reg
!= (int) REGNO (reg
))
13874 /* Not addressed from the same base register. */
13877 /* If it isn't an integer register, then we can't do this. */
13878 if (unsorted_regs
[i
] < 0
13879 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13880 /* The effects are unpredictable if the base register is
13881 both updated and stored. */
13882 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13883 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13884 || unsorted_regs
[i
] > 14)
13887 unsorted_offsets
[i
] = INTVAL (offset
);
13888 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13892 /* Not a suitable memory address. */
13896 /* All the useful information has now been extracted from the
13897 operands into unsorted_regs and unsorted_offsets; additionally,
13898 order[0] has been set to the lowest offset in the list. Sort
13899 the offsets into order, verifying that they are adjacent, and
13900 check that the register numbers are ascending. */
13901 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13902 check_regs
? unsorted_regs
: NULL
))
13906 memcpy (saved_order
, order
, sizeof order
);
13912 for (i
= 0; i
< nops
; i
++)
13914 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13916 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13919 *load_offset
= unsorted_offsets
[order
[0]];
13923 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13926 if (unsorted_offsets
[order
[0]] == 0)
13927 stm_case
= 1; /* stmia */
13928 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13929 stm_case
= 2; /* stmib */
13930 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13931 stm_case
= 3; /* stmda */
13932 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13933 stm_case
= 4; /* stmdb */
13937 if (!multiple_operation_profitable_p (false, nops
, 0))
13943 /* Routines for use in generating RTL. */
13945 /* Generate a load-multiple instruction. COUNT is the number of loads in
13946 the instruction; REGS and MEMS are arrays containing the operands.
13947 BASEREG is the base register to be used in addressing the memory operands.
13948 WBACK_OFFSET is nonzero if the instruction should update the base
13952 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13953 HOST_WIDE_INT wback_offset
)
13958 if (!multiple_operation_profitable_p (false, count
, 0))
13964 for (i
= 0; i
< count
; i
++)
13965 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13967 if (wback_offset
!= 0)
13968 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13970 seq
= get_insns ();
13976 result
= gen_rtx_PARALLEL (VOIDmode
,
13977 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13978 if (wback_offset
!= 0)
13980 XVECEXP (result
, 0, 0)
13981 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13986 for (j
= 0; i
< count
; i
++, j
++)
13987 XVECEXP (result
, 0, i
)
13988 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13993 /* Generate a store-multiple instruction. COUNT is the number of stores in
13994 the instruction; REGS and MEMS are arrays containing the operands.
13995 BASEREG is the base register to be used in addressing the memory operands.
13996 WBACK_OFFSET is nonzero if the instruction should update the base
14000 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14001 HOST_WIDE_INT wback_offset
)
14006 if (GET_CODE (basereg
) == PLUS
)
14007 basereg
= XEXP (basereg
, 0);
14009 if (!multiple_operation_profitable_p (false, count
, 0))
14015 for (i
= 0; i
< count
; i
++)
14016 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14018 if (wback_offset
!= 0)
14019 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14021 seq
= get_insns ();
14027 result
= gen_rtx_PARALLEL (VOIDmode
,
14028 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14029 if (wback_offset
!= 0)
14031 XVECEXP (result
, 0, 0)
14032 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14037 for (j
= 0; i
< count
; i
++, j
++)
14038 XVECEXP (result
, 0, i
)
14039 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14044 /* Generate either a load-multiple or a store-multiple instruction. This
14045 function can be used in situations where we can start with a single MEM
14046 rtx and adjust its address upwards.
14047 COUNT is the number of operations in the instruction, not counting a
14048 possible update of the base register. REGS is an array containing the
14050 BASEREG is the base register to be used in addressing the memory operands,
14051 which are constructed from BASEMEM.
14052 WRITE_BACK specifies whether the generated instruction should include an
14053 update of the base register.
14054 OFFSETP is used to pass an offset to and from this function; this offset
14055 is not used when constructing the address (instead BASEMEM should have an
14056 appropriate offset in its address), it is used only for setting
14057 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14060 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14061 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14063 rtx mems
[MAX_LDM_STM_OPS
];
14064 HOST_WIDE_INT offset
= *offsetp
;
14067 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14069 if (GET_CODE (basereg
) == PLUS
)
14070 basereg
= XEXP (basereg
, 0);
14072 for (i
= 0; i
< count
; i
++)
14074 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14075 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14083 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14084 write_back
? 4 * count
: 0);
14086 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14087 write_back
? 4 * count
: 0);
14091 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14092 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14094 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14099 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14100 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14102 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14106 /* Called from a peephole2 expander to turn a sequence of loads into an
14107 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14108 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14109 is true if we can reorder the registers because they are used commutatively
14111 Returns true iff we could generate a new instruction. */
14114 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14116 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14117 rtx mems
[MAX_LDM_STM_OPS
];
14118 int i
, j
, base_reg
;
14120 HOST_WIDE_INT offset
;
14121 int write_back
= FALSE
;
14125 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14126 &base_reg
, &offset
, !sort_regs
);
14132 for (i
= 0; i
< nops
- 1; i
++)
14133 for (j
= i
+ 1; j
< nops
; j
++)
14134 if (regs
[i
] > regs
[j
])
14140 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14144 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14146 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14148 for (i
= 0; i
< nops
; i
++)
14149 if (base_reg
== regs
[i
])
14150 write_back
= false;
14152 /* Ensure the base is dead if it is updated. */
14153 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
14159 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14160 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14162 base_reg_rtx
= newbase
;
14165 for (i
= 0; i
< nops
; i
++)
14167 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14168 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14171 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14172 write_back
? offset
+ i
* 4 : 0));
14176 /* Called from a peephole2 expander to turn a sequence of stores into an
14177 STM instruction. OPERANDS are the operands found by the peephole matcher;
14178 NOPS indicates how many separate stores we are trying to combine.
14179 Returns true iff we could generate a new instruction. */
14182 gen_stm_seq (rtx
*operands
, int nops
)
14185 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14186 rtx mems
[MAX_LDM_STM_OPS
];
14189 HOST_WIDE_INT offset
;
14190 int write_back
= FALSE
;
14193 bool base_reg_dies
;
14195 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14196 mem_order
, &base_reg
, &offset
, true);
14201 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14203 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14206 gcc_assert (base_reg_dies
);
14212 gcc_assert (base_reg_dies
);
14213 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14217 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14219 for (i
= 0; i
< nops
; i
++)
14221 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14222 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14225 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14226 write_back
? offset
+ i
* 4 : 0));
14230 /* Called from a peephole2 expander to turn a sequence of stores that are
14231 preceded by constant loads into an STM instruction. OPERANDS are the
14232 operands found by the peephole matcher; NOPS indicates how many
14233 separate stores we are trying to combine; there are 2 * NOPS
14234 instructions in the peephole.
14235 Returns true iff we could generate a new instruction. */
14238 gen_const_stm_seq (rtx
*operands
, int nops
)
14240 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14241 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14242 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14243 rtx mems
[MAX_LDM_STM_OPS
];
14246 HOST_WIDE_INT offset
;
14247 int write_back
= FALSE
;
14250 bool base_reg_dies
;
14252 HARD_REG_SET allocated
;
14254 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14255 mem_order
, &base_reg
, &offset
, false);
14260 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14262 /* If the same register is used more than once, try to find a free
14264 CLEAR_HARD_REG_SET (allocated
);
14265 for (i
= 0; i
< nops
; i
++)
14267 for (j
= i
+ 1; j
< nops
; j
++)
14268 if (regs
[i
] == regs
[j
])
14270 rtx t
= peep2_find_free_register (0, nops
* 2,
14271 TARGET_THUMB1
? "l" : "r",
14272 SImode
, &allocated
);
14276 regs
[i
] = REGNO (t
);
14280 /* Compute an ordering that maps the register numbers to an ascending
14283 for (i
= 0; i
< nops
; i
++)
14284 if (regs
[i
] < regs
[reg_order
[0]])
14287 for (i
= 1; i
< nops
; i
++)
14289 int this_order
= reg_order
[i
- 1];
14290 for (j
= 0; j
< nops
; j
++)
14291 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14292 && (this_order
== reg_order
[i
- 1]
14293 || regs
[j
] < regs
[this_order
]))
14295 reg_order
[i
] = this_order
;
14298 /* Ensure that registers that must be live after the instruction end
14299 up with the correct value. */
14300 for (i
= 0; i
< nops
; i
++)
14302 int this_order
= reg_order
[i
];
14303 if ((this_order
!= mem_order
[i
]
14304 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14305 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14309 /* Load the constants. */
14310 for (i
= 0; i
< nops
; i
++)
14312 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14313 sorted_regs
[i
] = regs
[reg_order
[i
]];
14314 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14317 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14319 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14322 gcc_assert (base_reg_dies
);
14328 gcc_assert (base_reg_dies
);
14329 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14333 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14335 for (i
= 0; i
< nops
; i
++)
14337 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14338 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14341 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14342 write_back
? offset
+ i
* 4 : 0));
14346 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14347 unaligned copies on processors which support unaligned semantics for those
14348 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14349 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14350 An interleave factor of 1 (the minimum) will perform no interleaving.
14351 Load/store multiple are used for aligned addresses where possible. */
14354 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14355 HOST_WIDE_INT length
,
14356 unsigned int interleave_factor
)
14358 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14359 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14360 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14361 HOST_WIDE_INT i
, j
;
14362 HOST_WIDE_INT remaining
= length
, words
;
14363 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14365 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14366 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14367 HOST_WIDE_INT srcoffset
, dstoffset
;
14368 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14371 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
14373 /* Use hard registers if we have aligned source or destination so we can use
14374 load/store multiple with contiguous registers. */
14375 if (dst_aligned
|| src_aligned
)
14376 for (i
= 0; i
< interleave_factor
; i
++)
14377 regs
[i
] = gen_rtx_REG (SImode
, i
);
14379 for (i
= 0; i
< interleave_factor
; i
++)
14380 regs
[i
] = gen_reg_rtx (SImode
);
14382 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14383 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14385 srcoffset
= dstoffset
= 0;
14387 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14388 For copying the last bytes we want to subtract this offset again. */
14389 src_autoinc
= dst_autoinc
= 0;
14391 for (i
= 0; i
< interleave_factor
; i
++)
14394 /* Copy BLOCK_SIZE_BYTES chunks. */
14396 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14399 if (src_aligned
&& interleave_factor
> 1)
14401 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14402 TRUE
, srcbase
, &srcoffset
));
14403 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14407 for (j
= 0; j
< interleave_factor
; j
++)
14409 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14411 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14412 srcoffset
+ j
* UNITS_PER_WORD
);
14413 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14415 srcoffset
+= block_size_bytes
;
14419 if (dst_aligned
&& interleave_factor
> 1)
14421 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14422 TRUE
, dstbase
, &dstoffset
));
14423 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14427 for (j
= 0; j
< interleave_factor
; j
++)
14429 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14431 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14432 dstoffset
+ j
* UNITS_PER_WORD
);
14433 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14435 dstoffset
+= block_size_bytes
;
14438 remaining
-= block_size_bytes
;
14441 /* Copy any whole words left (note these aren't interleaved with any
14442 subsequent halfword/byte load/stores in the interests of simplicity). */
14444 words
= remaining
/ UNITS_PER_WORD
;
14446 gcc_assert (words
< interleave_factor
);
14448 if (src_aligned
&& words
> 1)
14450 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14452 src_autoinc
+= UNITS_PER_WORD
* words
;
14456 for (j
= 0; j
< words
; j
++)
14458 addr
= plus_constant (Pmode
, src
,
14459 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14460 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14461 srcoffset
+ j
* UNITS_PER_WORD
);
14463 emit_move_insn (regs
[j
], mem
);
14465 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14467 srcoffset
+= words
* UNITS_PER_WORD
;
14470 if (dst_aligned
&& words
> 1)
14472 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14474 dst_autoinc
+= words
* UNITS_PER_WORD
;
14478 for (j
= 0; j
< words
; j
++)
14480 addr
= plus_constant (Pmode
, dst
,
14481 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14482 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14483 dstoffset
+ j
* UNITS_PER_WORD
);
14485 emit_move_insn (mem
, regs
[j
]);
14487 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14489 dstoffset
+= words
* UNITS_PER_WORD
;
14492 remaining
-= words
* UNITS_PER_WORD
;
14494 gcc_assert (remaining
< 4);
14496 /* Copy a halfword if necessary. */
14498 if (remaining
>= 2)
14500 halfword_tmp
= gen_reg_rtx (SImode
);
14502 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14503 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14504 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14506 /* Either write out immediately, or delay until we've loaded the last
14507 byte, depending on interleave factor. */
14508 if (interleave_factor
== 1)
14510 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14511 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14512 emit_insn (gen_unaligned_storehi (mem
,
14513 gen_lowpart (HImode
, halfword_tmp
)));
14514 halfword_tmp
= NULL
;
14522 gcc_assert (remaining
< 2);
14524 /* Copy last byte. */
14526 if ((remaining
& 1) != 0)
14528 byte_tmp
= gen_reg_rtx (SImode
);
14530 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14531 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14532 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14534 if (interleave_factor
== 1)
14536 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14537 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14538 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14547 /* Store last halfword if we haven't done so already. */
14551 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14552 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14553 emit_insn (gen_unaligned_storehi (mem
,
14554 gen_lowpart (HImode
, halfword_tmp
)));
14558 /* Likewise for last byte. */
14562 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14563 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14564 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14568 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14571 /* From mips_adjust_block_mem:
14573 Helper function for doing a loop-based block operation on memory
14574 reference MEM. Each iteration of the loop will operate on LENGTH
14577 Create a new base register for use within the loop and point it to
14578 the start of MEM. Create a new memory reference that uses this
14579 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14582 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14585 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14587 /* Although the new mem does not refer to a known location,
14588 it does keep up to LENGTH bytes of alignment. */
14589 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14590 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14593 /* From mips_block_move_loop:
14595 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14596 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14597 the memory regions do not overlap. */
14600 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14601 unsigned int interleave_factor
,
14602 HOST_WIDE_INT bytes_per_iter
)
14604 rtx src_reg
, dest_reg
, final_src
, test
;
14605 HOST_WIDE_INT leftover
;
14607 leftover
= length
% bytes_per_iter
;
14608 length
-= leftover
;
14610 /* Create registers and memory references for use within the loop. */
14611 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14612 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14614 /* Calculate the value that SRC_REG should have after the last iteration of
14616 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14617 0, 0, OPTAB_WIDEN
);
14619 /* Emit the start of the loop. */
14620 rtx_code_label
*label
= gen_label_rtx ();
14621 emit_label (label
);
14623 /* Emit the loop body. */
14624 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14625 interleave_factor
);
14627 /* Move on to the next block. */
14628 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14629 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14631 /* Emit the loop condition. */
14632 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14633 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14635 /* Mop up any left-over bytes. */
14637 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14640 /* Emit a block move when either the source or destination is unaligned (not
14641 aligned to a four-byte boundary). This may need further tuning depending on
14642 core type, optimize_size setting, etc. */
14645 arm_cpymemqi_unaligned (rtx
*operands
)
14647 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14651 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14652 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14653 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14654 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14655 or dst_aligned though: allow more interleaving in those cases since the
14656 resulting code can be smaller. */
14657 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14658 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14661 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14662 interleave_factor
, bytes_per_iter
);
14664 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14665 interleave_factor
);
14669 /* Note that the loop created by arm_block_move_unaligned_loop may be
14670 subject to loop unrolling, which makes tuning this condition a little
14673 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14675 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14682 arm_gen_cpymemqi (rtx
*operands
)
14684 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14685 HOST_WIDE_INT srcoffset
, dstoffset
;
14686 rtx src
, dst
, srcbase
, dstbase
;
14687 rtx part_bytes_reg
= NULL
;
14690 if (!CONST_INT_P (operands
[2])
14691 || !CONST_INT_P (operands
[3])
14692 || INTVAL (operands
[2]) > 64)
14695 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14696 return arm_cpymemqi_unaligned (operands
);
14698 if (INTVAL (operands
[3]) & 3)
14701 dstbase
= operands
[0];
14702 srcbase
= operands
[1];
14704 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14705 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14707 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14708 out_words_to_go
= INTVAL (operands
[2]) / 4;
14709 last_bytes
= INTVAL (operands
[2]) & 3;
14710 dstoffset
= srcoffset
= 0;
14712 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14713 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14715 while (in_words_to_go
>= 2)
14717 if (in_words_to_go
> 4)
14718 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14719 TRUE
, srcbase
, &srcoffset
));
14721 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14722 src
, FALSE
, srcbase
,
14725 if (out_words_to_go
)
14727 if (out_words_to_go
> 4)
14728 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14729 TRUE
, dstbase
, &dstoffset
));
14730 else if (out_words_to_go
!= 1)
14731 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14732 out_words_to_go
, dst
,
14735 dstbase
, &dstoffset
));
14738 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14739 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14740 if (last_bytes
!= 0)
14742 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14748 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14749 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14752 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14753 if (out_words_to_go
)
14757 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14758 sreg
= copy_to_reg (mem
);
14760 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14761 emit_move_insn (mem
, sreg
);
14764 gcc_assert (!in_words_to_go
); /* Sanity check */
14767 if (in_words_to_go
)
14769 gcc_assert (in_words_to_go
> 0);
14771 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14772 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14775 gcc_assert (!last_bytes
|| part_bytes_reg
);
14777 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14779 rtx tmp
= gen_reg_rtx (SImode
);
14781 /* The bytes we want are in the top end of the word. */
14782 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14783 GEN_INT (8 * (4 - last_bytes
))));
14784 part_bytes_reg
= tmp
;
14788 mem
= adjust_automodify_address (dstbase
, QImode
,
14789 plus_constant (Pmode
, dst
,
14791 dstoffset
+ last_bytes
- 1);
14792 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14796 tmp
= gen_reg_rtx (SImode
);
14797 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14798 part_bytes_reg
= tmp
;
14805 if (last_bytes
> 1)
14807 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14808 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14812 rtx tmp
= gen_reg_rtx (SImode
);
14813 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14814 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14815 part_bytes_reg
= tmp
;
14822 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14823 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14830 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
14833 next_consecutive_mem (rtx mem
)
14835 machine_mode mode
= GET_MODE (mem
);
14836 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14837 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14839 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14842 /* Copy using LDRD/STRD instructions whenever possible.
14843 Returns true upon success. */
14845 gen_cpymem_ldrd_strd (rtx
*operands
)
14847 unsigned HOST_WIDE_INT len
;
14848 HOST_WIDE_INT align
;
14849 rtx src
, dst
, base
;
14851 bool src_aligned
, dst_aligned
;
14852 bool src_volatile
, dst_volatile
;
14854 gcc_assert (CONST_INT_P (operands
[2]));
14855 gcc_assert (CONST_INT_P (operands
[3]));
14857 len
= UINTVAL (operands
[2]);
14861 /* Maximum alignment we can assume for both src and dst buffers. */
14862 align
= INTVAL (operands
[3]);
14864 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14867 /* Place src and dst addresses in registers
14868 and update the corresponding mem rtx. */
14870 dst_volatile
= MEM_VOLATILE_P (dst
);
14871 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14872 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14873 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14876 src_volatile
= MEM_VOLATILE_P (src
);
14877 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14878 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14879 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14881 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14884 if (src_volatile
|| dst_volatile
)
14887 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14888 if (!(dst_aligned
|| src_aligned
))
14889 return arm_gen_cpymemqi (operands
);
14891 /* If the either src or dst is unaligned we'll be accessing it as pairs
14892 of unaligned SImode accesses. Otherwise we can generate DImode
14893 ldrd/strd instructions. */
14894 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14895 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14900 reg0
= gen_reg_rtx (DImode
);
14901 rtx low_reg
= NULL_RTX
;
14902 rtx hi_reg
= NULL_RTX
;
14904 if (!src_aligned
|| !dst_aligned
)
14906 low_reg
= gen_lowpart (SImode
, reg0
);
14907 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14909 if (MEM_ALIGN (src
) >= 2 * BITS_PER_WORD
)
14910 emit_move_insn (reg0
, src
);
14911 else if (src_aligned
)
14912 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14915 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14916 src
= next_consecutive_mem (src
);
14917 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14920 if (MEM_ALIGN (dst
) >= 2 * BITS_PER_WORD
)
14921 emit_move_insn (dst
, reg0
);
14922 else if (dst_aligned
)
14923 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14926 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14927 dst
= next_consecutive_mem (dst
);
14928 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14931 src
= next_consecutive_mem (src
);
14932 dst
= next_consecutive_mem (dst
);
14935 gcc_assert (len
< 8);
14938 /* More than a word but less than a double-word to copy. Copy a word. */
14939 reg0
= gen_reg_rtx (SImode
);
14940 src
= adjust_address (src
, SImode
, 0);
14941 dst
= adjust_address (dst
, SImode
, 0);
14943 emit_move_insn (reg0
, src
);
14945 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14948 emit_move_insn (dst
, reg0
);
14950 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14952 src
= next_consecutive_mem (src
);
14953 dst
= next_consecutive_mem (dst
);
14960 /* Copy the remaining bytes. */
14963 dst
= adjust_address (dst
, HImode
, 0);
14964 src
= adjust_address (src
, HImode
, 0);
14965 reg0
= gen_reg_rtx (SImode
);
14967 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14969 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14972 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14974 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14976 src
= next_consecutive_mem (src
);
14977 dst
= next_consecutive_mem (dst
);
14982 dst
= adjust_address (dst
, QImode
, 0);
14983 src
= adjust_address (src
, QImode
, 0);
14984 reg0
= gen_reg_rtx (QImode
);
14985 emit_move_insn (reg0
, src
);
14986 emit_move_insn (dst
, reg0
);
14990 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
14991 into its component 32-bit subregs. OP2 may be an immediate
14992 constant and we want to simplify it in that case. */
14994 arm_decompose_di_binop (rtx op1
, rtx op2
, rtx
*lo_op1
, rtx
*hi_op1
,
14995 rtx
*lo_op2
, rtx
*hi_op2
)
14997 *lo_op1
= gen_lowpart (SImode
, op1
);
14998 *hi_op1
= gen_highpart (SImode
, op1
);
14999 *lo_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15000 subreg_lowpart_offset (SImode
, DImode
));
15001 *hi_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15002 subreg_highpart_offset (SImode
, DImode
));
15005 /* Select a dominance comparison mode if possible for a test of the general
15006 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15007 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15008 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15009 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15010 In all cases OP will be either EQ or NE, but we don't need to know which
15011 here. If we are unable to support a dominance comparison we return
15012 CC mode. This will then fail to match for the RTL expressions that
15013 generate this call. */
15015 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15017 enum rtx_code cond1
, cond2
;
15020 /* Currently we will probably get the wrong result if the individual
15021 comparisons are not simple. This also ensures that it is safe to
15022 reverse a comparison if necessary. */
15023 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
15025 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
15029 /* The if_then_else variant of this tests the second condition if the
15030 first passes, but is true if the first fails. Reverse the first
15031 condition to get a true "inclusive-or" expression. */
15032 if (cond_or
== DOM_CC_NX_OR_Y
)
15033 cond1
= reverse_condition (cond1
);
15035 /* If the comparisons are not equal, and one doesn't dominate the other,
15036 then we can't do this. */
15038 && !comparison_dominates_p (cond1
, cond2
)
15039 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15043 std::swap (cond1
, cond2
);
15048 if (cond_or
== DOM_CC_X_AND_Y
)
15053 case EQ
: return CC_DEQmode
;
15054 case LE
: return CC_DLEmode
;
15055 case LEU
: return CC_DLEUmode
;
15056 case GE
: return CC_DGEmode
;
15057 case GEU
: return CC_DGEUmode
;
15058 default: gcc_unreachable ();
15062 if (cond_or
== DOM_CC_X_AND_Y
)
15074 gcc_unreachable ();
15078 if (cond_or
== DOM_CC_X_AND_Y
)
15090 gcc_unreachable ();
15094 if (cond_or
== DOM_CC_X_AND_Y
)
15095 return CC_DLTUmode
;
15100 return CC_DLTUmode
;
15102 return CC_DLEUmode
;
15106 gcc_unreachable ();
15110 if (cond_or
== DOM_CC_X_AND_Y
)
15111 return CC_DGTUmode
;
15116 return CC_DGTUmode
;
15118 return CC_DGEUmode
;
15122 gcc_unreachable ();
15125 /* The remaining cases only occur when both comparisons are the
15128 gcc_assert (cond1
== cond2
);
15132 gcc_assert (cond1
== cond2
);
15136 gcc_assert (cond1
== cond2
);
15140 gcc_assert (cond1
== cond2
);
15141 return CC_DLEUmode
;
15144 gcc_assert (cond1
== cond2
);
15145 return CC_DGEUmode
;
15148 gcc_unreachable ();
15153 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15155 /* All floating point compares return CCFP if it is an equality
15156 comparison, and CCFPE otherwise. */
15157 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15180 gcc_unreachable ();
15184 /* A compare with a shifted operand. Because of canonicalization, the
15185 comparison will have to be swapped when we emit the assembler. */
15186 if (GET_MODE (y
) == SImode
15187 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15188 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15189 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15190 || GET_CODE (x
) == ROTATERT
))
15193 /* This operation is performed swapped, but since we only rely on the Z
15194 flag we don't need an additional mode. */
15195 if (GET_MODE (y
) == SImode
15196 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15197 && GET_CODE (x
) == NEG
15198 && (op
== EQ
|| op
== NE
))
15201 /* This is a special case that is used by combine to allow a
15202 comparison of a shifted byte load to be split into a zero-extend
15203 followed by a comparison of the shifted integer (only valid for
15204 equalities and unsigned inequalities). */
15205 if (GET_MODE (x
) == SImode
15206 && GET_CODE (x
) == ASHIFT
15207 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15208 && GET_CODE (XEXP (x
, 0)) == SUBREG
15209 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15210 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15211 && (op
== EQ
|| op
== NE
15212 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15213 && CONST_INT_P (y
))
15216 /* A construct for a conditional compare, if the false arm contains
15217 0, then both conditions must be true, otherwise either condition
15218 must be true. Not all conditions are possible, so CCmode is
15219 returned if it can't be done. */
15220 if (GET_CODE (x
) == IF_THEN_ELSE
15221 && (XEXP (x
, 2) == const0_rtx
15222 || XEXP (x
, 2) == const1_rtx
)
15223 && COMPARISON_P (XEXP (x
, 0))
15224 && COMPARISON_P (XEXP (x
, 1)))
15225 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15226 INTVAL (XEXP (x
, 2)));
15228 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15229 if (GET_CODE (x
) == AND
15230 && (op
== EQ
|| op
== NE
)
15231 && COMPARISON_P (XEXP (x
, 0))
15232 && COMPARISON_P (XEXP (x
, 1)))
15233 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15236 if (GET_CODE (x
) == IOR
15237 && (op
== EQ
|| op
== NE
)
15238 && COMPARISON_P (XEXP (x
, 0))
15239 && COMPARISON_P (XEXP (x
, 1)))
15240 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15243 /* An operation (on Thumb) where we want to test for a single bit.
15244 This is done by shifting that bit up into the top bit of a
15245 scratch register; we can then branch on the sign bit. */
15247 && GET_MODE (x
) == SImode
15248 && (op
== EQ
|| op
== NE
)
15249 && GET_CODE (x
) == ZERO_EXTRACT
15250 && XEXP (x
, 1) == const1_rtx
)
15253 /* An operation that sets the condition codes as a side-effect, the
15254 V flag is not set correctly, so we can only use comparisons where
15255 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15257 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15258 if (GET_MODE (x
) == SImode
15260 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15261 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15262 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15263 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15264 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15265 || GET_CODE (x
) == LSHIFTRT
15266 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15267 || GET_CODE (x
) == ROTATERT
15268 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15269 return CC_NOOVmode
;
15271 /* An unsigned comparison of ~reg with a const is really a special
15272 canoncialization of compare (~const, reg), which is a reverse
15273 subtract operation. We may not get here if CONST is 0, but that
15274 doesn't matter because ~0 isn't a valid immediate for RSB. */
15275 if (GET_MODE (x
) == SImode
15276 && GET_CODE (x
) == NOT
15278 && (op
== EQ
|| op
== NE
15279 || op
== LTU
|| op
== LEU
|| op
== GEU
|| op
== GTU
))
15282 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15285 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15286 && GET_CODE (x
) == PLUS
15287 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15290 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15296 /* A DImode comparison against zero can be implemented by
15297 or'ing the two halves together. */
15298 if (y
== const0_rtx
)
15301 /* We can do an equality test in three Thumb instructions. */
15311 /* DImode unsigned comparisons can be implemented by cmp +
15312 cmpeq without a scratch register. Not worth doing in
15323 /* DImode signed and unsigned comparisons can be implemented
15324 by cmp + sbcs with a scratch register, but that does not
15325 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15326 gcc_assert (op
!= EQ
&& op
!= NE
);
15330 gcc_unreachable ();
15334 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15335 return GET_MODE (x
);
15340 /* X and Y are two things to compare using CODE. Emit the compare insn and
15341 return the rtx for register 0 in the proper mode. FP means this is a
15342 floating point compare: I don't think that it is needed on the arm. */
15344 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15348 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15350 /* We might have X as a constant, Y as a register because of the predicates
15351 used for cmpdi. If so, force X to a register here. */
15352 if (dimode_comparison
&& !REG_P (x
))
15353 x
= force_reg (DImode
, x
);
15355 mode
= SELECT_CC_MODE (code
, x
, y
);
15356 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15358 if (dimode_comparison
15359 && mode
!= CC_CZmode
)
15363 /* To compare two non-zero values for equality, XOR them and
15364 then compare against zero. Not used for ARM mode; there
15365 CC_CZmode is cheaper. */
15366 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15368 gcc_assert (!reload_completed
);
15369 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15373 /* A scratch register is required. */
15374 if (reload_completed
)
15375 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15377 scratch
= gen_rtx_SCRATCH (SImode
);
15379 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15380 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15381 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15384 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15389 /* Generate a sequence of insns that will generate the correct return
15390 address mask depending on the physical architecture that the program
15393 arm_gen_return_addr_mask (void)
15395 rtx reg
= gen_reg_rtx (Pmode
);
15397 emit_insn (gen_return_addr_mask (reg
));
15402 arm_reload_in_hi (rtx
*operands
)
15404 rtx ref
= operands
[1];
15406 HOST_WIDE_INT offset
= 0;
15408 if (GET_CODE (ref
) == SUBREG
)
15410 offset
= SUBREG_BYTE (ref
);
15411 ref
= SUBREG_REG (ref
);
15416 /* We have a pseudo which has been spilt onto the stack; there
15417 are two cases here: the first where there is a simple
15418 stack-slot replacement and a second where the stack-slot is
15419 out of range, or is used as a subreg. */
15420 if (reg_equiv_mem (REGNO (ref
)))
15422 ref
= reg_equiv_mem (REGNO (ref
));
15423 base
= find_replacement (&XEXP (ref
, 0));
15426 /* The slot is out of range, or was dressed up in a SUBREG. */
15427 base
= reg_equiv_address (REGNO (ref
));
15429 /* PR 62554: If there is no equivalent memory location then just move
15430 the value as an SImode register move. This happens when the target
15431 architecture variant does not have an HImode register move. */
15434 gcc_assert (REG_P (operands
[0]));
15435 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15436 gen_rtx_SUBREG (SImode
, ref
, 0)));
15441 base
= find_replacement (&XEXP (ref
, 0));
15443 /* Handle the case where the address is too complex to be offset by 1. */
15444 if (GET_CODE (base
) == MINUS
15445 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15447 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15449 emit_set_insn (base_plus
, base
);
15452 else if (GET_CODE (base
) == PLUS
)
15454 /* The addend must be CONST_INT, or we would have dealt with it above. */
15455 HOST_WIDE_INT hi
, lo
;
15457 offset
+= INTVAL (XEXP (base
, 1));
15458 base
= XEXP (base
, 0);
15460 /* Rework the address into a legal sequence of insns. */
15461 /* Valid range for lo is -4095 -> 4095 */
15464 : -((-offset
) & 0xfff));
15466 /* Corner case, if lo is the max offset then we would be out of range
15467 once we have added the additional 1 below, so bump the msb into the
15468 pre-loading insn(s). */
15472 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15473 ^ (HOST_WIDE_INT
) 0x80000000)
15474 - (HOST_WIDE_INT
) 0x80000000);
15476 gcc_assert (hi
+ lo
== offset
);
15480 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15482 /* Get the base address; addsi3 knows how to handle constants
15483 that require more than one insn. */
15484 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15490 /* Operands[2] may overlap operands[0] (though it won't overlap
15491 operands[1]), that's why we asked for a DImode reg -- so we can
15492 use the bit that does not overlap. */
15493 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15494 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15496 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15498 emit_insn (gen_zero_extendqisi2 (scratch
,
15499 gen_rtx_MEM (QImode
,
15500 plus_constant (Pmode
, base
,
15502 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15503 gen_rtx_MEM (QImode
,
15504 plus_constant (Pmode
, base
,
15506 if (!BYTES_BIG_ENDIAN
)
15507 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15508 gen_rtx_IOR (SImode
,
15511 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15515 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15516 gen_rtx_IOR (SImode
,
15517 gen_rtx_ASHIFT (SImode
, scratch
,
15519 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15522 /* Handle storing a half-word to memory during reload by synthesizing as two
15523 byte stores. Take care not to clobber the input values until after we
15524 have moved them somewhere safe. This code assumes that if the DImode
15525 scratch in operands[2] overlaps either the input value or output address
15526 in some way, then that value must die in this insn (we absolutely need
15527 two scratch registers for some corner cases). */
15529 arm_reload_out_hi (rtx
*operands
)
15531 rtx ref
= operands
[0];
15532 rtx outval
= operands
[1];
15534 HOST_WIDE_INT offset
= 0;
15536 if (GET_CODE (ref
) == SUBREG
)
15538 offset
= SUBREG_BYTE (ref
);
15539 ref
= SUBREG_REG (ref
);
15544 /* We have a pseudo which has been spilt onto the stack; there
15545 are two cases here: the first where there is a simple
15546 stack-slot replacement and a second where the stack-slot is
15547 out of range, or is used as a subreg. */
15548 if (reg_equiv_mem (REGNO (ref
)))
15550 ref
= reg_equiv_mem (REGNO (ref
));
15551 base
= find_replacement (&XEXP (ref
, 0));
15554 /* The slot is out of range, or was dressed up in a SUBREG. */
15555 base
= reg_equiv_address (REGNO (ref
));
15557 /* PR 62254: If there is no equivalent memory location then just move
15558 the value as an SImode register move. This happens when the target
15559 architecture variant does not have an HImode register move. */
15562 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15564 if (REG_P (outval
))
15566 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15567 gen_rtx_SUBREG (SImode
, outval
, 0)));
15569 else /* SUBREG_P (outval) */
15571 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15572 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15573 SUBREG_REG (outval
)));
15575 /* FIXME: Handle other cases ? */
15576 gcc_unreachable ();
15582 base
= find_replacement (&XEXP (ref
, 0));
15584 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15586 /* Handle the case where the address is too complex to be offset by 1. */
15587 if (GET_CODE (base
) == MINUS
15588 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15590 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15592 /* Be careful not to destroy OUTVAL. */
15593 if (reg_overlap_mentioned_p (base_plus
, outval
))
15595 /* Updating base_plus might destroy outval, see if we can
15596 swap the scratch and base_plus. */
15597 if (!reg_overlap_mentioned_p (scratch
, outval
))
15598 std::swap (scratch
, base_plus
);
15601 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15603 /* Be conservative and copy OUTVAL into the scratch now,
15604 this should only be necessary if outval is a subreg
15605 of something larger than a word. */
15606 /* XXX Might this clobber base? I can't see how it can,
15607 since scratch is known to overlap with OUTVAL, and
15608 must be wider than a word. */
15609 emit_insn (gen_movhi (scratch_hi
, outval
));
15610 outval
= scratch_hi
;
15614 emit_set_insn (base_plus
, base
);
15617 else if (GET_CODE (base
) == PLUS
)
15619 /* The addend must be CONST_INT, or we would have dealt with it above. */
15620 HOST_WIDE_INT hi
, lo
;
15622 offset
+= INTVAL (XEXP (base
, 1));
15623 base
= XEXP (base
, 0);
15625 /* Rework the address into a legal sequence of insns. */
15626 /* Valid range for lo is -4095 -> 4095 */
15629 : -((-offset
) & 0xfff));
15631 /* Corner case, if lo is the max offset then we would be out of range
15632 once we have added the additional 1 below, so bump the msb into the
15633 pre-loading insn(s). */
15637 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15638 ^ (HOST_WIDE_INT
) 0x80000000)
15639 - (HOST_WIDE_INT
) 0x80000000);
15641 gcc_assert (hi
+ lo
== offset
);
15645 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15647 /* Be careful not to destroy OUTVAL. */
15648 if (reg_overlap_mentioned_p (base_plus
, outval
))
15650 /* Updating base_plus might destroy outval, see if we
15651 can swap the scratch and base_plus. */
15652 if (!reg_overlap_mentioned_p (scratch
, outval
))
15653 std::swap (scratch
, base_plus
);
15656 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15658 /* Be conservative and copy outval into scratch now,
15659 this should only be necessary if outval is a
15660 subreg of something larger than a word. */
15661 /* XXX Might this clobber base? I can't see how it
15662 can, since scratch is known to overlap with
15664 emit_insn (gen_movhi (scratch_hi
, outval
));
15665 outval
= scratch_hi
;
15669 /* Get the base address; addsi3 knows how to handle constants
15670 that require more than one insn. */
15671 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15677 if (BYTES_BIG_ENDIAN
)
15679 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15680 plus_constant (Pmode
, base
,
15682 gen_lowpart (QImode
, outval
)));
15683 emit_insn (gen_lshrsi3 (scratch
,
15684 gen_rtx_SUBREG (SImode
, outval
, 0),
15686 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15688 gen_lowpart (QImode
, scratch
)));
15692 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15694 gen_lowpart (QImode
, outval
)));
15695 emit_insn (gen_lshrsi3 (scratch
,
15696 gen_rtx_SUBREG (SImode
, outval
, 0),
15698 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15699 plus_constant (Pmode
, base
,
15701 gen_lowpart (QImode
, scratch
)));
15705 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15706 (padded to the size of a word) should be passed in a register. */
15709 arm_must_pass_in_stack (const function_arg_info
&arg
)
15711 if (TARGET_AAPCS_BASED
)
15712 return must_pass_in_stack_var_size (arg
);
15714 return must_pass_in_stack_var_size_or_pad (arg
);
15718 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15719 byte of a stack argument has useful data. For legacy APCS ABIs we use
15720 the default. For AAPCS based ABIs small aggregate types are placed
15721 in the lowest memory address. */
15723 static pad_direction
15724 arm_function_arg_padding (machine_mode mode
, const_tree type
)
15726 if (!TARGET_AAPCS_BASED
)
15727 return default_function_arg_padding (mode
, type
);
15729 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15730 return PAD_DOWNWARD
;
15736 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15737 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15738 register has useful data, and return the opposite if the most
15739 significant byte does. */
15742 arm_pad_reg_upward (machine_mode mode
,
15743 tree type
, int first ATTRIBUTE_UNUSED
)
15745 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15747 /* For AAPCS, small aggregates, small fixed-point types,
15748 and small complex types are always padded upwards. */
15751 if ((AGGREGATE_TYPE_P (type
)
15752 || TREE_CODE (type
) == COMPLEX_TYPE
15753 || FIXED_POINT_TYPE_P (type
))
15754 && int_size_in_bytes (type
) <= 4)
15759 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15760 && GET_MODE_SIZE (mode
) <= 4)
15765 /* Otherwise, use default padding. */
15766 return !BYTES_BIG_ENDIAN
;
15769 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15770 assuming that the address in the base register is word aligned. */
15772 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15774 HOST_WIDE_INT max_offset
;
15776 /* Offset must be a multiple of 4 in Thumb mode. */
15777 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15782 else if (TARGET_ARM
)
15787 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15790 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15791 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15792 Assumes that the address in the base register RN is word aligned. Pattern
15793 guarantees that both memory accesses use the same base register,
15794 the offsets are constants within the range, and the gap between the offsets is 4.
15795 If preload complete then check that registers are legal. WBACK indicates whether
15796 address is updated. LOAD indicates whether memory access is load or store. */
15798 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15799 bool wback
, bool load
)
15801 unsigned int t
, t2
, n
;
15803 if (!reload_completed
)
15806 if (!offset_ok_for_ldrd_strd (offset
))
15813 if ((TARGET_THUMB2
)
15814 && ((wback
&& (n
== t
|| n
== t2
))
15815 || (t
== SP_REGNUM
)
15816 || (t
== PC_REGNUM
)
15817 || (t2
== SP_REGNUM
)
15818 || (t2
== PC_REGNUM
)
15819 || (!load
&& (n
== PC_REGNUM
))
15820 || (load
&& (t
== t2
))
15821 /* Triggers Cortex-M3 LDRD errata. */
15822 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15826 && ((wback
&& (n
== t
|| n
== t2
))
15827 || (t2
== PC_REGNUM
)
15828 || (t
% 2 != 0) /* First destination register is not even. */
15830 /* PC can be used as base register (for offset addressing only),
15831 but it is depricated. */
15832 || (n
== PC_REGNUM
)))
15838 /* Return true if a 64-bit access with alignment ALIGN and with a
15839 constant offset OFFSET from the base pointer is permitted on this
15842 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
15844 return (unaligned_access
15845 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
15846 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
15849 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15850 operand MEM's address contains an immediate offset from the base
15851 register and has no side effects, in which case it sets BASE,
15852 OFFSET and ALIGN accordingly. */
15854 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
15858 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15860 /* TODO: Handle more general memory operand patterns, such as
15861 PRE_DEC and PRE_INC. */
15863 if (side_effects_p (mem
))
15866 /* Can't deal with subregs. */
15867 if (GET_CODE (mem
) == SUBREG
)
15870 gcc_assert (MEM_P (mem
));
15872 *offset
= const0_rtx
;
15873 *align
= MEM_ALIGN (mem
);
15875 addr
= XEXP (mem
, 0);
15877 /* If addr isn't valid for DImode, then we can't handle it. */
15878 if (!arm_legitimate_address_p (DImode
, addr
,
15879 reload_in_progress
|| reload_completed
))
15887 else if (GET_CODE (addr
) == PLUS
)
15889 *base
= XEXP (addr
, 0);
15890 *offset
= XEXP (addr
, 1);
15891 return (REG_P (*base
) && CONST_INT_P (*offset
));
15897 /* Called from a peephole2 to replace two word-size accesses with a
15898 single LDRD/STRD instruction. Returns true iff we can generate a
15899 new instruction sequence. That is, both accesses use the same base
15900 register and the gap between constant offsets is 4. This function
15901 may reorder its operands to match ldrd/strd RTL templates.
15902 OPERANDS are the operands found by the peephole matcher;
15903 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15904 corresponding memory operands. LOAD indicaates whether the access
15905 is load or store. CONST_STORE indicates a store of constant
15906 integer values held in OPERANDS[4,5] and assumes that the pattern
15907 is of length 4 insn, for the purpose of checking dead registers.
15908 COMMUTE indicates that register operands may be reordered. */
15910 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15911 bool const_store
, bool commute
)
15914 HOST_WIDE_INT offsets
[2], offset
, align
[2];
15915 rtx base
= NULL_RTX
;
15916 rtx cur_base
, cur_offset
, tmp
;
15918 HARD_REG_SET regset
;
15920 gcc_assert (!const_store
|| !load
);
15921 /* Check that the memory references are immediate offsets from the
15922 same base register. Extract the base register, the destination
15923 registers, and the corresponding memory offsets. */
15924 for (i
= 0; i
< nops
; i
++)
15926 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
15932 else if (REGNO (base
) != REGNO (cur_base
))
15935 offsets
[i
] = INTVAL (cur_offset
);
15936 if (GET_CODE (operands
[i
]) == SUBREG
)
15938 tmp
= SUBREG_REG (operands
[i
]);
15939 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15944 /* Make sure there is no dependency between the individual loads. */
15945 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15946 return false; /* RAW */
15948 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15949 return false; /* WAW */
15951 /* If the same input register is used in both stores
15952 when storing different constants, try to find a free register.
15953 For example, the code
15958 can be transformed into
15962 in Thumb mode assuming that r1 is free.
15963 For ARM mode do the same but only if the starting register
15964 can be made to be even. */
15966 && REGNO (operands
[0]) == REGNO (operands
[1])
15967 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15971 CLEAR_HARD_REG_SET (regset
);
15972 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15973 if (tmp
== NULL_RTX
)
15976 /* Use the new register in the first load to ensure that
15977 if the original input register is not dead after peephole,
15978 then it will have the correct constant value. */
15981 else if (TARGET_ARM
)
15983 int regno
= REGNO (operands
[0]);
15984 if (!peep2_reg_dead_p (4, operands
[0]))
15986 /* When the input register is even and is not dead after the
15987 pattern, it has to hold the second constant but we cannot
15988 form a legal STRD in ARM mode with this register as the second
15990 if (regno
% 2 == 0)
15993 /* Is regno-1 free? */
15994 SET_HARD_REG_SET (regset
);
15995 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15996 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15997 if (tmp
== NULL_RTX
)
16004 /* Find a DImode register. */
16005 CLEAR_HARD_REG_SET (regset
);
16006 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16007 if (tmp
!= NULL_RTX
)
16009 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16010 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16014 /* Can we use the input register to form a DI register? */
16015 SET_HARD_REG_SET (regset
);
16016 CLEAR_HARD_REG_BIT(regset
,
16017 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
16018 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16019 if (tmp
== NULL_RTX
)
16021 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
16025 gcc_assert (operands
[0] != NULL_RTX
);
16026 gcc_assert (operands
[1] != NULL_RTX
);
16027 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16028 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
16032 /* Make sure the instructions are ordered with lower memory access first. */
16033 if (offsets
[0] > offsets
[1])
16035 gap
= offsets
[0] - offsets
[1];
16036 offset
= offsets
[1];
16038 /* Swap the instructions such that lower memory is accessed first. */
16039 std::swap (operands
[0], operands
[1]);
16040 std::swap (operands
[2], operands
[3]);
16041 std::swap (align
[0], align
[1]);
16043 std::swap (operands
[4], operands
[5]);
16047 gap
= offsets
[1] - offsets
[0];
16048 offset
= offsets
[0];
16051 /* Make sure accesses are to consecutive memory locations. */
16052 if (gap
!= GET_MODE_SIZE (SImode
))
16055 if (!align_ok_ldrd_strd (align
[0], offset
))
16058 /* Make sure we generate legal instructions. */
16059 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16063 /* In Thumb state, where registers are almost unconstrained, there
16064 is little hope to fix it. */
16068 if (load
&& commute
)
16070 /* Try reordering registers. */
16071 std::swap (operands
[0], operands
[1]);
16072 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16079 /* If input registers are dead after this pattern, they can be
16080 reordered or replaced by other registers that are free in the
16081 current pattern. */
16082 if (!peep2_reg_dead_p (4, operands
[0])
16083 || !peep2_reg_dead_p (4, operands
[1]))
16086 /* Try to reorder the input registers. */
16087 /* For example, the code
16092 can be transformed into
16097 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
16100 std::swap (operands
[0], operands
[1]);
16104 /* Try to find a free DI register. */
16105 CLEAR_HARD_REG_SET (regset
);
16106 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
16107 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
16110 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16111 if (tmp
== NULL_RTX
)
16114 /* DREG must be an even-numbered register in DImode.
16115 Split it into SI registers. */
16116 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16117 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16118 gcc_assert (operands
[0] != NULL_RTX
);
16119 gcc_assert (operands
[1] != NULL_RTX
);
16120 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16121 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
16123 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
16133 /* Return true if parallel execution of the two word-size accesses provided
16134 could be satisfied with a single LDRD/STRD instruction. Two word-size
16135 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16136 register operands and OPERANDS[2,3] are the corresponding memory operands.
16139 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
16142 HOST_WIDE_INT offsets
[2], offset
, align
[2];
16143 rtx base
= NULL_RTX
;
16144 rtx cur_base
, cur_offset
;
16147 /* Check that the memory references are immediate offsets from the
16148 same base register. Extract the base register, the destination
16149 registers, and the corresponding memory offsets. */
16150 for (i
= 0; i
< nops
; i
++)
16152 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
16158 else if (REGNO (base
) != REGNO (cur_base
))
16161 offsets
[i
] = INTVAL (cur_offset
);
16162 if (GET_CODE (operands
[i
]) == SUBREG
)
16166 if (offsets
[0] > offsets
[1])
16169 gap
= offsets
[1] - offsets
[0];
16170 offset
= offsets
[0];
16172 /* Make sure accesses are to consecutive memory locations. */
16173 if (gap
!= GET_MODE_SIZE (SImode
))
16176 if (!align_ok_ldrd_strd (align
[0], offset
))
16179 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16184 /* Print a symbolic form of X to the debug file, F. */
16186 arm_print_value (FILE *f
, rtx x
)
16188 switch (GET_CODE (x
))
16191 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16197 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
16198 sizeof (fpstr
), 0, 1);
16208 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16210 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16211 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16219 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16223 fprintf (f
, "`%s'", XSTR (x
, 0));
16227 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16231 arm_print_value (f
, XEXP (x
, 0));
16235 arm_print_value (f
, XEXP (x
, 0));
16237 arm_print_value (f
, XEXP (x
, 1));
16245 fprintf (f
, "????");
16250 /* Routines for manipulation of the constant pool. */
16252 /* Arm instructions cannot load a large constant directly into a
16253 register; they have to come from a pc relative load. The constant
16254 must therefore be placed in the addressable range of the pc
16255 relative load. Depending on the precise pc relative load
16256 instruction the range is somewhere between 256 bytes and 4k. This
16257 means that we often have to dump a constant inside a function, and
16258 generate code to branch around it.
16260 It is important to minimize this, since the branches will slow
16261 things down and make the code larger.
16263 Normally we can hide the table after an existing unconditional
16264 branch so that there is no interruption of the flow, but in the
16265 worst case the code looks like this:
16283 We fix this by performing a scan after scheduling, which notices
16284 which instructions need to have their operands fetched from the
16285 constant table and builds the table.
16287 The algorithm starts by building a table of all the constants that
16288 need fixing up and all the natural barriers in the function (places
16289 where a constant table can be dropped without breaking the flow).
16290 For each fixup we note how far the pc-relative replacement will be
16291 able to reach and the offset of the instruction into the function.
16293 Having built the table we then group the fixes together to form
16294 tables that are as large as possible (subject to addressing
16295 constraints) and emit each table of constants after the last
16296 barrier that is within range of all the instructions in the group.
16297 If a group does not contain a barrier, then we forcibly create one
16298 by inserting a jump instruction into the flow. Once the table has
16299 been inserted, the insns are then modified to reference the
16300 relevant entry in the pool.
16302 Possible enhancements to the algorithm (not implemented) are:
16304 1) For some processors and object formats, there may be benefit in
16305 aligning the pools to the start of cache lines; this alignment
16306 would need to be taken into account when calculating addressability
16309 /* These typedefs are located at the start of this file, so that
16310 they can be used in the prototypes there. This comment is to
16311 remind readers of that fact so that the following structures
16312 can be understood more easily.
16314 typedef struct minipool_node Mnode;
16315 typedef struct minipool_fixup Mfix; */
16317 struct minipool_node
16319 /* Doubly linked chain of entries. */
16322 /* The maximum offset into the code that this entry can be placed. While
16323 pushing fixes for forward references, all entries are sorted in order
16324 of increasing max_address. */
16325 HOST_WIDE_INT max_address
;
16326 /* Similarly for an entry inserted for a backwards ref. */
16327 HOST_WIDE_INT min_address
;
16328 /* The number of fixes referencing this entry. This can become zero
16329 if we "unpush" an entry. In this case we ignore the entry when we
16330 come to emit the code. */
16332 /* The offset from the start of the minipool. */
16333 HOST_WIDE_INT offset
;
16334 /* The value in table. */
16336 /* The mode of value. */
16338 /* The size of the value. With iWMMXt enabled
16339 sizes > 4 also imply an alignment of 8-bytes. */
16343 struct minipool_fixup
16347 HOST_WIDE_INT address
;
16353 HOST_WIDE_INT forwards
;
16354 HOST_WIDE_INT backwards
;
16357 /* Fixes less than a word need padding out to a word boundary. */
16358 #define MINIPOOL_FIX_SIZE(mode) \
16359 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16361 static Mnode
* minipool_vector_head
;
16362 static Mnode
* minipool_vector_tail
;
16363 static rtx_code_label
*minipool_vector_label
;
16364 static int minipool_pad
;
16366 /* The linked list of all minipool fixes required for this function. */
16367 Mfix
* minipool_fix_head
;
16368 Mfix
* minipool_fix_tail
;
16369 /* The fix entry for the current minipool, once it has been placed. */
16370 Mfix
* minipool_barrier
;
16372 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16373 #define JUMP_TABLES_IN_TEXT_SECTION 0
16376 static HOST_WIDE_INT
16377 get_jump_table_size (rtx_jump_table_data
*insn
)
16379 /* ADDR_VECs only take room if read-only data does into the text
16381 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16383 rtx body
= PATTERN (insn
);
16384 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16385 HOST_WIDE_INT size
;
16386 HOST_WIDE_INT modesize
;
16388 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16389 size
= modesize
* XVECLEN (body
, elt
);
16393 /* Round up size of TBB table to a halfword boundary. */
16394 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
16397 /* No padding necessary for TBH. */
16400 /* Add two bytes for alignment on Thumb. */
16405 gcc_unreachable ();
16413 /* Emit insns to load the function address from FUNCDESC (an FDPIC
16414 function descriptor) into a register and the GOT address into the
16415 FDPIC register, returning an rtx for the register holding the
16416 function address. */
16419 arm_load_function_descriptor (rtx funcdesc
)
16421 rtx fnaddr_reg
= gen_reg_rtx (Pmode
);
16422 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
16423 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
16424 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
16426 emit_move_insn (fnaddr_reg
, fnaddr
);
16428 /* The ABI requires the entry point address to be loaded first, but
16429 since we cannot support lazy binding for lack of atomic load of
16430 two 32-bits values, we do not need to bother to prevent the
16431 previous load from being moved after that of the GOT address. */
16432 emit_insn (gen_restore_pic_register_after_call (pic_reg
, gotaddr
));
16437 /* Return the maximum amount of padding that will be inserted before
16439 static HOST_WIDE_INT
16440 get_label_padding (rtx label
)
16442 HOST_WIDE_INT align
, min_insn_size
;
16444 align
= 1 << label_to_alignment (label
).levels
[0].log
;
16445 min_insn_size
= TARGET_THUMB
? 2 : 4;
16446 return align
> min_insn_size
? align
- min_insn_size
: 0;
16449 /* Move a minipool fix MP from its current location to before MAX_MP.
16450 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16451 constraints may need updating. */
16453 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16454 HOST_WIDE_INT max_address
)
16456 /* The code below assumes these are different. */
16457 gcc_assert (mp
!= max_mp
);
16459 if (max_mp
== NULL
)
16461 if (max_address
< mp
->max_address
)
16462 mp
->max_address
= max_address
;
16466 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16467 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16469 mp
->max_address
= max_address
;
16471 /* Unlink MP from its current position. Since max_mp is non-null,
16472 mp->prev must be non-null. */
16473 mp
->prev
->next
= mp
->next
;
16474 if (mp
->next
!= NULL
)
16475 mp
->next
->prev
= mp
->prev
;
16477 minipool_vector_tail
= mp
->prev
;
16479 /* Re-insert it before MAX_MP. */
16481 mp
->prev
= max_mp
->prev
;
16484 if (mp
->prev
!= NULL
)
16485 mp
->prev
->next
= mp
;
16487 minipool_vector_head
= mp
;
16490 /* Save the new entry. */
16493 /* Scan over the preceding entries and adjust their addresses as
16495 while (mp
->prev
!= NULL
16496 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16498 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16505 /* Add a constant to the minipool for a forward reference. Returns the
16506 node added or NULL if the constant will not fit in this pool. */
16508 add_minipool_forward_ref (Mfix
*fix
)
16510 /* If set, max_mp is the first pool_entry that has a lower
16511 constraint than the one we are trying to add. */
16512 Mnode
* max_mp
= NULL
;
16513 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16516 /* If the minipool starts before the end of FIX->INSN then this FIX
16517 cannot be placed into the current pool. Furthermore, adding the
16518 new constant pool entry may cause the pool to start FIX_SIZE bytes
16520 if (minipool_vector_head
&&
16521 (fix
->address
+ get_attr_length (fix
->insn
)
16522 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16525 /* Scan the pool to see if a constant with the same value has
16526 already been added. While we are doing this, also note the
16527 location where we must insert the constant if it doesn't already
16529 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16531 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16532 && fix
->mode
== mp
->mode
16533 && (!LABEL_P (fix
->value
)
16534 || (CODE_LABEL_NUMBER (fix
->value
)
16535 == CODE_LABEL_NUMBER (mp
->value
)))
16536 && rtx_equal_p (fix
->value
, mp
->value
))
16538 /* More than one fix references this entry. */
16540 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16543 /* Note the insertion point if necessary. */
16545 && mp
->max_address
> max_address
)
16548 /* If we are inserting an 8-bytes aligned quantity and
16549 we have not already found an insertion point, then
16550 make sure that all such 8-byte aligned quantities are
16551 placed at the start of the pool. */
16552 if (ARM_DOUBLEWORD_ALIGN
16554 && fix
->fix_size
>= 8
16555 && mp
->fix_size
< 8)
16558 max_address
= mp
->max_address
;
16562 /* The value is not currently in the minipool, so we need to create
16563 a new entry for it. If MAX_MP is NULL, the entry will be put on
16564 the end of the list since the placement is less constrained than
16565 any existing entry. Otherwise, we insert the new fix before
16566 MAX_MP and, if necessary, adjust the constraints on the other
16569 mp
->fix_size
= fix
->fix_size
;
16570 mp
->mode
= fix
->mode
;
16571 mp
->value
= fix
->value
;
16573 /* Not yet required for a backwards ref. */
16574 mp
->min_address
= -65536;
16576 if (max_mp
== NULL
)
16578 mp
->max_address
= max_address
;
16580 mp
->prev
= minipool_vector_tail
;
16582 if (mp
->prev
== NULL
)
16584 minipool_vector_head
= mp
;
16585 minipool_vector_label
= gen_label_rtx ();
16588 mp
->prev
->next
= mp
;
16590 minipool_vector_tail
= mp
;
16594 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16595 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16597 mp
->max_address
= max_address
;
16600 mp
->prev
= max_mp
->prev
;
16602 if (mp
->prev
!= NULL
)
16603 mp
->prev
->next
= mp
;
16605 minipool_vector_head
= mp
;
16608 /* Save the new entry. */
16611 /* Scan over the preceding entries and adjust their addresses as
16613 while (mp
->prev
!= NULL
16614 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16616 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16624 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16625 HOST_WIDE_INT min_address
)
16627 HOST_WIDE_INT offset
;
16629 /* The code below assumes these are different. */
16630 gcc_assert (mp
!= min_mp
);
16632 if (min_mp
== NULL
)
16634 if (min_address
> mp
->min_address
)
16635 mp
->min_address
= min_address
;
16639 /* We will adjust this below if it is too loose. */
16640 mp
->min_address
= min_address
;
16642 /* Unlink MP from its current position. Since min_mp is non-null,
16643 mp->next must be non-null. */
16644 mp
->next
->prev
= mp
->prev
;
16645 if (mp
->prev
!= NULL
)
16646 mp
->prev
->next
= mp
->next
;
16648 minipool_vector_head
= mp
->next
;
16650 /* Reinsert it after MIN_MP. */
16652 mp
->next
= min_mp
->next
;
16654 if (mp
->next
!= NULL
)
16655 mp
->next
->prev
= mp
;
16657 minipool_vector_tail
= mp
;
16663 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16665 mp
->offset
= offset
;
16666 if (mp
->refcount
> 0)
16667 offset
+= mp
->fix_size
;
16669 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16670 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16676 /* Add a constant to the minipool for a backward reference. Returns the
16677 node added or NULL if the constant will not fit in this pool.
16679 Note that the code for insertion for a backwards reference can be
16680 somewhat confusing because the calculated offsets for each fix do
16681 not take into account the size of the pool (which is still under
16684 add_minipool_backward_ref (Mfix
*fix
)
16686 /* If set, min_mp is the last pool_entry that has a lower constraint
16687 than the one we are trying to add. */
16688 Mnode
*min_mp
= NULL
;
16689 /* This can be negative, since it is only a constraint. */
16690 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16693 /* If we can't reach the current pool from this insn, or if we can't
16694 insert this entry at the end of the pool without pushing other
16695 fixes out of range, then we don't try. This ensures that we
16696 can't fail later on. */
16697 if (min_address
>= minipool_barrier
->address
16698 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16699 >= minipool_barrier
->address
))
16702 /* Scan the pool to see if a constant with the same value has
16703 already been added. While we are doing this, also note the
16704 location where we must insert the constant if it doesn't already
16706 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16708 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16709 && fix
->mode
== mp
->mode
16710 && (!LABEL_P (fix
->value
)
16711 || (CODE_LABEL_NUMBER (fix
->value
)
16712 == CODE_LABEL_NUMBER (mp
->value
)))
16713 && rtx_equal_p (fix
->value
, mp
->value
)
16714 /* Check that there is enough slack to move this entry to the
16715 end of the table (this is conservative). */
16716 && (mp
->max_address
16717 > (minipool_barrier
->address
16718 + minipool_vector_tail
->offset
16719 + minipool_vector_tail
->fix_size
)))
16722 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16725 if (min_mp
!= NULL
)
16726 mp
->min_address
+= fix
->fix_size
;
16729 /* Note the insertion point if necessary. */
16730 if (mp
->min_address
< min_address
)
16732 /* For now, we do not allow the insertion of 8-byte alignment
16733 requiring nodes anywhere but at the start of the pool. */
16734 if (ARM_DOUBLEWORD_ALIGN
16735 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16740 else if (mp
->max_address
16741 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16743 /* Inserting before this entry would push the fix beyond
16744 its maximum address (which can happen if we have
16745 re-located a forwards fix); force the new fix to come
16747 if (ARM_DOUBLEWORD_ALIGN
16748 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16753 min_address
= mp
->min_address
+ fix
->fix_size
;
16756 /* Do not insert a non-8-byte aligned quantity before 8-byte
16757 aligned quantities. */
16758 else if (ARM_DOUBLEWORD_ALIGN
16759 && fix
->fix_size
< 8
16760 && mp
->fix_size
>= 8)
16763 min_address
= mp
->min_address
+ fix
->fix_size
;
16768 /* We need to create a new entry. */
16770 mp
->fix_size
= fix
->fix_size
;
16771 mp
->mode
= fix
->mode
;
16772 mp
->value
= fix
->value
;
16774 mp
->max_address
= minipool_barrier
->address
+ 65536;
16776 mp
->min_address
= min_address
;
16778 if (min_mp
== NULL
)
16781 mp
->next
= minipool_vector_head
;
16783 if (mp
->next
== NULL
)
16785 minipool_vector_tail
= mp
;
16786 minipool_vector_label
= gen_label_rtx ();
16789 mp
->next
->prev
= mp
;
16791 minipool_vector_head
= mp
;
16795 mp
->next
= min_mp
->next
;
16799 if (mp
->next
!= NULL
)
16800 mp
->next
->prev
= mp
;
16802 minipool_vector_tail
= mp
;
16805 /* Save the new entry. */
16813 /* Scan over the following entries and adjust their offsets. */
16814 while (mp
->next
!= NULL
)
16816 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16817 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16820 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16822 mp
->next
->offset
= mp
->offset
;
16831 assign_minipool_offsets (Mfix
*barrier
)
16833 HOST_WIDE_INT offset
= 0;
16836 minipool_barrier
= barrier
;
16838 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16840 mp
->offset
= offset
;
16842 if (mp
->refcount
> 0)
16843 offset
+= mp
->fix_size
;
16847 /* Output the literal table */
16849 dump_minipool (rtx_insn
*scan
)
16855 if (ARM_DOUBLEWORD_ALIGN
)
16856 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16857 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16864 fprintf (dump_file
,
16865 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16866 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16868 scan
= emit_label_after (gen_label_rtx (), scan
);
16869 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16870 scan
= emit_label_after (minipool_vector_label
, scan
);
16872 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16874 if (mp
->refcount
> 0)
16878 fprintf (dump_file
,
16879 ";; Offset %u, min %ld, max %ld ",
16880 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16881 (unsigned long) mp
->max_address
);
16882 arm_print_value (dump_file
, mp
->value
);
16883 fputc ('\n', dump_file
);
16886 rtx val
= copy_rtx (mp
->value
);
16888 switch (GET_MODE_SIZE (mp
->mode
))
16890 #ifdef HAVE_consttable_1
16892 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16896 #ifdef HAVE_consttable_2
16898 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16902 #ifdef HAVE_consttable_4
16904 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16908 #ifdef HAVE_consttable_8
16910 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16914 #ifdef HAVE_consttable_16
16916 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16921 gcc_unreachable ();
16929 minipool_vector_head
= minipool_vector_tail
= NULL
;
16930 scan
= emit_insn_after (gen_consttable_end (), scan
);
16931 scan
= emit_barrier_after (scan
);
16934 /* Return the cost of forcibly inserting a barrier after INSN. */
16936 arm_barrier_cost (rtx_insn
*insn
)
16938 /* Basing the location of the pool on the loop depth is preferable,
16939 but at the moment, the basic block information seems to be
16940 corrupt by this stage of the compilation. */
16941 int base_cost
= 50;
16942 rtx_insn
*next
= next_nonnote_insn (insn
);
16944 if (next
!= NULL
&& LABEL_P (next
))
16947 switch (GET_CODE (insn
))
16950 /* It will always be better to place the table before the label, rather
16959 return base_cost
- 10;
16962 return base_cost
+ 10;
16966 /* Find the best place in the insn stream in the range
16967 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16968 Create the barrier by inserting a jump and add a new fix entry for
16971 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16973 HOST_WIDE_INT count
= 0;
16974 rtx_barrier
*barrier
;
16975 rtx_insn
*from
= fix
->insn
;
16976 /* The instruction after which we will insert the jump. */
16977 rtx_insn
*selected
= NULL
;
16979 /* The address at which the jump instruction will be placed. */
16980 HOST_WIDE_INT selected_address
;
16982 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16983 rtx_code_label
*label
= gen_label_rtx ();
16985 selected_cost
= arm_barrier_cost (from
);
16986 selected_address
= fix
->address
;
16988 while (from
&& count
< max_count
)
16990 rtx_jump_table_data
*tmp
;
16993 /* This code shouldn't have been called if there was a natural barrier
16995 gcc_assert (!BARRIER_P (from
));
16997 /* Count the length of this insn. This must stay in sync with the
16998 code that pushes minipool fixes. */
16999 if (LABEL_P (from
))
17000 count
+= get_label_padding (from
);
17002 count
+= get_attr_length (from
);
17004 /* If there is a jump table, add its length. */
17005 if (tablejump_p (from
, NULL
, &tmp
))
17007 count
+= get_jump_table_size (tmp
);
17009 /* Jump tables aren't in a basic block, so base the cost on
17010 the dispatch insn. If we select this location, we will
17011 still put the pool after the table. */
17012 new_cost
= arm_barrier_cost (from
);
17014 if (count
< max_count
17015 && (!selected
|| new_cost
<= selected_cost
))
17018 selected_cost
= new_cost
;
17019 selected_address
= fix
->address
+ count
;
17022 /* Continue after the dispatch table. */
17023 from
= NEXT_INSN (tmp
);
17027 new_cost
= arm_barrier_cost (from
);
17029 if (count
< max_count
17030 && (!selected
|| new_cost
<= selected_cost
))
17033 selected_cost
= new_cost
;
17034 selected_address
= fix
->address
+ count
;
17037 from
= NEXT_INSN (from
);
17040 /* Make sure that we found a place to insert the jump. */
17041 gcc_assert (selected
);
17043 /* Create a new JUMP_INSN that branches around a barrier. */
17044 from
= emit_jump_insn_after (gen_jump (label
), selected
);
17045 JUMP_LABEL (from
) = label
;
17046 barrier
= emit_barrier_after (from
);
17047 emit_label_after (label
, barrier
);
17049 /* Create a minipool barrier entry for the new barrier. */
17050 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
17051 new_fix
->insn
= barrier
;
17052 new_fix
->address
= selected_address
;
17053 new_fix
->next
= fix
->next
;
17054 fix
->next
= new_fix
;
17059 /* Record that there is a natural barrier in the insn stream at
17062 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
17064 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17067 fix
->address
= address
;
17070 if (minipool_fix_head
!= NULL
)
17071 minipool_fix_tail
->next
= fix
;
17073 minipool_fix_head
= fix
;
17075 minipool_fix_tail
= fix
;
17078 /* Record INSN, which will need fixing up to load a value from the
17079 minipool. ADDRESS is the offset of the insn since the start of the
17080 function; LOC is a pointer to the part of the insn which requires
17081 fixing; VALUE is the constant that must be loaded, which is of type
17084 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
17085 machine_mode mode
, rtx value
)
17087 gcc_assert (!arm_disable_literal_pool
);
17088 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17091 fix
->address
= address
;
17094 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
17095 fix
->value
= value
;
17096 fix
->forwards
= get_attr_pool_range (insn
);
17097 fix
->backwards
= get_attr_neg_pool_range (insn
);
17098 fix
->minipool
= NULL
;
17100 /* If an insn doesn't have a range defined for it, then it isn't
17101 expecting to be reworked by this code. Better to stop now than
17102 to generate duff assembly code. */
17103 gcc_assert (fix
->forwards
|| fix
->backwards
);
17105 /* If an entry requires 8-byte alignment then assume all constant pools
17106 require 4 bytes of padding. Trying to do this later on a per-pool
17107 basis is awkward because existing pool entries have to be modified. */
17108 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
17113 fprintf (dump_file
,
17114 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17115 GET_MODE_NAME (mode
),
17116 INSN_UID (insn
), (unsigned long) address
,
17117 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
17118 arm_print_value (dump_file
, fix
->value
);
17119 fprintf (dump_file
, "\n");
17122 /* Add it to the chain of fixes. */
17125 if (minipool_fix_head
!= NULL
)
17126 minipool_fix_tail
->next
= fix
;
17128 minipool_fix_head
= fix
;
17130 minipool_fix_tail
= fix
;
17133 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17134 Returns the number of insns needed, or 99 if we always want to synthesize
17137 arm_max_const_double_inline_cost ()
17139 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
17142 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17143 Returns the number of insns needed, or 99 if we don't know how to
17146 arm_const_double_inline_cost (rtx val
)
17148 rtx lowpart
, highpart
;
17151 mode
= GET_MODE (val
);
17153 if (mode
== VOIDmode
)
17156 gcc_assert (GET_MODE_SIZE (mode
) == 8);
17158 lowpart
= gen_lowpart (SImode
, val
);
17159 highpart
= gen_highpart_mode (SImode
, mode
, val
);
17161 gcc_assert (CONST_INT_P (lowpart
));
17162 gcc_assert (CONST_INT_P (highpart
));
17164 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
17165 NULL_RTX
, NULL_RTX
, 0, 0)
17166 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
17167 NULL_RTX
, NULL_RTX
, 0, 0));
17170 /* Cost of loading a SImode constant. */
17172 arm_const_inline_cost (enum rtx_code code
, rtx val
)
17174 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
17175 NULL_RTX
, NULL_RTX
, 1, 0);
17178 /* Return true if it is worthwhile to split a 64-bit constant into two
17179 32-bit operations. This is the case if optimizing for size, or
17180 if we have load delay slots, or if one 32-bit part can be done with
17181 a single data operation. */
17183 arm_const_double_by_parts (rtx val
)
17185 machine_mode mode
= GET_MODE (val
);
17188 if (optimize_size
|| arm_ld_sched
)
17191 if (mode
== VOIDmode
)
17194 part
= gen_highpart_mode (SImode
, mode
, val
);
17196 gcc_assert (CONST_INT_P (part
));
17198 if (const_ok_for_arm (INTVAL (part
))
17199 || const_ok_for_arm (~INTVAL (part
)))
17202 part
= gen_lowpart (SImode
, val
);
17204 gcc_assert (CONST_INT_P (part
));
17206 if (const_ok_for_arm (INTVAL (part
))
17207 || const_ok_for_arm (~INTVAL (part
)))
17213 /* Return true if it is possible to inline both the high and low parts
17214 of a 64-bit constant into 32-bit data processing instructions. */
17216 arm_const_double_by_immediates (rtx val
)
17218 machine_mode mode
= GET_MODE (val
);
17221 if (mode
== VOIDmode
)
17224 part
= gen_highpart_mode (SImode
, mode
, val
);
17226 gcc_assert (CONST_INT_P (part
));
17228 if (!const_ok_for_arm (INTVAL (part
)))
17231 part
= gen_lowpart (SImode
, val
);
17233 gcc_assert (CONST_INT_P (part
));
17235 if (!const_ok_for_arm (INTVAL (part
)))
17241 /* Scan INSN and note any of its operands that need fixing.
17242 If DO_PUSHES is false we do not actually push any of the fixups
17245 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17249 extract_constrain_insn (insn
);
17251 if (recog_data
.n_alternatives
== 0)
17254 /* Fill in recog_op_alt with information about the constraints of
17256 preprocess_constraints (insn
);
17258 const operand_alternative
*op_alt
= which_op_alt ();
17259 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17261 /* Things we need to fix can only occur in inputs. */
17262 if (recog_data
.operand_type
[opno
] != OP_IN
)
17265 /* If this alternative is a memory reference, then any mention
17266 of constants in this alternative is really to fool reload
17267 into allowing us to accept one there. We need to fix them up
17268 now so that we output the right code. */
17269 if (op_alt
[opno
].memory_ok
)
17271 rtx op
= recog_data
.operand
[opno
];
17273 if (CONSTANT_P (op
))
17276 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17277 recog_data
.operand_mode
[opno
], op
);
17279 else if (MEM_P (op
)
17280 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17281 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17285 rtx cop
= avoid_constant_pool_reference (op
);
17287 /* Casting the address of something to a mode narrower
17288 than a word can cause avoid_constant_pool_reference()
17289 to return the pool reference itself. That's no good to
17290 us here. Lets just hope that we can use the
17291 constant pool value directly. */
17293 cop
= get_pool_constant (XEXP (op
, 0));
17295 push_minipool_fix (insn
, address
,
17296 recog_data
.operand_loc
[opno
],
17297 recog_data
.operand_mode
[opno
], cop
);
17307 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
17308 and unions in the context of ARMv8-M Security Extensions. It is used as a
17309 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
17310 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
17311 or four masks, depending on whether it is being computed for a
17312 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
17313 respectively. The tree for the type of the argument or a field within an
17314 argument is passed in ARG_TYPE, the current register this argument or field
17315 starts in is kept in the pointer REGNO and updated accordingly, the bit this
17316 argument or field starts at is passed in STARTING_BIT and the last used bit
17317 is kept in LAST_USED_BIT which is also updated accordingly. */
17319 static unsigned HOST_WIDE_INT
17320 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
17321 uint32_t * padding_bits_to_clear
,
17322 unsigned starting_bit
, int * last_used_bit
)
17325 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
17327 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
17329 unsigned current_bit
= starting_bit
;
17331 long int offset
, size
;
17334 field
= TYPE_FIELDS (arg_type
);
17337 /* The offset within a structure is always an offset from
17338 the start of that structure. Make sure we take that into the
17339 calculation of the register based offset that we use here. */
17340 offset
= starting_bit
;
17341 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
17344 /* This is the actual size of the field, for bitfields this is the
17345 bitfield width and not the container size. */
17346 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
17348 if (*last_used_bit
!= offset
)
17350 if (offset
< *last_used_bit
)
17352 /* This field's offset is before the 'last_used_bit', that
17353 means this field goes on the next register. So we need to
17354 pad the rest of the current register and increase the
17355 register number. */
17357 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
17360 padding_bits_to_clear
[*regno
] |= mask
;
17361 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
17366 /* Otherwise we pad the bits between the last field's end and
17367 the start of the new field. */
17370 mask
= ((uint32_t)-1) >> (32 - offset
);
17371 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
17372 padding_bits_to_clear
[*regno
] |= mask
;
17374 current_bit
= offset
;
17377 /* Calculate further padding bits for inner structs/unions too. */
17378 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
17380 *last_used_bit
= current_bit
;
17381 not_to_clear_reg_mask
17382 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
17383 padding_bits_to_clear
, offset
,
17388 /* Update 'current_bit' with this field's size. If the
17389 'current_bit' lies in a subsequent register, update 'regno' and
17390 reset 'current_bit' to point to the current bit in that new
17392 current_bit
+= size
;
17393 while (current_bit
>= 32)
17396 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
17399 *last_used_bit
= current_bit
;
17402 field
= TREE_CHAIN (field
);
17404 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
17406 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
17408 tree field
, field_t
;
17409 int i
, regno_t
, field_size
;
17413 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
17414 = {-1, -1, -1, -1};
17416 /* To compute the padding bits in a union we only consider bits as
17417 padding bits if they are always either a padding bit or fall outside a
17418 fields size for all fields in the union. */
17419 field
= TYPE_FIELDS (arg_type
);
17422 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
17423 = {0U, 0U, 0U, 0U};
17424 int last_used_bit_t
= *last_used_bit
;
17426 field_t
= TREE_TYPE (field
);
17428 /* If the field's type is either a record or a union make sure to
17429 compute their padding bits too. */
17430 if (RECORD_OR_UNION_TYPE_P (field_t
))
17431 not_to_clear_reg_mask
17432 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
17433 &padding_bits_to_clear_t
[0],
17434 starting_bit
, &last_used_bit_t
);
17437 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
17438 regno_t
= (field_size
/ 32) + *regno
;
17439 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
17442 for (i
= *regno
; i
< regno_t
; i
++)
17444 /* For all but the last register used by this field only keep the
17445 padding bits that were padding bits in this field. */
17446 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
17449 /* For the last register, keep all padding bits that were padding
17450 bits in this field and any padding bits that are still valid
17451 as padding bits but fall outside of this field's size. */
17452 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
17453 padding_bits_to_clear_res
[regno_t
]
17454 &= padding_bits_to_clear_t
[regno_t
] | mask
;
17456 /* Update the maximum size of the fields in terms of registers used
17457 ('max_reg') and the 'last_used_bit' in said register. */
17458 if (max_reg
< regno_t
)
17461 max_bit
= last_used_bit_t
;
17463 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
17464 max_bit
= last_used_bit_t
;
17466 field
= TREE_CHAIN (field
);
17469 /* Update the current padding_bits_to_clear using the intersection of the
17470 padding bits of all the fields. */
17471 for (i
=*regno
; i
< max_reg
; i
++)
17472 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
17474 /* Do not keep trailing padding bits, we do not know yet whether this
17475 is the end of the argument. */
17476 mask
= ((uint32_t) 1 << max_bit
) - 1;
17477 padding_bits_to_clear
[max_reg
]
17478 |= padding_bits_to_clear_res
[max_reg
] & mask
;
17481 *last_used_bit
= max_bit
;
17484 /* This function should only be used for structs and unions. */
17485 gcc_unreachable ();
17487 return not_to_clear_reg_mask
;
17490 /* In the context of ARMv8-M Security Extensions, this function is used for both
17491 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17492 registers are used when returning or passing arguments, which is then
17493 returned as a mask. It will also compute a mask to indicate padding/unused
17494 bits for each of these registers, and passes this through the
17495 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17496 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17497 the starting register used to pass this argument or return value is passed
17498 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17499 for struct and union types. */
17501 static unsigned HOST_WIDE_INT
17502 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
17503 uint32_t * padding_bits_to_clear
)
17506 int last_used_bit
= 0;
17507 unsigned HOST_WIDE_INT not_to_clear_mask
;
17509 if (RECORD_OR_UNION_TYPE_P (arg_type
))
17512 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
17513 padding_bits_to_clear
, 0,
17517 /* If the 'last_used_bit' is not zero, that means we are still using a
17518 part of the last 'regno'. In such cases we must clear the trailing
17519 bits. Otherwise we are not using regno and we should mark it as to
17521 if (last_used_bit
!= 0)
17522 padding_bits_to_clear
[regno
]
17523 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
17525 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
17529 not_to_clear_mask
= 0;
17530 /* We are not dealing with structs nor unions. So these arguments may be
17531 passed in floating point registers too. In some cases a BLKmode is
17532 used when returning or passing arguments in multiple VFP registers. */
17533 if (GET_MODE (arg_rtx
) == BLKmode
)
17538 /* This should really only occur when dealing with the hard-float
17540 gcc_assert (TARGET_HARD_FLOAT_ABI
);
17542 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
17544 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
17545 gcc_assert (REG_P (reg
));
17547 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
17549 /* If we are dealing with DF mode, make sure we don't
17550 clear either of the registers it addresses. */
17551 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
17554 unsigned HOST_WIDE_INT mask
;
17555 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
17556 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
17557 not_to_clear_mask
|= mask
;
17563 /* Otherwise we can rely on the MODE to determine how many registers
17564 are being used by this argument. */
17565 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
17566 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
17569 unsigned HOST_WIDE_INT
17570 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
17571 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
17572 not_to_clear_mask
|= mask
;
17577 return not_to_clear_mask
;
17580 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17581 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17582 are to be fully cleared, using the value in register CLEARING_REG if more
17583 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17584 the bits that needs to be cleared in caller-saved core registers, with
17585 SCRATCH_REG used as a scratch register for that clearing.
17587 NOTE: one of three following assertions must hold:
17588 - SCRATCH_REG is a low register
17589 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17590 in TO_CLEAR_BITMAP)
17591 - CLEARING_REG is a low register. */
17594 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
17595 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
17597 bool saved_clearing
= false;
17598 rtx saved_clearing_reg
= NULL_RTX
;
17599 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
17601 gcc_assert (arm_arch_cmse
);
17603 if (!bitmap_empty_p (to_clear_bitmap
))
17605 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
17606 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
17608 clearing_regno
= REGNO (clearing_reg
);
17610 /* Clear padding bits. */
17611 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
17612 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
17615 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
17617 if (padding_bits_to_clear
[i
] == 0)
17620 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17621 CLEARING_REG as scratch. */
17623 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
17625 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17626 such that we can use clearing_reg to clear the unused bits in the
17628 if ((clearing_regno
> maxregno
17629 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
17630 && !saved_clearing
)
17632 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
17633 emit_move_insn (scratch_reg
, clearing_reg
);
17634 saved_clearing
= true;
17635 saved_clearing_reg
= scratch_reg
;
17637 scratch_reg
= clearing_reg
;
17640 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17641 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
17642 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
17644 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17645 mask
= (~padding_bits_to_clear
[i
]) >> 16;
17646 rtx16
= gen_int_mode (16, SImode
);
17647 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
17649 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
17651 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
17653 if (saved_clearing
)
17654 emit_move_insn (clearing_reg
, saved_clearing_reg
);
17657 /* Clear full registers. */
17659 /* If not marked for clearing, clearing_reg already does not contain
17661 if (clearing_regno
<= maxregno
17662 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
17664 emit_move_insn (clearing_reg
, const0_rtx
);
17665 emit_use (clearing_reg
);
17666 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
17669 for (regno
= minregno
; regno
<= maxregno
; regno
++)
17671 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
17674 if (IS_VFP_REGNUM (regno
))
17676 /* If regno is an even vfp register and its successor is also to
17677 be cleared, use vmov. */
17678 if (TARGET_VFP_DOUBLE
17679 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17680 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
17682 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
17683 CONST1_RTX (DFmode
));
17684 emit_use (gen_rtx_REG (DFmode
, regno
));
17689 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17690 CONST1_RTX (SFmode
));
17691 emit_use (gen_rtx_REG (SFmode
, regno
));
17696 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
17697 emit_use (gen_rtx_REG (SImode
, regno
));
17702 /* Clears caller saved registers not used to pass arguments before a
17703 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17704 registers is done in __gnu_cmse_nonsecure_call libcall.
17705 See libgcc/config/arm/cmse_nonsecure_call.S. */
17708 cmse_nonsecure_call_clear_caller_saved (void)
17712 FOR_EACH_BB_FN (bb
, cfun
)
17716 FOR_BB_INSNS (bb
, insn
)
17718 unsigned address_regnum
, regno
, maxregno
=
17719 TARGET_HARD_FLOAT_ABI
? D7_VFP_REGNUM
: NUM_ARG_REGS
- 1;
17720 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
17722 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
17724 CUMULATIVE_ARGS args_so_far_v
;
17725 cumulative_args_t args_so_far
;
17726 tree arg_type
, fntype
;
17727 bool first_param
= true;
17728 function_args_iterator args_iter
;
17729 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
17731 if (!NONDEBUG_INSN_P (insn
))
17734 if (!CALL_P (insn
))
17737 pat
= PATTERN (insn
);
17738 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
17739 call
= XVECEXP (pat
, 0, 0);
17741 /* Get the real call RTX if the insn sets a value, ie. returns. */
17742 if (GET_CODE (call
) == SET
)
17743 call
= SET_SRC (call
);
17745 /* Check if it is a cmse_nonsecure_call. */
17746 unspec
= XEXP (call
, 0);
17747 if (GET_CODE (unspec
) != UNSPEC
17748 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
17751 /* Determine the caller-saved registers we need to clear. */
17752 bitmap_clear (to_clear_bitmap
);
17753 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
17755 /* Only look at the caller-saved floating point registers in case of
17756 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17757 lazy store and loads which clear both caller- and callee-saved
17759 if (TARGET_HARD_FLOAT_ABI
)
17761 auto_sbitmap
float_bitmap (maxregno
+ 1);
17763 bitmap_clear (float_bitmap
);
17764 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
17765 D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1);
17766 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
17769 /* Make sure the register used to hold the function address is not
17771 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
17772 gcc_assert (MEM_P (address
));
17773 gcc_assert (REG_P (XEXP (address
, 0)));
17774 address_regnum
= REGNO (XEXP (address
, 0));
17775 if (address_regnum
< R0_REGNUM
+ NUM_ARG_REGS
)
17776 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
17778 /* Set basic block of call insn so that df rescan is performed on
17779 insns inserted here. */
17780 set_block_for_insn (insn
, bb
);
17781 df_set_flags (DF_DEFER_INSN_RESCAN
);
17784 /* Make sure the scheduler doesn't schedule other insns beyond
17786 emit_insn (gen_blockage ());
17788 /* Walk through all arguments and clear registers appropriately.
17790 fntype
= TREE_TYPE (MEM_EXPR (address
));
17791 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
17793 args_so_far
= pack_cumulative_args (&args_so_far_v
);
17794 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
17797 uint64_t to_clear_args_mask
;
17799 if (VOID_TYPE_P (arg_type
))
17802 function_arg_info
arg (arg_type
, /*named=*/true);
17804 /* ??? We should advance after processing the argument and pass
17805 the argument we're advancing past. */
17806 arm_function_arg_advance (args_so_far
, arg
);
17808 arg_rtx
= arm_function_arg (args_so_far
, arg
);
17809 gcc_assert (REG_P (arg_rtx
));
17811 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
17813 &padding_bits_to_clear
[0]);
17814 if (to_clear_args_mask
)
17816 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17818 if (to_clear_args_mask
& (1ULL << regno
))
17819 bitmap_clear_bit (to_clear_bitmap
, regno
);
17823 first_param
= false;
17826 /* We use right shift and left shift to clear the LSB of the address
17827 we jump to instead of using bic, to avoid having to use an extra
17828 register on Thumb-1. */
17829 clearing_reg
= XEXP (address
, 0);
17830 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
17831 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
17832 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
17833 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
17835 /* Clear caller-saved registers that leak before doing a non-secure
17837 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17838 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
17839 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
17841 seq
= get_insns ();
17843 emit_insn_before (seq
, insn
);
17848 /* Rewrite move insn into subtract of 0 if the condition codes will
17849 be useful in next conditional jump insn. */
17852 thumb1_reorg (void)
17856 FOR_EACH_BB_FN (bb
, cfun
)
17859 rtx cmp
, op0
, op1
, set
= NULL
;
17860 rtx_insn
*prev
, *insn
= BB_END (bb
);
17861 bool insn_clobbered
= false;
17863 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17864 insn
= PREV_INSN (insn
);
17866 /* Find the last cbranchsi4_insn in basic block BB. */
17867 if (insn
== BB_HEAD (bb
)
17868 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17871 /* Get the register with which we are comparing. */
17872 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17873 op0
= XEXP (cmp
, 0);
17874 op1
= XEXP (cmp
, 1);
17876 /* Check that comparison is against ZERO. */
17877 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17880 /* Find the first flag setting insn before INSN in basic block BB. */
17881 gcc_assert (insn
!= BB_HEAD (bb
));
17882 for (prev
= PREV_INSN (insn
);
17884 && prev
!= BB_HEAD (bb
)
17886 || DEBUG_INSN_P (prev
)
17887 || ((set
= single_set (prev
)) != NULL
17888 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17889 prev
= PREV_INSN (prev
))
17891 if (reg_set_p (op0
, prev
))
17892 insn_clobbered
= true;
17895 /* Skip if op0 is clobbered by insn other than prev. */
17896 if (insn_clobbered
)
17902 dest
= SET_DEST (set
);
17903 src
= SET_SRC (set
);
17904 if (!low_register_operand (dest
, SImode
)
17905 || !low_register_operand (src
, SImode
))
17908 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17909 in INSN. Both src and dest of the move insn are checked. */
17910 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17912 dest
= copy_rtx (dest
);
17913 src
= copy_rtx (src
);
17914 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17915 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17916 INSN_CODE (prev
) = -1;
17917 /* Set test register in INSN to dest. */
17918 XEXP (cmp
, 0) = copy_rtx (dest
);
17919 INSN_CODE (insn
) = -1;
17924 /* Convert instructions to their cc-clobbering variant if possible, since
17925 that allows us to use smaller encodings. */
17928 thumb2_reorg (void)
17933 INIT_REG_SET (&live
);
17935 /* We are freeing block_for_insn in the toplev to keep compatibility
17936 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17937 compute_bb_for_insn ();
17940 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17942 FOR_EACH_BB_FN (bb
, cfun
)
17944 if ((current_tune
->disparage_flag_setting_t16_encodings
17945 == tune_params::DISPARAGE_FLAGS_ALL
)
17946 && optimize_bb_for_speed_p (bb
))
17950 Convert_Action action
= SKIP
;
17951 Convert_Action action_for_partial_flag_setting
17952 = ((current_tune
->disparage_flag_setting_t16_encodings
17953 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17954 && optimize_bb_for_speed_p (bb
))
17957 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17958 df_simulate_initialize_backwards (bb
, &live
);
17959 FOR_BB_INSNS_REVERSE (bb
, insn
)
17961 if (NONJUMP_INSN_P (insn
)
17962 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17963 && GET_CODE (PATTERN (insn
)) == SET
)
17966 rtx pat
= PATTERN (insn
);
17967 rtx dst
= XEXP (pat
, 0);
17968 rtx src
= XEXP (pat
, 1);
17969 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17971 if (UNARY_P (src
) || BINARY_P (src
))
17972 op0
= XEXP (src
, 0);
17974 if (BINARY_P (src
))
17975 op1
= XEXP (src
, 1);
17977 if (low_register_operand (dst
, SImode
))
17979 switch (GET_CODE (src
))
17982 /* Adding two registers and storing the result
17983 in the first source is already a 16-bit
17985 if (rtx_equal_p (dst
, op0
)
17986 && register_operand (op1
, SImode
))
17989 if (low_register_operand (op0
, SImode
))
17991 /* ADDS <Rd>,<Rn>,<Rm> */
17992 if (low_register_operand (op1
, SImode
))
17994 /* ADDS <Rdn>,#<imm8> */
17995 /* SUBS <Rdn>,#<imm8> */
17996 else if (rtx_equal_p (dst
, op0
)
17997 && CONST_INT_P (op1
)
17998 && IN_RANGE (INTVAL (op1
), -255, 255))
18000 /* ADDS <Rd>,<Rn>,#<imm3> */
18001 /* SUBS <Rd>,<Rn>,#<imm3> */
18002 else if (CONST_INT_P (op1
)
18003 && IN_RANGE (INTVAL (op1
), -7, 7))
18006 /* ADCS <Rd>, <Rn> */
18007 else if (GET_CODE (XEXP (src
, 0)) == PLUS
18008 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
18009 && low_register_operand (XEXP (XEXP (src
, 0), 1),
18011 && COMPARISON_P (op1
)
18012 && cc_register (XEXP (op1
, 0), VOIDmode
)
18013 && maybe_get_arm_condition_code (op1
) == ARM_CS
18014 && XEXP (op1
, 1) == const0_rtx
)
18019 /* RSBS <Rd>,<Rn>,#0
18020 Not handled here: see NEG below. */
18021 /* SUBS <Rd>,<Rn>,#<imm3>
18023 Not handled here: see PLUS above. */
18024 /* SUBS <Rd>,<Rn>,<Rm> */
18025 if (low_register_operand (op0
, SImode
)
18026 && low_register_operand (op1
, SImode
))
18031 /* MULS <Rdm>,<Rn>,<Rdm>
18032 As an exception to the rule, this is only used
18033 when optimizing for size since MULS is slow on all
18034 known implementations. We do not even want to use
18035 MULS in cold code, if optimizing for speed, so we
18036 test the global flag here. */
18037 if (!optimize_size
)
18039 /* Fall through. */
18043 /* ANDS <Rdn>,<Rm> */
18044 if (rtx_equal_p (dst
, op0
)
18045 && low_register_operand (op1
, SImode
))
18046 action
= action_for_partial_flag_setting
;
18047 else if (rtx_equal_p (dst
, op1
)
18048 && low_register_operand (op0
, SImode
))
18049 action
= action_for_partial_flag_setting
== SKIP
18050 ? SKIP
: SWAP_CONV
;
18056 /* ASRS <Rdn>,<Rm> */
18057 /* LSRS <Rdn>,<Rm> */
18058 /* LSLS <Rdn>,<Rm> */
18059 if (rtx_equal_p (dst
, op0
)
18060 && low_register_operand (op1
, SImode
))
18061 action
= action_for_partial_flag_setting
;
18062 /* ASRS <Rd>,<Rm>,#<imm5> */
18063 /* LSRS <Rd>,<Rm>,#<imm5> */
18064 /* LSLS <Rd>,<Rm>,#<imm5> */
18065 else if (low_register_operand (op0
, SImode
)
18066 && CONST_INT_P (op1
)
18067 && IN_RANGE (INTVAL (op1
), 0, 31))
18068 action
= action_for_partial_flag_setting
;
18072 /* RORS <Rdn>,<Rm> */
18073 if (rtx_equal_p (dst
, op0
)
18074 && low_register_operand (op1
, SImode
))
18075 action
= action_for_partial_flag_setting
;
18079 /* MVNS <Rd>,<Rm> */
18080 if (low_register_operand (op0
, SImode
))
18081 action
= action_for_partial_flag_setting
;
18085 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
18086 if (low_register_operand (op0
, SImode
))
18091 /* MOVS <Rd>,#<imm8> */
18092 if (CONST_INT_P (src
)
18093 && IN_RANGE (INTVAL (src
), 0, 255))
18094 action
= action_for_partial_flag_setting
;
18098 /* MOVS and MOV<c> with registers have different
18099 encodings, so are not relevant here. */
18107 if (action
!= SKIP
)
18109 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
18110 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
18113 if (action
== SWAP_CONV
)
18115 src
= copy_rtx (src
);
18116 XEXP (src
, 0) = op1
;
18117 XEXP (src
, 1) = op0
;
18118 pat
= gen_rtx_SET (dst
, src
);
18119 vec
= gen_rtvec (2, pat
, clobber
);
18121 else /* action == CONV */
18122 vec
= gen_rtvec (2, pat
, clobber
);
18124 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
18125 INSN_CODE (insn
) = -1;
18129 if (NONDEBUG_INSN_P (insn
))
18130 df_simulate_one_insn_backwards (bb
, insn
, &live
);
18134 CLEAR_REG_SET (&live
);
18137 /* Gcc puts the pool in the wrong place for ARM, since we can only
18138 load addresses a limited distance around the pc. We do some
18139 special munging to move the constant pool values to the correct
18140 point in the code. */
18145 HOST_WIDE_INT address
= 0;
18149 cmse_nonsecure_call_clear_caller_saved ();
18151 /* We cannot run the Thumb passes for thunks because there is no CFG. */
18152 if (cfun
->is_thunk
)
18154 else if (TARGET_THUMB1
)
18156 else if (TARGET_THUMB2
)
18159 /* Ensure all insns that must be split have been split at this point.
18160 Otherwise, the pool placement code below may compute incorrect
18161 insn lengths. Note that when optimizing, all insns have already
18162 been split at this point. */
18164 split_all_insns_noflow ();
18166 /* Make sure we do not attempt to create a literal pool even though it should
18167 no longer be necessary to create any. */
18168 if (arm_disable_literal_pool
)
18171 minipool_fix_head
= minipool_fix_tail
= NULL
;
18173 /* The first insn must always be a note, or the code below won't
18174 scan it properly. */
18175 insn
= get_insns ();
18176 gcc_assert (NOTE_P (insn
));
18179 /* Scan all the insns and record the operands that will need fixing. */
18180 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
18182 if (BARRIER_P (insn
))
18183 push_minipool_barrier (insn
, address
);
18184 else if (INSN_P (insn
))
18186 rtx_jump_table_data
*table
;
18188 note_invalid_constants (insn
, address
, true);
18189 address
+= get_attr_length (insn
);
18191 /* If the insn is a vector jump, add the size of the table
18192 and skip the table. */
18193 if (tablejump_p (insn
, NULL
, &table
))
18195 address
+= get_jump_table_size (table
);
18199 else if (LABEL_P (insn
))
18200 /* Add the worst-case padding due to alignment. We don't add
18201 the _current_ padding because the minipool insertions
18202 themselves might change it. */
18203 address
+= get_label_padding (insn
);
18206 fix
= minipool_fix_head
;
18208 /* Now scan the fixups and perform the required changes. */
18213 Mfix
* last_added_fix
;
18214 Mfix
* last_barrier
= NULL
;
18217 /* Skip any further barriers before the next fix. */
18218 while (fix
&& BARRIER_P (fix
->insn
))
18221 /* No more fixes. */
18225 last_added_fix
= NULL
;
18227 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
18229 if (BARRIER_P (ftmp
->insn
))
18231 if (ftmp
->address
>= minipool_vector_head
->max_address
)
18234 last_barrier
= ftmp
;
18236 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
18239 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
18242 /* If we found a barrier, drop back to that; any fixes that we
18243 could have reached but come after the barrier will now go in
18244 the next mini-pool. */
18245 if (last_barrier
!= NULL
)
18247 /* Reduce the refcount for those fixes that won't go into this
18249 for (fdel
= last_barrier
->next
;
18250 fdel
&& fdel
!= ftmp
;
18253 fdel
->minipool
->refcount
--;
18254 fdel
->minipool
= NULL
;
18257 ftmp
= last_barrier
;
18261 /* ftmp is first fix that we can't fit into this pool and
18262 there no natural barriers that we could use. Insert a
18263 new barrier in the code somewhere between the previous
18264 fix and this one, and arrange to jump around it. */
18265 HOST_WIDE_INT max_address
;
18267 /* The last item on the list of fixes must be a barrier, so
18268 we can never run off the end of the list of fixes without
18269 last_barrier being set. */
18272 max_address
= minipool_vector_head
->max_address
;
18273 /* Check that there isn't another fix that is in range that
18274 we couldn't fit into this pool because the pool was
18275 already too large: we need to put the pool before such an
18276 instruction. The pool itself may come just after the
18277 fix because create_fix_barrier also allows space for a
18278 jump instruction. */
18279 if (ftmp
->address
< max_address
)
18280 max_address
= ftmp
->address
+ 1;
18282 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
18285 assign_minipool_offsets (last_barrier
);
18289 if (!BARRIER_P (ftmp
->insn
)
18290 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
18297 /* Scan over the fixes we have identified for this pool, fixing them
18298 up and adding the constants to the pool itself. */
18299 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
18300 this_fix
= this_fix
->next
)
18301 if (!BARRIER_P (this_fix
->insn
))
18304 = plus_constant (Pmode
,
18305 gen_rtx_LABEL_REF (VOIDmode
,
18306 minipool_vector_label
),
18307 this_fix
->minipool
->offset
);
18308 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
18311 dump_minipool (last_barrier
->insn
);
18315 /* From now on we must synthesize any constants that we can't handle
18316 directly. This can happen if the RTL gets split during final
18317 instruction generation. */
18318 cfun
->machine
->after_arm_reorg
= 1;
18320 /* Free the minipool memory. */
18321 obstack_free (&minipool_obstack
, minipool_startobj
);
18324 /* Routines to output assembly language. */
18326 /* Return string representation of passed in real value. */
18327 static const char *
18328 fp_const_from_val (REAL_VALUE_TYPE
*r
)
18330 if (!fp_consts_inited
)
18333 gcc_assert (real_equal (r
, &value_fp0
));
18337 /* OPERANDS[0] is the entire list of insns that constitute pop,
18338 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
18339 is in the list, UPDATE is true iff the list contains explicit
18340 update of base register. */
18342 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
18348 const char *conditional
;
18349 int num_saves
= XVECLEN (operands
[0], 0);
18350 unsigned int regno
;
18351 unsigned int regno_base
= REGNO (operands
[1]);
18352 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
18355 offset
+= update
? 1 : 0;
18356 offset
+= return_pc
? 1 : 0;
18358 /* Is the base register in the list? */
18359 for (i
= offset
; i
< num_saves
; i
++)
18361 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
18362 /* If SP is in the list, then the base register must be SP. */
18363 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
18364 /* If base register is in the list, there must be no explicit update. */
18365 if (regno
== regno_base
)
18366 gcc_assert (!update
);
18369 conditional
= reverse
? "%?%D0" : "%?%d0";
18370 /* Can't use POP if returning from an interrupt. */
18371 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
18372 sprintf (pattern
, "pop%s\t{", conditional
);
18375 /* Output ldmfd when the base register is SP, otherwise output ldmia.
18376 It's just a convention, their semantics are identical. */
18377 if (regno_base
== SP_REGNUM
)
18378 sprintf (pattern
, "ldmfd%s\t", conditional
);
18380 sprintf (pattern
, "ldmia%s\t", conditional
);
18382 sprintf (pattern
, "ldm%s\t", conditional
);
18384 strcat (pattern
, reg_names
[regno_base
]);
18386 strcat (pattern
, "!, {");
18388 strcat (pattern
, ", {");
18391 /* Output the first destination register. */
18393 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
18395 /* Output the rest of the destination registers. */
18396 for (i
= offset
+ 1; i
< num_saves
; i
++)
18398 strcat (pattern
, ", ");
18400 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
18403 strcat (pattern
, "}");
18405 if (interrupt_p
&& return_pc
)
18406 strcat (pattern
, "^");
18408 output_asm_insn (pattern
, &cond
);
18412 /* Output the assembly for a store multiple. */
18415 vfp_output_vstmd (rtx
* operands
)
18421 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
18422 ? XEXP (operands
[0], 0)
18423 : XEXP (XEXP (operands
[0], 0), 0);
18424 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
18427 strcpy (pattern
, "vpush%?.64\t{%P1");
18429 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
18431 p
= strlen (pattern
);
18433 gcc_assert (REG_P (operands
[1]));
18435 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
18436 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
18438 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
18440 strcpy (&pattern
[p
], "}");
18442 output_asm_insn (pattern
, operands
);
18447 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
18448 number of bytes pushed. */
18451 vfp_emit_fstmd (int base_reg
, int count
)
18458 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
18459 register pairs are stored by a store multiple insn. We avoid this
18460 by pushing an extra pair. */
18461 if (count
== 2 && !arm_arch6
)
18463 if (base_reg
== LAST_VFP_REGNUM
- 3)
18468 /* FSTMD may not store more than 16 doubleword registers at once. Split
18469 larger stores into multiple parts (up to a maximum of two, in
18474 /* NOTE: base_reg is an internal register number, so each D register
18476 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
18477 saved
+= vfp_emit_fstmd (base_reg
, 16);
18481 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
18482 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
18484 reg
= gen_rtx_REG (DFmode
, base_reg
);
18487 XVECEXP (par
, 0, 0)
18488 = gen_rtx_SET (gen_frame_mem
18490 gen_rtx_PRE_MODIFY (Pmode
,
18493 (Pmode
, stack_pointer_rtx
,
18496 gen_rtx_UNSPEC (BLKmode
,
18497 gen_rtvec (1, reg
),
18498 UNSPEC_PUSH_MULT
));
18500 tmp
= gen_rtx_SET (stack_pointer_rtx
,
18501 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
18502 RTX_FRAME_RELATED_P (tmp
) = 1;
18503 XVECEXP (dwarf
, 0, 0) = tmp
;
18505 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
18506 RTX_FRAME_RELATED_P (tmp
) = 1;
18507 XVECEXP (dwarf
, 0, 1) = tmp
;
18509 for (i
= 1; i
< count
; i
++)
18511 reg
= gen_rtx_REG (DFmode
, base_reg
);
18513 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
18515 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
18516 plus_constant (Pmode
,
18520 RTX_FRAME_RELATED_P (tmp
) = 1;
18521 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
18524 par
= emit_insn (par
);
18525 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
18526 RTX_FRAME_RELATED_P (par
) = 1;
18531 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18532 has the cmse_nonsecure_call attribute and returns false otherwise. */
18535 detect_cmse_nonsecure_call (tree addr
)
18540 tree fntype
= TREE_TYPE (addr
);
18541 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
18542 TYPE_ATTRIBUTES (fntype
)))
18548 /* Emit a call instruction with pattern PAT. ADDR is the address of
18549 the call target. */
18552 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
18556 insn
= emit_call_insn (pat
);
18558 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18559 If the call might use such an entry, add a use of the PIC register
18560 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18561 if (TARGET_VXWORKS_RTP
18564 && GET_CODE (addr
) == SYMBOL_REF
18565 && (SYMBOL_REF_DECL (addr
)
18566 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
18567 : !SYMBOL_REF_LOCAL_P (addr
)))
18569 require_pic_register (NULL_RTX
, false /*compute_now*/);
18570 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
18575 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
18576 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), fdpic_reg
);
18579 if (TARGET_AAPCS_BASED
)
18581 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18582 linker. We need to add an IP clobber to allow setting
18583 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18584 is not needed since it's a fixed register. */
18585 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
18586 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
18590 /* Output a 'call' insn. */
18592 output_call (rtx
*operands
)
18594 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
18596 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18597 if (REGNO (operands
[0]) == LR_REGNUM
)
18599 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
18600 output_asm_insn ("mov%?\t%0, %|lr", operands
);
18603 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
18605 if (TARGET_INTERWORK
|| arm_arch4t
)
18606 output_asm_insn ("bx%?\t%0", operands
);
18608 output_asm_insn ("mov%?\t%|pc, %0", operands
);
18613 /* Output a move from arm registers to arm registers of a long double
18614 OPERANDS[0] is the destination.
18615 OPERANDS[1] is the source. */
18617 output_mov_long_double_arm_from_arm (rtx
*operands
)
18619 /* We have to be careful here because the two might overlap. */
18620 int dest_start
= REGNO (operands
[0]);
18621 int src_start
= REGNO (operands
[1]);
18625 if (dest_start
< src_start
)
18627 for (i
= 0; i
< 3; i
++)
18629 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18630 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18631 output_asm_insn ("mov%?\t%0, %1", ops
);
18636 for (i
= 2; i
>= 0; i
--)
18638 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18639 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18640 output_asm_insn ("mov%?\t%0, %1", ops
);
18648 arm_emit_movpair (rtx dest
, rtx src
)
18650 /* If the src is an immediate, simplify it. */
18651 if (CONST_INT_P (src
))
18653 HOST_WIDE_INT val
= INTVAL (src
);
18654 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
18655 if ((val
>> 16) & 0x0000ffff)
18657 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
18659 GEN_INT ((val
>> 16) & 0x0000ffff));
18660 rtx_insn
*insn
= get_last_insn ();
18661 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18665 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
18666 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
18667 rtx_insn
*insn
= get_last_insn ();
18668 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18671 /* Output a move between double words. It must be REG<-MEM
18674 output_move_double (rtx
*operands
, bool emit
, int *count
)
18676 enum rtx_code code0
= GET_CODE (operands
[0]);
18677 enum rtx_code code1
= GET_CODE (operands
[1]);
18682 /* The only case when this might happen is when
18683 you are looking at the length of a DImode instruction
18684 that has an invalid constant in it. */
18685 if (code0
== REG
&& code1
!= MEM
)
18687 gcc_assert (!emit
);
18694 unsigned int reg0
= REGNO (operands
[0]);
18696 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
18698 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
18700 switch (GET_CODE (XEXP (operands
[1], 0)))
18707 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
18708 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
18710 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18715 gcc_assert (TARGET_LDRD
);
18717 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
18724 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
18726 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18734 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18736 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18741 gcc_assert (TARGET_LDRD
);
18743 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18748 /* Autoicrement addressing modes should never have overlapping
18749 base and destination registers, and overlapping index registers
18750 are already prohibited, so this doesn't need to worry about
18752 otherops
[0] = operands
[0];
18753 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18754 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18756 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18758 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18760 /* Registers overlap so split out the increment. */
18763 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18764 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18771 /* Use a single insn if we can.
18772 FIXME: IWMMXT allows offsets larger than ldrd can
18773 handle, fix these up with a pair of ldr. */
18775 || !CONST_INT_P (otherops
[2])
18776 || (INTVAL (otherops
[2]) > -256
18777 && INTVAL (otherops
[2]) < 256))
18780 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18786 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18787 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18797 /* Use a single insn if we can.
18798 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18799 fix these up with a pair of ldr. */
18801 || !CONST_INT_P (otherops
[2])
18802 || (INTVAL (otherops
[2]) > -256
18803 && INTVAL (otherops
[2]) < 256))
18806 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18812 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18813 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18823 /* We might be able to use ldrd %0, %1 here. However the range is
18824 different to ldr/adr, and it is broken on some ARMv7-M
18825 implementations. */
18826 /* Use the second register of the pair to avoid problematic
18828 otherops
[1] = operands
[1];
18830 output_asm_insn ("adr%?\t%0, %1", otherops
);
18831 operands
[1] = otherops
[0];
18835 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18837 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18844 /* ??? This needs checking for thumb2. */
18846 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18847 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18849 otherops
[0] = operands
[0];
18850 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18851 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18853 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18855 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18857 switch ((int) INTVAL (otherops
[2]))
18861 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18867 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18873 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18877 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18878 operands
[1] = otherops
[0];
18880 && (REG_P (otherops
[2])
18882 || (CONST_INT_P (otherops
[2])
18883 && INTVAL (otherops
[2]) > -256
18884 && INTVAL (otherops
[2]) < 256)))
18886 if (reg_overlap_mentioned_p (operands
[0],
18889 /* Swap base and index registers over to
18890 avoid a conflict. */
18891 std::swap (otherops
[1], otherops
[2]);
18893 /* If both registers conflict, it will usually
18894 have been fixed by a splitter. */
18895 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18896 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18900 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18901 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18908 otherops
[0] = operands
[0];
18910 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18915 if (CONST_INT_P (otherops
[2]))
18919 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18920 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18922 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18928 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18934 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18941 return "ldrd%?\t%0, [%1]";
18943 return "ldmia%?\t%1, %M0";
18947 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18948 /* Take care of overlapping base/data reg. */
18949 if (reg_mentioned_p (operands
[0], operands
[1]))
18953 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18954 output_asm_insn ("ldr%?\t%0, %1", operands
);
18964 output_asm_insn ("ldr%?\t%0, %1", operands
);
18965 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18975 /* Constraints should ensure this. */
18976 gcc_assert (code0
== MEM
&& code1
== REG
);
18977 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18978 || (TARGET_ARM
&& TARGET_LDRD
));
18980 /* For TARGET_ARM the first source register of an STRD
18981 must be even. This is usually the case for double-word
18982 values but user assembly constraints can force an odd
18983 starting register. */
18984 bool allow_strd
= TARGET_LDRD
18985 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
18986 switch (GET_CODE (XEXP (operands
[0], 0)))
18992 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18994 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18999 gcc_assert (allow_strd
);
19001 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
19008 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
19010 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
19018 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
19020 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
19025 gcc_assert (allow_strd
);
19027 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
19032 otherops
[0] = operands
[1];
19033 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
19034 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
19036 /* IWMMXT allows offsets larger than strd can handle,
19037 fix these up with a pair of str. */
19039 && CONST_INT_P (otherops
[2])
19040 && (INTVAL(otherops
[2]) <= -256
19041 || INTVAL(otherops
[2]) >= 256))
19043 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
19047 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
19048 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
19057 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
19058 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
19064 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
19067 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
19072 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
19077 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
19078 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
19080 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
19084 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
19091 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
19098 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
19103 && (REG_P (otherops
[2])
19105 || (CONST_INT_P (otherops
[2])
19106 && INTVAL (otherops
[2]) > -256
19107 && INTVAL (otherops
[2]) < 256)))
19109 otherops
[0] = operands
[1];
19110 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
19112 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
19118 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
19119 otherops
[1] = operands
[1];
19122 output_asm_insn ("str%?\t%1, %0", operands
);
19123 output_asm_insn ("str%?\t%H1, %0", otherops
);
19133 /* Output a move, load or store for quad-word vectors in ARM registers. Only
19134 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
19137 output_move_quad (rtx
*operands
)
19139 if (REG_P (operands
[0]))
19141 /* Load, or reg->reg move. */
19143 if (MEM_P (operands
[1]))
19145 switch (GET_CODE (XEXP (operands
[1], 0)))
19148 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
19153 output_asm_insn ("adr%?\t%0, %1", operands
);
19154 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
19158 gcc_unreachable ();
19166 gcc_assert (REG_P (operands
[1]));
19168 dest
= REGNO (operands
[0]);
19169 src
= REGNO (operands
[1]);
19171 /* This seems pretty dumb, but hopefully GCC won't try to do it
19174 for (i
= 0; i
< 4; i
++)
19176 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
19177 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
19178 output_asm_insn ("mov%?\t%0, %1", ops
);
19181 for (i
= 3; i
>= 0; i
--)
19183 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
19184 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
19185 output_asm_insn ("mov%?\t%0, %1", ops
);
19191 gcc_assert (MEM_P (operands
[0]));
19192 gcc_assert (REG_P (operands
[1]));
19193 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
19195 switch (GET_CODE (XEXP (operands
[0], 0)))
19198 output_asm_insn ("stm%?\t%m0, %M1", operands
);
19202 gcc_unreachable ();
19209 /* Output a VFP load or store instruction. */
19212 output_move_vfp (rtx
*operands
)
19214 rtx reg
, mem
, addr
, ops
[2];
19215 int load
= REG_P (operands
[0]);
19216 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
19217 int sp
= (!TARGET_VFP_FP16INST
19218 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
19219 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
19224 reg
= operands
[!load
];
19225 mem
= operands
[load
];
19227 mode
= GET_MODE (reg
);
19229 gcc_assert (REG_P (reg
));
19230 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
19231 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
19237 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
19238 gcc_assert (MEM_P (mem
));
19240 addr
= XEXP (mem
, 0);
19242 switch (GET_CODE (addr
))
19245 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
19246 ops
[0] = XEXP (addr
, 0);
19251 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
19252 ops
[0] = XEXP (addr
, 0);
19257 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
19263 sprintf (buff
, templ
,
19264 load
? "ld" : "st",
19265 dp
? "64" : sp
? "32" : "16",
19267 integer_p
? "\t%@ int" : "");
19268 output_asm_insn (buff
, ops
);
19273 /* Output a Neon double-word or quad-word load or store, or a load
19274 or store for larger structure modes.
19276 WARNING: The ordering of elements is weird in big-endian mode,
19277 because the EABI requires that vectors stored in memory appear
19278 as though they were stored by a VSTM, as required by the EABI.
19279 GCC RTL defines element ordering based on in-memory order.
19280 This can be different from the architectural ordering of elements
19281 within a NEON register. The intrinsics defined in arm_neon.h use the
19282 NEON register element ordering, not the GCC RTL element ordering.
19284 For example, the in-memory ordering of a big-endian a quadword
19285 vector with 16-bit elements when stored from register pair {d0,d1}
19286 will be (lowest address first, d0[N] is NEON register element N):
19288 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
19290 When necessary, quadword registers (dN, dN+1) are moved to ARM
19291 registers from rN in the order:
19293 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
19295 So that STM/LDM can be used on vectors in ARM registers, and the
19296 same memory layout will result as if VSTM/VLDM were used.
19298 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
19299 possible, which allows use of appropriate alignment tags.
19300 Note that the choice of "64" is independent of the actual vector
19301 element size; this size simply ensures that the behavior is
19302 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
19304 Due to limitations of those instructions, use of VST1.64/VLD1.64
19305 is not possible if:
19306 - the address contains PRE_DEC, or
19307 - the mode refers to more than 4 double-word registers
19309 In those cases, it would be possible to replace VSTM/VLDM by a
19310 sequence of instructions; this is not currently implemented since
19311 this is not certain to actually improve performance. */
19314 output_move_neon (rtx
*operands
)
19316 rtx reg
, mem
, addr
, ops
[2];
19317 int regno
, nregs
, load
= REG_P (operands
[0]);
19322 reg
= operands
[!load
];
19323 mem
= operands
[load
];
19325 mode
= GET_MODE (reg
);
19327 gcc_assert (REG_P (reg
));
19328 regno
= REGNO (reg
);
19329 nregs
= REG_NREGS (reg
) / 2;
19330 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
19331 || NEON_REGNO_OK_FOR_QUAD (regno
));
19332 gcc_assert (VALID_NEON_DREG_MODE (mode
)
19333 || VALID_NEON_QREG_MODE (mode
)
19334 || VALID_NEON_STRUCT_MODE (mode
));
19335 gcc_assert (MEM_P (mem
));
19337 addr
= XEXP (mem
, 0);
19339 /* Strip off const from addresses like (const (plus (...))). */
19340 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
19341 addr
= XEXP (addr
, 0);
19343 switch (GET_CODE (addr
))
19346 /* We have to use vldm / vstm for too-large modes. */
19349 templ
= "v%smia%%?\t%%0!, %%h1";
19350 ops
[0] = XEXP (addr
, 0);
19354 templ
= "v%s1.64\t%%h1, %%A0";
19361 /* We have to use vldm / vstm in this case, since there is no
19362 pre-decrement form of the vld1 / vst1 instructions. */
19363 templ
= "v%smdb%%?\t%%0!, %%h1";
19364 ops
[0] = XEXP (addr
, 0);
19369 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
19370 gcc_unreachable ();
19373 /* We have to use vldm / vstm for too-large modes. */
19377 templ
= "v%smia%%?\t%%m0, %%h1";
19379 templ
= "v%s1.64\t%%h1, %%A0";
19385 /* Fall through. */
19391 for (i
= 0; i
< nregs
; i
++)
19393 /* We're only using DImode here because it's a convenient size. */
19394 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
19395 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
19396 if (reg_overlap_mentioned_p (ops
[0], mem
))
19398 gcc_assert (overlap
== -1);
19403 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
19404 output_asm_insn (buff
, ops
);
19409 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
19410 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
19411 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
19412 output_asm_insn (buff
, ops
);
19419 gcc_unreachable ();
19422 sprintf (buff
, templ
, load
? "ld" : "st");
19423 output_asm_insn (buff
, ops
);
19428 /* Compute and return the length of neon_mov<mode>, where <mode> is
19429 one of VSTRUCT modes: EI, OI, CI or XI. */
19431 arm_attr_length_move_neon (rtx_insn
*insn
)
19433 rtx reg
, mem
, addr
;
19437 extract_insn_cached (insn
);
19439 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
19441 mode
= GET_MODE (recog_data
.operand
[0]);
19452 gcc_unreachable ();
19456 load
= REG_P (recog_data
.operand
[0]);
19457 reg
= recog_data
.operand
[!load
];
19458 mem
= recog_data
.operand
[load
];
19460 gcc_assert (MEM_P (mem
));
19462 addr
= XEXP (mem
, 0);
19464 /* Strip off const from addresses like (const (plus (...))). */
19465 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
19466 addr
= XEXP (addr
, 0);
19468 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
19470 int insns
= REG_NREGS (reg
) / 2;
19477 /* Return nonzero if the offset in the address is an immediate. Otherwise,
19481 arm_address_offset_is_imm (rtx_insn
*insn
)
19485 extract_insn_cached (insn
);
19487 if (REG_P (recog_data
.operand
[0]))
19490 mem
= recog_data
.operand
[0];
19492 gcc_assert (MEM_P (mem
));
19494 addr
= XEXP (mem
, 0);
19497 || (GET_CODE (addr
) == PLUS
19498 && REG_P (XEXP (addr
, 0))
19499 && CONST_INT_P (XEXP (addr
, 1))))
19505 /* Output an ADD r, s, #n where n may be too big for one instruction.
19506 If adding zero to one register, output nothing. */
19508 output_add_immediate (rtx
*operands
)
19510 HOST_WIDE_INT n
= INTVAL (operands
[2]);
19512 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
19515 output_multi_immediate (operands
,
19516 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19519 output_multi_immediate (operands
,
19520 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19527 /* Output a multiple immediate operation.
19528 OPERANDS is the vector of operands referred to in the output patterns.
19529 INSTR1 is the output pattern to use for the first constant.
19530 INSTR2 is the output pattern to use for subsequent constants.
19531 IMMED_OP is the index of the constant slot in OPERANDS.
19532 N is the constant value. */
19533 static const char *
19534 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
19535 int immed_op
, HOST_WIDE_INT n
)
19537 #if HOST_BITS_PER_WIDE_INT > 32
19543 /* Quick and easy output. */
19544 operands
[immed_op
] = const0_rtx
;
19545 output_asm_insn (instr1
, operands
);
19550 const char * instr
= instr1
;
19552 /* Note that n is never zero here (which would give no output). */
19553 for (i
= 0; i
< 32; i
+= 2)
19557 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
19558 output_asm_insn (instr
, operands
);
19568 /* Return the name of a shifter operation. */
19569 static const char *
19570 arm_shift_nmem(enum rtx_code code
)
19575 return ARM_LSL_NAME
;
19591 /* Return the appropriate ARM instruction for the operation code.
19592 The returned result should not be overwritten. OP is the rtx of the
19593 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19596 arithmetic_instr (rtx op
, int shift_first_arg
)
19598 switch (GET_CODE (op
))
19604 return shift_first_arg
? "rsb" : "sub";
19619 return arm_shift_nmem(GET_CODE(op
));
19622 gcc_unreachable ();
19626 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19627 for the operation code. The returned result should not be overwritten.
19628 OP is the rtx code of the shift.
19629 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19631 static const char *
19632 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
19635 enum rtx_code code
= GET_CODE (op
);
19640 if (!CONST_INT_P (XEXP (op
, 1)))
19642 output_operand_lossage ("invalid shift operand");
19647 *amountp
= 32 - INTVAL (XEXP (op
, 1));
19655 mnem
= arm_shift_nmem(code
);
19656 if (CONST_INT_P (XEXP (op
, 1)))
19658 *amountp
= INTVAL (XEXP (op
, 1));
19660 else if (REG_P (XEXP (op
, 1)))
19667 output_operand_lossage ("invalid shift operand");
19673 /* We never have to worry about the amount being other than a
19674 power of 2, since this case can never be reloaded from a reg. */
19675 if (!CONST_INT_P (XEXP (op
, 1)))
19677 output_operand_lossage ("invalid shift operand");
19681 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
19683 /* Amount must be a power of two. */
19684 if (*amountp
& (*amountp
- 1))
19686 output_operand_lossage ("invalid shift operand");
19690 *amountp
= exact_log2 (*amountp
);
19691 gcc_assert (IN_RANGE (*amountp
, 0, 31));
19692 return ARM_LSL_NAME
;
19695 output_operand_lossage ("invalid shift operand");
19699 /* This is not 100% correct, but follows from the desire to merge
19700 multiplication by a power of 2 with the recognizer for a
19701 shift. >=32 is not a valid shift for "lsl", so we must try and
19702 output a shift that produces the correct arithmetical result.
19703 Using lsr #32 is identical except for the fact that the carry bit
19704 is not set correctly if we set the flags; but we never use the
19705 carry bit from such an operation, so we can ignore that. */
19706 if (code
== ROTATERT
)
19707 /* Rotate is just modulo 32. */
19709 else if (*amountp
!= (*amountp
& 31))
19711 if (code
== ASHIFT
)
19716 /* Shifts of 0 are no-ops. */
19723 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19724 because /bin/as is horribly restrictive. The judgement about
19725 whether or not each character is 'printable' (and can be output as
19726 is) or not (and must be printed with an octal escape) must be made
19727 with reference to the *host* character set -- the situation is
19728 similar to that discussed in the comments above pp_c_char in
19729 c-pretty-print.c. */
19731 #define MAX_ASCII_LEN 51
19734 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19737 int len_so_far
= 0;
19739 fputs ("\t.ascii\t\"", stream
);
19741 for (i
= 0; i
< len
; i
++)
19745 if (len_so_far
>= MAX_ASCII_LEN
)
19747 fputs ("\"\n\t.ascii\t\"", stream
);
19753 if (c
== '\\' || c
== '\"')
19755 putc ('\\', stream
);
19763 fprintf (stream
, "\\%03o", c
);
19768 fputs ("\"\n", stream
);
19772 /* Compute the register save mask for registers 0 through 12
19773 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19775 static unsigned long
19776 arm_compute_save_reg0_reg12_mask (void)
19778 unsigned long func_type
= arm_current_func_type ();
19779 unsigned long save_reg_mask
= 0;
19782 if (IS_INTERRUPT (func_type
))
19784 unsigned int max_reg
;
19785 /* Interrupt functions must not corrupt any registers,
19786 even call clobbered ones. If this is a leaf function
19787 we can just examine the registers used by the RTL, but
19788 otherwise we have to assume that whatever function is
19789 called might clobber anything, and so we have to save
19790 all the call-clobbered registers as well. */
19791 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19792 /* FIQ handlers have registers r8 - r12 banked, so
19793 we only need to check r0 - r7, Normal ISRs only
19794 bank r14 and r15, so we must check up to r12.
19795 r13 is the stack pointer which is always preserved,
19796 so we do not need to consider it here. */
19801 for (reg
= 0; reg
<= max_reg
; reg
++)
19802 if (df_regs_ever_live_p (reg
)
19803 || (! crtl
->is_leaf
&& call_used_or_fixed_reg_p (reg
)))
19804 save_reg_mask
|= (1 << reg
);
19806 /* Also save the pic base register if necessary. */
19807 if (PIC_REGISTER_MAY_NEED_SAVING
19808 && crtl
->uses_pic_offset_table
)
19809 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19811 else if (IS_VOLATILE(func_type
))
19813 /* For noreturn functions we historically omitted register saves
19814 altogether. However this really messes up debugging. As a
19815 compromise save just the frame pointers. Combined with the link
19816 register saved elsewhere this should be sufficient to get
19818 if (frame_pointer_needed
)
19819 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19820 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19821 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19822 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19823 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19827 /* In the normal case we only need to save those registers
19828 which are call saved and which are used by this function. */
19829 for (reg
= 0; reg
<= 11; reg
++)
19830 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19831 save_reg_mask
|= (1 << reg
);
19833 /* Handle the frame pointer as a special case. */
19834 if (frame_pointer_needed
)
19835 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19837 /* If we aren't loading the PIC register,
19838 don't stack it even though it may be live. */
19839 if (PIC_REGISTER_MAY_NEED_SAVING
19840 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19841 || crtl
->uses_pic_offset_table
))
19842 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19844 /* The prologue will copy SP into R0, so save it. */
19845 if (IS_STACKALIGN (func_type
))
19846 save_reg_mask
|= 1;
19849 /* Save registers so the exception handler can modify them. */
19850 if (crtl
->calls_eh_return
)
19856 reg
= EH_RETURN_DATA_REGNO (i
);
19857 if (reg
== INVALID_REGNUM
)
19859 save_reg_mask
|= 1 << reg
;
19863 return save_reg_mask
;
19866 /* Return true if r3 is live at the start of the function. */
19869 arm_r3_live_at_start_p (void)
19871 /* Just look at cfg info, which is still close enough to correct at this
19872 point. This gives false positives for broken functions that might use
19873 uninitialized data that happens to be allocated in r3, but who cares? */
19874 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19877 /* Compute the number of bytes used to store the static chain register on the
19878 stack, above the stack frame. We need to know this accurately to get the
19879 alignment of the rest of the stack frame correct. */
19882 arm_compute_static_chain_stack_bytes (void)
19884 /* Once the value is updated from the init value of -1, do not
19886 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
19887 return cfun
->machine
->static_chain_stack_bytes
;
19889 /* See the defining assertion in arm_expand_prologue. */
19890 if (IS_NESTED (arm_current_func_type ())
19891 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19892 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19893 || flag_stack_clash_protection
)
19894 && !df_regs_ever_live_p (LR_REGNUM
)))
19895 && arm_r3_live_at_start_p ()
19896 && crtl
->args
.pretend_args_size
== 0)
19902 /* Compute a bit mask of which core registers need to be
19903 saved on the stack for the current function.
19904 This is used by arm_compute_frame_layout, which may add extra registers. */
19906 static unsigned long
19907 arm_compute_save_core_reg_mask (void)
19909 unsigned int save_reg_mask
= 0;
19910 unsigned long func_type
= arm_current_func_type ();
19913 if (IS_NAKED (func_type
))
19914 /* This should never really happen. */
19917 /* If we are creating a stack frame, then we must save the frame pointer,
19918 IP (which will hold the old stack pointer), LR and the PC. */
19919 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19921 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19924 | (1 << PC_REGNUM
);
19926 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19928 /* Decide if we need to save the link register.
19929 Interrupt routines have their own banked link register,
19930 so they never need to save it.
19931 Otherwise if we do not use the link register we do not need to save
19932 it. If we are pushing other registers onto the stack however, we
19933 can save an instruction in the epilogue by pushing the link register
19934 now and then popping it back into the PC. This incurs extra memory
19935 accesses though, so we only do it when optimizing for size, and only
19936 if we know that we will not need a fancy return sequence. */
19937 if (df_regs_ever_live_p (LR_REGNUM
)
19940 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19941 && !crtl
->tail_call_emit
19942 && !crtl
->calls_eh_return
))
19943 save_reg_mask
|= 1 << LR_REGNUM
;
19945 if (cfun
->machine
->lr_save_eliminated
)
19946 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19948 if (TARGET_REALLY_IWMMXT
19949 && ((bit_count (save_reg_mask
)
19950 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19951 arm_compute_static_chain_stack_bytes())
19954 /* The total number of registers that are going to be pushed
19955 onto the stack is odd. We need to ensure that the stack
19956 is 64-bit aligned before we start to save iWMMXt registers,
19957 and also before we start to create locals. (A local variable
19958 might be a double or long long which we will load/store using
19959 an iWMMXt instruction). Therefore we need to push another
19960 ARM register, so that the stack will be 64-bit aligned. We
19961 try to avoid using the arg registers (r0 -r3) as they might be
19962 used to pass values in a tail call. */
19963 for (reg
= 4; reg
<= 12; reg
++)
19964 if ((save_reg_mask
& (1 << reg
)) == 0)
19968 save_reg_mask
|= (1 << reg
);
19971 cfun
->machine
->sibcall_blocked
= 1;
19972 save_reg_mask
|= (1 << 3);
19976 /* We may need to push an additional register for use initializing the
19977 PIC base register. */
19978 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19979 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19981 reg
= thumb_find_work_register (1 << 4);
19982 if (!call_used_or_fixed_reg_p (reg
))
19983 save_reg_mask
|= (1 << reg
);
19986 return save_reg_mask
;
19989 /* Compute a bit mask of which core registers need to be
19990 saved on the stack for the current function. */
19991 static unsigned long
19992 thumb1_compute_save_core_reg_mask (void)
19994 unsigned long mask
;
19998 for (reg
= 0; reg
< 12; reg
++)
19999 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
20002 /* Handle the frame pointer as a special case. */
20003 if (frame_pointer_needed
)
20004 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
20007 && !TARGET_SINGLE_PIC_BASE
20008 && arm_pic_register
!= INVALID_REGNUM
20009 && crtl
->uses_pic_offset_table
)
20010 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
20012 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
20013 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
20014 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
20016 /* LR will also be pushed if any lo regs are pushed. */
20017 if (mask
& 0xff || thumb_force_lr_save ())
20018 mask
|= (1 << LR_REGNUM
);
20020 bool call_clobbered_scratch
20021 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
20022 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
20024 /* Make sure we have a low work register if we need one. We will
20025 need one if we are going to push a high register, but we are not
20026 currently intending to push a low register. However if both the
20027 prologue and epilogue have a spare call-clobbered low register,
20028 then we won't need to find an additional work register. It does
20029 not need to be the same register in the prologue and
20031 if ((mask
& 0xff) == 0
20032 && !call_clobbered_scratch
20033 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
20035 /* Use thumb_find_work_register to choose which register
20036 we will use. If the register is live then we will
20037 have to push it. Use LAST_LO_REGNUM as our fallback
20038 choice for the register to select. */
20039 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
20040 /* Make sure the register returned by thumb_find_work_register is
20041 not part of the return value. */
20042 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
20043 reg
= LAST_LO_REGNUM
;
20045 if (callee_saved_reg_p (reg
))
20049 /* The 504 below is 8 bytes less than 512 because there are two possible
20050 alignment words. We can't tell here if they will be present or not so we
20051 have to play it safe and assume that they are. */
20052 if ((CALLER_INTERWORKING_SLOT_SIZE
+
20053 ROUND_UP_WORD (get_frame_size ()) +
20054 crtl
->outgoing_args_size
) >= 504)
20056 /* This is the same as the code in thumb1_expand_prologue() which
20057 determines which register to use for stack decrement. */
20058 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
20059 if (mask
& (1 << reg
))
20062 if (reg
> LAST_LO_REGNUM
)
20064 /* Make sure we have a register available for stack decrement. */
20065 mask
|= 1 << LAST_LO_REGNUM
;
20073 /* Return the number of bytes required to save VFP registers. */
20075 arm_get_vfp_saved_size (void)
20077 unsigned int regno
;
20082 /* Space for saved VFP registers. */
20083 if (TARGET_HARD_FLOAT
)
20086 for (regno
= FIRST_VFP_REGNUM
;
20087 regno
< LAST_VFP_REGNUM
;
20090 if ((!df_regs_ever_live_p (regno
)
20091 || call_used_or_fixed_reg_p (regno
))
20092 && (!df_regs_ever_live_p (regno
+ 1)
20093 || call_used_or_fixed_reg_p (regno
+ 1)))
20097 /* Workaround ARM10 VFPr1 bug. */
20098 if (count
== 2 && !arm_arch6
)
20100 saved
+= count
* 8;
20109 if (count
== 2 && !arm_arch6
)
20111 saved
+= count
* 8;
20118 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
20119 everything bar the final return instruction. If simple_return is true,
20120 then do not output epilogue, because it has already been emitted in RTL.
20122 Note: do not forget to update length attribute of corresponding insn pattern
20123 when changing assembly output (eg. length attribute of
20124 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
20125 register clearing sequences). */
20127 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
20128 bool simple_return
)
20130 char conditional
[10];
20133 unsigned long live_regs_mask
;
20134 unsigned long func_type
;
20135 arm_stack_offsets
*offsets
;
20137 func_type
= arm_current_func_type ();
20139 if (IS_NAKED (func_type
))
20142 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
20144 /* If this function was declared non-returning, and we have
20145 found a tail call, then we have to trust that the called
20146 function won't return. */
20151 /* Otherwise, trap an attempted return by aborting. */
20153 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
20155 assemble_external_libcall (ops
[1]);
20156 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
20162 gcc_assert (!cfun
->calls_alloca
|| really_return
);
20164 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
20166 cfun
->machine
->return_used_this_function
= 1;
20168 offsets
= arm_get_frame_offsets ();
20169 live_regs_mask
= offsets
->saved_regs_mask
;
20171 if (!simple_return
&& live_regs_mask
)
20173 const char * return_reg
;
20175 /* If we do not have any special requirements for function exit
20176 (e.g. interworking) then we can load the return address
20177 directly into the PC. Otherwise we must load it into LR. */
20179 && !IS_CMSE_ENTRY (func_type
)
20180 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
20181 return_reg
= reg_names
[PC_REGNUM
];
20183 return_reg
= reg_names
[LR_REGNUM
];
20185 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
20187 /* There are three possible reasons for the IP register
20188 being saved. 1) a stack frame was created, in which case
20189 IP contains the old stack pointer, or 2) an ISR routine
20190 corrupted it, or 3) it was saved to align the stack on
20191 iWMMXt. In case 1, restore IP into SP, otherwise just
20193 if (frame_pointer_needed
)
20195 live_regs_mask
&= ~ (1 << IP_REGNUM
);
20196 live_regs_mask
|= (1 << SP_REGNUM
);
20199 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
20202 /* On some ARM architectures it is faster to use LDR rather than
20203 LDM to load a single register. On other architectures, the
20204 cost is the same. In 26 bit mode, or for exception handlers,
20205 we have to use LDM to load the PC so that the CPSR is also
20207 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
20208 if (live_regs_mask
== (1U << reg
))
20211 if (reg
<= LAST_ARM_REGNUM
20212 && (reg
!= LR_REGNUM
20214 || ! IS_INTERRUPT (func_type
)))
20216 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
20217 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
20224 /* Generate the load multiple instruction to restore the
20225 registers. Note we can get here, even if
20226 frame_pointer_needed is true, but only if sp already
20227 points to the base of the saved core registers. */
20228 if (live_regs_mask
& (1 << SP_REGNUM
))
20230 unsigned HOST_WIDE_INT stack_adjust
;
20232 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
20233 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
20235 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
20236 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
20239 /* If we can't use ldmib (SA110 bug),
20240 then try to pop r3 instead. */
20242 live_regs_mask
|= 1 << 3;
20244 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
20247 /* For interrupt returns we have to use an LDM rather than
20248 a POP so that we can use the exception return variant. */
20249 else if (IS_INTERRUPT (func_type
))
20250 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
20252 sprintf (instr
, "pop%s\t{", conditional
);
20254 p
= instr
+ strlen (instr
);
20256 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
20257 if (live_regs_mask
& (1 << reg
))
20259 int l
= strlen (reg_names
[reg
]);
20265 memcpy (p
, ", ", 2);
20269 memcpy (p
, "%|", 2);
20270 memcpy (p
+ 2, reg_names
[reg
], l
);
20274 if (live_regs_mask
& (1 << LR_REGNUM
))
20276 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
20277 /* If returning from an interrupt, restore the CPSR. */
20278 if (IS_INTERRUPT (func_type
))
20285 output_asm_insn (instr
, & operand
);
20287 /* See if we need to generate an extra instruction to
20288 perform the actual function return. */
20290 && func_type
!= ARM_FT_INTERWORKED
20291 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
20293 /* The return has already been handled
20294 by loading the LR into the PC. */
20301 switch ((int) ARM_FUNC_TYPE (func_type
))
20305 /* ??? This is wrong for unified assembly syntax. */
20306 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
20309 case ARM_FT_INTERWORKED
:
20310 gcc_assert (arm_arch5t
|| arm_arch4t
);
20311 sprintf (instr
, "bx%s\t%%|lr", conditional
);
20314 case ARM_FT_EXCEPTION
:
20315 /* ??? This is wrong for unified assembly syntax. */
20316 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
20320 if (IS_CMSE_ENTRY (func_type
))
20322 /* Check if we have to clear the 'GE bits' which is only used if
20323 parallel add and subtraction instructions are available. */
20324 if (TARGET_INT_SIMD
)
20325 snprintf (instr
, sizeof (instr
),
20326 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
20328 snprintf (instr
, sizeof (instr
),
20329 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
20331 output_asm_insn (instr
, & operand
);
20332 if (TARGET_HARD_FLOAT
)
20334 /* Clear the cumulative exception-status bits (0-4,7) and the
20335 condition code bits (28-31) of the FPSCR. We need to
20336 remember to clear the first scratch register used (IP) and
20337 save and restore the second (r4). */
20338 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
20339 output_asm_insn (instr
, & operand
);
20340 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
20341 output_asm_insn (instr
, & operand
);
20342 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
20343 output_asm_insn (instr
, & operand
);
20344 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
20345 output_asm_insn (instr
, & operand
);
20346 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
20347 output_asm_insn (instr
, & operand
);
20348 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
20349 output_asm_insn (instr
, & operand
);
20350 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
20351 output_asm_insn (instr
, & operand
);
20352 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
20353 output_asm_insn (instr
, & operand
);
20355 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
20357 /* Use bx if it's available. */
20358 else if (arm_arch5t
|| arm_arch4t
)
20359 sprintf (instr
, "bx%s\t%%|lr", conditional
);
20361 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
20365 output_asm_insn (instr
, & operand
);
20371 /* Output in FILE asm statements needed to declare the NAME of the function
20372 defined by its DECL node. */
20375 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
20377 size_t cmse_name_len
;
20378 char *cmse_name
= 0;
20379 char cmse_prefix
[] = "__acle_se_";
20381 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
20382 extra function label for each function with the 'cmse_nonsecure_entry'
20383 attribute. This extra function label should be prepended with
20384 '__acle_se_', telling the linker that it needs to create secure gateway
20385 veneers for this function. */
20386 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
20387 DECL_ATTRIBUTES (decl
)))
20389 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
20390 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
20391 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
20392 targetm
.asm_out
.globalize_label (file
, cmse_name
);
20394 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
20395 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
20398 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
20399 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20400 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20401 ASM_OUTPUT_LABEL (file
, name
);
20404 ASM_OUTPUT_LABEL (file
, cmse_name
);
20406 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
20409 /* Write the function name into the code section, directly preceding
20410 the function prologue.
20412 Code will be output similar to this:
20414 .ascii "arm_poke_function_name", 0
20417 .word 0xff000000 + (t1 - t0)
20418 arm_poke_function_name
20420 stmfd sp!, {fp, ip, lr, pc}
20423 When performing a stack backtrace, code can inspect the value
20424 of 'pc' stored at 'fp' + 0. If the trace function then looks
20425 at location pc - 12 and the top 8 bits are set, then we know
20426 that there is a function name embedded immediately preceding this
20427 location and has length ((pc[-3]) & 0xff000000).
20429 We assume that pc is declared as a pointer to an unsigned long.
20431 It is of no benefit to output the function name if we are assembling
20432 a leaf function. These function types will not contain a stack
20433 backtrace structure, therefore it is not possible to determine the
20436 arm_poke_function_name (FILE *stream
, const char *name
)
20438 unsigned long alignlength
;
20439 unsigned long length
;
20442 length
= strlen (name
) + 1;
20443 alignlength
= ROUND_UP_WORD (length
);
20445 ASM_OUTPUT_ASCII (stream
, name
, length
);
20446 ASM_OUTPUT_ALIGN (stream
, 2);
20447 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
20448 assemble_aligned_integer (UNITS_PER_WORD
, x
);
20451 /* Place some comments into the assembler stream
20452 describing the current function. */
20454 arm_output_function_prologue (FILE *f
)
20456 unsigned long func_type
;
20458 /* Sanity check. */
20459 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
20461 func_type
= arm_current_func_type ();
20463 switch ((int) ARM_FUNC_TYPE (func_type
))
20466 case ARM_FT_NORMAL
:
20468 case ARM_FT_INTERWORKED
:
20469 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
20472 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
20475 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
20477 case ARM_FT_EXCEPTION
:
20478 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
20482 if (IS_NAKED (func_type
))
20483 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20485 if (IS_VOLATILE (func_type
))
20486 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
20488 if (IS_NESTED (func_type
))
20489 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
20490 if (IS_STACKALIGN (func_type
))
20491 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20492 if (IS_CMSE_ENTRY (func_type
))
20493 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
20495 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20496 (HOST_WIDE_INT
) crtl
->args
.size
,
20497 crtl
->args
.pretend_args_size
,
20498 (HOST_WIDE_INT
) get_frame_size ());
20500 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20501 frame_pointer_needed
,
20502 cfun
->machine
->uses_anonymous_args
);
20504 if (cfun
->machine
->lr_save_eliminated
)
20505 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
20507 if (crtl
->calls_eh_return
)
20508 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
20513 arm_output_function_epilogue (FILE *)
20515 arm_stack_offsets
*offsets
;
20521 /* Emit any call-via-reg trampolines that are needed for v4t support
20522 of call_reg and call_value_reg type insns. */
20523 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
20525 rtx label
= cfun
->machine
->call_via
[regno
];
20529 switch_to_section (function_section (current_function_decl
));
20530 targetm
.asm_out
.internal_label (asm_out_file
, "L",
20531 CODE_LABEL_NUMBER (label
));
20532 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
20536 /* ??? Probably not safe to set this here, since it assumes that a
20537 function will be emitted as assembly immediately after we generate
20538 RTL for it. This does not happen for inline functions. */
20539 cfun
->machine
->return_used_this_function
= 0;
20541 else /* TARGET_32BIT */
20543 /* We need to take into account any stack-frame rounding. */
20544 offsets
= arm_get_frame_offsets ();
20546 gcc_assert (!use_return_insn (FALSE
, NULL
)
20547 || (cfun
->machine
->return_used_this_function
!= 0)
20548 || offsets
->saved_regs
== offsets
->outgoing_args
20549 || frame_pointer_needed
);
20553 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20554 STR and STRD. If an even number of registers are being pushed, one
20555 or more STRD patterns are created for each register pair. If an
20556 odd number of registers are pushed, emit an initial STR followed by
20557 as many STRD instructions as are needed. This works best when the
20558 stack is initially 64-bit aligned (the normal case), since it
20559 ensures that each STRD is also 64-bit aligned. */
20561 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
20566 rtx par
= NULL_RTX
;
20567 rtx dwarf
= NULL_RTX
;
20571 num_regs
= bit_count (saved_regs_mask
);
20573 /* Must be at least one register to save, and can't save SP or PC. */
20574 gcc_assert (num_regs
> 0 && num_regs
<= 14);
20575 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20576 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20578 /* Create sequence for DWARF info. All the frame-related data for
20579 debugging is held in this wrapper. */
20580 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20582 /* Describe the stack adjustment. */
20583 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20584 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20585 RTX_FRAME_RELATED_P (tmp
) = 1;
20586 XVECEXP (dwarf
, 0, 0) = tmp
;
20588 /* Find the first register. */
20589 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
20594 /* If there's an odd number of registers to push. Start off by
20595 pushing a single register. This ensures that subsequent strd
20596 operations are dword aligned (assuming that SP was originally
20597 64-bit aligned). */
20598 if ((num_regs
& 1) != 0)
20600 rtx reg
, mem
, insn
;
20602 reg
= gen_rtx_REG (SImode
, regno
);
20604 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
20605 stack_pointer_rtx
));
20607 mem
= gen_frame_mem (Pmode
,
20609 (Pmode
, stack_pointer_rtx
,
20610 plus_constant (Pmode
, stack_pointer_rtx
,
20613 tmp
= gen_rtx_SET (mem
, reg
);
20614 RTX_FRAME_RELATED_P (tmp
) = 1;
20615 insn
= emit_insn (tmp
);
20616 RTX_FRAME_RELATED_P (insn
) = 1;
20617 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20618 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
20619 RTX_FRAME_RELATED_P (tmp
) = 1;
20622 XVECEXP (dwarf
, 0, i
) = tmp
;
20626 while (i
< num_regs
)
20627 if (saved_regs_mask
& (1 << regno
))
20629 rtx reg1
, reg2
, mem1
, mem2
;
20630 rtx tmp0
, tmp1
, tmp2
;
20633 /* Find the register to pair with this one. */
20634 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
20638 reg1
= gen_rtx_REG (SImode
, regno
);
20639 reg2
= gen_rtx_REG (SImode
, regno2
);
20646 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20649 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20651 -4 * (num_regs
- 1)));
20652 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
20653 plus_constant (Pmode
, stack_pointer_rtx
,
20655 tmp1
= gen_rtx_SET (mem1
, reg1
);
20656 tmp2
= gen_rtx_SET (mem2
, reg2
);
20657 RTX_FRAME_RELATED_P (tmp0
) = 1;
20658 RTX_FRAME_RELATED_P (tmp1
) = 1;
20659 RTX_FRAME_RELATED_P (tmp2
) = 1;
20660 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
20661 XVECEXP (par
, 0, 0) = tmp0
;
20662 XVECEXP (par
, 0, 1) = tmp1
;
20663 XVECEXP (par
, 0, 2) = tmp2
;
20664 insn
= emit_insn (par
);
20665 RTX_FRAME_RELATED_P (insn
) = 1;
20666 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20670 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20673 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20676 tmp1
= gen_rtx_SET (mem1
, reg1
);
20677 tmp2
= gen_rtx_SET (mem2
, reg2
);
20678 RTX_FRAME_RELATED_P (tmp1
) = 1;
20679 RTX_FRAME_RELATED_P (tmp2
) = 1;
20680 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20681 XVECEXP (par
, 0, 0) = tmp1
;
20682 XVECEXP (par
, 0, 1) = tmp2
;
20686 /* Create unwind information. This is an approximation. */
20687 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
20688 plus_constant (Pmode
,
20692 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
20693 plus_constant (Pmode
,
20698 RTX_FRAME_RELATED_P (tmp1
) = 1;
20699 RTX_FRAME_RELATED_P (tmp2
) = 1;
20700 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
20701 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
20703 regno
= regno2
+ 1;
20711 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20712 whenever possible, otherwise it emits single-word stores. The first store
20713 also allocates stack space for all saved registers, using writeback with
20714 post-addressing mode. All other stores use offset addressing. If no STRD
20715 can be emitted, this function emits a sequence of single-word stores,
20716 and not an STM as before, because single-word stores provide more freedom
20717 scheduling and can be turned into an STM by peephole optimizations. */
20719 arm_emit_strd_push (unsigned long saved_regs_mask
)
20722 int i
, j
, dwarf_index
= 0;
20724 rtx dwarf
= NULL_RTX
;
20725 rtx insn
= NULL_RTX
;
20728 /* TODO: A more efficient code can be emitted by changing the
20729 layout, e.g., first push all pairs that can use STRD to keep the
20730 stack aligned, and then push all other registers. */
20731 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20732 if (saved_regs_mask
& (1 << i
))
20735 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20736 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20737 gcc_assert (num_regs
> 0);
20739 /* Create sequence for DWARF info. */
20740 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20742 /* For dwarf info, we generate explicit stack update. */
20743 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20744 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20745 RTX_FRAME_RELATED_P (tmp
) = 1;
20746 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20748 /* Save registers. */
20749 offset
= - 4 * num_regs
;
20751 while (j
<= LAST_ARM_REGNUM
)
20752 if (saved_regs_mask
& (1 << j
))
20755 && (saved_regs_mask
& (1 << (j
+ 1))))
20757 /* Current register and previous register form register pair for
20758 which STRD can be generated. */
20761 /* Allocate stack space for all saved registers. */
20762 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20763 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20764 mem
= gen_frame_mem (DImode
, tmp
);
20767 else if (offset
> 0)
20768 mem
= gen_frame_mem (DImode
,
20769 plus_constant (Pmode
,
20773 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20775 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20776 RTX_FRAME_RELATED_P (tmp
) = 1;
20777 tmp
= emit_insn (tmp
);
20779 /* Record the first store insn. */
20780 if (dwarf_index
== 1)
20783 /* Generate dwarf info. */
20784 mem
= gen_frame_mem (SImode
,
20785 plus_constant (Pmode
,
20788 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20789 RTX_FRAME_RELATED_P (tmp
) = 1;
20790 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20792 mem
= gen_frame_mem (SImode
,
20793 plus_constant (Pmode
,
20796 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20797 RTX_FRAME_RELATED_P (tmp
) = 1;
20798 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20805 /* Emit a single word store. */
20808 /* Allocate stack space for all saved registers. */
20809 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20810 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20811 mem
= gen_frame_mem (SImode
, tmp
);
20814 else if (offset
> 0)
20815 mem
= gen_frame_mem (SImode
,
20816 plus_constant (Pmode
,
20820 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20822 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20823 RTX_FRAME_RELATED_P (tmp
) = 1;
20824 tmp
= emit_insn (tmp
);
20826 /* Record the first store insn. */
20827 if (dwarf_index
== 1)
20830 /* Generate dwarf info. */
20831 mem
= gen_frame_mem (SImode
,
20832 plus_constant(Pmode
,
20835 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20836 RTX_FRAME_RELATED_P (tmp
) = 1;
20837 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20846 /* Attach dwarf info to the first insn we generate. */
20847 gcc_assert (insn
!= NULL_RTX
);
20848 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20849 RTX_FRAME_RELATED_P (insn
) = 1;
20852 /* Generate and emit an insn that we will recognize as a push_multi.
20853 Unfortunately, since this insn does not reflect very well the actual
20854 semantics of the operation, we need to annotate the insn for the benefit
20855 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20856 MASK for registers that should be annotated for DWARF2 frame unwind
20859 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20862 int num_dwarf_regs
= 0;
20866 int dwarf_par_index
;
20869 /* We don't record the PC in the dwarf frame information. */
20870 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20872 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20874 if (mask
& (1 << i
))
20876 if (dwarf_regs_mask
& (1 << i
))
20880 gcc_assert (num_regs
&& num_regs
<= 16);
20881 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20883 /* For the body of the insn we are going to generate an UNSPEC in
20884 parallel with several USEs. This allows the insn to be recognized
20885 by the push_multi pattern in the arm.md file.
20887 The body of the insn looks something like this:
20890 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20891 (const_int:SI <num>)))
20892 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20898 For the frame note however, we try to be more explicit and actually
20899 show each register being stored into the stack frame, plus a (single)
20900 decrement of the stack pointer. We do it this way in order to be
20901 friendly to the stack unwinding code, which only wants to see a single
20902 stack decrement per instruction. The RTL we generate for the note looks
20903 something like this:
20906 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20907 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20908 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20909 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20913 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20914 instead we'd have a parallel expression detailing all
20915 the stores to the various memory addresses so that debug
20916 information is more up-to-date. Remember however while writing
20917 this to take care of the constraints with the push instruction.
20919 Note also that this has to be taken care of for the VFP registers.
20921 For more see PR43399. */
20923 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20924 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20925 dwarf_par_index
= 1;
20927 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20929 if (mask
& (1 << i
))
20931 reg
= gen_rtx_REG (SImode
, i
);
20933 XVECEXP (par
, 0, 0)
20934 = gen_rtx_SET (gen_frame_mem
20936 gen_rtx_PRE_MODIFY (Pmode
,
20939 (Pmode
, stack_pointer_rtx
,
20942 gen_rtx_UNSPEC (BLKmode
,
20943 gen_rtvec (1, reg
),
20944 UNSPEC_PUSH_MULT
));
20946 if (dwarf_regs_mask
& (1 << i
))
20948 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20950 RTX_FRAME_RELATED_P (tmp
) = 1;
20951 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20958 for (j
= 1, i
++; j
< num_regs
; i
++)
20960 if (mask
& (1 << i
))
20962 reg
= gen_rtx_REG (SImode
, i
);
20964 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20966 if (dwarf_regs_mask
& (1 << i
))
20969 = gen_rtx_SET (gen_frame_mem
20971 plus_constant (Pmode
, stack_pointer_rtx
,
20974 RTX_FRAME_RELATED_P (tmp
) = 1;
20975 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20982 par
= emit_insn (par
);
20984 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20985 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20986 RTX_FRAME_RELATED_P (tmp
) = 1;
20987 XVECEXP (dwarf
, 0, 0) = tmp
;
20989 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20994 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20995 SIZE is the offset to be adjusted.
20996 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20998 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
21002 RTX_FRAME_RELATED_P (insn
) = 1;
21003 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
21004 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
21007 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
21008 SAVED_REGS_MASK shows which registers need to be restored.
21010 Unfortunately, since this insn does not reflect very well the actual
21011 semantics of the operation, we need to annotate the insn for the benefit
21012 of DWARF2 frame unwind information. */
21014 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
21019 rtx dwarf
= NULL_RTX
;
21021 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
21025 offset_adj
= return_in_pc
? 1 : 0;
21026 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
21027 if (saved_regs_mask
& (1 << i
))
21030 gcc_assert (num_regs
&& num_regs
<= 16);
21032 /* If SP is in reglist, then we don't emit SP update insn. */
21033 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
21035 /* The parallel needs to hold num_regs SETs
21036 and one SET for the stack update. */
21037 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
21040 XVECEXP (par
, 0, 0) = ret_rtx
;
21044 /* Increment the stack pointer, based on there being
21045 num_regs 4-byte registers to restore. */
21046 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21047 plus_constant (Pmode
,
21050 RTX_FRAME_RELATED_P (tmp
) = 1;
21051 XVECEXP (par
, 0, offset_adj
) = tmp
;
21054 /* Now restore every reg, which may include PC. */
21055 for (j
= 0, i
= 0; j
< num_regs
; i
++)
21056 if (saved_regs_mask
& (1 << i
))
21058 reg
= gen_rtx_REG (SImode
, i
);
21059 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
21061 /* Emit single load with writeback. */
21062 tmp
= gen_frame_mem (SImode
,
21063 gen_rtx_POST_INC (Pmode
,
21064 stack_pointer_rtx
));
21065 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
21066 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
21070 tmp
= gen_rtx_SET (reg
,
21073 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
21074 RTX_FRAME_RELATED_P (tmp
) = 1;
21075 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
21077 /* We need to maintain a sequence for DWARF info too. As dwarf info
21078 should not have PC, skip PC. */
21079 if (i
!= PC_REGNUM
)
21080 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
21086 par
= emit_jump_insn (par
);
21088 par
= emit_insn (par
);
21090 REG_NOTES (par
) = dwarf
;
21092 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
21093 stack_pointer_rtx
, stack_pointer_rtx
);
21096 /* Generate and emit an insn pattern that we will recognize as a pop_multi
21097 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
21099 Unfortunately, since this insn does not reflect very well the actual
21100 semantics of the operation, we need to annotate the insn for the benefit
21101 of DWARF2 frame unwind information. */
21103 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
21107 rtx dwarf
= NULL_RTX
;
21110 gcc_assert (num_regs
&& num_regs
<= 32);
21112 /* Workaround ARM10 VFPr1 bug. */
21113 if (num_regs
== 2 && !arm_arch6
)
21115 if (first_reg
== 15)
21121 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
21122 there could be up to 32 D-registers to restore.
21123 If there are more than 16 D-registers, make two recursive calls,
21124 each of which emits one pop_multi instruction. */
21127 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
21128 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
21132 /* The parallel needs to hold num_regs SETs
21133 and one SET for the stack update. */
21134 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21136 /* Increment the stack pointer, based on there being
21137 num_regs 8-byte registers to restore. */
21138 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
21139 RTX_FRAME_RELATED_P (tmp
) = 1;
21140 XVECEXP (par
, 0, 0) = tmp
;
21142 /* Now show every reg that will be restored, using a SET for each. */
21143 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
21145 reg
= gen_rtx_REG (DFmode
, i
);
21147 tmp
= gen_rtx_SET (reg
,
21150 plus_constant (Pmode
, base_reg
, 8 * j
)));
21151 RTX_FRAME_RELATED_P (tmp
) = 1;
21152 XVECEXP (par
, 0, j
+ 1) = tmp
;
21154 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
21159 par
= emit_insn (par
);
21160 REG_NOTES (par
) = dwarf
;
21162 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
21163 if (REGNO (base_reg
) == IP_REGNUM
)
21165 RTX_FRAME_RELATED_P (par
) = 1;
21166 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
21169 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
21170 base_reg
, base_reg
);
21173 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
21174 number of registers are being popped, multiple LDRD patterns are created for
21175 all register pairs. If odd number of registers are popped, last register is
21176 loaded by using LDR pattern. */
21178 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
21182 rtx par
= NULL_RTX
;
21183 rtx dwarf
= NULL_RTX
;
21184 rtx tmp
, reg
, tmp1
;
21185 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
21187 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
21188 if (saved_regs_mask
& (1 << i
))
21191 gcc_assert (num_regs
&& num_regs
<= 16);
21193 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
21194 to be popped. So, if num_regs is even, now it will become odd,
21195 and we can generate pop with PC. If num_regs is odd, it will be
21196 even now, and ldr with return can be generated for PC. */
21200 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21202 /* Var j iterates over all the registers to gather all the registers in
21203 saved_regs_mask. Var i gives index of saved registers in stack frame.
21204 A PARALLEL RTX of register-pair is created here, so that pattern for
21205 LDRD can be matched. As PC is always last register to be popped, and
21206 we have already decremented num_regs if PC, we don't have to worry
21207 about PC in this loop. */
21208 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
21209 if (saved_regs_mask
& (1 << j
))
21211 /* Create RTX for memory load. */
21212 reg
= gen_rtx_REG (SImode
, j
);
21213 tmp
= gen_rtx_SET (reg
,
21214 gen_frame_mem (SImode
,
21215 plus_constant (Pmode
,
21216 stack_pointer_rtx
, 4 * i
)));
21217 RTX_FRAME_RELATED_P (tmp
) = 1;
21221 /* When saved-register index (i) is even, the RTX to be emitted is
21222 yet to be created. Hence create it first. The LDRD pattern we
21223 are generating is :
21224 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
21225 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
21226 where target registers need not be consecutive. */
21227 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
21231 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
21232 added as 0th element and if i is odd, reg_i is added as 1st element
21233 of LDRD pattern shown above. */
21234 XVECEXP (par
, 0, (i
% 2)) = tmp
;
21235 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
21239 /* When saved-register index (i) is odd, RTXs for both the registers
21240 to be loaded are generated in above given LDRD pattern, and the
21241 pattern can be emitted now. */
21242 par
= emit_insn (par
);
21243 REG_NOTES (par
) = dwarf
;
21244 RTX_FRAME_RELATED_P (par
) = 1;
21250 /* If the number of registers pushed is odd AND return_in_pc is false OR
21251 number of registers are even AND return_in_pc is true, last register is
21252 popped using LDR. It can be PC as well. Hence, adjust the stack first and
21253 then LDR with post increment. */
21255 /* Increment the stack pointer, based on there being
21256 num_regs 4-byte registers to restore. */
21257 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21258 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
21259 RTX_FRAME_RELATED_P (tmp
) = 1;
21260 tmp
= emit_insn (tmp
);
21263 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
21264 stack_pointer_rtx
, stack_pointer_rtx
);
21269 if (((num_regs
% 2) == 1 && !return_in_pc
)
21270 || ((num_regs
% 2) == 0 && return_in_pc
))
21272 /* Scan for the single register to be popped. Skip until the saved
21273 register is found. */
21274 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
21276 /* Gen LDR with post increment here. */
21277 tmp1
= gen_rtx_MEM (SImode
,
21278 gen_rtx_POST_INC (SImode
,
21279 stack_pointer_rtx
));
21280 set_mem_alias_set (tmp1
, get_frame_alias_set ());
21282 reg
= gen_rtx_REG (SImode
, j
);
21283 tmp
= gen_rtx_SET (reg
, tmp1
);
21284 RTX_FRAME_RELATED_P (tmp
) = 1;
21285 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
21289 /* If return_in_pc, j must be PC_REGNUM. */
21290 gcc_assert (j
== PC_REGNUM
);
21291 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
21292 XVECEXP (par
, 0, 0) = ret_rtx
;
21293 XVECEXP (par
, 0, 1) = tmp
;
21294 par
= emit_jump_insn (par
);
21298 par
= emit_insn (tmp
);
21299 REG_NOTES (par
) = dwarf
;
21300 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
21301 stack_pointer_rtx
, stack_pointer_rtx
);
21305 else if ((num_regs
% 2) == 1 && return_in_pc
)
21307 /* There are 2 registers to be popped. So, generate the pattern
21308 pop_multiple_with_stack_update_and_return to pop in PC. */
21309 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
21315 /* LDRD in ARM mode needs consecutive registers as operands. This function
21316 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
21317 offset addressing and then generates one separate stack udpate. This provides
21318 more scheduling freedom, compared to writeback on every load. However,
21319 if the function returns using load into PC directly
21320 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
21321 before the last load. TODO: Add a peephole optimization to recognize
21322 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
21323 peephole optimization to merge the load at stack-offset zero
21324 with the stack update instruction using load with writeback
21325 in post-index addressing mode. */
21327 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
21331 rtx par
= NULL_RTX
;
21332 rtx dwarf
= NULL_RTX
;
21335 /* Restore saved registers. */
21336 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
21338 while (j
<= LAST_ARM_REGNUM
)
21339 if (saved_regs_mask
& (1 << j
))
21342 && (saved_regs_mask
& (1 << (j
+ 1)))
21343 && (j
+ 1) != PC_REGNUM
)
21345 /* Current register and next register form register pair for which
21346 LDRD can be generated. PC is always the last register popped, and
21347 we handle it separately. */
21349 mem
= gen_frame_mem (DImode
,
21350 plus_constant (Pmode
,
21354 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
21356 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
21357 tmp
= emit_insn (tmp
);
21358 RTX_FRAME_RELATED_P (tmp
) = 1;
21360 /* Generate dwarf info. */
21362 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
21363 gen_rtx_REG (SImode
, j
),
21365 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
21366 gen_rtx_REG (SImode
, j
+ 1),
21369 REG_NOTES (tmp
) = dwarf
;
21374 else if (j
!= PC_REGNUM
)
21376 /* Emit a single word load. */
21378 mem
= gen_frame_mem (SImode
,
21379 plus_constant (Pmode
,
21383 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
21385 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
21386 tmp
= emit_insn (tmp
);
21387 RTX_FRAME_RELATED_P (tmp
) = 1;
21389 /* Generate dwarf info. */
21390 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
21391 gen_rtx_REG (SImode
, j
),
21397 else /* j == PC_REGNUM */
21403 /* Update the stack. */
21406 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21407 plus_constant (Pmode
,
21410 tmp
= emit_insn (tmp
);
21411 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
21412 stack_pointer_rtx
, stack_pointer_rtx
);
21416 if (saved_regs_mask
& (1 << PC_REGNUM
))
21418 /* Only PC is to be popped. */
21419 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
21420 XVECEXP (par
, 0, 0) = ret_rtx
;
21421 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
21422 gen_frame_mem (SImode
,
21423 gen_rtx_POST_INC (SImode
,
21424 stack_pointer_rtx
)));
21425 RTX_FRAME_RELATED_P (tmp
) = 1;
21426 XVECEXP (par
, 0, 1) = tmp
;
21427 par
= emit_jump_insn (par
);
21429 /* Generate dwarf info. */
21430 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
21431 gen_rtx_REG (SImode
, PC_REGNUM
),
21433 REG_NOTES (par
) = dwarf
;
21434 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
21435 stack_pointer_rtx
, stack_pointer_rtx
);
21439 /* Calculate the size of the return value that is passed in registers. */
21441 arm_size_return_regs (void)
21445 if (crtl
->return_rtx
!= 0)
21446 mode
= GET_MODE (crtl
->return_rtx
);
21448 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
21450 return GET_MODE_SIZE (mode
);
21453 /* Return true if the current function needs to save/restore LR. */
21455 thumb_force_lr_save (void)
21457 return !cfun
->machine
->lr_save_eliminated
21459 || thumb_far_jump_used_p ()
21460 || df_regs_ever_live_p (LR_REGNUM
));
21463 /* We do not know if r3 will be available because
21464 we do have an indirect tailcall happening in this
21465 particular case. */
21467 is_indirect_tailcall_p (rtx call
)
21469 rtx pat
= PATTERN (call
);
21471 /* Indirect tail call. */
21472 pat
= XVECEXP (pat
, 0, 0);
21473 if (GET_CODE (pat
) == SET
)
21474 pat
= SET_SRC (pat
);
21476 pat
= XEXP (XEXP (pat
, 0), 0);
21477 return REG_P (pat
);
21480 /* Return true if r3 is used by any of the tail call insns in the
21481 current function. */
21483 any_sibcall_could_use_r3 (void)
21488 if (!crtl
->tail_call_emit
)
21490 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
21491 if (e
->flags
& EDGE_SIBCALL
)
21493 rtx_insn
*call
= BB_END (e
->src
);
21494 if (!CALL_P (call
))
21495 call
= prev_nonnote_nondebug_insn (call
);
21496 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
21497 if (find_regno_fusage (call
, USE
, 3)
21498 || is_indirect_tailcall_p (call
))
21505 /* Compute the distance from register FROM to register TO.
21506 These can be the arg pointer (26), the soft frame pointer (25),
21507 the stack pointer (13) or the hard frame pointer (11).
21508 In thumb mode r7 is used as the soft frame pointer, if needed.
21509 Typical stack layout looks like this:
21511 old stack pointer -> | |
21514 | | saved arguments for
21515 | | vararg functions
21518 hard FP & arg pointer -> | | \
21526 soft frame pointer -> | | /
21531 locals base pointer -> | | /
21536 current stack pointer -> | | /
21539 For a given function some or all of these stack components
21540 may not be needed, giving rise to the possibility of
21541 eliminating some of the registers.
21543 The values returned by this function must reflect the behavior
21544 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21546 The sign of the number returned reflects the direction of stack
21547 growth, so the values are positive for all eliminations except
21548 from the soft frame pointer to the hard frame pointer.
21550 SFP may point just inside the local variables block to ensure correct
21554 /* Return cached stack offsets. */
21556 static arm_stack_offsets
*
21557 arm_get_frame_offsets (void)
21559 struct arm_stack_offsets
*offsets
;
21561 offsets
= &cfun
->machine
->stack_offsets
;
21567 /* Calculate stack offsets. These are used to calculate register elimination
21568 offsets and in prologue/epilogue code. Also calculates which registers
21569 should be saved. */
21572 arm_compute_frame_layout (void)
21574 struct arm_stack_offsets
*offsets
;
21575 unsigned long func_type
;
21578 HOST_WIDE_INT frame_size
;
21581 offsets
= &cfun
->machine
->stack_offsets
;
21583 /* Initially this is the size of the local variables. It will translated
21584 into an offset once we have determined the size of preceding data. */
21585 frame_size
= ROUND_UP_WORD (get_frame_size ());
21587 /* Space for variadic functions. */
21588 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
21590 /* In Thumb mode this is incorrect, but never used. */
21592 = (offsets
->saved_args
21593 + arm_compute_static_chain_stack_bytes ()
21594 + (frame_pointer_needed
? 4 : 0));
21598 unsigned int regno
;
21600 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
21601 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21602 saved
= core_saved
;
21604 /* We know that SP will be doubleword aligned on entry, and we must
21605 preserve that condition at any subroutine call. We also require the
21606 soft frame pointer to be doubleword aligned. */
21608 if (TARGET_REALLY_IWMMXT
)
21610 /* Check for the call-saved iWMMXt registers. */
21611 for (regno
= FIRST_IWMMXT_REGNUM
;
21612 regno
<= LAST_IWMMXT_REGNUM
;
21614 if (df_regs_ever_live_p (regno
)
21615 && !call_used_or_fixed_reg_p (regno
))
21619 func_type
= arm_current_func_type ();
21620 /* Space for saved VFP registers. */
21621 if (! IS_VOLATILE (func_type
)
21622 && TARGET_HARD_FLOAT
)
21623 saved
+= arm_get_vfp_saved_size ();
21625 else /* TARGET_THUMB1 */
21627 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
21628 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21629 saved
= core_saved
;
21630 if (TARGET_BACKTRACE
)
21634 /* Saved registers include the stack frame. */
21635 offsets
->saved_regs
21636 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
21637 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
21639 /* A leaf function does not need any stack alignment if it has nothing
21641 if (crtl
->is_leaf
&& frame_size
== 0
21642 /* However if it calls alloca(), we have a dynamically allocated
21643 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21644 && ! cfun
->calls_alloca
)
21646 offsets
->outgoing_args
= offsets
->soft_frame
;
21647 offsets
->locals_base
= offsets
->soft_frame
;
21651 /* Ensure SFP has the correct alignment. */
21652 if (ARM_DOUBLEWORD_ALIGN
21653 && (offsets
->soft_frame
& 7))
21655 offsets
->soft_frame
+= 4;
21656 /* Try to align stack by pushing an extra reg. Don't bother doing this
21657 when there is a stack frame as the alignment will be rolled into
21658 the normal stack adjustment. */
21659 if (frame_size
+ crtl
->outgoing_args_size
== 0)
21663 /* Register r3 is caller-saved. Normally it does not need to be
21664 saved on entry by the prologue. However if we choose to save
21665 it for padding then we may confuse the compiler into thinking
21666 a prologue sequence is required when in fact it is not. This
21667 will occur when shrink-wrapping if r3 is used as a scratch
21668 register and there are no other callee-saved writes.
21670 This situation can be avoided when other callee-saved registers
21671 are available and r3 is not mandatory if we choose a callee-saved
21672 register for padding. */
21673 bool prefer_callee_reg_p
= false;
21675 /* If it is safe to use r3, then do so. This sometimes
21676 generates better code on Thumb-2 by avoiding the need to
21677 use 32-bit push/pop instructions. */
21678 if (! any_sibcall_could_use_r3 ()
21679 && arm_size_return_regs () <= 12
21680 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
21682 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
21685 if (!TARGET_THUMB2
)
21686 prefer_callee_reg_p
= true;
21689 || prefer_callee_reg_p
)
21691 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
21693 /* Avoid fixed registers; they may be changed at
21694 arbitrary times so it's unsafe to restore them
21695 during the epilogue. */
21697 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
21707 offsets
->saved_regs
+= 4;
21708 offsets
->saved_regs_mask
|= (1 << reg
);
21713 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
21714 offsets
->outgoing_args
= (offsets
->locals_base
21715 + crtl
->outgoing_args_size
);
21717 if (ARM_DOUBLEWORD_ALIGN
)
21719 /* Ensure SP remains doubleword aligned. */
21720 if (offsets
->outgoing_args
& 7)
21721 offsets
->outgoing_args
+= 4;
21722 gcc_assert (!(offsets
->outgoing_args
& 7));
21727 /* Calculate the relative offsets for the different stack pointers. Positive
21728 offsets are in the direction of stack growth. */
21731 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
21733 arm_stack_offsets
*offsets
;
21735 offsets
= arm_get_frame_offsets ();
21737 /* OK, now we have enough information to compute the distances.
21738 There must be an entry in these switch tables for each pair
21739 of registers in ELIMINABLE_REGS, even if some of the entries
21740 seem to be redundant or useless. */
21743 case ARG_POINTER_REGNUM
:
21746 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21749 case FRAME_POINTER_REGNUM
:
21750 /* This is the reverse of the soft frame pointer
21751 to hard frame pointer elimination below. */
21752 return offsets
->soft_frame
- offsets
->saved_args
;
21754 case ARM_HARD_FRAME_POINTER_REGNUM
:
21755 /* This is only non-zero in the case where the static chain register
21756 is stored above the frame. */
21757 return offsets
->frame
- offsets
->saved_args
- 4;
21759 case STACK_POINTER_REGNUM
:
21760 /* If nothing has been pushed on the stack at all
21761 then this will return -4. This *is* correct! */
21762 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21765 gcc_unreachable ();
21767 gcc_unreachable ();
21769 case FRAME_POINTER_REGNUM
:
21772 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21775 case ARM_HARD_FRAME_POINTER_REGNUM
:
21776 /* The hard frame pointer points to the top entry in the
21777 stack frame. The soft frame pointer to the bottom entry
21778 in the stack frame. If there is no stack frame at all,
21779 then they are identical. */
21781 return offsets
->frame
- offsets
->soft_frame
;
21783 case STACK_POINTER_REGNUM
:
21784 return offsets
->outgoing_args
- offsets
->soft_frame
;
21787 gcc_unreachable ();
21789 gcc_unreachable ();
21792 /* You cannot eliminate from the stack pointer.
21793 In theory you could eliminate from the hard frame
21794 pointer to the stack pointer, but this will never
21795 happen, since if a stack frame is not needed the
21796 hard frame pointer will never be used. */
21797 gcc_unreachable ();
21801 /* Given FROM and TO register numbers, say whether this elimination is
21802 allowed. Frame pointer elimination is automatically handled.
21804 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21805 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21806 pointer, we must eliminate FRAME_POINTER_REGNUM into
21807 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21808 ARG_POINTER_REGNUM. */
21811 arm_can_eliminate (const int from
, const int to
)
21813 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21814 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21815 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21816 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21820 /* Emit RTL to save coprocessor registers on function entry. Returns the
21821 number of bytes pushed. */
21824 arm_save_coproc_regs(void)
21826 int saved_size
= 0;
21828 unsigned start_reg
;
21831 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21832 if (df_regs_ever_live_p (reg
) && !call_used_or_fixed_reg_p (reg
))
21834 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21835 insn
= gen_rtx_MEM (V2SImode
, insn
);
21836 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21837 RTX_FRAME_RELATED_P (insn
) = 1;
21841 if (TARGET_HARD_FLOAT
)
21843 start_reg
= FIRST_VFP_REGNUM
;
21845 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21847 if ((!df_regs_ever_live_p (reg
) || call_used_or_fixed_reg_p (reg
))
21848 && (!df_regs_ever_live_p (reg
+ 1)
21849 || call_used_or_fixed_reg_p (reg
+ 1)))
21851 if (start_reg
!= reg
)
21852 saved_size
+= vfp_emit_fstmd (start_reg
,
21853 (reg
- start_reg
) / 2);
21854 start_reg
= reg
+ 2;
21857 if (start_reg
!= reg
)
21858 saved_size
+= vfp_emit_fstmd (start_reg
,
21859 (reg
- start_reg
) / 2);
21865 /* Set the Thumb frame pointer from the stack pointer. */
21868 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21870 HOST_WIDE_INT amount
;
21873 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21875 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21876 stack_pointer_rtx
, GEN_INT (amount
)));
21879 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21880 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21881 expects the first two operands to be the same. */
21884 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21886 hard_frame_pointer_rtx
));
21890 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21891 hard_frame_pointer_rtx
,
21892 stack_pointer_rtx
));
21894 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21895 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21896 RTX_FRAME_RELATED_P (dwarf
) = 1;
21897 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21900 RTX_FRAME_RELATED_P (insn
) = 1;
21903 struct scratch_reg
{
21908 /* Return a short-lived scratch register for use as a 2nd scratch register on
21909 function entry after the registers are saved in the prologue. This register
21910 must be released by means of release_scratch_register_on_entry. IP is not
21911 considered since it is always used as the 1st scratch register if available.
21913 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21914 mask of live registers. */
21917 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21918 unsigned long live_regs
)
21924 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21930 for (i
= 4; i
< 11; i
++)
21931 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21939 /* If IP is used as the 1st scratch register for a nested function,
21940 then either r3 wasn't available or is used to preserve IP. */
21941 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21943 regno
= (regno1
== 3 ? 2 : 3);
21945 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21950 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21953 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21954 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21955 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21956 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21957 RTX_FRAME_RELATED_P (insn
) = 1;
21958 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21962 /* Release a scratch register obtained from the preceding function. */
21965 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21969 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21970 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21971 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21972 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21973 RTX_FRAME_RELATED_P (insn
) = 1;
21974 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21978 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21980 #if PROBE_INTERVAL > 4096
21981 #error Cannot use indexed addressing mode for stack probing
21984 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21985 inclusive. These are offsets from the current stack pointer. REGNO1
21986 is the index number of the 1st scratch register and LIVE_REGS is the
21987 mask of live registers. */
21990 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21991 unsigned int regno1
, unsigned long live_regs
)
21993 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21995 /* See if we have a constant small number of probes to generate. If so,
21996 that's the easy case. */
21997 if (size
<= PROBE_INTERVAL
)
21999 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
22000 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
22001 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
22004 /* The run-time loop is made up of 10 insns in the generic case while the
22005 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
22006 else if (size
<= 5 * PROBE_INTERVAL
)
22008 HOST_WIDE_INT i
, rem
;
22010 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
22011 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
22012 emit_stack_probe (reg1
);
22014 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
22015 it exceeds SIZE. If only two probes are needed, this will not
22016 generate any code. Then probe at FIRST + SIZE. */
22017 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
22019 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
22020 emit_stack_probe (reg1
);
22023 rem
= size
- (i
- PROBE_INTERVAL
);
22024 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
22026 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
22027 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
22030 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
22033 /* Otherwise, do the same as above, but in a loop. Note that we must be
22034 extra careful with variables wrapping around because we might be at
22035 the very top (or the very bottom) of the address space and we have
22036 to be able to handle this case properly; in particular, we use an
22037 equality test for the loop condition. */
22040 HOST_WIDE_INT rounded_size
;
22041 struct scratch_reg sr
;
22043 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
22045 emit_move_insn (reg1
, GEN_INT (first
));
22048 /* Step 1: round SIZE to the previous multiple of the interval. */
22050 rounded_size
= size
& -PROBE_INTERVAL
;
22051 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
22054 /* Step 2: compute initial and final value of the loop counter. */
22056 /* TEST_ADDR = SP + FIRST. */
22057 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
22059 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22060 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
22063 /* Step 3: the loop
22067 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22070 while (TEST_ADDR != LAST_ADDR)
22072 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22073 until it is equal to ROUNDED_SIZE. */
22075 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
22078 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22079 that SIZE is equal to ROUNDED_SIZE. */
22081 if (size
!= rounded_size
)
22083 HOST_WIDE_INT rem
= size
- rounded_size
;
22085 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
22087 emit_set_insn (sr
.reg
,
22088 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
22089 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
22090 PROBE_INTERVAL
- rem
));
22093 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
22096 release_scratch_register_on_entry (&sr
);
22099 /* Make sure nothing is scheduled before we are done. */
22100 emit_insn (gen_blockage ());
22103 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22104 absolute addresses. */
22107 output_probe_stack_range (rtx reg1
, rtx reg2
)
22109 static int labelno
= 0;
22113 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
22116 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
22118 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22120 xops
[1] = GEN_INT (PROBE_INTERVAL
);
22121 output_asm_insn ("sub\t%0, %0, %1", xops
);
22123 /* Probe at TEST_ADDR. */
22124 output_asm_insn ("str\tr0, [%0, #0]", xops
);
22126 /* Test if TEST_ADDR == LAST_ADDR. */
22128 output_asm_insn ("cmp\t%0, %1", xops
);
22131 fputs ("\tbne\t", asm_out_file
);
22132 assemble_name_raw (asm_out_file
, loop_lab
);
22133 fputc ('\n', asm_out_file
);
22138 /* Generate the prologue instructions for entry into an ARM or Thumb-2
22141 arm_expand_prologue (void)
22146 unsigned long live_regs_mask
;
22147 unsigned long func_type
;
22149 int saved_pretend_args
= 0;
22150 int saved_regs
= 0;
22151 unsigned HOST_WIDE_INT args_to_push
;
22152 HOST_WIDE_INT size
;
22153 arm_stack_offsets
*offsets
;
22156 func_type
= arm_current_func_type ();
22158 /* Naked functions don't have prologues. */
22159 if (IS_NAKED (func_type
))
22161 if (flag_stack_usage_info
)
22162 current_function_static_stack_size
= 0;
22166 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
22167 args_to_push
= crtl
->args
.pretend_args_size
;
22169 /* Compute which register we will have to save onto the stack. */
22170 offsets
= arm_get_frame_offsets ();
22171 live_regs_mask
= offsets
->saved_regs_mask
;
22173 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
22175 if (IS_STACKALIGN (func_type
))
22179 /* Handle a word-aligned stack pointer. We generate the following:
22184 <save and restore r0 in normal prologue/epilogue>
22188 The unwinder doesn't need to know about the stack realignment.
22189 Just tell it we saved SP in r0. */
22190 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
22192 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
22193 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
22195 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
22196 RTX_FRAME_RELATED_P (insn
) = 1;
22197 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
22199 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
22201 /* ??? The CFA changes here, which may cause GDB to conclude that it
22202 has entered a different function. That said, the unwind info is
22203 correct, individually, before and after this instruction because
22204 we've described the save of SP, which will override the default
22205 handling of SP as restoring from the CFA. */
22206 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
22209 /* Let's compute the static_chain_stack_bytes required and store it. Right
22210 now the value must be -1 as stored by arm_init_machine_status (). */
22211 cfun
->machine
->static_chain_stack_bytes
22212 = arm_compute_static_chain_stack_bytes ();
22214 /* The static chain register is the same as the IP register. If it is
22215 clobbered when creating the frame, we need to save and restore it. */
22216 clobber_ip
= IS_NESTED (func_type
)
22217 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
22218 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
22219 || flag_stack_clash_protection
)
22220 && !df_regs_ever_live_p (LR_REGNUM
)
22221 && arm_r3_live_at_start_p ()));
22223 /* Find somewhere to store IP whilst the frame is being created.
22224 We try the following places in order:
22226 1. The last argument register r3 if it is available.
22227 2. A slot on the stack above the frame if there are no
22228 arguments to push onto the stack.
22229 3. Register r3 again, after pushing the argument registers
22230 onto the stack, if this is a varargs function.
22231 4. The last slot on the stack created for the arguments to
22232 push, if this isn't a varargs function.
22234 Note - we only need to tell the dwarf2 backend about the SP
22235 adjustment in the second variant; the static chain register
22236 doesn't need to be unwound, as it doesn't contain a value
22237 inherited from the caller. */
22240 if (!arm_r3_live_at_start_p ())
22241 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
22242 else if (args_to_push
== 0)
22246 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
22249 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
22250 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
22253 /* Just tell the dwarf backend that we adjusted SP. */
22254 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
22255 plus_constant (Pmode
, stack_pointer_rtx
,
22257 RTX_FRAME_RELATED_P (insn
) = 1;
22258 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22262 /* Store the args on the stack. */
22263 if (cfun
->machine
->uses_anonymous_args
)
22265 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
22266 (0xf0 >> (args_to_push
/ 4)) & 0xf);
22267 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
22268 saved_pretend_args
= 1;
22274 if (args_to_push
== 4)
22275 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
22277 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
22278 plus_constant (Pmode
,
22282 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
22284 /* Just tell the dwarf backend that we adjusted SP. */
22285 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
22286 plus_constant (Pmode
, stack_pointer_rtx
,
22288 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22291 RTX_FRAME_RELATED_P (insn
) = 1;
22292 fp_offset
= args_to_push
;
22297 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
22299 if (IS_INTERRUPT (func_type
))
22301 /* Interrupt functions must not corrupt any registers.
22302 Creating a frame pointer however, corrupts the IP
22303 register, so we must push it first. */
22304 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
22306 /* Do not set RTX_FRAME_RELATED_P on this insn.
22307 The dwarf stack unwinding code only wants to see one
22308 stack decrement per function, and this is not it. If
22309 this instruction is labeled as being part of the frame
22310 creation sequence then dwarf2out_frame_debug_expr will
22311 die when it encounters the assignment of IP to FP
22312 later on, since the use of SP here establishes SP as
22313 the CFA register and not IP.
22315 Anyway this instruction is not really part of the stack
22316 frame creation although it is part of the prologue. */
22319 insn
= emit_set_insn (ip_rtx
,
22320 plus_constant (Pmode
, stack_pointer_rtx
,
22322 RTX_FRAME_RELATED_P (insn
) = 1;
22327 /* Push the argument registers, or reserve space for them. */
22328 if (cfun
->machine
->uses_anonymous_args
)
22329 insn
= emit_multi_reg_push
22330 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
22331 (0xf0 >> (args_to_push
/ 4)) & 0xf);
22334 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22335 GEN_INT (- args_to_push
)));
22336 RTX_FRAME_RELATED_P (insn
) = 1;
22339 /* If this is an interrupt service routine, and the link register
22340 is going to be pushed, and we're not generating extra
22341 push of IP (needed when frame is needed and frame layout if apcs),
22342 subtracting four from LR now will mean that the function return
22343 can be done with a single instruction. */
22344 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
22345 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
22346 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
22349 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
22351 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
22354 if (live_regs_mask
)
22356 unsigned long dwarf_regs_mask
= live_regs_mask
;
22358 saved_regs
+= bit_count (live_regs_mask
) * 4;
22359 if (optimize_size
&& !frame_pointer_needed
22360 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
22362 /* If no coprocessor registers are being pushed and we don't have
22363 to worry about a frame pointer then push extra registers to
22364 create the stack frame. This is done in a way that does not
22365 alter the frame layout, so is independent of the epilogue. */
22369 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
22371 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
22372 if (frame
&& n
* 4 >= frame
)
22375 live_regs_mask
|= (1 << n
) - 1;
22376 saved_regs
+= frame
;
22381 && current_tune
->prefer_ldrd_strd
22382 && !optimize_function_for_size_p (cfun
))
22384 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
22386 thumb2_emit_strd_push (live_regs_mask
);
22387 else if (TARGET_ARM
22388 && !TARGET_APCS_FRAME
22389 && !IS_INTERRUPT (func_type
))
22390 arm_emit_strd_push (live_regs_mask
);
22393 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
22394 RTX_FRAME_RELATED_P (insn
) = 1;
22399 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
22400 RTX_FRAME_RELATED_P (insn
) = 1;
22404 if (! IS_VOLATILE (func_type
))
22405 saved_regs
+= arm_save_coproc_regs ();
22407 if (frame_pointer_needed
&& TARGET_ARM
)
22409 /* Create the new frame pointer. */
22410 if (TARGET_APCS_FRAME
)
22412 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
22413 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
22414 RTX_FRAME_RELATED_P (insn
) = 1;
22418 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
22419 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
22420 stack_pointer_rtx
, insn
));
22421 RTX_FRAME_RELATED_P (insn
) = 1;
22425 size
= offsets
->outgoing_args
- offsets
->saved_args
;
22426 if (flag_stack_usage_info
)
22427 current_function_static_stack_size
= size
;
22429 /* If this isn't an interrupt service routine and we have a frame, then do
22430 stack checking. We use IP as the first scratch register, except for the
22431 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
22432 if (!IS_INTERRUPT (func_type
)
22433 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
22434 || flag_stack_clash_protection
))
22436 unsigned int regno
;
22438 if (!IS_NESTED (func_type
) || clobber_ip
)
22440 else if (df_regs_ever_live_p (LR_REGNUM
))
22445 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
22447 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
22448 arm_emit_probe_stack_range (get_stack_check_protect (),
22449 size
- get_stack_check_protect (),
22450 regno
, live_regs_mask
);
22453 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
22454 regno
, live_regs_mask
);
22457 /* Recover the static chain register. */
22460 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
22461 insn
= gen_rtx_REG (SImode
, 3);
22464 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
22465 insn
= gen_frame_mem (SImode
, insn
);
22467 emit_set_insn (ip_rtx
, insn
);
22468 emit_insn (gen_force_register_use (ip_rtx
));
22471 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
22473 /* This add can produce multiple insns for a large constant, so we
22474 need to get tricky. */
22475 rtx_insn
*last
= get_last_insn ();
22477 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
22478 - offsets
->outgoing_args
);
22480 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
22484 last
= last
? NEXT_INSN (last
) : get_insns ();
22485 RTX_FRAME_RELATED_P (last
) = 1;
22487 while (last
!= insn
);
22489 /* If the frame pointer is needed, emit a special barrier that
22490 will prevent the scheduler from moving stores to the frame
22491 before the stack adjustment. */
22492 if (frame_pointer_needed
)
22493 emit_insn (gen_stack_tie (stack_pointer_rtx
,
22494 hard_frame_pointer_rtx
));
22498 if (frame_pointer_needed
&& TARGET_THUMB2
)
22499 thumb_set_frame_pointer (offsets
);
22501 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
22503 unsigned long mask
;
22505 mask
= live_regs_mask
;
22506 mask
&= THUMB2_WORK_REGS
;
22507 if (!IS_NESTED (func_type
))
22508 mask
|= (1 << IP_REGNUM
);
22509 arm_load_pic_register (mask
, NULL_RTX
);
22512 /* If we are profiling, make sure no instructions are scheduled before
22513 the call to mcount. Similarly if the user has requested no
22514 scheduling in the prolog. Similarly if we want non-call exceptions
22515 using the EABI unwinder, to prevent faulting instructions from being
22516 swapped with a stack adjustment. */
22517 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
22518 || (arm_except_unwind_info (&global_options
) == UI_TARGET
22519 && cfun
->can_throw_non_call_exceptions
))
22520 emit_insn (gen_blockage ());
22522 /* If the link register is being kept alive, with the return address in it,
22523 then make sure that it does not get reused by the ce2 pass. */
22524 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
22525 cfun
->machine
->lr_save_eliminated
= 1;
22528 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22530 arm_print_condition (FILE *stream
)
22532 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
22534 /* Branch conversion is not implemented for Thumb-2. */
22537 output_operand_lossage ("predicated Thumb instruction");
22540 if (current_insn_predicate
!= NULL
)
22542 output_operand_lossage
22543 ("predicated instruction in conditional sequence");
22547 fputs (arm_condition_codes
[arm_current_cc
], stream
);
22549 else if (current_insn_predicate
)
22551 enum arm_cond_code code
;
22555 output_operand_lossage ("predicated Thumb instruction");
22559 code
= get_arm_condition_code (current_insn_predicate
);
22560 fputs (arm_condition_codes
[code
], stream
);
22565 /* Globally reserved letters: acln
22566 Puncutation letters currently used: @_|?().!#
22567 Lower case letters currently used: bcdefhimpqtvwxyz
22568 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22569 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22571 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22573 If CODE is 'd', then the X is a condition operand and the instruction
22574 should only be executed if the condition is true.
22575 if CODE is 'D', then the X is a condition operand and the instruction
22576 should only be executed if the condition is false: however, if the mode
22577 of the comparison is CCFPEmode, then always execute the instruction -- we
22578 do this because in these circumstances !GE does not necessarily imply LT;
22579 in these cases the instruction pattern will take care to make sure that
22580 an instruction containing %d will follow, thereby undoing the effects of
22581 doing this instruction unconditionally.
22582 If CODE is 'N' then X is a floating point operand that must be negated
22584 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22585 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22587 arm_print_operand (FILE *stream
, rtx x
, int code
)
22592 fputs (ASM_COMMENT_START
, stream
);
22596 fputs (user_label_prefix
, stream
);
22600 fputs (REGISTER_PREFIX
, stream
);
22604 arm_print_condition (stream
);
22608 /* The current condition code for a condition code setting instruction.
22609 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22610 fputc('s', stream
);
22611 arm_print_condition (stream
);
22615 /* If the instruction is conditionally executed then print
22616 the current condition code, otherwise print 's'. */
22617 gcc_assert (TARGET_THUMB2
);
22618 if (current_insn_predicate
)
22619 arm_print_condition (stream
);
22621 fputc('s', stream
);
22624 /* %# is a "break" sequence. It doesn't output anything, but is used to
22625 separate e.g. operand numbers from following text, if that text consists
22626 of further digits which we don't want to be part of the operand
22634 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
22635 fprintf (stream
, "%s", fp_const_from_val (&r
));
22639 /* An integer or symbol address without a preceding # sign. */
22641 switch (GET_CODE (x
))
22644 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
22648 output_addr_const (stream
, x
);
22652 if (GET_CODE (XEXP (x
, 0)) == PLUS
22653 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
22655 output_addr_const (stream
, x
);
22658 /* Fall through. */
22661 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22665 /* An integer that we want to print in HEX. */
22667 switch (GET_CODE (x
))
22670 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
22674 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22679 if (CONST_INT_P (x
))
22682 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
22683 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
22687 putc ('~', stream
);
22688 output_addr_const (stream
, x
);
22693 /* Print the log2 of a CONST_INT. */
22697 if (!CONST_INT_P (x
)
22698 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
22699 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22701 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22706 /* The low 16 bits of an immediate constant. */
22707 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
22711 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
22715 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
22723 shift
= shift_op (x
, &val
);
22727 fprintf (stream
, ", %s ", shift
);
22729 arm_print_operand (stream
, XEXP (x
, 1), 0);
22731 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22736 /* An explanation of the 'Q', 'R' and 'H' register operands:
22738 In a pair of registers containing a DI or DF value the 'Q'
22739 operand returns the register number of the register containing
22740 the least significant part of the value. The 'R' operand returns
22741 the register number of the register containing the most
22742 significant part of the value.
22744 The 'H' operand returns the higher of the two register numbers.
22745 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22746 same as the 'Q' operand, since the most significant part of the
22747 value is held in the lower number register. The reverse is true
22748 on systems where WORDS_BIG_ENDIAN is false.
22750 The purpose of these operands is to distinguish between cases
22751 where the endian-ness of the values is important (for example
22752 when they are added together), and cases where the endian-ness
22753 is irrelevant, but the order of register operations is important.
22754 For example when loading a value from memory into a register
22755 pair, the endian-ness does not matter. Provided that the value
22756 from the lower memory address is put into the lower numbered
22757 register, and the value from the higher address is put into the
22758 higher numbered register, the load will work regardless of whether
22759 the value being loaded is big-wordian or little-wordian. The
22760 order of the two register loads can matter however, if the address
22761 of the memory location is actually held in one of the registers
22762 being overwritten by the load.
22764 The 'Q' and 'R' constraints are also available for 64-bit
22767 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22769 rtx part
= gen_lowpart (SImode
, x
);
22770 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22774 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22776 output_operand_lossage ("invalid operand for code '%c'", code
);
22780 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22784 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22786 machine_mode mode
= GET_MODE (x
);
22789 if (mode
== VOIDmode
)
22791 part
= gen_highpart_mode (SImode
, mode
, x
);
22792 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22796 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22798 output_operand_lossage ("invalid operand for code '%c'", code
);
22802 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22806 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22808 output_operand_lossage ("invalid operand for code '%c'", code
);
22812 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22816 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22818 output_operand_lossage ("invalid operand for code '%c'", code
);
22822 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22826 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22828 output_operand_lossage ("invalid operand for code '%c'", code
);
22832 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22836 asm_fprintf (stream
, "%r",
22837 REG_P (XEXP (x
, 0))
22838 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22842 asm_fprintf (stream
, "{%r-%r}",
22844 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22847 /* Like 'M', but writing doubleword vector registers, for use by Neon
22851 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22852 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22854 asm_fprintf (stream
, "{d%d}", regno
);
22856 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22861 /* CONST_TRUE_RTX means always -- that's the default. */
22862 if (x
== const_true_rtx
)
22865 if (!COMPARISON_P (x
))
22867 output_operand_lossage ("invalid operand for code '%c'", code
);
22871 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22876 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22877 want to do that. */
22878 if (x
== const_true_rtx
)
22880 output_operand_lossage ("instruction never executed");
22883 if (!COMPARISON_P (x
))
22885 output_operand_lossage ("invalid operand for code '%c'", code
);
22889 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22890 (get_arm_condition_code (x
))],
22900 /* Former Maverick support, removed after GCC-4.7. */
22901 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22906 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22907 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22908 /* Bad value for wCG register number. */
22910 output_operand_lossage ("invalid operand for code '%c'", code
);
22915 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22918 /* Print an iWMMXt control register name. */
22920 if (!CONST_INT_P (x
)
22922 || INTVAL (x
) >= 16)
22923 /* Bad value for wC register number. */
22925 output_operand_lossage ("invalid operand for code '%c'", code
);
22931 static const char * wc_reg_names
[16] =
22933 "wCID", "wCon", "wCSSF", "wCASF",
22934 "wC4", "wC5", "wC6", "wC7",
22935 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22936 "wC12", "wC13", "wC14", "wC15"
22939 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22943 /* Print the high single-precision register of a VFP double-precision
22947 machine_mode mode
= GET_MODE (x
);
22950 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22952 output_operand_lossage ("invalid operand for code '%c'", code
);
22957 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22959 output_operand_lossage ("invalid operand for code '%c'", code
);
22963 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22967 /* Print a VFP/Neon double precision or quad precision register name. */
22971 machine_mode mode
= GET_MODE (x
);
22972 int is_quad
= (code
== 'q');
22975 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22977 output_operand_lossage ("invalid operand for code '%c'", code
);
22982 || !IS_VFP_REGNUM (REGNO (x
)))
22984 output_operand_lossage ("invalid operand for code '%c'", code
);
22989 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22990 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22992 output_operand_lossage ("invalid operand for code '%c'", code
);
22996 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22997 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
23001 /* These two codes print the low/high doubleword register of a Neon quad
23002 register, respectively. For pair-structure types, can also print
23003 low/high quadword registers. */
23007 machine_mode mode
= GET_MODE (x
);
23010 if ((GET_MODE_SIZE (mode
) != 16
23011 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
23013 output_operand_lossage ("invalid operand for code '%c'", code
);
23018 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
23020 output_operand_lossage ("invalid operand for code '%c'", code
);
23024 if (GET_MODE_SIZE (mode
) == 16)
23025 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
23026 + (code
== 'f' ? 1 : 0));
23028 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
23029 + (code
== 'f' ? 1 : 0));
23033 /* Print a VFPv3 floating-point constant, represented as an integer
23037 int index
= vfp3_const_double_index (x
);
23038 gcc_assert (index
!= -1);
23039 fprintf (stream
, "%d", index
);
23043 /* Print bits representing opcode features for Neon.
23045 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
23046 and polynomials as unsigned.
23048 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
23050 Bit 2 is 1 for rounding functions, 0 otherwise. */
23052 /* Identify the type as 's', 'u', 'p' or 'f'. */
23055 HOST_WIDE_INT bits
= INTVAL (x
);
23056 fputc ("uspf"[bits
& 3], stream
);
23060 /* Likewise, but signed and unsigned integers are both 'i'. */
23063 HOST_WIDE_INT bits
= INTVAL (x
);
23064 fputc ("iipf"[bits
& 3], stream
);
23068 /* As for 'T', but emit 'u' instead of 'p'. */
23071 HOST_WIDE_INT bits
= INTVAL (x
);
23072 fputc ("usuf"[bits
& 3], stream
);
23076 /* Bit 2: rounding (vs none). */
23079 HOST_WIDE_INT bits
= INTVAL (x
);
23080 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
23084 /* Memory operand for vld1/vst1 instruction. */
23088 bool postinc
= FALSE
;
23089 rtx postinc_reg
= NULL
;
23090 unsigned align
, memsize
, align_bits
;
23092 gcc_assert (MEM_P (x
));
23093 addr
= XEXP (x
, 0);
23094 if (GET_CODE (addr
) == POST_INC
)
23097 addr
= XEXP (addr
, 0);
23099 if (GET_CODE (addr
) == POST_MODIFY
)
23101 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
23102 addr
= XEXP (addr
, 0);
23104 asm_fprintf (stream
, "[%r", REGNO (addr
));
23106 /* We know the alignment of this access, so we can emit a hint in the
23107 instruction (for some alignments) as an aid to the memory subsystem
23109 align
= MEM_ALIGN (x
) >> 3;
23110 memsize
= MEM_SIZE (x
);
23112 /* Only certain alignment specifiers are supported by the hardware. */
23113 if (memsize
== 32 && (align
% 32) == 0)
23115 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
23117 else if (memsize
>= 8 && (align
% 8) == 0)
23122 if (align_bits
!= 0)
23123 asm_fprintf (stream
, ":%d", align_bits
);
23125 asm_fprintf (stream
, "]");
23128 fputs("!", stream
);
23130 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
23138 gcc_assert (MEM_P (x
));
23139 addr
= XEXP (x
, 0);
23140 gcc_assert (REG_P (addr
));
23141 asm_fprintf (stream
, "[%r]", REGNO (addr
));
23145 /* Translate an S register number into a D register number and element index. */
23148 machine_mode mode
= GET_MODE (x
);
23151 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
23153 output_operand_lossage ("invalid operand for code '%c'", code
);
23158 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
23160 output_operand_lossage ("invalid operand for code '%c'", code
);
23164 regno
= regno
- FIRST_VFP_REGNUM
;
23165 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
23170 gcc_assert (CONST_DOUBLE_P (x
));
23172 result
= vfp3_const_double_for_fract_bits (x
);
23174 result
= vfp3_const_double_for_bits (x
);
23175 fprintf (stream
, "#%d", result
);
23178 /* Register specifier for vld1.16/vst1.16. Translate the S register
23179 number into a D register number and element index. */
23182 machine_mode mode
= GET_MODE (x
);
23185 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
23187 output_operand_lossage ("invalid operand for code '%c'", code
);
23192 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
23194 output_operand_lossage ("invalid operand for code '%c'", code
);
23198 regno
= regno
- FIRST_VFP_REGNUM
;
23199 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
23206 output_operand_lossage ("missing operand");
23210 switch (GET_CODE (x
))
23213 asm_fprintf (stream
, "%r", REGNO (x
));
23217 output_address (GET_MODE (x
), XEXP (x
, 0));
23223 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
23224 sizeof (fpstr
), 0, 1);
23225 fprintf (stream
, "#%s", fpstr
);
23230 gcc_assert (GET_CODE (x
) != NEG
);
23231 fputc ('#', stream
);
23232 if (GET_CODE (x
) == HIGH
)
23234 fputs (":lower16:", stream
);
23238 output_addr_const (stream
, x
);
23244 /* Target hook for printing a memory address. */
23246 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
23250 int is_minus
= GET_CODE (x
) == MINUS
;
23253 asm_fprintf (stream
, "[%r]", REGNO (x
));
23254 else if (GET_CODE (x
) == PLUS
|| is_minus
)
23256 rtx base
= XEXP (x
, 0);
23257 rtx index
= XEXP (x
, 1);
23258 HOST_WIDE_INT offset
= 0;
23260 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
23262 /* Ensure that BASE is a register. */
23263 /* (one of them must be). */
23264 /* Also ensure the SP is not used as in index register. */
23265 std::swap (base
, index
);
23267 switch (GET_CODE (index
))
23270 offset
= INTVAL (index
);
23273 asm_fprintf (stream
, "[%r, #%wd]",
23274 REGNO (base
), offset
);
23278 asm_fprintf (stream
, "[%r, %s%r]",
23279 REGNO (base
), is_minus
? "-" : "",
23289 asm_fprintf (stream
, "[%r, %s%r",
23290 REGNO (base
), is_minus
? "-" : "",
23291 REGNO (XEXP (index
, 0)));
23292 arm_print_operand (stream
, index
, 'S');
23293 fputs ("]", stream
);
23298 gcc_unreachable ();
23301 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
23302 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
23304 gcc_assert (REG_P (XEXP (x
, 0)));
23306 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
23307 asm_fprintf (stream
, "[%r, #%s%d]!",
23308 REGNO (XEXP (x
, 0)),
23309 GET_CODE (x
) == PRE_DEC
? "-" : "",
23310 GET_MODE_SIZE (mode
));
23312 asm_fprintf (stream
, "[%r], #%s%d",
23313 REGNO (XEXP (x
, 0)),
23314 GET_CODE (x
) == POST_DEC
? "-" : "",
23315 GET_MODE_SIZE (mode
));
23317 else if (GET_CODE (x
) == PRE_MODIFY
)
23319 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
23320 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
23321 asm_fprintf (stream
, "#%wd]!",
23322 INTVAL (XEXP (XEXP (x
, 1), 1)));
23324 asm_fprintf (stream
, "%r]!",
23325 REGNO (XEXP (XEXP (x
, 1), 1)));
23327 else if (GET_CODE (x
) == POST_MODIFY
)
23329 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
23330 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
23331 asm_fprintf (stream
, "#%wd",
23332 INTVAL (XEXP (XEXP (x
, 1), 1)));
23334 asm_fprintf (stream
, "%r",
23335 REGNO (XEXP (XEXP (x
, 1), 1)));
23337 else output_addr_const (stream
, x
);
23342 asm_fprintf (stream
, "[%r]", REGNO (x
));
23343 else if (GET_CODE (x
) == POST_INC
)
23344 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
23345 else if (GET_CODE (x
) == PLUS
)
23347 gcc_assert (REG_P (XEXP (x
, 0)));
23348 if (CONST_INT_P (XEXP (x
, 1)))
23349 asm_fprintf (stream
, "[%r, #%wd]",
23350 REGNO (XEXP (x
, 0)),
23351 INTVAL (XEXP (x
, 1)));
23353 asm_fprintf (stream
, "[%r, %r]",
23354 REGNO (XEXP (x
, 0)),
23355 REGNO (XEXP (x
, 1)));
23358 output_addr_const (stream
, x
);
23362 /* Target hook for indicating whether a punctuation character for
23363 TARGET_PRINT_OPERAND is valid. */
23365 arm_print_operand_punct_valid_p (unsigned char code
)
23367 return (code
== '@' || code
== '|' || code
== '.'
23368 || code
== '(' || code
== ')' || code
== '#'
23369 || (TARGET_32BIT
&& (code
== '?'))
23370 || (TARGET_THUMB2
&& (code
== '!'))
23371 || (TARGET_THUMB
&& (code
== '_')));
23374 /* Target hook for assembling integer objects. The ARM version needs to
23375 handle word-sized values specially. */
23377 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
23381 if (size
== UNITS_PER_WORD
&& aligned_p
)
23383 fputs ("\t.word\t", asm_out_file
);
23384 output_addr_const (asm_out_file
, x
);
23386 /* Mark symbols as position independent. We only do this in the
23387 .text segment, not in the .data segment. */
23388 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
23389 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
23391 /* See legitimize_pic_address for an explanation of the
23392 TARGET_VXWORKS_RTP check. */
23393 /* References to weak symbols cannot be resolved locally:
23394 they may be overridden by a non-weak definition at link
23396 if (!arm_pic_data_is_text_relative
23397 || (GET_CODE (x
) == SYMBOL_REF
23398 && (!SYMBOL_REF_LOCAL_P (x
)
23399 || (SYMBOL_REF_DECL (x
)
23400 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0)
23401 || (SYMBOL_REF_FUNCTION_P (x
)
23402 && !arm_fdpic_local_funcdesc_p (x
)))))
23404 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
23405 fputs ("(GOTFUNCDESC)", asm_out_file
);
23407 fputs ("(GOT)", asm_out_file
);
23411 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
23412 fputs ("(GOTOFFFUNCDESC)", asm_out_file
);
23418 || arm_is_segment_info_known (x
, &is_readonly
))
23419 fputs ("(GOTOFF)", asm_out_file
);
23421 fputs ("(GOT)", asm_out_file
);
23426 /* For FDPIC we also have to mark symbol for .data section. */
23428 && !making_const_table
23429 && SYMBOL_REF_P (x
)
23430 && SYMBOL_REF_FUNCTION_P (x
))
23431 fputs ("(FUNCDESC)", asm_out_file
);
23433 fputc ('\n', asm_out_file
);
23437 mode
= GET_MODE (x
);
23439 if (arm_vector_mode_supported_p (mode
))
23443 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
23445 units
= CONST_VECTOR_NUNITS (x
);
23446 size
= GET_MODE_UNIT_SIZE (mode
);
23448 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
23449 for (i
= 0; i
< units
; i
++)
23451 rtx elt
= CONST_VECTOR_ELT (x
, i
);
23453 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
23456 for (i
= 0; i
< units
; i
++)
23458 rtx elt
= CONST_VECTOR_ELT (x
, i
);
23460 (*CONST_DOUBLE_REAL_VALUE (elt
),
23461 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
23462 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
23468 return default_assemble_integer (x
, size
, aligned_p
);
23472 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
23476 if (!TARGET_AAPCS_BASED
)
23479 default_named_section_asm_out_constructor
23480 : default_named_section_asm_out_destructor
) (symbol
, priority
);
23484 /* Put these in the .init_array section, using a special relocation. */
23485 if (priority
!= DEFAULT_INIT_PRIORITY
)
23488 sprintf (buf
, "%s.%.5u",
23489 is_ctor
? ".init_array" : ".fini_array",
23491 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
23498 switch_to_section (s
);
23499 assemble_align (POINTER_SIZE
);
23500 fputs ("\t.word\t", asm_out_file
);
23501 output_addr_const (asm_out_file
, symbol
);
23502 fputs ("(target1)\n", asm_out_file
);
23505 /* Add a function to the list of static constructors. */
23508 arm_elf_asm_constructor (rtx symbol
, int priority
)
23510 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
23513 /* Add a function to the list of static destructors. */
23516 arm_elf_asm_destructor (rtx symbol
, int priority
)
23518 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
23521 /* A finite state machine takes care of noticing whether or not instructions
23522 can be conditionally executed, and thus decrease execution time and code
23523 size by deleting branch instructions. The fsm is controlled by
23524 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
23526 /* The state of the fsm controlling condition codes are:
23527 0: normal, do nothing special
23528 1: make ASM_OUTPUT_OPCODE not output this instruction
23529 2: make ASM_OUTPUT_OPCODE not output this instruction
23530 3: make instructions conditional
23531 4: make instructions conditional
23533 State transitions (state->state by whom under condition):
23534 0 -> 1 final_prescan_insn if the `target' is a label
23535 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23536 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23537 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23538 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23539 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23540 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23541 (the target insn is arm_target_insn).
23543 If the jump clobbers the conditions then we use states 2 and 4.
23545 A similar thing can be done with conditional return insns.
23547 XXX In case the `target' is an unconditional branch, this conditionalising
23548 of the instructions always reduces code size, but not always execution
23549 time. But then, I want to reduce the code size to somewhere near what
23550 /bin/cc produces. */
23552 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23553 instructions. When a COND_EXEC instruction is seen the subsequent
23554 instructions are scanned so that multiple conditional instructions can be
23555 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23556 specify the length and true/false mask for the IT block. These will be
23557 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23559 /* Returns the index of the ARM condition code string in
23560 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23561 COMPARISON should be an rtx like `(eq (...) (...))'. */
23564 maybe_get_arm_condition_code (rtx comparison
)
23566 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
23567 enum arm_cond_code code
;
23568 enum rtx_code comp_code
= GET_CODE (comparison
);
23570 if (GET_MODE_CLASS (mode
) != MODE_CC
)
23571 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
23572 XEXP (comparison
, 1));
23576 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
23577 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
23578 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
23579 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
23580 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
23581 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
23582 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
23583 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
23584 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
23585 case E_CC_DLTUmode
: code
= ARM_CC
;
23588 if (comp_code
== EQ
)
23589 return ARM_INVERSE_CONDITION_CODE (code
);
23590 if (comp_code
== NE
)
23594 case E_CC_NOOVmode
:
23597 case NE
: return ARM_NE
;
23598 case EQ
: return ARM_EQ
;
23599 case GE
: return ARM_PL
;
23600 case LT
: return ARM_MI
;
23601 default: return ARM_NV
;
23607 case NE
: return ARM_NE
;
23608 case EQ
: return ARM_EQ
;
23609 default: return ARM_NV
;
23615 case NE
: return ARM_MI
;
23616 case EQ
: return ARM_PL
;
23617 default: return ARM_NV
;
23622 /* We can handle all cases except UNEQ and LTGT. */
23625 case GE
: return ARM_GE
;
23626 case GT
: return ARM_GT
;
23627 case LE
: return ARM_LS
;
23628 case LT
: return ARM_MI
;
23629 case NE
: return ARM_NE
;
23630 case EQ
: return ARM_EQ
;
23631 case ORDERED
: return ARM_VC
;
23632 case UNORDERED
: return ARM_VS
;
23633 case UNLT
: return ARM_LT
;
23634 case UNLE
: return ARM_LE
;
23635 case UNGT
: return ARM_HI
;
23636 case UNGE
: return ARM_PL
;
23637 /* UNEQ and LTGT do not have a representation. */
23638 case UNEQ
: /* Fall through. */
23639 case LTGT
: /* Fall through. */
23640 default: return ARM_NV
;
23646 case NE
: return ARM_NE
;
23647 case EQ
: return ARM_EQ
;
23648 case GE
: return ARM_LE
;
23649 case GT
: return ARM_LT
;
23650 case LE
: return ARM_GE
;
23651 case LT
: return ARM_GT
;
23652 case GEU
: return ARM_LS
;
23653 case GTU
: return ARM_CC
;
23654 case LEU
: return ARM_CS
;
23655 case LTU
: return ARM_HI
;
23656 default: return ARM_NV
;
23662 case LTU
: return ARM_CS
;
23663 case GEU
: return ARM_CC
;
23664 default: return ARM_NV
;
23670 case NE
: return ARM_NE
;
23671 case EQ
: return ARM_EQ
;
23672 case GEU
: return ARM_CS
;
23673 case GTU
: return ARM_HI
;
23674 case LEU
: return ARM_LS
;
23675 case LTU
: return ARM_CC
;
23676 default: return ARM_NV
;
23682 case GE
: return ARM_GE
;
23683 case LT
: return ARM_LT
;
23684 case GEU
: return ARM_CS
;
23685 case LTU
: return ARM_CC
;
23686 default: return ARM_NV
;
23692 case NE
: return ARM_VS
;
23693 case EQ
: return ARM_VC
;
23694 default: return ARM_NV
;
23700 case NE
: return ARM_NE
;
23701 case EQ
: return ARM_EQ
;
23702 case GEU
: return ARM_CS
;
23703 case GTU
: return ARM_HI
;
23704 case LEU
: return ARM_LS
;
23705 case LTU
: return ARM_CC
;
23706 default: return ARM_NV
;
23712 case NE
: return ARM_NE
;
23713 case EQ
: return ARM_EQ
;
23714 case GE
: return ARM_GE
;
23715 case GT
: return ARM_GT
;
23716 case LE
: return ARM_LE
;
23717 case LT
: return ARM_LT
;
23718 case GEU
: return ARM_CS
;
23719 case GTU
: return ARM_HI
;
23720 case LEU
: return ARM_LS
;
23721 case LTU
: return ARM_CC
;
23722 default: return ARM_NV
;
23725 default: gcc_unreachable ();
23729 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23730 static enum arm_cond_code
23731 get_arm_condition_code (rtx comparison
)
23733 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
23734 gcc_assert (code
!= ARM_NV
);
23738 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23739 code registers when not targetting Thumb1. The VFP condition register
23740 only exists when generating hard-float code. */
23742 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
23748 *p2
= TARGET_HARD_FLOAT
? VFPCC_REGNUM
: INVALID_REGNUM
;
23752 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23755 thumb2_final_prescan_insn (rtx_insn
*insn
)
23757 rtx_insn
*first_insn
= insn
;
23758 rtx body
= PATTERN (insn
);
23760 enum arm_cond_code code
;
23765 /* max_insns_skipped in the tune was already taken into account in the
23766 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23767 just emit the IT blocks as we can. It does not make sense to split
23769 max
= MAX_INSN_PER_IT_BLOCK
;
23771 /* Remove the previous insn from the count of insns to be output. */
23772 if (arm_condexec_count
)
23773 arm_condexec_count
--;
23775 /* Nothing to do if we are already inside a conditional block. */
23776 if (arm_condexec_count
)
23779 if (GET_CODE (body
) != COND_EXEC
)
23782 /* Conditional jumps are implemented directly. */
23786 predicate
= COND_EXEC_TEST (body
);
23787 arm_current_cc
= get_arm_condition_code (predicate
);
23789 n
= get_attr_ce_count (insn
);
23790 arm_condexec_count
= 1;
23791 arm_condexec_mask
= (1 << n
) - 1;
23792 arm_condexec_masklen
= n
;
23793 /* See if subsequent instructions can be combined into the same block. */
23796 insn
= next_nonnote_insn (insn
);
23798 /* Jumping into the middle of an IT block is illegal, so a label or
23799 barrier terminates the block. */
23800 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23803 body
= PATTERN (insn
);
23804 /* USE and CLOBBER aren't really insns, so just skip them. */
23805 if (GET_CODE (body
) == USE
23806 || GET_CODE (body
) == CLOBBER
)
23809 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23810 if (GET_CODE (body
) != COND_EXEC
)
23812 /* Maximum number of conditionally executed instructions in a block. */
23813 n
= get_attr_ce_count (insn
);
23814 if (arm_condexec_masklen
+ n
> max
)
23817 predicate
= COND_EXEC_TEST (body
);
23818 code
= get_arm_condition_code (predicate
);
23819 mask
= (1 << n
) - 1;
23820 if (arm_current_cc
== code
)
23821 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23822 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23825 arm_condexec_count
++;
23826 arm_condexec_masklen
+= n
;
23828 /* A jump must be the last instruction in a conditional block. */
23832 /* Restore recog_data (getting the attributes of other insns can
23833 destroy this array, but final.c assumes that it remains intact
23834 across this call). */
23835 extract_constrain_insn_cached (first_insn
);
23839 arm_final_prescan_insn (rtx_insn
*insn
)
23841 /* BODY will hold the body of INSN. */
23842 rtx body
= PATTERN (insn
);
23844 /* This will be 1 if trying to repeat the trick, and things need to be
23845 reversed if it appears to fail. */
23848 /* If we start with a return insn, we only succeed if we find another one. */
23849 int seeking_return
= 0;
23850 enum rtx_code return_code
= UNKNOWN
;
23852 /* START_INSN will hold the insn from where we start looking. This is the
23853 first insn after the following code_label if REVERSE is true. */
23854 rtx_insn
*start_insn
= insn
;
23856 /* If in state 4, check if the target branch is reached, in order to
23857 change back to state 0. */
23858 if (arm_ccfsm_state
== 4)
23860 if (insn
== arm_target_insn
)
23862 arm_target_insn
= NULL
;
23863 arm_ccfsm_state
= 0;
23868 /* If in state 3, it is possible to repeat the trick, if this insn is an
23869 unconditional branch to a label, and immediately following this branch
23870 is the previous target label which is only used once, and the label this
23871 branch jumps to is not too far off. */
23872 if (arm_ccfsm_state
== 3)
23874 if (simplejump_p (insn
))
23876 start_insn
= next_nonnote_insn (start_insn
);
23877 if (BARRIER_P (start_insn
))
23879 /* XXX Isn't this always a barrier? */
23880 start_insn
= next_nonnote_insn (start_insn
);
23882 if (LABEL_P (start_insn
)
23883 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23884 && LABEL_NUSES (start_insn
) == 1)
23889 else if (ANY_RETURN_P (body
))
23891 start_insn
= next_nonnote_insn (start_insn
);
23892 if (BARRIER_P (start_insn
))
23893 start_insn
= next_nonnote_insn (start_insn
);
23894 if (LABEL_P (start_insn
)
23895 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23896 && LABEL_NUSES (start_insn
) == 1)
23899 seeking_return
= 1;
23900 return_code
= GET_CODE (body
);
23909 gcc_assert (!arm_ccfsm_state
|| reverse
);
23910 if (!JUMP_P (insn
))
23913 /* This jump might be paralleled with a clobber of the condition codes
23914 the jump should always come first */
23915 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23916 body
= XVECEXP (body
, 0, 0);
23919 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23920 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23923 int fail
= FALSE
, succeed
= FALSE
;
23924 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23925 int then_not_else
= TRUE
;
23926 rtx_insn
*this_insn
= start_insn
;
23929 /* Register the insn jumped to. */
23932 if (!seeking_return
)
23933 label
= XEXP (SET_SRC (body
), 0);
23935 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23936 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23937 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23939 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23940 then_not_else
= FALSE
;
23942 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23944 seeking_return
= 1;
23945 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23947 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23949 seeking_return
= 1;
23950 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23951 then_not_else
= FALSE
;
23954 gcc_unreachable ();
23956 /* See how many insns this branch skips, and what kind of insns. If all
23957 insns are okay, and the label or unconditional branch to the same
23958 label is not too far away, succeed. */
23959 for (insns_skipped
= 0;
23960 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23964 this_insn
= next_nonnote_insn (this_insn
);
23968 switch (GET_CODE (this_insn
))
23971 /* Succeed if it is the target label, otherwise fail since
23972 control falls in from somewhere else. */
23973 if (this_insn
== label
)
23975 arm_ccfsm_state
= 1;
23983 /* Succeed if the following insn is the target label.
23985 If return insns are used then the last insn in a function
23986 will be a barrier. */
23987 this_insn
= next_nonnote_insn (this_insn
);
23988 if (this_insn
&& this_insn
== label
)
23990 arm_ccfsm_state
= 1;
23998 /* The AAPCS says that conditional calls should not be
23999 used since they make interworking inefficient (the
24000 linker can't transform BL<cond> into BLX). That's
24001 only a problem if the machine has BLX. */
24008 /* Succeed if the following insn is the target label, or
24009 if the following two insns are a barrier and the
24011 this_insn
= next_nonnote_insn (this_insn
);
24012 if (this_insn
&& BARRIER_P (this_insn
))
24013 this_insn
= next_nonnote_insn (this_insn
);
24015 if (this_insn
&& this_insn
== label
24016 && insns_skipped
< max_insns_skipped
)
24018 arm_ccfsm_state
= 1;
24026 /* If this is an unconditional branch to the same label, succeed.
24027 If it is to another label, do nothing. If it is conditional,
24029 /* XXX Probably, the tests for SET and the PC are
24032 scanbody
= PATTERN (this_insn
);
24033 if (GET_CODE (scanbody
) == SET
24034 && GET_CODE (SET_DEST (scanbody
)) == PC
)
24036 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
24037 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
24039 arm_ccfsm_state
= 2;
24042 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
24045 /* Fail if a conditional return is undesirable (e.g. on a
24046 StrongARM), but still allow this if optimizing for size. */
24047 else if (GET_CODE (scanbody
) == return_code
24048 && !use_return_insn (TRUE
, NULL
)
24051 else if (GET_CODE (scanbody
) == return_code
)
24053 arm_ccfsm_state
= 2;
24056 else if (GET_CODE (scanbody
) == PARALLEL
)
24058 switch (get_attr_conds (this_insn
))
24068 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
24073 /* Instructions using or affecting the condition codes make it
24075 scanbody
= PATTERN (this_insn
);
24076 if (!(GET_CODE (scanbody
) == SET
24077 || GET_CODE (scanbody
) == PARALLEL
)
24078 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
24088 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
24089 arm_target_label
= CODE_LABEL_NUMBER (label
);
24092 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
24094 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
24096 this_insn
= next_nonnote_insn (this_insn
);
24097 gcc_assert (!this_insn
24098 || (!BARRIER_P (this_insn
)
24099 && !LABEL_P (this_insn
)));
24103 /* Oh, dear! we ran off the end.. give up. */
24104 extract_constrain_insn_cached (insn
);
24105 arm_ccfsm_state
= 0;
24106 arm_target_insn
= NULL
;
24109 arm_target_insn
= this_insn
;
24112 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
24115 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
24117 if (reverse
|| then_not_else
)
24118 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
24121 /* Restore recog_data (getting the attributes of other insns can
24122 destroy this array, but final.c assumes that it remains intact
24123 across this call. */
24124 extract_constrain_insn_cached (insn
);
24128 /* Output IT instructions. */
24130 thumb2_asm_output_opcode (FILE * stream
)
24135 if (arm_condexec_mask
)
24137 for (n
= 0; n
< arm_condexec_masklen
; n
++)
24138 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
24140 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
24141 arm_condition_codes
[arm_current_cc
]);
24142 arm_condexec_mask
= 0;
24146 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
24147 UNITS_PER_WORD bytes wide. */
24148 static unsigned int
24149 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
24152 && regno
> PC_REGNUM
24153 && regno
!= FRAME_POINTER_REGNUM
24154 && regno
!= ARG_POINTER_REGNUM
24155 && !IS_VFP_REGNUM (regno
))
24158 return ARM_NUM_REGS (mode
);
24161 /* Implement TARGET_HARD_REGNO_MODE_OK. */
24163 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
24165 if (GET_MODE_CLASS (mode
) == MODE_CC
)
24166 return (regno
== CC_REGNUM
24167 || (TARGET_HARD_FLOAT
24168 && regno
== VFPCC_REGNUM
));
24170 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
24174 /* For the Thumb we only allow values bigger than SImode in
24175 registers 0 - 6, so that there is always a second low
24176 register available to hold the upper part of the value.
24177 We probably we ought to ensure that the register is the
24178 start of an even numbered register pair. */
24179 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
24181 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
24183 if (mode
== SFmode
|| mode
== SImode
)
24184 return VFP_REGNO_OK_FOR_SINGLE (regno
);
24186 if (mode
== DFmode
)
24187 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
24189 if (mode
== HFmode
)
24190 return VFP_REGNO_OK_FOR_SINGLE (regno
);
24192 /* VFP registers can hold HImode values. */
24193 if (mode
== HImode
)
24194 return VFP_REGNO_OK_FOR_SINGLE (regno
);
24197 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
24198 || (VALID_NEON_QREG_MODE (mode
)
24199 && NEON_REGNO_OK_FOR_QUAD (regno
))
24200 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
24201 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
24202 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
24203 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
24204 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
24209 if (TARGET_REALLY_IWMMXT
)
24211 if (IS_IWMMXT_GR_REGNUM (regno
))
24212 return mode
== SImode
;
24214 if (IS_IWMMXT_REGNUM (regno
))
24215 return VALID_IWMMXT_REG_MODE (mode
);
24218 /* We allow almost any value to be stored in the general registers.
24219 Restrict doubleword quantities to even register pairs in ARM state
24220 so that we can use ldrd. Do not allow very large Neon structure
24221 opaque modes in general registers; they would use too many. */
24222 if (regno
<= LAST_ARM_REGNUM
)
24224 if (ARM_NUM_REGS (mode
) > 4)
24230 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
24233 if (regno
== FRAME_POINTER_REGNUM
24234 || regno
== ARG_POINTER_REGNUM
)
24235 /* We only allow integers in the fake hard registers. */
24236 return GET_MODE_CLASS (mode
) == MODE_INT
;
24241 /* Implement TARGET_MODES_TIEABLE_P. */
24244 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
24246 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
24249 /* We specifically want to allow elements of "structure" modes to
24250 be tieable to the structure. This more general condition allows
24251 other rarer situations too. */
24253 && (VALID_NEON_DREG_MODE (mode1
)
24254 || VALID_NEON_QREG_MODE (mode1
)
24255 || VALID_NEON_STRUCT_MODE (mode1
))
24256 && (VALID_NEON_DREG_MODE (mode2
)
24257 || VALID_NEON_QREG_MODE (mode2
)
24258 || VALID_NEON_STRUCT_MODE (mode2
)))
24264 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
24265 not used in arm mode. */
24268 arm_regno_class (int regno
)
24270 if (regno
== PC_REGNUM
)
24275 if (regno
== STACK_POINTER_REGNUM
)
24277 if (regno
== CC_REGNUM
)
24284 if (TARGET_THUMB2
&& regno
< 8)
24287 if ( regno
<= LAST_ARM_REGNUM
24288 || regno
== FRAME_POINTER_REGNUM
24289 || regno
== ARG_POINTER_REGNUM
)
24290 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
24292 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
24293 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
24295 if (IS_VFP_REGNUM (regno
))
24297 if (regno
<= D7_VFP_REGNUM
)
24298 return VFP_D0_D7_REGS
;
24299 else if (regno
<= LAST_LO_VFP_REGNUM
)
24300 return VFP_LO_REGS
;
24302 return VFP_HI_REGS
;
24305 if (IS_IWMMXT_REGNUM (regno
))
24306 return IWMMXT_REGS
;
24308 if (IS_IWMMXT_GR_REGNUM (regno
))
24309 return IWMMXT_GR_REGS
;
24314 /* Handle a special case when computing the offset
24315 of an argument from the frame pointer. */
24317 arm_debugger_arg_offset (int value
, rtx addr
)
24321 /* We are only interested if dbxout_parms() failed to compute the offset. */
24325 /* We can only cope with the case where the address is held in a register. */
24329 /* If we are using the frame pointer to point at the argument, then
24330 an offset of 0 is correct. */
24331 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
24334 /* If we are using the stack pointer to point at the
24335 argument, then an offset of 0 is correct. */
24336 /* ??? Check this is consistent with thumb2 frame layout. */
24337 if ((TARGET_THUMB
|| !frame_pointer_needed
)
24338 && REGNO (addr
) == SP_REGNUM
)
24341 /* Oh dear. The argument is pointed to by a register rather
24342 than being held in a register, or being stored at a known
24343 offset from the frame pointer. Since GDB only understands
24344 those two kinds of argument we must translate the address
24345 held in the register into an offset from the frame pointer.
24346 We do this by searching through the insns for the function
24347 looking to see where this register gets its value. If the
24348 register is initialized from the frame pointer plus an offset
24349 then we are in luck and we can continue, otherwise we give up.
24351 This code is exercised by producing debugging information
24352 for a function with arguments like this:
24354 double func (double a, double b, int c, double d) {return d;}
24356 Without this code the stab for parameter 'd' will be set to
24357 an offset of 0 from the frame pointer, rather than 8. */
24359 /* The if() statement says:
24361 If the insn is a normal instruction
24362 and if the insn is setting the value in a register
24363 and if the register being set is the register holding the address of the argument
24364 and if the address is computing by an addition
24365 that involves adding to a register
24366 which is the frame pointer
24371 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24373 if ( NONJUMP_INSN_P (insn
)
24374 && GET_CODE (PATTERN (insn
)) == SET
24375 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
24376 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
24377 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
24378 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
24379 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
24382 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
24391 warning (0, "unable to compute real location of stacked parameter");
24392 value
= 8; /* XXX magic hack */
24398 /* Implement TARGET_PROMOTED_TYPE. */
24401 arm_promoted_type (const_tree t
)
24403 if (SCALAR_FLOAT_TYPE_P (t
)
24404 && TYPE_PRECISION (t
) == 16
24405 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
24406 return float_type_node
;
24410 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24411 This simply adds HFmode as a supported mode; even though we don't
24412 implement arithmetic on this type directly, it's supported by
24413 optabs conversions, much the way the double-word arithmetic is
24414 special-cased in the default hook. */
24417 arm_scalar_mode_supported_p (scalar_mode mode
)
24419 if (mode
== HFmode
)
24420 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24421 else if (ALL_FIXED_POINT_MODE_P (mode
))
24424 return default_scalar_mode_supported_p (mode
);
24427 /* Set the value of FLT_EVAL_METHOD.
24428 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
24430 0: evaluate all operations and constants, whose semantic type has at
24431 most the range and precision of type float, to the range and
24432 precision of float; evaluate all other operations and constants to
24433 the range and precision of the semantic type;
24435 N, where _FloatN is a supported interchange floating type
24436 evaluate all operations and constants, whose semantic type has at
24437 most the range and precision of _FloatN type, to the range and
24438 precision of the _FloatN type; evaluate all other operations and
24439 constants to the range and precision of the semantic type;
24441 If we have the ARMv8.2-A extensions then we support _Float16 in native
24442 precision, so we should set this to 16. Otherwise, we support the type,
24443 but want to evaluate expressions in float precision, so set this to
24446 static enum flt_eval_method
24447 arm_excess_precision (enum excess_precision_type type
)
24451 case EXCESS_PRECISION_TYPE_FAST
:
24452 case EXCESS_PRECISION_TYPE_STANDARD
:
24453 /* We can calculate either in 16-bit range and precision or
24454 32-bit range and precision. Make that decision based on whether
24455 we have native support for the ARMv8.2-A 16-bit floating-point
24456 instructions or not. */
24457 return (TARGET_VFP_FP16INST
24458 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24459 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
24460 case EXCESS_PRECISION_TYPE_IMPLICIT
:
24461 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
24463 gcc_unreachable ();
24465 return FLT_EVAL_METHOD_UNPREDICTABLE
;
24469 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
24470 _Float16 if we are using anything other than ieee format for 16-bit
24471 floating point. Otherwise, punt to the default implementation. */
24472 static opt_scalar_float_mode
24473 arm_floatn_mode (int n
, bool extended
)
24475 if (!extended
&& n
== 16)
24477 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
24479 return opt_scalar_float_mode ();
24482 return default_floatn_mode (n
, extended
);
24486 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24487 not to early-clobber SRC registers in the process.
24489 We assume that the operands described by SRC and DEST represent a
24490 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24491 number of components into which the copy has been decomposed. */
24493 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
24497 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
24498 || REGNO (operands
[0]) < REGNO (operands
[1]))
24500 for (i
= 0; i
< count
; i
++)
24502 operands
[2 * i
] = dest
[i
];
24503 operands
[2 * i
+ 1] = src
[i
];
24508 for (i
= 0; i
< count
; i
++)
24510 operands
[2 * i
] = dest
[count
- i
- 1];
24511 operands
[2 * i
+ 1] = src
[count
- i
- 1];
24516 /* Split operands into moves from op[1] + op[2] into op[0]. */
24519 neon_split_vcombine (rtx operands
[3])
24521 unsigned int dest
= REGNO (operands
[0]);
24522 unsigned int src1
= REGNO (operands
[1]);
24523 unsigned int src2
= REGNO (operands
[2]);
24524 machine_mode halfmode
= GET_MODE (operands
[1]);
24525 unsigned int halfregs
= REG_NREGS (operands
[1]);
24526 rtx destlo
, desthi
;
24528 if (src1
== dest
&& src2
== dest
+ halfregs
)
24530 /* No-op move. Can't split to nothing; emit something. */
24531 emit_note (NOTE_INSN_DELETED
);
24535 /* Preserve register attributes for variable tracking. */
24536 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
24537 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
24538 GET_MODE_SIZE (halfmode
));
24540 /* Special case of reversed high/low parts. Use VSWP. */
24541 if (src2
== dest
&& src1
== dest
+ halfregs
)
24543 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
24544 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
24545 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
24549 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
24551 /* Try to avoid unnecessary moves if part of the result
24552 is in the right place already. */
24554 emit_move_insn (destlo
, operands
[1]);
24555 if (src2
!= dest
+ halfregs
)
24556 emit_move_insn (desthi
, operands
[2]);
24560 if (src2
!= dest
+ halfregs
)
24561 emit_move_insn (desthi
, operands
[2]);
24563 emit_move_insn (destlo
, operands
[1]);
24567 /* Return the number (counting from 0) of
24568 the least significant set bit in MASK. */
24571 number_of_first_bit_set (unsigned mask
)
24573 return ctz_hwi (mask
);
24576 /* Like emit_multi_reg_push, but allowing for a different set of
24577 registers to be described as saved. MASK is the set of registers
24578 to be saved; REAL_REGS is the set of registers to be described as
24579 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24582 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
24584 unsigned long regno
;
24585 rtx par
[10], tmp
, reg
;
24589 /* Build the parallel of the registers actually being stored. */
24590 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
24592 regno
= ctz_hwi (mask
);
24593 reg
= gen_rtx_REG (SImode
, regno
);
24596 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
24598 tmp
= gen_rtx_USE (VOIDmode
, reg
);
24603 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24604 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
24605 tmp
= gen_frame_mem (BLKmode
, tmp
);
24606 tmp
= gen_rtx_SET (tmp
, par
[0]);
24609 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
24610 insn
= emit_insn (tmp
);
24612 /* Always build the stack adjustment note for unwind info. */
24613 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24614 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
24617 /* Build the parallel of the registers recorded as saved for unwind. */
24618 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
24620 regno
= ctz_hwi (real_regs
);
24621 reg
= gen_rtx_REG (SImode
, regno
);
24623 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
24624 tmp
= gen_frame_mem (SImode
, tmp
);
24625 tmp
= gen_rtx_SET (tmp
, reg
);
24626 RTX_FRAME_RELATED_P (tmp
) = 1;
24634 RTX_FRAME_RELATED_P (par
[0]) = 1;
24635 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
24638 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
24643 /* Emit code to push or pop registers to or from the stack. F is the
24644 assembly file. MASK is the registers to pop. */
24646 thumb_pop (FILE *f
, unsigned long mask
)
24649 int lo_mask
= mask
& 0xFF;
24653 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
24655 /* Special case. Do not generate a POP PC statement here, do it in
24657 thumb_exit (f
, -1);
24661 fprintf (f
, "\tpop\t{");
24663 /* Look at the low registers first. */
24664 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
24668 asm_fprintf (f
, "%r", regno
);
24670 if ((lo_mask
& ~1) != 0)
24675 if (mask
& (1 << PC_REGNUM
))
24677 /* Catch popping the PC. */
24678 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
24679 || IS_CMSE_ENTRY (arm_current_func_type ()))
24681 /* The PC is never poped directly, instead
24682 it is popped into r3 and then BX is used. */
24683 fprintf (f
, "}\n");
24685 thumb_exit (f
, -1);
24694 asm_fprintf (f
, "%r", PC_REGNUM
);
24698 fprintf (f
, "}\n");
24701 /* Generate code to return from a thumb function.
24702 If 'reg_containing_return_addr' is -1, then the return address is
24703 actually on the stack, at the stack pointer.
24705 Note: do not forget to update length attribute of corresponding insn pattern
24706 when changing assembly output (eg. length attribute of epilogue_insns when
24707 updating Armv8-M Baseline Security Extensions register clearing
24710 thumb_exit (FILE *f
, int reg_containing_return_addr
)
24712 unsigned regs_available_for_popping
;
24713 unsigned regs_to_pop
;
24715 unsigned available
;
24719 int restore_a4
= FALSE
;
24721 /* Compute the registers we need to pop. */
24725 if (reg_containing_return_addr
== -1)
24727 regs_to_pop
|= 1 << LR_REGNUM
;
24731 if (TARGET_BACKTRACE
)
24733 /* Restore the (ARM) frame pointer and stack pointer. */
24734 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
24738 /* If there is nothing to pop then just emit the BX instruction and
24740 if (pops_needed
== 0)
24742 if (crtl
->calls_eh_return
)
24743 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24745 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24747 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
24748 reg_containing_return_addr
);
24749 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24752 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24755 /* Otherwise if we are not supporting interworking and we have not created
24756 a backtrace structure and the function was not entered in ARM mode then
24757 just pop the return address straight into the PC. */
24758 else if (!TARGET_INTERWORK
24759 && !TARGET_BACKTRACE
24760 && !is_called_in_ARM_mode (current_function_decl
)
24761 && !crtl
->calls_eh_return
24762 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24764 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
24768 /* Find out how many of the (return) argument registers we can corrupt. */
24769 regs_available_for_popping
= 0;
24771 /* If returning via __builtin_eh_return, the bottom three registers
24772 all contain information needed for the return. */
24773 if (crtl
->calls_eh_return
)
24777 /* If we can deduce the registers used from the function's
24778 return value. This is more reliable that examining
24779 df_regs_ever_live_p () because that will be set if the register is
24780 ever used in the function, not just if the register is used
24781 to hold a return value. */
24783 if (crtl
->return_rtx
!= 0)
24784 mode
= GET_MODE (crtl
->return_rtx
);
24786 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
24788 size
= GET_MODE_SIZE (mode
);
24792 /* In a void function we can use any argument register.
24793 In a function that returns a structure on the stack
24794 we can use the second and third argument registers. */
24795 if (mode
== VOIDmode
)
24796 regs_available_for_popping
=
24797 (1 << ARG_REGISTER (1))
24798 | (1 << ARG_REGISTER (2))
24799 | (1 << ARG_REGISTER (3));
24801 regs_available_for_popping
=
24802 (1 << ARG_REGISTER (2))
24803 | (1 << ARG_REGISTER (3));
24805 else if (size
<= 4)
24806 regs_available_for_popping
=
24807 (1 << ARG_REGISTER (2))
24808 | (1 << ARG_REGISTER (3));
24809 else if (size
<= 8)
24810 regs_available_for_popping
=
24811 (1 << ARG_REGISTER (3));
24814 /* Match registers to be popped with registers into which we pop them. */
24815 for (available
= regs_available_for_popping
,
24816 required
= regs_to_pop
;
24817 required
!= 0 && available
!= 0;
24818 available
&= ~(available
& - available
),
24819 required
&= ~(required
& - required
))
24822 /* If we have any popping registers left over, remove them. */
24824 regs_available_for_popping
&= ~available
;
24826 /* Otherwise if we need another popping register we can use
24827 the fourth argument register. */
24828 else if (pops_needed
)
24830 /* If we have not found any free argument registers and
24831 reg a4 contains the return address, we must move it. */
24832 if (regs_available_for_popping
== 0
24833 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24835 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24836 reg_containing_return_addr
= LR_REGNUM
;
24838 else if (size
> 12)
24840 /* Register a4 is being used to hold part of the return value,
24841 but we have dire need of a free, low register. */
24844 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24847 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24849 /* The fourth argument register is available. */
24850 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24856 /* Pop as many registers as we can. */
24857 thumb_pop (f
, regs_available_for_popping
);
24859 /* Process the registers we popped. */
24860 if (reg_containing_return_addr
== -1)
24862 /* The return address was popped into the lowest numbered register. */
24863 regs_to_pop
&= ~(1 << LR_REGNUM
);
24865 reg_containing_return_addr
=
24866 number_of_first_bit_set (regs_available_for_popping
);
24868 /* Remove this register for the mask of available registers, so that
24869 the return address will not be corrupted by further pops. */
24870 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24873 /* If we popped other registers then handle them here. */
24874 if (regs_available_for_popping
)
24878 /* Work out which register currently contains the frame pointer. */
24879 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24881 /* Move it into the correct place. */
24882 asm_fprintf (f
, "\tmov\t%r, %r\n",
24883 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24885 /* (Temporarily) remove it from the mask of popped registers. */
24886 regs_available_for_popping
&= ~(1 << frame_pointer
);
24887 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24889 if (regs_available_for_popping
)
24893 /* We popped the stack pointer as well,
24894 find the register that contains it. */
24895 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24897 /* Move it into the stack register. */
24898 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24900 /* At this point we have popped all necessary registers, so
24901 do not worry about restoring regs_available_for_popping
24902 to its correct value:
24904 assert (pops_needed == 0)
24905 assert (regs_available_for_popping == (1 << frame_pointer))
24906 assert (regs_to_pop == (1 << STACK_POINTER)) */
24910 /* Since we have just move the popped value into the frame
24911 pointer, the popping register is available for reuse, and
24912 we know that we still have the stack pointer left to pop. */
24913 regs_available_for_popping
|= (1 << frame_pointer
);
24917 /* If we still have registers left on the stack, but we no longer have
24918 any registers into which we can pop them, then we must move the return
24919 address into the link register and make available the register that
24921 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24923 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24925 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24926 reg_containing_return_addr
);
24928 reg_containing_return_addr
= LR_REGNUM
;
24931 /* If we have registers left on the stack then pop some more.
24932 We know that at most we will want to pop FP and SP. */
24933 if (pops_needed
> 0)
24938 thumb_pop (f
, regs_available_for_popping
);
24940 /* We have popped either FP or SP.
24941 Move whichever one it is into the correct register. */
24942 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24943 move_to
= number_of_first_bit_set (regs_to_pop
);
24945 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24949 /* If we still have not popped everything then we must have only
24950 had one register available to us and we are now popping the SP. */
24951 if (pops_needed
> 0)
24955 thumb_pop (f
, regs_available_for_popping
);
24957 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24959 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24961 assert (regs_to_pop == (1 << STACK_POINTER))
24962 assert (pops_needed == 1)
24966 /* If necessary restore the a4 register. */
24969 if (reg_containing_return_addr
!= LR_REGNUM
)
24971 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24972 reg_containing_return_addr
= LR_REGNUM
;
24975 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24978 if (crtl
->calls_eh_return
)
24979 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24981 /* Return to caller. */
24982 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24984 /* This is for the cases where LR is not being used to contain the return
24985 address. It may therefore contain information that we might not want
24986 to leak, hence it must be cleared. The value in R0 will never be a
24987 secret at this point, so it is safe to use it, see the clearing code
24988 in 'cmse_nonsecure_entry_clear_before_return'. */
24989 if (reg_containing_return_addr
!= LR_REGNUM
)
24990 asm_fprintf (f
, "\tmov\tlr, r0\n");
24992 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24993 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24996 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24999 /* Scan INSN just before assembler is output for it.
25000 For Thumb-1, we track the status of the condition codes; this
25001 information is used in the cbranchsi4_insn pattern. */
25003 thumb1_final_prescan_insn (rtx_insn
*insn
)
25005 if (flag_print_asm_name
)
25006 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
25007 INSN_ADDRESSES (INSN_UID (insn
)));
25008 /* Don't overwrite the previous setter when we get to a cbranch. */
25009 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
25011 enum attr_conds conds
;
25013 if (cfun
->machine
->thumb1_cc_insn
)
25015 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
25016 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
25019 conds
= get_attr_conds (insn
);
25020 if (conds
== CONDS_SET
)
25022 rtx set
= single_set (insn
);
25023 cfun
->machine
->thumb1_cc_insn
= insn
;
25024 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
25025 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
25026 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
25027 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
25029 rtx src1
= XEXP (SET_SRC (set
), 1);
25030 if (src1
== const0_rtx
)
25031 cfun
->machine
->thumb1_cc_mode
= CCmode
;
25033 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
25035 /* Record the src register operand instead of dest because
25036 cprop_hardreg pass propagates src. */
25037 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
25040 else if (conds
!= CONDS_NOCOND
)
25041 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
25044 /* Check if unexpected far jump is used. */
25045 if (cfun
->machine
->lr_save_eliminated
25046 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
25047 internal_error("Unexpected thumb1 far jump");
25051 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
25053 unsigned HOST_WIDE_INT mask
= 0xff;
25056 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
25057 if (val
== 0) /* XXX */
25060 for (i
= 0; i
< 25; i
++)
25061 if ((val
& (mask
<< i
)) == val
)
25067 /* Returns nonzero if the current function contains,
25068 or might contain a far jump. */
25070 thumb_far_jump_used_p (void)
25073 bool far_jump
= false;
25074 unsigned int func_size
= 0;
25076 /* If we have already decided that far jumps may be used,
25077 do not bother checking again, and always return true even if
25078 it turns out that they are not being used. Once we have made
25079 the decision that far jumps are present (and that hence the link
25080 register will be pushed onto the stack) we cannot go back on it. */
25081 if (cfun
->machine
->far_jump_used
)
25084 /* If this function is not being called from the prologue/epilogue
25085 generation code then it must be being called from the
25086 INITIAL_ELIMINATION_OFFSET macro. */
25087 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
25089 /* In this case we know that we are being asked about the elimination
25090 of the arg pointer register. If that register is not being used,
25091 then there are no arguments on the stack, and we do not have to
25092 worry that a far jump might force the prologue to push the link
25093 register, changing the stack offsets. In this case we can just
25094 return false, since the presence of far jumps in the function will
25095 not affect stack offsets.
25097 If the arg pointer is live (or if it was live, but has now been
25098 eliminated and so set to dead) then we do have to test to see if
25099 the function might contain a far jump. This test can lead to some
25100 false negatives, since before reload is completed, then length of
25101 branch instructions is not known, so gcc defaults to returning their
25102 longest length, which in turn sets the far jump attribute to true.
25104 A false negative will not result in bad code being generated, but it
25105 will result in a needless push and pop of the link register. We
25106 hope that this does not occur too often.
25108 If we need doubleword stack alignment this could affect the other
25109 elimination offsets so we can't risk getting it wrong. */
25110 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
25111 cfun
->machine
->arg_pointer_live
= 1;
25112 else if (!cfun
->machine
->arg_pointer_live
)
25116 /* We should not change far_jump_used during or after reload, as there is
25117 no chance to change stack frame layout. */
25118 if (reload_in_progress
|| reload_completed
)
25121 /* Check to see if the function contains a branch
25122 insn with the far jump attribute set. */
25123 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25125 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
25129 func_size
+= get_attr_length (insn
);
25132 /* Attribute far_jump will always be true for thumb1 before
25133 shorten_branch pass. So checking far_jump attribute before
25134 shorten_branch isn't much useful.
25136 Following heuristic tries to estimate more accurately if a far jump
25137 may finally be used. The heuristic is very conservative as there is
25138 no chance to roll-back the decision of not to use far jump.
25140 Thumb1 long branch offset is -2048 to 2046. The worst case is each
25141 2-byte insn is associated with a 4 byte constant pool. Using
25142 function size 2048/3 as the threshold is conservative enough. */
25145 if ((func_size
* 3) >= 2048)
25147 /* Record the fact that we have decided that
25148 the function does use far jumps. */
25149 cfun
->machine
->far_jump_used
= 1;
25157 /* Return nonzero if FUNC must be entered in ARM mode. */
25159 is_called_in_ARM_mode (tree func
)
25161 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
25163 /* Ignore the problem about functions whose address is taken. */
25164 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
25168 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
25174 /* Given the stack offsets and register mask in OFFSETS, decide how
25175 many additional registers to push instead of subtracting a constant
25176 from SP. For epilogues the principle is the same except we use pop.
25177 FOR_PROLOGUE indicates which we're generating. */
25179 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
25181 HOST_WIDE_INT amount
;
25182 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
25183 /* Extract a mask of the ones we can give to the Thumb's push/pop
25185 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
25186 /* Then count how many other high registers will need to be pushed. */
25187 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25188 int n_free
, reg_base
, size
;
25190 if (!for_prologue
&& frame_pointer_needed
)
25191 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25193 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25195 /* If the stack frame size is 512 exactly, we can save one load
25196 instruction, which should make this a win even when optimizing
25198 if (!optimize_size
&& amount
!= 512)
25201 /* Can't do this if there are high registers to push. */
25202 if (high_regs_pushed
!= 0)
25205 /* Shouldn't do it in the prologue if no registers would normally
25206 be pushed at all. In the epilogue, also allow it if we'll have
25207 a pop insn for the PC. */
25210 || TARGET_BACKTRACE
25211 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
25212 || TARGET_INTERWORK
25213 || crtl
->args
.pretend_args_size
!= 0))
25216 /* Don't do this if thumb_expand_prologue wants to emit instructions
25217 between the push and the stack frame allocation. */
25219 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
25220 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
25227 size
= arm_size_return_regs ();
25228 reg_base
= ARM_NUM_INTS (size
);
25229 live_regs_mask
>>= reg_base
;
25232 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
25233 && (for_prologue
|| call_used_or_fixed_reg_p (reg_base
+ n_free
)))
25235 live_regs_mask
>>= 1;
25241 gcc_assert (amount
/ 4 * 4 == amount
);
25243 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
25244 return (amount
- 508) / 4;
25245 if (amount
<= n_free
* 4)
25250 /* The bits which aren't usefully expanded as rtl. */
25252 thumb1_unexpanded_epilogue (void)
25254 arm_stack_offsets
*offsets
;
25256 unsigned long live_regs_mask
= 0;
25257 int high_regs_pushed
= 0;
25259 int had_to_push_lr
;
25262 if (cfun
->machine
->return_used_this_function
!= 0)
25265 if (IS_NAKED (arm_current_func_type ()))
25268 offsets
= arm_get_frame_offsets ();
25269 live_regs_mask
= offsets
->saved_regs_mask
;
25270 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25272 /* If we can deduce the registers used from the function's return value.
25273 This is more reliable that examining df_regs_ever_live_p () because that
25274 will be set if the register is ever used in the function, not just if
25275 the register is used to hold a return value. */
25276 size
= arm_size_return_regs ();
25278 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
25281 unsigned long extra_mask
= (1 << extra_pop
) - 1;
25282 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
25285 /* The prolog may have pushed some high registers to use as
25286 work registers. e.g. the testsuite file:
25287 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
25288 compiles to produce:
25289 push {r4, r5, r6, r7, lr}
25293 as part of the prolog. We have to undo that pushing here. */
25295 if (high_regs_pushed
)
25297 unsigned long mask
= live_regs_mask
& 0xff;
25300 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
25303 /* Oh dear! We have no low registers into which we can pop
25306 ("no low registers available for popping high registers");
25308 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
25309 if (live_regs_mask
& (1 << next_hi_reg
))
25312 while (high_regs_pushed
)
25314 /* Find lo register(s) into which the high register(s) can
25316 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
25318 if (mask
& (1 << regno
))
25319 high_regs_pushed
--;
25320 if (high_regs_pushed
== 0)
25324 if (high_regs_pushed
== 0 && regno
>= 0)
25325 mask
&= ~((1 << regno
) - 1);
25327 /* Pop the values into the low register(s). */
25328 thumb_pop (asm_out_file
, mask
);
25330 /* Move the value(s) into the high registers. */
25331 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
25333 if (mask
& (1 << regno
))
25335 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
25338 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
25340 if (live_regs_mask
& (1 << next_hi_reg
))
25345 live_regs_mask
&= ~0x0f00;
25348 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
25349 live_regs_mask
&= 0xff;
25351 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
25353 /* Pop the return address into the PC. */
25354 if (had_to_push_lr
)
25355 live_regs_mask
|= 1 << PC_REGNUM
;
25357 /* Either no argument registers were pushed or a backtrace
25358 structure was created which includes an adjusted stack
25359 pointer, so just pop everything. */
25360 if (live_regs_mask
)
25361 thumb_pop (asm_out_file
, live_regs_mask
);
25363 /* We have either just popped the return address into the
25364 PC or it is was kept in LR for the entire function.
25365 Note that thumb_pop has already called thumb_exit if the
25366 PC was in the list. */
25367 if (!had_to_push_lr
)
25368 thumb_exit (asm_out_file
, LR_REGNUM
);
25372 /* Pop everything but the return address. */
25373 if (live_regs_mask
)
25374 thumb_pop (asm_out_file
, live_regs_mask
);
25376 if (had_to_push_lr
)
25380 /* We have no free low regs, so save one. */
25381 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
25385 /* Get the return address into a temporary register. */
25386 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
25390 /* Move the return address to lr. */
25391 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
25393 /* Restore the low register. */
25394 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
25399 regno
= LAST_ARG_REGNUM
;
25404 /* Remove the argument registers that were pushed onto the stack. */
25405 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
25406 SP_REGNUM
, SP_REGNUM
,
25407 crtl
->args
.pretend_args_size
);
25409 thumb_exit (asm_out_file
, regno
);
25415 /* Functions to save and restore machine-specific function data. */
25416 static struct machine_function
*
25417 arm_init_machine_status (void)
25419 struct machine_function
*machine
;
25420 machine
= ggc_cleared_alloc
<machine_function
> ();
25422 #if ARM_FT_UNKNOWN != 0
25423 machine
->func_type
= ARM_FT_UNKNOWN
;
25425 machine
->static_chain_stack_bytes
= -1;
25429 /* Return an RTX indicating where the return address to the
25430 calling function can be found. */
25432 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
25437 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
25440 /* Do anything needed before RTL is emitted for each function. */
25442 arm_init_expanders (void)
25444 /* Arrange to initialize and mark the machine per-function status. */
25445 init_machine_status
= arm_init_machine_status
;
25447 /* This is to stop the combine pass optimizing away the alignment
25448 adjustment of va_arg. */
25449 /* ??? It is claimed that this should not be necessary. */
25451 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
25454 /* Check that FUNC is called with a different mode. */
25457 arm_change_mode_p (tree func
)
25459 if (TREE_CODE (func
) != FUNCTION_DECL
)
25462 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
25465 callee_tree
= target_option_default_node
;
25467 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25468 int flags
= callee_opts
->x_target_flags
;
25470 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
25473 /* Like arm_compute_initial_elimination offset. Simpler because there
25474 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25475 to point at the base of the local variables after static stack
25476 space for a function has been allocated. */
25479 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
25481 arm_stack_offsets
*offsets
;
25483 offsets
= arm_get_frame_offsets ();
25487 case ARG_POINTER_REGNUM
:
25490 case STACK_POINTER_REGNUM
:
25491 return offsets
->outgoing_args
- offsets
->saved_args
;
25493 case FRAME_POINTER_REGNUM
:
25494 return offsets
->soft_frame
- offsets
->saved_args
;
25496 case ARM_HARD_FRAME_POINTER_REGNUM
:
25497 return offsets
->saved_regs
- offsets
->saved_args
;
25499 case THUMB_HARD_FRAME_POINTER_REGNUM
:
25500 return offsets
->locals_base
- offsets
->saved_args
;
25503 gcc_unreachable ();
25507 case FRAME_POINTER_REGNUM
:
25510 case STACK_POINTER_REGNUM
:
25511 return offsets
->outgoing_args
- offsets
->soft_frame
;
25513 case ARM_HARD_FRAME_POINTER_REGNUM
:
25514 return offsets
->saved_regs
- offsets
->soft_frame
;
25516 case THUMB_HARD_FRAME_POINTER_REGNUM
:
25517 return offsets
->locals_base
- offsets
->soft_frame
;
25520 gcc_unreachable ();
25525 gcc_unreachable ();
25529 /* Generate the function's prologue. */
25532 thumb1_expand_prologue (void)
25536 HOST_WIDE_INT amount
;
25537 HOST_WIDE_INT size
;
25538 arm_stack_offsets
*offsets
;
25539 unsigned long func_type
;
25541 unsigned long live_regs_mask
;
25542 unsigned long l_mask
;
25543 unsigned high_regs_pushed
= 0;
25544 bool lr_needs_saving
;
25546 func_type
= arm_current_func_type ();
25548 /* Naked functions don't have prologues. */
25549 if (IS_NAKED (func_type
))
25551 if (flag_stack_usage_info
)
25552 current_function_static_stack_size
= 0;
25556 if (IS_INTERRUPT (func_type
))
25558 error ("interrupt Service Routines cannot be coded in Thumb mode");
25562 if (is_called_in_ARM_mode (current_function_decl
))
25563 emit_insn (gen_prologue_thumb1_interwork ());
25565 offsets
= arm_get_frame_offsets ();
25566 live_regs_mask
= offsets
->saved_regs_mask
;
25567 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
25569 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25570 l_mask
= live_regs_mask
& 0x40ff;
25571 /* Then count how many other high registers will need to be pushed. */
25572 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25574 if (crtl
->args
.pretend_args_size
)
25576 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
25578 if (cfun
->machine
->uses_anonymous_args
)
25580 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
25581 unsigned long mask
;
25583 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
25584 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
25586 insn
= thumb1_emit_multi_reg_push (mask
, 0);
25590 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25591 stack_pointer_rtx
, x
));
25593 RTX_FRAME_RELATED_P (insn
) = 1;
25596 if (TARGET_BACKTRACE
)
25598 HOST_WIDE_INT offset
= 0;
25599 unsigned work_register
;
25600 rtx work_reg
, x
, arm_hfp_rtx
;
25602 /* We have been asked to create a stack backtrace structure.
25603 The code looks like this:
25607 0 sub SP, #16 Reserve space for 4 registers.
25608 2 push {R7} Push low registers.
25609 4 add R7, SP, #20 Get the stack pointer before the push.
25610 6 str R7, [SP, #8] Store the stack pointer
25611 (before reserving the space).
25612 8 mov R7, PC Get hold of the start of this code + 12.
25613 10 str R7, [SP, #16] Store it.
25614 12 mov R7, FP Get hold of the current frame pointer.
25615 14 str R7, [SP, #4] Store it.
25616 16 mov R7, LR Get hold of the current return address.
25617 18 str R7, [SP, #12] Store it.
25618 20 add R7, SP, #16 Point at the start of the
25619 backtrace structure.
25620 22 mov FP, R7 Put this value into the frame pointer. */
25622 work_register
= thumb_find_work_register (live_regs_mask
);
25623 work_reg
= gen_rtx_REG (SImode
, work_register
);
25624 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
25626 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25627 stack_pointer_rtx
, GEN_INT (-16)));
25628 RTX_FRAME_RELATED_P (insn
) = 1;
25632 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
25633 RTX_FRAME_RELATED_P (insn
) = 1;
25634 lr_needs_saving
= false;
25636 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
25639 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
25640 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25642 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
25643 x
= gen_frame_mem (SImode
, x
);
25644 emit_move_insn (x
, work_reg
);
25646 /* Make sure that the instruction fetching the PC is in the right place
25647 to calculate "start of backtrace creation code + 12". */
25648 /* ??? The stores using the common WORK_REG ought to be enough to
25649 prevent the scheduler from doing anything weird. Failing that
25650 we could always move all of the following into an UNSPEC_VOLATILE. */
25653 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25654 emit_move_insn (work_reg
, x
);
25656 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25657 x
= gen_frame_mem (SImode
, x
);
25658 emit_move_insn (x
, work_reg
);
25660 emit_move_insn (work_reg
, arm_hfp_rtx
);
25662 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25663 x
= gen_frame_mem (SImode
, x
);
25664 emit_move_insn (x
, work_reg
);
25668 emit_move_insn (work_reg
, arm_hfp_rtx
);
25670 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25671 x
= gen_frame_mem (SImode
, x
);
25672 emit_move_insn (x
, work_reg
);
25674 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25675 emit_move_insn (work_reg
, x
);
25677 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25678 x
= gen_frame_mem (SImode
, x
);
25679 emit_move_insn (x
, work_reg
);
25682 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
25683 emit_move_insn (work_reg
, x
);
25685 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
25686 x
= gen_frame_mem (SImode
, x
);
25687 emit_move_insn (x
, work_reg
);
25689 x
= GEN_INT (offset
+ 12);
25690 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25692 emit_move_insn (arm_hfp_rtx
, work_reg
);
25694 /* Optimization: If we are not pushing any low registers but we are going
25695 to push some high registers then delay our first push. This will just
25696 be a push of LR and we can combine it with the push of the first high
25698 else if ((l_mask
& 0xff) != 0
25699 || (high_regs_pushed
== 0 && lr_needs_saving
))
25701 unsigned long mask
= l_mask
;
25702 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
25703 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
25704 RTX_FRAME_RELATED_P (insn
) = 1;
25705 lr_needs_saving
= false;
25708 if (high_regs_pushed
)
25710 unsigned pushable_regs
;
25711 unsigned next_hi_reg
;
25712 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
25713 : crtl
->args
.info
.nregs
;
25714 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
25716 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
25717 if (live_regs_mask
& (1 << next_hi_reg
))
25720 /* Here we need to mask out registers used for passing arguments
25721 even if they can be pushed. This is to avoid using them to
25722 stash the high registers. Such kind of stash may clobber the
25723 use of arguments. */
25724 pushable_regs
= l_mask
& (~arg_regs_mask
);
25725 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
25727 /* Normally, LR can be used as a scratch register once it has been
25728 saved; but if the function examines its own return address then
25729 the value is still live and we need to avoid using it. */
25730 bool return_addr_live
25731 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
25734 if (lr_needs_saving
|| return_addr_live
)
25735 pushable_regs
&= ~(1 << LR_REGNUM
);
25737 if (pushable_regs
== 0)
25738 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
25740 while (high_regs_pushed
> 0)
25742 unsigned long real_regs_mask
= 0;
25743 unsigned long push_mask
= 0;
25745 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
25747 if (pushable_regs
& (1 << regno
))
25749 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25750 gen_rtx_REG (SImode
, next_hi_reg
));
25752 high_regs_pushed
--;
25753 real_regs_mask
|= (1 << next_hi_reg
);
25754 push_mask
|= (1 << regno
);
25756 if (high_regs_pushed
)
25758 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
25760 if (live_regs_mask
& (1 << next_hi_reg
))
25768 /* If we had to find a work register and we have not yet
25769 saved the LR then add it to the list of regs to push. */
25770 if (lr_needs_saving
)
25772 push_mask
|= 1 << LR_REGNUM
;
25773 real_regs_mask
|= 1 << LR_REGNUM
;
25774 lr_needs_saving
= false;
25775 /* If the return address is not live at this point, we
25776 can add LR to the list of registers that we can use
25778 if (!return_addr_live
)
25779 pushable_regs
|= 1 << LR_REGNUM
;
25782 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
25783 RTX_FRAME_RELATED_P (insn
) = 1;
25787 /* Load the pic register before setting the frame pointer,
25788 so we can use r7 as a temporary work register. */
25789 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
25790 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
25792 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
25793 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
25794 stack_pointer_rtx
);
25796 size
= offsets
->outgoing_args
- offsets
->saved_args
;
25797 if (flag_stack_usage_info
)
25798 current_function_static_stack_size
= size
;
25800 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25801 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
25802 || flag_stack_clash_protection
)
25804 sorry ("%<-fstack-check=specific%> for Thumb-1");
25806 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25807 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
25812 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25813 GEN_INT (- amount
)));
25814 RTX_FRAME_RELATED_P (insn
) = 1;
25820 /* The stack decrement is too big for an immediate value in a single
25821 insn. In theory we could issue multiple subtracts, but after
25822 three of them it becomes more space efficient to place the full
25823 value in the constant pool and load into a register. (Also the
25824 ARM debugger really likes to see only one stack decrement per
25825 function). So instead we look for a scratch register into which
25826 we can load the decrement, and then we subtract this from the
25827 stack pointer. Unfortunately on the thumb the only available
25828 scratch registers are the argument registers, and we cannot use
25829 these as they may hold arguments to the function. Instead we
25830 attempt to locate a call preserved register which is used by this
25831 function. If we can find one, then we know that it will have
25832 been pushed at the start of the prologue and so we can corrupt
25834 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25835 if (live_regs_mask
& (1 << regno
))
25838 gcc_assert(regno
<= LAST_LO_REGNUM
);
25840 reg
= gen_rtx_REG (SImode
, regno
);
25842 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25844 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25845 stack_pointer_rtx
, reg
));
25847 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25848 plus_constant (Pmode
, stack_pointer_rtx
,
25850 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25851 RTX_FRAME_RELATED_P (insn
) = 1;
25855 if (frame_pointer_needed
)
25856 thumb_set_frame_pointer (offsets
);
25858 /* If we are profiling, make sure no instructions are scheduled before
25859 the call to mcount. Similarly if the user has requested no
25860 scheduling in the prolog. Similarly if we want non-call exceptions
25861 using the EABI unwinder, to prevent faulting instructions from being
25862 swapped with a stack adjustment. */
25863 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25864 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25865 && cfun
->can_throw_non_call_exceptions
))
25866 emit_insn (gen_blockage ());
25868 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25869 if (live_regs_mask
& 0xff)
25870 cfun
->machine
->lr_save_eliminated
= 0;
25873 /* Clear caller saved registers not used to pass return values and leaked
25874 condition flags before exiting a cmse_nonsecure_entry function. */
25877 cmse_nonsecure_entry_clear_before_return (void)
25879 int regno
, maxregno
= TARGET_HARD_FLOAT
? LAST_VFP_REGNUM
: IP_REGNUM
;
25880 uint32_t padding_bits_to_clear
= 0;
25881 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
25882 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
25885 bitmap_clear (to_clear_bitmap
);
25886 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
25887 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
25889 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25891 if (TARGET_HARD_FLOAT
)
25893 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
25895 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
25897 /* Make sure we don't clear the two scratch registers used to clear the
25898 relevant FPSCR bits in output_return_instruction. */
25899 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25900 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
25901 emit_use (gen_rtx_REG (SImode
, 4));
25902 bitmap_clear_bit (to_clear_bitmap
, 4);
25905 /* If the user has defined registers to be caller saved, these are no longer
25906 restored by the function before returning and must thus be cleared for
25907 security purposes. */
25908 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
25910 /* We do not touch registers that can be used to pass arguments as per
25911 the AAPCS, since these should never be made callee-saved by user
25913 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25915 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25917 if (call_used_or_fixed_reg_p (regno
))
25918 bitmap_set_bit (to_clear_bitmap
, regno
);
25921 /* Make sure we do not clear the registers used to return the result in. */
25922 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25923 if (!VOID_TYPE_P (result_type
))
25925 uint64_t to_clear_return_mask
;
25926 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25928 /* No need to check that we return in registers, because we don't
25929 support returning on stack yet. */
25930 gcc_assert (REG_P (result_rtl
));
25931 to_clear_return_mask
25932 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25933 &padding_bits_to_clear
);
25934 if (to_clear_return_mask
)
25936 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
25937 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25939 if (to_clear_return_mask
& (1ULL << regno
))
25940 bitmap_clear_bit (to_clear_bitmap
, regno
);
25945 if (padding_bits_to_clear
!= 0)
25947 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
25948 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
25950 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25951 returning a composite type, which only uses r0. Let's make sure that
25952 r1-r3 is cleared too. */
25953 bitmap_clear (to_clear_arg_regs_bitmap
);
25954 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
25955 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
25958 /* Clear full registers that leak before returning. */
25959 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
25960 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
25961 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
25965 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25966 POP instruction can be generated. LR should be replaced by PC. All
25967 the checks required are already done by USE_RETURN_INSN (). Hence,
25968 all we really need to check here is if single register is to be
25969 returned, or multiple register return. */
25971 thumb2_expand_return (bool simple_return
)
25974 unsigned long saved_regs_mask
;
25975 arm_stack_offsets
*offsets
;
25977 offsets
= arm_get_frame_offsets ();
25978 saved_regs_mask
= offsets
->saved_regs_mask
;
25980 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25981 if (saved_regs_mask
& (1 << i
))
25984 if (!simple_return
&& saved_regs_mask
)
25986 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25987 functions or adapt code to handle according to ACLE. This path should
25988 not be reachable for cmse_nonsecure_entry functions though we prefer
25989 to assert it for now to ensure that future code changes do not silently
25990 change this behavior. */
25991 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25994 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25995 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25996 rtx addr
= gen_rtx_MEM (SImode
,
25997 gen_rtx_POST_INC (SImode
,
25998 stack_pointer_rtx
));
25999 set_mem_alias_set (addr
, get_frame_alias_set ());
26000 XVECEXP (par
, 0, 0) = ret_rtx
;
26001 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
26002 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
26003 emit_jump_insn (par
);
26007 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
26008 saved_regs_mask
|= (1 << PC_REGNUM
);
26009 arm_emit_multi_reg_pop (saved_regs_mask
);
26014 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26015 cmse_nonsecure_entry_clear_before_return ();
26016 emit_jump_insn (simple_return_rtx
);
26021 thumb1_expand_epilogue (void)
26023 HOST_WIDE_INT amount
;
26024 arm_stack_offsets
*offsets
;
26027 /* Naked functions don't have prologues. */
26028 if (IS_NAKED (arm_current_func_type ()))
26031 offsets
= arm_get_frame_offsets ();
26032 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26034 if (frame_pointer_needed
)
26036 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
26037 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26039 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
26041 gcc_assert (amount
>= 0);
26044 emit_insn (gen_blockage ());
26047 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26048 GEN_INT (amount
)));
26051 /* r3 is always free in the epilogue. */
26052 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
26054 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
26055 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
26059 /* Emit a USE (stack_pointer_rtx), so that
26060 the stack adjustment will not be deleted. */
26061 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26063 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
26064 emit_insn (gen_blockage ());
26066 /* Emit a clobber for each insn that will be restored in the epilogue,
26067 so that flow2 will get register lifetimes correct. */
26068 for (regno
= 0; regno
< 13; regno
++)
26069 if (df_regs_ever_live_p (regno
) && !call_used_or_fixed_reg_p (regno
))
26070 emit_clobber (gen_rtx_REG (SImode
, regno
));
26072 if (! df_regs_ever_live_p (LR_REGNUM
))
26073 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
26075 /* Clear all caller-saved regs that are not used to return. */
26076 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26077 cmse_nonsecure_entry_clear_before_return ();
26080 /* Epilogue code for APCS frame. */
26082 arm_expand_epilogue_apcs_frame (bool really_return
)
26084 unsigned long func_type
;
26085 unsigned long saved_regs_mask
;
26088 int floats_from_frame
= 0;
26089 arm_stack_offsets
*offsets
;
26091 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
26092 func_type
= arm_current_func_type ();
26094 /* Get frame offsets for ARM. */
26095 offsets
= arm_get_frame_offsets ();
26096 saved_regs_mask
= offsets
->saved_regs_mask
;
26098 /* Find the offset of the floating-point save area in the frame. */
26100 = (offsets
->saved_args
26101 + arm_compute_static_chain_stack_bytes ()
26104 /* Compute how many core registers saved and how far away the floats are. */
26105 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26106 if (saved_regs_mask
& (1 << i
))
26109 floats_from_frame
+= 4;
26112 if (TARGET_HARD_FLOAT
)
26115 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
26117 /* The offset is from IP_REGNUM. */
26118 int saved_size
= arm_get_vfp_saved_size ();
26119 if (saved_size
> 0)
26122 floats_from_frame
+= saved_size
;
26123 insn
= emit_insn (gen_addsi3 (ip_rtx
,
26124 hard_frame_pointer_rtx
,
26125 GEN_INT (-floats_from_frame
)));
26126 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
26127 ip_rtx
, hard_frame_pointer_rtx
);
26130 /* Generate VFP register multi-pop. */
26131 start_reg
= FIRST_VFP_REGNUM
;
26133 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
26134 /* Look for a case where a reg does not need restoring. */
26135 if ((!df_regs_ever_live_p (i
) || call_used_or_fixed_reg_p (i
))
26136 && (!df_regs_ever_live_p (i
+ 1)
26137 || call_used_or_fixed_reg_p (i
+ 1)))
26139 if (start_reg
!= i
)
26140 arm_emit_vfp_multi_reg_pop (start_reg
,
26141 (i
- start_reg
) / 2,
26142 gen_rtx_REG (SImode
,
26147 /* Restore the remaining regs that we have discovered (or possibly
26148 even all of them, if the conditional in the for loop never
26150 if (start_reg
!= i
)
26151 arm_emit_vfp_multi_reg_pop (start_reg
,
26152 (i
- start_reg
) / 2,
26153 gen_rtx_REG (SImode
, IP_REGNUM
));
26158 /* The frame pointer is guaranteed to be non-double-word aligned, as
26159 it is set to double-word-aligned old_stack_pointer - 4. */
26161 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
26163 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
26164 if (df_regs_ever_live_p (i
) && !call_used_or_fixed_reg_p (i
))
26166 rtx addr
= gen_frame_mem (V2SImode
,
26167 plus_constant (Pmode
, hard_frame_pointer_rtx
,
26169 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
26170 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26171 gen_rtx_REG (V2SImode
, i
),
26177 /* saved_regs_mask should contain IP which contains old stack pointer
26178 at the time of activation creation. Since SP and IP are adjacent registers,
26179 we can restore the value directly into SP. */
26180 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
26181 saved_regs_mask
&= ~(1 << IP_REGNUM
);
26182 saved_regs_mask
|= (1 << SP_REGNUM
);
26184 /* There are two registers left in saved_regs_mask - LR and PC. We
26185 only need to restore LR (the return address), but to
26186 save time we can load it directly into PC, unless we need a
26187 special function exit sequence, or we are not really returning. */
26189 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
26190 && !crtl
->calls_eh_return
)
26191 /* Delete LR from the register mask, so that LR on
26192 the stack is loaded into the PC in the register mask. */
26193 saved_regs_mask
&= ~(1 << LR_REGNUM
);
26195 saved_regs_mask
&= ~(1 << PC_REGNUM
);
26197 num_regs
= bit_count (saved_regs_mask
);
26198 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
26201 emit_insn (gen_blockage ());
26202 /* Unwind the stack to just below the saved registers. */
26203 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26204 hard_frame_pointer_rtx
,
26205 GEN_INT (- 4 * num_regs
)));
26207 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
26208 stack_pointer_rtx
, hard_frame_pointer_rtx
);
26211 arm_emit_multi_reg_pop (saved_regs_mask
);
26213 if (IS_INTERRUPT (func_type
))
26215 /* Interrupt handlers will have pushed the
26216 IP onto the stack, so restore it now. */
26218 rtx addr
= gen_rtx_MEM (SImode
,
26219 gen_rtx_POST_INC (SImode
,
26220 stack_pointer_rtx
));
26221 set_mem_alias_set (addr
, get_frame_alias_set ());
26222 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
26223 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26224 gen_rtx_REG (SImode
, IP_REGNUM
),
26228 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
26231 if (crtl
->calls_eh_return
)
26232 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26234 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
26236 if (IS_STACKALIGN (func_type
))
26237 /* Restore the original stack pointer. Before prologue, the stack was
26238 realigned and the original stack pointer saved in r0. For details,
26239 see comment in arm_expand_prologue. */
26240 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
26242 emit_jump_insn (simple_return_rtx
);
26245 /* Generate RTL to represent ARM epilogue. Really_return is true if the
26246 function is not a sibcall. */
26248 arm_expand_epilogue (bool really_return
)
26250 unsigned long func_type
;
26251 unsigned long saved_regs_mask
;
26255 arm_stack_offsets
*offsets
;
26257 func_type
= arm_current_func_type ();
26259 /* Naked functions don't have epilogue. Hence, generate return pattern, and
26260 let output_return_instruction take care of instruction emission if any. */
26261 if (IS_NAKED (func_type
)
26262 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
26265 emit_jump_insn (simple_return_rtx
);
26269 /* If we are throwing an exception, then we really must be doing a
26270 return, so we can't tail-call. */
26271 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
26273 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
26275 arm_expand_epilogue_apcs_frame (really_return
);
26279 /* Get frame offsets for ARM. */
26280 offsets
= arm_get_frame_offsets ();
26281 saved_regs_mask
= offsets
->saved_regs_mask
;
26282 num_regs
= bit_count (saved_regs_mask
);
26284 if (frame_pointer_needed
)
26287 /* Restore stack pointer if necessary. */
26290 /* In ARM mode, frame pointer points to first saved register.
26291 Restore stack pointer to last saved register. */
26292 amount
= offsets
->frame
- offsets
->saved_regs
;
26294 /* Force out any pending memory operations that reference stacked data
26295 before stack de-allocation occurs. */
26296 emit_insn (gen_blockage ());
26297 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26298 hard_frame_pointer_rtx
,
26299 GEN_INT (amount
)));
26300 arm_add_cfa_adjust_cfa_note (insn
, amount
,
26302 hard_frame_pointer_rtx
);
26304 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26306 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26310 /* In Thumb-2 mode, the frame pointer points to the last saved
26312 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26315 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
26316 hard_frame_pointer_rtx
,
26317 GEN_INT (amount
)));
26318 arm_add_cfa_adjust_cfa_note (insn
, amount
,
26319 hard_frame_pointer_rtx
,
26320 hard_frame_pointer_rtx
);
26323 /* Force out any pending memory operations that reference stacked data
26324 before stack de-allocation occurs. */
26325 emit_insn (gen_blockage ());
26326 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
26327 hard_frame_pointer_rtx
));
26328 arm_add_cfa_adjust_cfa_note (insn
, 0,
26330 hard_frame_pointer_rtx
);
26331 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26333 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26338 /* Pop off outgoing args and local frame to adjust stack pointer to
26339 last saved register. */
26340 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26344 /* Force out any pending memory operations that reference stacked data
26345 before stack de-allocation occurs. */
26346 emit_insn (gen_blockage ());
26347 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26349 GEN_INT (amount
)));
26350 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
26351 stack_pointer_rtx
, stack_pointer_rtx
);
26352 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
26354 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26358 if (TARGET_HARD_FLOAT
)
26360 /* Generate VFP register multi-pop. */
26361 int end_reg
= LAST_VFP_REGNUM
+ 1;
26363 /* Scan the registers in reverse order. We need to match
26364 any groupings made in the prologue and generate matching
26365 vldm operations. The need to match groups is because,
26366 unlike pop, vldm can only do consecutive regs. */
26367 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
26368 /* Look for a case where a reg does not need restoring. */
26369 if ((!df_regs_ever_live_p (i
) || call_used_or_fixed_reg_p (i
))
26370 && (!df_regs_ever_live_p (i
+ 1)
26371 || call_used_or_fixed_reg_p (i
+ 1)))
26373 /* Restore the regs discovered so far (from reg+2 to
26375 if (end_reg
> i
+ 2)
26376 arm_emit_vfp_multi_reg_pop (i
+ 2,
26377 (end_reg
- (i
+ 2)) / 2,
26378 stack_pointer_rtx
);
26382 /* Restore the remaining regs that we have discovered (or possibly
26383 even all of them, if the conditional in the for loop never
26385 if (end_reg
> i
+ 2)
26386 arm_emit_vfp_multi_reg_pop (i
+ 2,
26387 (end_reg
- (i
+ 2)) / 2,
26388 stack_pointer_rtx
);
26392 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
26393 if (df_regs_ever_live_p (i
) && !call_used_or_fixed_reg_p (i
))
26396 rtx addr
= gen_rtx_MEM (V2SImode
,
26397 gen_rtx_POST_INC (SImode
,
26398 stack_pointer_rtx
));
26399 set_mem_alias_set (addr
, get_frame_alias_set ());
26400 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
26401 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26402 gen_rtx_REG (V2SImode
, i
),
26404 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
26405 stack_pointer_rtx
, stack_pointer_rtx
);
26408 if (saved_regs_mask
)
26411 bool return_in_pc
= false;
26413 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
26414 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
26415 && !IS_CMSE_ENTRY (func_type
)
26416 && !IS_STACKALIGN (func_type
)
26418 && crtl
->args
.pretend_args_size
== 0
26419 && saved_regs_mask
& (1 << LR_REGNUM
)
26420 && !crtl
->calls_eh_return
)
26422 saved_regs_mask
&= ~(1 << LR_REGNUM
);
26423 saved_regs_mask
|= (1 << PC_REGNUM
);
26424 return_in_pc
= true;
26427 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
26429 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26430 if (saved_regs_mask
& (1 << i
))
26432 rtx addr
= gen_rtx_MEM (SImode
,
26433 gen_rtx_POST_INC (SImode
,
26434 stack_pointer_rtx
));
26435 set_mem_alias_set (addr
, get_frame_alias_set ());
26437 if (i
== PC_REGNUM
)
26439 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
26440 XVECEXP (insn
, 0, 0) = ret_rtx
;
26441 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
26443 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
26444 insn
= emit_jump_insn (insn
);
26448 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
26450 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
26451 gen_rtx_REG (SImode
, i
),
26453 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
26455 stack_pointer_rtx
);
26462 && current_tune
->prefer_ldrd_strd
26463 && !optimize_function_for_size_p (cfun
))
26466 thumb2_emit_ldrd_pop (saved_regs_mask
);
26467 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
26468 arm_emit_ldrd_pop (saved_regs_mask
);
26470 arm_emit_multi_reg_pop (saved_regs_mask
);
26473 arm_emit_multi_reg_pop (saved_regs_mask
);
26481 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
26485 rtx dwarf
= NULL_RTX
;
26487 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26489 GEN_INT (amount
)));
26491 RTX_FRAME_RELATED_P (tmp
) = 1;
26493 if (cfun
->machine
->uses_anonymous_args
)
26495 /* Restore pretend args. Refer arm_expand_prologue on how to save
26496 pretend_args in stack. */
26497 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
26498 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
26499 for (j
= 0, i
= 0; j
< num_regs
; i
++)
26500 if (saved_regs_mask
& (1 << i
))
26502 rtx reg
= gen_rtx_REG (SImode
, i
);
26503 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
26506 REG_NOTES (tmp
) = dwarf
;
26508 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
26509 stack_pointer_rtx
, stack_pointer_rtx
);
26512 /* Clear all caller-saved regs that are not used to return. */
26513 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26515 /* CMSE_ENTRY always returns. */
26516 gcc_assert (really_return
);
26517 cmse_nonsecure_entry_clear_before_return ();
26520 if (!really_return
)
26523 if (crtl
->calls_eh_return
)
26524 emit_insn (gen_addsi3 (stack_pointer_rtx
,
26526 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
26528 if (IS_STACKALIGN (func_type
))
26529 /* Restore the original stack pointer. Before prologue, the stack was
26530 realigned and the original stack pointer saved in r0. For details,
26531 see comment in arm_expand_prologue. */
26532 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
26534 emit_jump_insn (simple_return_rtx
);
26537 /* Implementation of insn prologue_thumb1_interwork. This is the first
26538 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26541 thumb1_output_interwork (void)
26544 FILE *f
= asm_out_file
;
26546 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
26547 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
26549 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
26551 /* Generate code sequence to switch us into Thumb mode. */
26552 /* The .code 32 directive has already been emitted by
26553 ASM_DECLARE_FUNCTION_NAME. */
26554 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
26555 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
26557 /* Generate a label, so that the debugger will notice the
26558 change in instruction sets. This label is also used by
26559 the assembler to bypass the ARM code when this function
26560 is called from a Thumb encoded function elsewhere in the
26561 same file. Hence the definition of STUB_NAME here must
26562 agree with the definition in gas/config/tc-arm.c. */
26564 #define STUB_NAME ".real_start_of"
26566 fprintf (f
, "\t.code\t16\n");
26568 if (arm_dllexport_name_p (name
))
26569 name
= arm_strip_name_encoding (name
);
26571 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
26572 fprintf (f
, "\t.thumb_func\n");
26573 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
26578 /* Handle the case of a double word load into a low register from
26579 a computed memory address. The computed address may involve a
26580 register which is overwritten by the load. */
26582 thumb_load_double_from_address (rtx
*operands
)
26590 gcc_assert (REG_P (operands
[0]));
26591 gcc_assert (MEM_P (operands
[1]));
26593 /* Get the memory address. */
26594 addr
= XEXP (operands
[1], 0);
26596 /* Work out how the memory address is computed. */
26597 switch (GET_CODE (addr
))
26600 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26602 if (REGNO (operands
[0]) == REGNO (addr
))
26604 output_asm_insn ("ldr\t%H0, %2", operands
);
26605 output_asm_insn ("ldr\t%0, %1", operands
);
26609 output_asm_insn ("ldr\t%0, %1", operands
);
26610 output_asm_insn ("ldr\t%H0, %2", operands
);
26615 /* Compute <address> + 4 for the high order load. */
26616 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26618 output_asm_insn ("ldr\t%0, %1", operands
);
26619 output_asm_insn ("ldr\t%H0, %2", operands
);
26623 arg1
= XEXP (addr
, 0);
26624 arg2
= XEXP (addr
, 1);
26626 if (CONSTANT_P (arg1
))
26627 base
= arg2
, offset
= arg1
;
26629 base
= arg1
, offset
= arg2
;
26631 gcc_assert (REG_P (base
));
26633 /* Catch the case of <address> = <reg> + <reg> */
26634 if (REG_P (offset
))
26636 int reg_offset
= REGNO (offset
);
26637 int reg_base
= REGNO (base
);
26638 int reg_dest
= REGNO (operands
[0]);
26640 /* Add the base and offset registers together into the
26641 higher destination register. */
26642 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
26643 reg_dest
+ 1, reg_base
, reg_offset
);
26645 /* Load the lower destination register from the address in
26646 the higher destination register. */
26647 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
26648 reg_dest
, reg_dest
+ 1);
26650 /* Load the higher destination register from its own address
26652 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
26653 reg_dest
+ 1, reg_dest
+ 1);
26657 /* Compute <address> + 4 for the high order load. */
26658 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26660 /* If the computed address is held in the low order register
26661 then load the high order register first, otherwise always
26662 load the low order register first. */
26663 if (REGNO (operands
[0]) == REGNO (base
))
26665 output_asm_insn ("ldr\t%H0, %2", operands
);
26666 output_asm_insn ("ldr\t%0, %1", operands
);
26670 output_asm_insn ("ldr\t%0, %1", operands
);
26671 output_asm_insn ("ldr\t%H0, %2", operands
);
26677 /* With no registers to worry about we can just load the value
26679 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26681 output_asm_insn ("ldr\t%H0, %2", operands
);
26682 output_asm_insn ("ldr\t%0, %1", operands
);
26686 gcc_unreachable ();
26693 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
26698 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26699 std::swap (operands
[4], operands
[5]);
26701 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
26702 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
26706 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26707 std::swap (operands
[4], operands
[5]);
26708 if (REGNO (operands
[5]) > REGNO (operands
[6]))
26709 std::swap (operands
[5], operands
[6]);
26710 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26711 std::swap (operands
[4], operands
[5]);
26713 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
26714 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
26718 gcc_unreachable ();
26724 /* Output a call-via instruction for thumb state. */
26726 thumb_call_via_reg (rtx reg
)
26728 int regno
= REGNO (reg
);
26731 gcc_assert (regno
< LR_REGNUM
);
26733 /* If we are in the normal text section we can use a single instance
26734 per compilation unit. If we are doing function sections, then we need
26735 an entry per section, since we can't rely on reachability. */
26736 if (in_section
== text_section
)
26738 thumb_call_reg_needed
= 1;
26740 if (thumb_call_via_label
[regno
] == NULL
)
26741 thumb_call_via_label
[regno
] = gen_label_rtx ();
26742 labelp
= thumb_call_via_label
+ regno
;
26746 if (cfun
->machine
->call_via
[regno
] == NULL
)
26747 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
26748 labelp
= cfun
->machine
->call_via
+ regno
;
26751 output_asm_insn ("bl\t%a0", labelp
);
26755 /* Routines for generating rtl. */
26757 thumb_expand_cpymemqi (rtx
*operands
)
26759 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
26760 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
26761 HOST_WIDE_INT len
= INTVAL (operands
[2]);
26762 HOST_WIDE_INT offset
= 0;
26766 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
26772 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
26778 rtx reg
= gen_reg_rtx (SImode
);
26779 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
26780 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
26787 rtx reg
= gen_reg_rtx (HImode
);
26788 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
26789 plus_constant (Pmode
, in
,
26791 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
26800 rtx reg
= gen_reg_rtx (QImode
);
26801 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
26802 plus_constant (Pmode
, in
,
26804 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
26811 thumb_reload_out_hi (rtx
*operands
)
26813 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
26816 /* Return the length of a function name prefix
26817 that starts with the character 'c'. */
26819 arm_get_strip_length (int c
)
26823 ARM_NAME_ENCODING_LENGTHS
26828 /* Return a pointer to a function's name with any
26829 and all prefix encodings stripped from it. */
26831 arm_strip_name_encoding (const char *name
)
26835 while ((skip
= arm_get_strip_length (* name
)))
26841 /* If there is a '*' anywhere in the name's prefix, then
26842 emit the stripped name verbatim, otherwise prepend an
26843 underscore if leading underscores are being used. */
26845 arm_asm_output_labelref (FILE *stream
, const char *name
)
26850 while ((skip
= arm_get_strip_length (* name
)))
26852 verbatim
|= (*name
== '*');
26857 fputs (name
, stream
);
26859 asm_fprintf (stream
, "%U%s", name
);
26862 /* This function is used to emit an EABI tag and its associated value.
26863 We emit the numerical value of the tag in case the assembler does not
26864 support textual tags. (Eg gas prior to 2.20). If requested we include
26865 the tag name in a comment so that anyone reading the assembler output
26866 will know which tag is being set.
26868 This function is not static because arm-c.c needs it too. */
26871 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26873 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26874 if (flag_verbose_asm
|| flag_debug_asm
)
26875 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26876 asm_fprintf (asm_out_file
, "\n");
26879 /* This function is used to print CPU tuning information as comment
26880 in assembler file. Pointers are not printed for now. */
26883 arm_print_tune_info (void)
26885 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26886 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26887 current_tune
->constant_limit
);
26888 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26889 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26890 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26891 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26892 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26893 "prefetch.l1_cache_size:\t%d\n",
26894 current_tune
->prefetch
.l1_cache_size
);
26895 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26896 "prefetch.l1_cache_line_size:\t%d\n",
26897 current_tune
->prefetch
.l1_cache_line_size
);
26898 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26899 "prefer_constant_pool:\t%d\n",
26900 (int) current_tune
->prefer_constant_pool
);
26901 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26902 "branch_cost:\t(s:speed, p:predictable)\n");
26903 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26904 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26905 current_tune
->branch_cost (false, false));
26906 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26907 current_tune
->branch_cost (false, true));
26908 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26909 current_tune
->branch_cost (true, false));
26910 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26911 current_tune
->branch_cost (true, true));
26912 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26913 "prefer_ldrd_strd:\t%d\n",
26914 (int) current_tune
->prefer_ldrd_strd
);
26915 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26916 "logical_op_non_short_circuit:\t[%d,%d]\n",
26917 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26918 (int) current_tune
->logical_op_non_short_circuit_arm
);
26919 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26920 "disparage_flag_setting_t16_encodings:\t%d\n",
26921 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26922 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26923 "string_ops_prefer_neon:\t%d\n",
26924 (int) current_tune
->string_ops_prefer_neon
);
26925 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26926 "max_insns_inline_memset:\t%d\n",
26927 current_tune
->max_insns_inline_memset
);
26928 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26929 current_tune
->fusible_ops
);
26930 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26931 (int) current_tune
->sched_autopref
);
26934 /* Print .arch and .arch_extension directives corresponding to the
26935 current architecture configuration. */
26937 arm_print_asm_arch_directives ()
26939 const arch_option
*arch
26940 = arm_parse_arch_option_name (all_architectures
, "-march",
26941 arm_active_target
.arch_name
);
26942 auto_sbitmap
opt_bits (isa_num_bits
);
26946 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
26947 arm_last_printed_arch_string
= arm_active_target
.arch_name
;
26948 if (!arch
->common
.extensions
)
26951 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
26957 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
26959 /* If every feature bit of this option is set in the target
26960 ISA specification, print out the option name. However,
26961 don't print anything if all the bits are part of the
26962 FPU specification. */
26963 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
26964 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
26965 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
26971 arm_file_start (void)
26977 /* We don't have a specified CPU. Use the architecture to
26980 Note: it might be better to do this unconditionally, then the
26981 assembler would not need to know about all new CPU names as
26983 if (!arm_active_target
.core_name
)
26985 /* armv7ve doesn't support any extensions. */
26986 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26988 /* Keep backward compatability for assemblers
26989 which don't support armv7ve. */
26990 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26991 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26992 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26993 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26994 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26995 arm_last_printed_arch_string
= "armv7ve";
26998 arm_print_asm_arch_directives ();
27000 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
27002 asm_fprintf (asm_out_file
, "\t.arch %s\n",
27003 arm_active_target
.core_name
+ 8);
27004 arm_last_printed_arch_string
= arm_active_target
.core_name
+ 8;
27008 const char* truncated_name
27009 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
27010 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
27013 if (print_tune_info
)
27014 arm_print_tune_info ();
27016 if (! TARGET_SOFT_FLOAT
)
27018 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
27019 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
27021 if (TARGET_HARD_FLOAT_ABI
)
27022 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27025 /* Some of these attributes only apply when the corresponding features
27026 are used. However we don't have any easy way of figuring this out.
27027 Conservatively record the setting that would have been used. */
27029 if (flag_rounding_math
)
27030 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27032 if (!flag_unsafe_math_optimizations
)
27034 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27035 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27037 if (flag_signaling_nans
)
27038 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27040 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27041 flag_finite_math_only
? 1 : 3);
27043 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27044 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27045 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27046 flag_short_enums
? 1 : 2);
27048 /* Tag_ABI_optimization_goals. */
27051 else if (optimize
>= 2)
27057 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
27059 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27062 if (arm_fp16_format
)
27063 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27064 (int) arm_fp16_format
);
27066 if (arm_lang_output_object_attributes_hook
)
27067 arm_lang_output_object_attributes_hook();
27070 default_file_start ();
27074 arm_file_end (void)
27078 if (NEED_INDICATE_EXEC_STACK
)
27079 /* Add .note.GNU-stack. */
27080 file_end_indicate_exec_stack ();
27082 if (! thumb_call_reg_needed
)
27085 switch_to_section (text_section
);
27086 asm_fprintf (asm_out_file
, "\t.code 16\n");
27087 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
27089 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
27091 rtx label
= thumb_call_via_label
[regno
];
27095 targetm
.asm_out
.internal_label (asm_out_file
, "L",
27096 CODE_LABEL_NUMBER (label
));
27097 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
27103 /* Symbols in the text segment can be accessed without indirecting via the
27104 constant pool; it may take an extra binary operation, but this is still
27105 faster than indirecting via memory. Don't do this when not optimizing,
27106 since we won't be calculating al of the offsets necessary to do this
27110 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
27112 if (optimize
> 0 && TREE_CONSTANT (decl
))
27113 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
27115 default_encode_section_info (decl
, rtl
, first
);
27117 #endif /* !ARM_PE */
27120 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
27122 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
27123 && !strcmp (prefix
, "L"))
27125 arm_ccfsm_state
= 0;
27126 arm_target_insn
= NULL
;
27128 default_internal_label (stream
, prefix
, labelno
);
27131 /* Output code to add DELTA to the first argument, and then jump
27132 to FUNCTION. Used for C++ multiple inheritance. */
27135 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
27136 HOST_WIDE_INT
, tree function
)
27138 static int thunk_label
= 0;
27141 int mi_delta
= delta
;
27142 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
27144 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
27147 mi_delta
= - mi_delta
;
27149 final_start_function (emit_barrier (), file
, 1);
27153 int labelno
= thunk_label
++;
27154 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
27155 /* Thunks are entered in arm mode when available. */
27156 if (TARGET_THUMB1_ONLY
)
27158 /* push r3 so we can use it as a temporary. */
27159 /* TODO: Omit this save if r3 is not used. */
27160 fputs ("\tpush {r3}\n", file
);
27161 fputs ("\tldr\tr3, ", file
);
27165 fputs ("\tldr\tr12, ", file
);
27167 assemble_name (file
, label
);
27168 fputc ('\n', file
);
27171 /* If we are generating PIC, the ldr instruction below loads
27172 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
27173 the address of the add + 8, so we have:
27175 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
27178 Note that we have "+ 1" because some versions of GNU ld
27179 don't set the low bit of the result for R_ARM_REL32
27180 relocations against thumb function symbols.
27181 On ARMv6M this is +4, not +8. */
27182 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
27183 assemble_name (file
, labelpc
);
27184 fputs (":\n", file
);
27185 if (TARGET_THUMB1_ONLY
)
27187 /* This is 2 insns after the start of the thunk, so we know it
27188 is 4-byte aligned. */
27189 fputs ("\tadd\tr3, pc, r3\n", file
);
27190 fputs ("\tmov r12, r3\n", file
);
27193 fputs ("\tadd\tr12, pc, r12\n", file
);
27195 else if (TARGET_THUMB1_ONLY
)
27196 fputs ("\tmov r12, r3\n", file
);
27198 if (TARGET_THUMB1_ONLY
)
27200 if (mi_delta
> 255)
27202 fputs ("\tldr\tr3, ", file
);
27203 assemble_name (file
, label
);
27204 fputs ("+4\n", file
);
27205 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
27206 mi_op
, this_regno
, this_regno
);
27208 else if (mi_delta
!= 0)
27210 /* Thumb1 unified syntax requires s suffix in instruction name when
27211 one of the operands is immediate. */
27212 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
27213 mi_op
, this_regno
, this_regno
,
27219 /* TODO: Use movw/movt for large constants when available. */
27220 while (mi_delta
!= 0)
27222 if ((mi_delta
& (3 << shift
)) == 0)
27226 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
27227 mi_op
, this_regno
, this_regno
,
27228 mi_delta
& (0xff << shift
));
27229 mi_delta
&= ~(0xff << shift
);
27236 if (TARGET_THUMB1_ONLY
)
27237 fputs ("\tpop\t{r3}\n", file
);
27239 fprintf (file
, "\tbx\tr12\n");
27240 ASM_OUTPUT_ALIGN (file
, 2);
27241 assemble_name (file
, label
);
27242 fputs (":\n", file
);
27245 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
27246 rtx tem
= XEXP (DECL_RTL (function
), 0);
27247 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
27248 pipeline offset is four rather than eight. Adjust the offset
27250 tem
= plus_constant (GET_MODE (tem
), tem
,
27251 TARGET_THUMB1_ONLY
? -3 : -7);
27252 tem
= gen_rtx_MINUS (GET_MODE (tem
),
27254 gen_rtx_SYMBOL_REF (Pmode
,
27255 ggc_strdup (labelpc
)));
27256 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
27259 /* Output ".word .LTHUNKn". */
27260 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
27262 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
27263 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
27267 fputs ("\tb\t", file
);
27268 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
27269 if (NEED_PLT_RELOC
)
27270 fputs ("(PLT)", file
);
27271 fputc ('\n', file
);
27274 final_end_function ();
27277 /* MI thunk handling for TARGET_32BIT. */
27280 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
27281 HOST_WIDE_INT vcall_offset
, tree function
)
27283 const bool long_call_p
= arm_is_long_call_p (function
);
27285 /* On ARM, this_regno is R0 or R1 depending on
27286 whether the function returns an aggregate or not.
27288 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
27290 ? R1_REGNUM
: R0_REGNUM
);
27292 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
27293 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
27294 reload_completed
= 1;
27295 emit_note (NOTE_INSN_PROLOGUE_END
);
27297 /* Add DELTA to THIS_RTX. */
27299 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
27300 delta
, this_rtx
, this_rtx
, false);
27302 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
27303 if (vcall_offset
!= 0)
27305 /* Load *THIS_RTX. */
27306 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
27307 /* Compute *THIS_RTX + VCALL_OFFSET. */
27308 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
27310 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
27311 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
27312 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
27315 /* Generate a tail call to the target function. */
27316 if (!TREE_USED (function
))
27318 assemble_external (function
);
27319 TREE_USED (function
) = 1;
27321 rtx funexp
= XEXP (DECL_RTL (function
), 0);
27324 emit_move_insn (temp
, funexp
);
27327 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
27328 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
27329 SIBLING_CALL_P (insn
) = 1;
27332 /* Indirect calls require a bit of fixup in PIC mode. */
27335 split_all_insns_noflow ();
27339 insn
= get_insns ();
27340 shorten_branches (insn
);
27341 final_start_function (insn
, file
, 1);
27342 final (insn
, file
, 1);
27343 final_end_function ();
27345 /* Stop pretending this is a post-reload pass. */
27346 reload_completed
= 0;
27349 /* Output code to add DELTA to the first argument, and then jump
27350 to FUNCTION. Used for C++ multiple inheritance. */
27353 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
27354 HOST_WIDE_INT vcall_offset
, tree function
)
27356 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
27358 assemble_start_function (thunk
, fnname
);
27360 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
27362 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
27363 assemble_end_function (thunk
, fnname
);
27367 arm_emit_vector_const (FILE *file
, rtx x
)
27370 const char * pattern
;
27372 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
27374 switch (GET_MODE (x
))
27376 case E_V2SImode
: pattern
= "%08x"; break;
27377 case E_V4HImode
: pattern
= "%04x"; break;
27378 case E_V8QImode
: pattern
= "%02x"; break;
27379 default: gcc_unreachable ();
27382 fprintf (file
, "0x");
27383 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
27387 element
= CONST_VECTOR_ELT (x
, i
);
27388 fprintf (file
, pattern
, INTVAL (element
));
27394 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27395 HFmode constant pool entries are actually loaded with ldr. */
27397 arm_emit_fp16_const (rtx c
)
27401 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
27402 if (WORDS_BIG_ENDIAN
)
27403 assemble_zeros (2);
27404 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
27405 if (!WORDS_BIG_ENDIAN
)
27406 assemble_zeros (2);
27410 arm_output_load_gr (rtx
*operands
)
27417 if (!MEM_P (operands
[1])
27418 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
27419 || !REG_P (reg
= XEXP (sum
, 0))
27420 || !CONST_INT_P (offset
= XEXP (sum
, 1))
27421 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
27422 return "wldrw%?\t%0, %1";
27424 /* Fix up an out-of-range load of a GR register. */
27425 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
27426 wcgr
= operands
[0];
27428 output_asm_insn ("ldr%?\t%0, %1", operands
);
27430 operands
[0] = wcgr
;
27432 output_asm_insn ("tmcr%?\t%0, %1", operands
);
27433 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
27438 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27440 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27441 named arg and all anonymous args onto the stack.
27442 XXX I know the prologue shouldn't be pushing registers, but it is faster
27446 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
27447 const function_arg_info
&arg
,
27449 int second_time ATTRIBUTE_UNUSED
)
27451 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
27454 cfun
->machine
->uses_anonymous_args
= 1;
27455 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
27457 nregs
= pcum
->aapcs_ncrn
;
27460 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
27461 if (res
< 0 && warn_psabi
)
27462 inform (input_location
, "parameter passing for argument of "
27463 "type %qT changed in GCC 7.1", arg
.type
);
27467 if (res
> 1 && warn_psabi
)
27468 inform (input_location
,
27469 "parameter passing for argument of type "
27470 "%qT changed in GCC 9.1", arg
.type
);
27475 nregs
= pcum
->nregs
;
27477 if (nregs
< NUM_ARG_REGS
)
27478 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
27481 /* We can't rely on the caller doing the proper promotion when
27482 using APCS or ATPCS. */
27485 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
27487 return !TARGET_AAPCS_BASED
;
27490 static machine_mode
27491 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
27493 int *punsignedp ATTRIBUTE_UNUSED
,
27494 const_tree fntype ATTRIBUTE_UNUSED
,
27495 int for_return ATTRIBUTE_UNUSED
)
27497 if (GET_MODE_CLASS (mode
) == MODE_INT
27498 && GET_MODE_SIZE (mode
) < 4)
27506 arm_default_short_enums (void)
27508 return ARM_DEFAULT_SHORT_ENUMS
;
27512 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27515 arm_align_anon_bitfield (void)
27517 return TARGET_AAPCS_BASED
;
27521 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27524 arm_cxx_guard_type (void)
27526 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
27530 /* The EABI says test the least significant bit of a guard variable. */
27533 arm_cxx_guard_mask_bit (void)
27535 return TARGET_AAPCS_BASED
;
27539 /* The EABI specifies that all array cookies are 8 bytes long. */
27542 arm_get_cookie_size (tree type
)
27546 if (!TARGET_AAPCS_BASED
)
27547 return default_cxx_get_cookie_size (type
);
27549 size
= build_int_cst (sizetype
, 8);
27554 /* The EABI says that array cookies should also contain the element size. */
27557 arm_cookie_has_size (void)
27559 return TARGET_AAPCS_BASED
;
27563 /* The EABI says constructors and destructors should return a pointer to
27564 the object constructed/destroyed. */
27567 arm_cxx_cdtor_returns_this (void)
27569 return TARGET_AAPCS_BASED
;
27572 /* The EABI says that an inline function may never be the key
27576 arm_cxx_key_method_may_be_inline (void)
27578 return !TARGET_AAPCS_BASED
;
27582 arm_cxx_determine_class_data_visibility (tree decl
)
27584 if (!TARGET_AAPCS_BASED
27585 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
27588 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27589 is exported. However, on systems without dynamic vague linkage,
27590 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27591 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
27592 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
27594 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
27595 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
27599 arm_cxx_class_data_always_comdat (void)
27601 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27602 vague linkage if the class has no key function. */
27603 return !TARGET_AAPCS_BASED
;
27607 /* The EABI says __aeabi_atexit should be used to register static
27611 arm_cxx_use_aeabi_atexit (void)
27613 return TARGET_AAPCS_BASED
;
27618 arm_set_return_address (rtx source
, rtx scratch
)
27620 arm_stack_offsets
*offsets
;
27621 HOST_WIDE_INT delta
;
27623 unsigned long saved_regs
;
27625 offsets
= arm_get_frame_offsets ();
27626 saved_regs
= offsets
->saved_regs_mask
;
27628 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
27629 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27632 if (frame_pointer_needed
)
27633 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
27636 /* LR will be the first saved register. */
27637 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
27642 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
27643 GEN_INT (delta
& ~4095)));
27648 addr
= stack_pointer_rtx
;
27650 addr
= plus_constant (Pmode
, addr
, delta
);
27653 /* The store needs to be marked to prevent DSE from deleting
27654 it as dead if it is based on fp. */
27655 mem
= gen_frame_mem (Pmode
, addr
);
27656 MEM_VOLATILE_P (mem
) = true;
27657 emit_move_insn (mem
, source
);
27663 thumb_set_return_address (rtx source
, rtx scratch
)
27665 arm_stack_offsets
*offsets
;
27666 HOST_WIDE_INT delta
;
27667 HOST_WIDE_INT limit
;
27670 unsigned long mask
;
27674 offsets
= arm_get_frame_offsets ();
27675 mask
= offsets
->saved_regs_mask
;
27676 if (mask
& (1 << LR_REGNUM
))
27679 /* Find the saved regs. */
27680 if (frame_pointer_needed
)
27682 delta
= offsets
->soft_frame
- offsets
->saved_args
;
27683 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
27689 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
27692 /* Allow for the stack frame. */
27693 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
27695 /* The link register is always the first saved register. */
27698 /* Construct the address. */
27699 addr
= gen_rtx_REG (SImode
, reg
);
27702 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
27703 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
27707 addr
= plus_constant (Pmode
, addr
, delta
);
27709 /* The store needs to be marked to prevent DSE from deleting
27710 it as dead if it is based on fp. */
27711 mem
= gen_frame_mem (Pmode
, addr
);
27712 MEM_VOLATILE_P (mem
) = true;
27713 emit_move_insn (mem
, source
);
27716 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27719 /* Implements target hook vector_mode_supported_p. */
27721 arm_vector_mode_supported_p (machine_mode mode
)
27723 /* Neon also supports V2SImode, etc. listed in the clause below. */
27724 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
27725 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
27726 || mode
== V2DImode
|| mode
== V8HFmode
))
27729 if ((TARGET_NEON
|| TARGET_IWMMXT
)
27730 && ((mode
== V2SImode
)
27731 || (mode
== V4HImode
)
27732 || (mode
== V8QImode
)))
27735 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
27736 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
27737 || mode
== V2HAmode
))
27743 /* Implements target hook array_mode_supported_p. */
27746 arm_array_mode_supported_p (machine_mode mode
,
27747 unsigned HOST_WIDE_INT nelems
)
27749 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27750 for now, as the lane-swapping logic needs to be extended in the expanders.
27751 See PR target/82518. */
27752 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
27753 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
27754 && (nelems
>= 2 && nelems
<= 4))
27760 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27761 registers when autovectorizing for Neon, at least until multiple vector
27762 widths are supported properly by the middle-end. */
27764 static machine_mode
27765 arm_preferred_simd_mode (scalar_mode mode
)
27771 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
27773 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
27775 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
27777 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
27779 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
27786 if (TARGET_REALLY_IWMMXT
)
27802 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27804 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27805 using r0-r4 for function arguments, r7 for the stack frame and don't have
27806 enough left over to do doubleword arithmetic. For Thumb-2 all the
27807 potentially problematic instructions accept high registers so this is not
27808 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27809 that require many low registers. */
27811 arm_class_likely_spilled_p (reg_class_t rclass
)
27813 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
27814 || rclass
== CC_REG
)
27820 /* Implements target hook small_register_classes_for_mode_p. */
27822 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
27824 return TARGET_THUMB1
;
27827 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27828 ARM insns and therefore guarantee that the shift count is modulo 256.
27829 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27830 guarantee no particular behavior for out-of-range counts. */
27832 static unsigned HOST_WIDE_INT
27833 arm_shift_truncation_mask (machine_mode mode
)
27835 return mode
== SImode
? 255 : 0;
27839 /* Map internal gcc register numbers to DWARF2 register numbers. */
27842 arm_dbx_register_number (unsigned int regno
)
27847 if (IS_VFP_REGNUM (regno
))
27849 /* See comment in arm_dwarf_register_span. */
27850 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27851 return 64 + regno
- FIRST_VFP_REGNUM
;
27853 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
27856 if (IS_IWMMXT_GR_REGNUM (regno
))
27857 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
27859 if (IS_IWMMXT_REGNUM (regno
))
27860 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
27862 return DWARF_FRAME_REGISTERS
;
27865 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27866 GCC models tham as 64 32-bit registers, so we need to describe this to
27867 the DWARF generation code. Other registers can use the default. */
27869 arm_dwarf_register_span (rtx rtl
)
27877 regno
= REGNO (rtl
);
27878 if (!IS_VFP_REGNUM (regno
))
27881 /* XXX FIXME: The EABI defines two VFP register ranges:
27882 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27884 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27885 corresponding D register. Until GDB supports this, we shall use the
27886 legacy encodings. We also use these encodings for D0-D15 for
27887 compatibility with older debuggers. */
27888 mode
= GET_MODE (rtl
);
27889 if (GET_MODE_SIZE (mode
) < 8)
27892 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27894 nregs
= GET_MODE_SIZE (mode
) / 4;
27895 for (i
= 0; i
< nregs
; i
+= 2)
27896 if (TARGET_BIG_END
)
27898 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27899 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27903 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27904 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27909 nregs
= GET_MODE_SIZE (mode
) / 8;
27910 for (i
= 0; i
< nregs
; i
++)
27911 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27914 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27917 #if ARM_UNWIND_INFO
27918 /* Emit unwind directives for a store-multiple instruction or stack pointer
27919 push during alignment.
27920 These should only ever be generated by the function prologue code, so
27921 expect them to have a particular form.
27922 The store-multiple instruction sometimes pushes pc as the last register,
27923 although it should not be tracked into unwind information, or for -Os
27924 sometimes pushes some dummy registers before first register that needs
27925 to be tracked in unwind information; such dummy registers are there just
27926 to avoid separate stack adjustment, and will not be restored in the
27930 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27933 HOST_WIDE_INT offset
;
27934 HOST_WIDE_INT nregs
;
27938 unsigned padfirst
= 0, padlast
= 0;
27941 e
= XVECEXP (p
, 0, 0);
27942 gcc_assert (GET_CODE (e
) == SET
);
27944 /* First insn will adjust the stack pointer. */
27945 gcc_assert (GET_CODE (e
) == SET
27946 && REG_P (SET_DEST (e
))
27947 && REGNO (SET_DEST (e
)) == SP_REGNUM
27948 && GET_CODE (SET_SRC (e
)) == PLUS
);
27950 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27951 nregs
= XVECLEN (p
, 0) - 1;
27952 gcc_assert (nregs
);
27954 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27957 /* For -Os dummy registers can be pushed at the beginning to
27958 avoid separate stack pointer adjustment. */
27959 e
= XVECEXP (p
, 0, 1);
27960 e
= XEXP (SET_DEST (e
), 0);
27961 if (GET_CODE (e
) == PLUS
)
27962 padfirst
= INTVAL (XEXP (e
, 1));
27963 gcc_assert (padfirst
== 0 || optimize_size
);
27964 /* The function prologue may also push pc, but not annotate it as it is
27965 never restored. We turn this into a stack pointer adjustment. */
27966 e
= XVECEXP (p
, 0, nregs
);
27967 e
= XEXP (SET_DEST (e
), 0);
27968 if (GET_CODE (e
) == PLUS
)
27969 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27971 padlast
= offset
- 4;
27972 gcc_assert (padlast
== 0 || padlast
== 4);
27974 fprintf (asm_out_file
, "\t.pad #4\n");
27976 fprintf (asm_out_file
, "\t.save {");
27978 else if (IS_VFP_REGNUM (reg
))
27981 fprintf (asm_out_file
, "\t.vsave {");
27984 /* Unknown register type. */
27985 gcc_unreachable ();
27987 /* If the stack increment doesn't match the size of the saved registers,
27988 something has gone horribly wrong. */
27989 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27993 /* The remaining insns will describe the stores. */
27994 for (i
= 1; i
<= nregs
; i
++)
27996 /* Expect (set (mem <addr>) (reg)).
27997 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27998 e
= XVECEXP (p
, 0, i
);
27999 gcc_assert (GET_CODE (e
) == SET
28000 && MEM_P (SET_DEST (e
))
28001 && REG_P (SET_SRC (e
)));
28003 reg
= REGNO (SET_SRC (e
));
28004 gcc_assert (reg
>= lastreg
);
28007 fprintf (asm_out_file
, ", ");
28008 /* We can't use %r for vfp because we need to use the
28009 double precision register names. */
28010 if (IS_VFP_REGNUM (reg
))
28011 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
28013 asm_fprintf (asm_out_file
, "%r", reg
);
28017 /* Check that the addresses are consecutive. */
28018 e
= XEXP (SET_DEST (e
), 0);
28019 if (GET_CODE (e
) == PLUS
)
28020 gcc_assert (REG_P (XEXP (e
, 0))
28021 && REGNO (XEXP (e
, 0)) == SP_REGNUM
28022 && CONST_INT_P (XEXP (e
, 1))
28023 && offset
== INTVAL (XEXP (e
, 1)));
28027 && REGNO (e
) == SP_REGNUM
);
28028 offset
+= reg_size
;
28031 fprintf (asm_out_file
, "}\n");
28033 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
28036 /* Emit unwind directives for a SET. */
28039 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
28047 switch (GET_CODE (e0
))
28050 /* Pushing a single register. */
28051 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
28052 || !REG_P (XEXP (XEXP (e0
, 0), 0))
28053 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
28056 asm_fprintf (asm_out_file
, "\t.save ");
28057 if (IS_VFP_REGNUM (REGNO (e1
)))
28058 asm_fprintf(asm_out_file
, "{d%d}\n",
28059 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
28061 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
28065 if (REGNO (e0
) == SP_REGNUM
)
28067 /* A stack increment. */
28068 if (GET_CODE (e1
) != PLUS
28069 || !REG_P (XEXP (e1
, 0))
28070 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
28071 || !CONST_INT_P (XEXP (e1
, 1)))
28074 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
28075 -INTVAL (XEXP (e1
, 1)));
28077 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
28079 HOST_WIDE_INT offset
;
28081 if (GET_CODE (e1
) == PLUS
)
28083 if (!REG_P (XEXP (e1
, 0))
28084 || !CONST_INT_P (XEXP (e1
, 1)))
28086 reg
= REGNO (XEXP (e1
, 0));
28087 offset
= INTVAL (XEXP (e1
, 1));
28088 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
28089 HARD_FRAME_POINTER_REGNUM
, reg
,
28092 else if (REG_P (e1
))
28095 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
28096 HARD_FRAME_POINTER_REGNUM
, reg
);
28101 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
28103 /* Move from sp to reg. */
28104 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
28106 else if (GET_CODE (e1
) == PLUS
28107 && REG_P (XEXP (e1
, 0))
28108 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
28109 && CONST_INT_P (XEXP (e1
, 1)))
28111 /* Set reg to offset from sp. */
28112 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
28113 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
28125 /* Emit unwind directives for the given insn. */
28128 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
28131 bool handled_one
= false;
28133 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28136 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28137 && (TREE_NOTHROW (current_function_decl
)
28138 || crtl
->all_throwers_are_sibcalls
))
28141 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
28144 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
28146 switch (REG_NOTE_KIND (note
))
28148 case REG_FRAME_RELATED_EXPR
:
28149 pat
= XEXP (note
, 0);
28152 case REG_CFA_REGISTER
:
28153 pat
= XEXP (note
, 0);
28156 pat
= PATTERN (insn
);
28157 if (GET_CODE (pat
) == PARALLEL
)
28158 pat
= XVECEXP (pat
, 0, 0);
28161 /* Only emitted for IS_STACKALIGN re-alignment. */
28166 src
= SET_SRC (pat
);
28167 dest
= SET_DEST (pat
);
28169 gcc_assert (src
== stack_pointer_rtx
);
28170 reg
= REGNO (dest
);
28171 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28174 handled_one
= true;
28177 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28178 to get correct dwarf information for shrink-wrap. We should not
28179 emit unwind information for it because these are used either for
28180 pretend arguments or notes to adjust sp and restore registers from
28182 case REG_CFA_DEF_CFA
:
28183 case REG_CFA_ADJUST_CFA
:
28184 case REG_CFA_RESTORE
:
28187 case REG_CFA_EXPRESSION
:
28188 case REG_CFA_OFFSET
:
28189 /* ??? Only handling here what we actually emit. */
28190 gcc_unreachable ();
28198 pat
= PATTERN (insn
);
28201 switch (GET_CODE (pat
))
28204 arm_unwind_emit_set (asm_out_file
, pat
);
28208 /* Store multiple. */
28209 arm_unwind_emit_sequence (asm_out_file
, pat
);
28218 /* Output a reference from a function exception table to the type_info
28219 object X. The EABI specifies that the symbol should be relocated by
28220 an R_ARM_TARGET2 relocation. */
28223 arm_output_ttype (rtx x
)
28225 fputs ("\t.word\t", asm_out_file
);
28226 output_addr_const (asm_out_file
, x
);
28227 /* Use special relocations for symbol references. */
28228 if (!CONST_INT_P (x
))
28229 fputs ("(TARGET2)", asm_out_file
);
28230 fputc ('\n', asm_out_file
);
28235 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28238 arm_asm_emit_except_personality (rtx personality
)
28240 fputs ("\t.personality\t", asm_out_file
);
28241 output_addr_const (asm_out_file
, personality
);
28242 fputc ('\n', asm_out_file
);
28244 #endif /* ARM_UNWIND_INFO */
28246 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28249 arm_asm_init_sections (void)
28251 #if ARM_UNWIND_INFO
28252 exception_section
= get_unnamed_section (0, output_section_asm_op
,
28254 #endif /* ARM_UNWIND_INFO */
28256 #ifdef OBJECT_FORMAT_ELF
28257 if (target_pure_code
)
28258 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
28262 /* Output unwind directives for the start/end of a function. */
28265 arm_output_fn_unwind (FILE * f
, bool prologue
)
28267 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28271 fputs ("\t.fnstart\n", f
);
28274 /* If this function will never be unwound, then mark it as such.
28275 The came condition is used in arm_unwind_emit to suppress
28276 the frame annotations. */
28277 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28278 && (TREE_NOTHROW (current_function_decl
)
28279 || crtl
->all_throwers_are_sibcalls
))
28280 fputs("\t.cantunwind\n", f
);
28282 fputs ("\t.fnend\n", f
);
28287 arm_emit_tls_decoration (FILE *fp
, rtx x
)
28289 enum tls_reloc reloc
;
28292 val
= XVECEXP (x
, 0, 0);
28293 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
28295 output_addr_const (fp
, val
);
28300 fputs ("(tlsgd)", fp
);
28302 case TLS_GD32_FDPIC
:
28303 fputs ("(tlsgd_fdpic)", fp
);
28306 fputs ("(tlsldm)", fp
);
28308 case TLS_LDM32_FDPIC
:
28309 fputs ("(tlsldm_fdpic)", fp
);
28312 fputs ("(tlsldo)", fp
);
28315 fputs ("(gottpoff)", fp
);
28317 case TLS_IE32_FDPIC
:
28318 fputs ("(gottpoff_fdpic)", fp
);
28321 fputs ("(tpoff)", fp
);
28324 fputs ("(tlsdesc)", fp
);
28327 gcc_unreachable ();
28336 fputs (" + (. - ", fp
);
28337 output_addr_const (fp
, XVECEXP (x
, 0, 2));
28338 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
28339 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
28340 output_addr_const (fp
, XVECEXP (x
, 0, 3));
28350 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
28353 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
28355 gcc_assert (size
== 4);
28356 fputs ("\t.word\t", file
);
28357 output_addr_const (file
, x
);
28358 fputs ("(tlsldo)", file
);
28361 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
28364 arm_output_addr_const_extra (FILE *fp
, rtx x
)
28366 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
28367 return arm_emit_tls_decoration (fp
, x
);
28368 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
28371 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
28373 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
28374 assemble_name_raw (fp
, label
);
28378 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
28380 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
28384 output_addr_const (fp
, XVECEXP (x
, 0, 0));
28388 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
28390 output_addr_const (fp
, XVECEXP (x
, 0, 0));
28394 output_addr_const (fp
, XVECEXP (x
, 0, 1));
28398 else if (GET_CODE (x
) == CONST_VECTOR
)
28399 return arm_emit_vector_const (fp
, x
);
28404 /* Output assembly for a shift instruction.
28405 SET_FLAGS determines how the instruction modifies the condition codes.
28406 0 - Do not set condition codes.
28407 1 - Set condition codes.
28408 2 - Use smallest instruction. */
28410 arm_output_shift(rtx
* operands
, int set_flags
)
28413 static const char flag_chars
[3] = {'?', '.', '!'};
28418 c
= flag_chars
[set_flags
];
28419 shift
= shift_op(operands
[3], &val
);
28423 operands
[2] = GEN_INT(val
);
28424 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
28427 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
28429 output_asm_insn (pattern
, operands
);
28433 /* Output assembly for a WMMX immediate shift instruction. */
28435 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
28437 int shift
= INTVAL (operands
[2]);
28439 machine_mode opmode
= GET_MODE (operands
[0]);
28441 gcc_assert (shift
>= 0);
28443 /* If the shift value in the register versions is > 63 (for D qualifier),
28444 31 (for W qualifier) or 15 (for H qualifier). */
28445 if (((opmode
== V4HImode
) && (shift
> 15))
28446 || ((opmode
== V2SImode
) && (shift
> 31))
28447 || ((opmode
== DImode
) && (shift
> 63)))
28451 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
28452 output_asm_insn (templ
, operands
);
28453 if (opmode
== DImode
)
28455 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
28456 output_asm_insn (templ
, operands
);
28461 /* The destination register will contain all zeros. */
28462 sprintf (templ
, "wzero\t%%0");
28463 output_asm_insn (templ
, operands
);
28468 if ((opmode
== DImode
) && (shift
> 32))
28470 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
28471 output_asm_insn (templ
, operands
);
28472 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
28473 output_asm_insn (templ
, operands
);
28477 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
28478 output_asm_insn (templ
, operands
);
28483 /* Output assembly for a WMMX tinsr instruction. */
28485 arm_output_iwmmxt_tinsr (rtx
*operands
)
28487 int mask
= INTVAL (operands
[3]);
28490 int units
= mode_nunits
[GET_MODE (operands
[0])];
28491 gcc_assert ((mask
& (mask
- 1)) == 0);
28492 for (i
= 0; i
< units
; ++i
)
28494 if ((mask
& 0x01) == 1)
28500 gcc_assert (i
< units
);
28502 switch (GET_MODE (operands
[0]))
28505 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
28508 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
28511 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
28514 gcc_unreachable ();
28517 output_asm_insn (templ
, operands
);
28522 /* Output a Thumb-1 casesi dispatch sequence. */
28524 thumb1_output_casesi (rtx
*operands
)
28526 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
28528 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
28530 switch (GET_MODE(diff_vec
))
28533 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
28534 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28536 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
28537 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28539 return "bl\t%___gnu_thumb1_case_si";
28541 gcc_unreachable ();
28545 /* Output a Thumb-2 casesi instruction. */
28547 thumb2_output_casesi (rtx
*operands
)
28549 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
28551 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
28553 output_asm_insn ("cmp\t%0, %1", operands
);
28554 output_asm_insn ("bhi\t%l3", operands
);
28555 switch (GET_MODE(diff_vec
))
28558 return "tbb\t[%|pc, %0]";
28560 return "tbh\t[%|pc, %0, lsl #1]";
28564 output_asm_insn ("adr\t%4, %l2", operands
);
28565 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
28566 output_asm_insn ("add\t%4, %4, %5", operands
);
28571 output_asm_insn ("adr\t%4, %l2", operands
);
28572 return "ldr\t%|pc, [%4, %0, lsl #2]";
28575 gcc_unreachable ();
28579 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28580 per-core tuning structs. */
28582 arm_issue_rate (void)
28584 return current_tune
->issue_rate
;
28587 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
28589 arm_sched_variable_issue (FILE *, int, rtx_insn
*insn
, int more
)
28591 if (DEBUG_INSN_P (insn
))
28594 rtx_code code
= GET_CODE (PATTERN (insn
));
28595 if (code
== USE
|| code
== CLOBBER
)
28598 if (get_attr_type (insn
) == TYPE_NO_INSN
)
28604 /* Return how many instructions should scheduler lookahead to choose the
28607 arm_first_cycle_multipass_dfa_lookahead (void)
28609 int issue_rate
= arm_issue_rate ();
28611 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
28614 /* Enable modeling of L2 auto-prefetcher. */
28616 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
28618 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
28622 arm_mangle_type (const_tree type
)
28624 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28625 has to be managled as if it is in the "std" namespace. */
28626 if (TARGET_AAPCS_BASED
28627 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
28628 return "St9__va_list";
28630 /* Half-precision float. */
28631 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
28634 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28636 if (TYPE_NAME (type
) != NULL
)
28637 return arm_mangle_builtin_type (type
);
28639 /* Use the default mangling. */
28643 /* Order of allocation of core registers for Thumb: this allocation is
28644 written over the corresponding initial entries of the array
28645 initialized with REG_ALLOC_ORDER. We allocate all low registers
28646 first. Saving and restoring a low register is usually cheaper than
28647 using a call-clobbered high register. */
28649 static const int thumb_core_reg_alloc_order
[] =
28651 3, 2, 1, 0, 4, 5, 6, 7,
28652 12, 14, 8, 9, 10, 11
28655 /* Adjust register allocation order when compiling for Thumb. */
28658 arm_order_regs_for_local_alloc (void)
28660 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
28661 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
28663 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
28664 sizeof (thumb_core_reg_alloc_order
));
28667 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28670 arm_frame_pointer_required (void)
28672 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
28675 /* If the function receives nonlocal gotos, it needs to save the frame
28676 pointer in the nonlocal_goto_save_area object. */
28677 if (cfun
->has_nonlocal_label
)
28680 /* The frame pointer is required for non-leaf APCS frames. */
28681 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
28684 /* If we are probing the stack in the prologue, we will have a faulting
28685 instruction prior to the stack adjustment and this requires a frame
28686 pointer if we want to catch the exception using the EABI unwinder. */
28687 if (!IS_INTERRUPT (arm_current_func_type ())
28688 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
28689 || flag_stack_clash_protection
)
28690 && arm_except_unwind_info (&global_options
) == UI_TARGET
28691 && cfun
->can_throw_non_call_exceptions
)
28693 HOST_WIDE_INT size
= get_frame_size ();
28695 /* That's irrelevant if there is no stack adjustment. */
28699 /* That's relevant only if there is a stack probe. */
28700 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
28702 /* We don't have the final size of the frame so adjust. */
28703 size
+= 32 * UNITS_PER_WORD
;
28704 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
28714 /* Only thumb1 can't support conditional execution, so return true if
28715 the target is not thumb1. */
28717 arm_have_conditional_execution (void)
28719 return !TARGET_THUMB1
;
28722 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28723 static HOST_WIDE_INT
28724 arm_vector_alignment (const_tree type
)
28726 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
28728 if (TARGET_AAPCS_BASED
)
28729 align
= MIN (align
, 64);
28735 arm_autovectorize_vector_sizes (vector_sizes
*sizes
, bool)
28737 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28739 sizes
->safe_push (16);
28740 sizes
->safe_push (8);
28745 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
28747 /* Vectors which aren't in packed structures will not be less aligned than
28748 the natural alignment of their element type, so this is safe. */
28749 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
28752 return default_builtin_vector_alignment_reachable (type
, is_packed
);
28756 arm_builtin_support_vector_misalignment (machine_mode mode
,
28757 const_tree type
, int misalignment
,
28760 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
28762 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
28767 /* If the misalignment is unknown, we should be able to handle the access
28768 so long as it is not to a member of a packed data structure. */
28769 if (misalignment
== -1)
28772 /* Return true if the misalignment is a multiple of the natural alignment
28773 of the vector's element type. This is probably always going to be
28774 true in practice, since we've already established that this isn't a
28776 return ((misalignment
% align
) == 0);
28779 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
28784 arm_conditional_register_usage (void)
28788 if (TARGET_THUMB1
&& optimize_size
)
28790 /* When optimizing for size on Thumb-1, it's better not
28791 to use the HI regs, because of the overhead of
28793 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
28794 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
28797 /* The link register can be clobbered by any branch insn,
28798 but we have no way to track that at present, so mark
28799 it as unavailable. */
28801 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
28803 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
28805 /* VFPv3 registers are disabled when earlier VFP
28806 versions are selected due to the definition of
28807 LAST_VFP_REGNUM. */
28808 for (regno
= FIRST_VFP_REGNUM
;
28809 regno
<= LAST_VFP_REGNUM
; ++ regno
)
28811 fixed_regs
[regno
] = 0;
28812 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
28813 || regno
>= FIRST_VFP_REGNUM
+ 32;
28817 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
28819 regno
= FIRST_IWMMXT_GR_REGNUM
;
28820 /* The 2002/10/09 revision of the XScale ABI has wCG0
28821 and wCG1 as call-preserved registers. The 2002/11/21
28822 revision changed this so that all wCG registers are
28823 scratch registers. */
28824 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
28825 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
28826 fixed_regs
[regno
] = 0;
28827 /* The XScale ABI has wR0 - wR9 as scratch registers,
28828 the rest as call-preserved registers. */
28829 for (regno
= FIRST_IWMMXT_REGNUM
;
28830 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
28832 fixed_regs
[regno
] = 0;
28833 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
28837 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
28839 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28840 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28842 else if (TARGET_APCS_STACK
)
28844 fixed_regs
[10] = 1;
28845 call_used_regs
[10] = 1;
28847 /* -mcaller-super-interworking reserves r11 for calls to
28848 _interwork_r11_call_via_rN(). Making the register global
28849 is an easy way of ensuring that it remains valid for all
28851 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
28852 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
28854 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28855 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28856 if (TARGET_CALLER_INTERWORKING
)
28857 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28859 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28863 arm_preferred_rename_class (reg_class_t rclass
)
28865 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28866 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28867 and code size can be reduced. */
28868 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
28874 /* Compute the attribute "length" of insn "*push_multi".
28875 So this function MUST be kept in sync with that insn pattern. */
28877 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
28879 int i
, regno
, hi_reg
;
28880 int num_saves
= XVECLEN (parallel_op
, 0);
28890 regno
= REGNO (first_op
);
28891 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28892 list is 8-bit. Normally this means all registers in the list must be
28893 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28894 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28895 with 16-bit encoding. */
28896 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28897 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
28899 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
28900 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28908 /* Compute the attribute "length" of insn. Currently, this function is used
28909 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28910 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28911 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28912 true if OPERANDS contains insn which explicit updates base register. */
28915 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28924 rtx parallel_op
= operands
[0];
28925 /* Initialize to elements number of PARALLEL. */
28926 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28927 /* Initialize the value to base register. */
28928 unsigned regno
= REGNO (operands
[1]);
28929 /* Skip return and write back pattern.
28930 We only need register pop pattern for later analysis. */
28931 unsigned first_indx
= 0;
28932 first_indx
+= return_pc
? 1 : 0;
28933 first_indx
+= write_back_p
? 1 : 0;
28935 /* A pop operation can be done through LDM or POP. If the base register is SP
28936 and if it's with write back, then a LDM will be alias of POP. */
28937 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28938 bool ldm_p
= !pop_p
;
28940 /* Check base register for LDM. */
28941 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28944 /* Check each register in the list. */
28945 for (; indx
>= first_indx
; indx
--)
28947 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28948 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28949 comment in arm_attr_length_push_multi. */
28950 if (REGNO_REG_CLASS (regno
) == HI_REGS
28951 && (regno
!= PC_REGNUM
|| ldm_p
))
28958 /* Compute the number of instructions emitted by output_move_double. */
28960 arm_count_output_move_double_insns (rtx
*operands
)
28964 /* output_move_double may modify the operands array, so call it
28965 here on a copy of the array. */
28966 ops
[0] = operands
[0];
28967 ops
[1] = operands
[1];
28968 output_move_double (ops
, false, &count
);
28972 /* Same as above, but operands are a register/memory pair in SImode.
28973 Assumes operands has the base register in position 0 and memory in position
28974 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
28976 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
28980 int regnum
, memnum
;
28982 regnum
= 0, memnum
= 1;
28984 regnum
= 1, memnum
= 0;
28985 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
28986 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
28987 output_move_double (ops
, false, &count
);
28993 vfp3_const_double_for_fract_bits (rtx operand
)
28995 REAL_VALUE_TYPE r0
;
28997 if (!CONST_DOUBLE_P (operand
))
29000 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
29001 if (exact_real_inverse (DFmode
, &r0
)
29002 && !REAL_VALUE_NEGATIVE (r0
))
29004 if (exact_real_truncate (DFmode
, &r0
))
29006 HOST_WIDE_INT value
= real_to_integer (&r0
);
29007 value
= value
& 0xffffffff;
29008 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29010 int ret
= exact_log2 (value
);
29011 gcc_assert (IN_RANGE (ret
, 0, 31));
29019 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
29020 log2 is in [1, 32], return that log2. Otherwise return -1.
29021 This is used in the patterns for vcvt.s32.f32 floating-point to
29022 fixed-point conversions. */
29025 vfp3_const_double_for_bits (rtx x
)
29027 const REAL_VALUE_TYPE
*r
;
29029 if (!CONST_DOUBLE_P (x
))
29032 r
= CONST_DOUBLE_REAL_VALUE (x
);
29034 if (REAL_VALUE_NEGATIVE (*r
)
29035 || REAL_VALUE_ISNAN (*r
)
29036 || REAL_VALUE_ISINF (*r
)
29037 || !real_isinteger (r
, SFmode
))
29040 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
29042 /* The exact_log2 above will have returned -1 if this is
29043 not an exact log2. */
29044 if (!IN_RANGE (hwint
, 1, 32))
29051 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29054 arm_pre_atomic_barrier (enum memmodel model
)
29056 if (need_atomic_barrier_p (model
, true))
29057 emit_insn (gen_memory_barrier ());
29061 arm_post_atomic_barrier (enum memmodel model
)
29063 if (need_atomic_barrier_p (model
, false))
29064 emit_insn (gen_memory_barrier ());
29067 /* Emit the load-exclusive and store-exclusive instructions.
29068 Use acquire and release versions if necessary. */
29071 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
29073 rtx (*gen
) (rtx
, rtx
);
29079 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
29080 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
29081 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
29082 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
29084 gcc_unreachable ();
29091 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
29092 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
29093 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
29094 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
29096 gcc_unreachable ();
29100 emit_insn (gen (rval
, mem
));
29104 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
29107 rtx (*gen
) (rtx
, rtx
, rtx
);
29113 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
29114 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
29115 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
29116 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
29118 gcc_unreachable ();
29125 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
29126 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
29127 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
29128 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
29130 gcc_unreachable ();
29134 emit_insn (gen (bval
, rval
, mem
));
29137 /* Mark the previous jump instruction as unlikely. */
29140 emit_unlikely_jump (rtx insn
)
29142 rtx_insn
*jump
= emit_jump_insn (insn
);
29143 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
29146 /* Expand a compare and swap pattern. */
29149 arm_expand_compare_and_swap (rtx operands
[])
29151 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
29152 machine_mode mode
, cmp_mode
;
29154 bval
= operands
[0];
29155 rval
= operands
[1];
29157 oldval
= operands
[3];
29158 newval
= operands
[4];
29159 is_weak
= operands
[5];
29160 mod_s
= operands
[6];
29161 mod_f
= operands
[7];
29162 mode
= GET_MODE (mem
);
29164 /* Normally the succ memory model must be stronger than fail, but in the
29165 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29166 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29168 if (TARGET_HAVE_LDACQ
29169 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
29170 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
29171 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
29177 /* For narrow modes, we're going to perform the comparison in SImode,
29178 so do the zero-extension now. */
29179 rval
= gen_reg_rtx (SImode
);
29180 oldval
= convert_modes (SImode
, mode
, oldval
, true);
29184 /* Force the value into a register if needed. We waited until after
29185 the zero-extension above to do this properly. */
29186 if (!arm_add_operand (oldval
, SImode
))
29187 oldval
= force_reg (SImode
, oldval
);
29191 if (!cmpdi_operand (oldval
, mode
))
29192 oldval
= force_reg (mode
, oldval
);
29196 gcc_unreachable ();
29200 cmp_mode
= E_SImode
;
29202 cmp_mode
= CC_Zmode
;
29204 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
29205 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
29206 oldval
, newval
, is_weak
, mod_s
, mod_f
));
29208 if (mode
== QImode
|| mode
== HImode
)
29209 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
29211 /* In all cases, we arrange for success to be signaled by Z set.
29212 This arrangement allows for the boolean result to be used directly
29213 in a subsequent branch, post optimization. For Thumb-1 targets, the
29214 boolean negation of the result is also stored in bval because Thumb-1
29215 backend lacks dependency tracking for CC flag due to flag-setting not
29216 being represented at RTL level. */
29218 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
29221 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
29222 emit_insn (gen_rtx_SET (bval
, x
));
29226 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29227 another memory store between the load-exclusive and store-exclusive can
29228 reset the monitor from Exclusive to Open state. This means we must wait
29229 until after reload to split the pattern, lest we get a register spill in
29230 the middle of the atomic sequence. Success of the compare and swap is
29231 indicated by the Z flag set for 32bit targets and by neg_bval being zero
29232 for Thumb-1 targets (ie. negation of the boolean value returned by
29233 atomic_compare_and_swapmode standard pattern in operand 0). */
29236 arm_split_compare_and_swap (rtx operands
[])
29238 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
29240 enum memmodel mod_s
, mod_f
;
29242 rtx_code_label
*label1
, *label2
;
29245 rval
= operands
[1];
29247 oldval
= operands
[3];
29248 newval
= operands
[4];
29249 is_weak
= (operands
[5] != const0_rtx
);
29250 mod_s_rtx
= operands
[6];
29251 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
29252 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
29253 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
29254 mode
= GET_MODE (mem
);
29256 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
29258 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
29259 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
29261 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
29262 a full barrier is emitted after the store-release. */
29264 use_acquire
= false;
29266 /* Checks whether a barrier is needed and emits one accordingly. */
29267 if (!(use_acquire
|| use_release
))
29268 arm_pre_atomic_barrier (mod_s
);
29273 label1
= gen_label_rtx ();
29274 emit_label (label1
);
29276 label2
= gen_label_rtx ();
29278 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
29280 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
29281 as required to communicate with arm_expand_compare_and_swap. */
29284 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
29285 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29286 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29287 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
29288 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
29292 emit_move_insn (neg_bval
, const1_rtx
);
29293 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
29294 if (thumb1_cmpneg_operand (oldval
, SImode
))
29295 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
29298 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
29301 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
29303 /* Weak or strong, we want EQ to be true for success, so that we
29304 match the flags that we got from the compare above. */
29307 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29308 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
29309 emit_insn (gen_rtx_SET (cond
, x
));
29314 /* Z is set to boolean value of !neg_bval, as required to communicate
29315 with arm_expand_compare_and_swap. */
29316 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
29317 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
29320 if (!is_mm_relaxed (mod_f
))
29321 emit_label (label2
);
29323 /* Checks whether a barrier is needed and emits one accordingly. */
29325 || !(use_acquire
|| use_release
))
29326 arm_post_atomic_barrier (mod_s
);
29328 if (is_mm_relaxed (mod_f
))
29329 emit_label (label2
);
29332 /* Split an atomic operation pattern. Operation is given by CODE and is one
29333 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
29334 operation). Operation is performed on the content at MEM and on VALUE
29335 following the memory model MODEL_RTX. The content at MEM before and after
29336 the operation is returned in OLD_OUT and NEW_OUT respectively while the
29337 success of the operation is returned in COND. Using a scratch register or
29338 an operand register for these determines what result is returned for that
29342 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
29343 rtx value
, rtx model_rtx
, rtx cond
)
29345 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
29346 machine_mode mode
= GET_MODE (mem
);
29347 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
29348 rtx_code_label
*label
;
29349 bool all_low_regs
, bind_old_new
;
29352 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
29354 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
29355 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
29357 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
29358 a full barrier is emitted after the store-release. */
29360 use_acquire
= false;
29362 /* Checks whether a barrier is needed and emits one accordingly. */
29363 if (!(use_acquire
|| use_release
))
29364 arm_pre_atomic_barrier (model
);
29366 label
= gen_label_rtx ();
29367 emit_label (label
);
29370 new_out
= gen_lowpart (wmode
, new_out
);
29372 old_out
= gen_lowpart (wmode
, old_out
);
29375 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
29377 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
29379 /* Does the operation require destination and first operand to use the same
29380 register? This is decided by register constraints of relevant insn
29381 patterns in thumb1.md. */
29382 gcc_assert (!new_out
|| REG_P (new_out
));
29383 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
29384 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
29385 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
29390 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
29392 /* We want to return the old value while putting the result of the operation
29393 in the same register as the old value so copy the old value over to the
29394 destination register and use that register for the operation. */
29395 if (old_out
&& bind_old_new
)
29397 emit_move_insn (new_out
, old_out
);
29408 x
= gen_rtx_AND (wmode
, old_out
, value
);
29409 emit_insn (gen_rtx_SET (new_out
, x
));
29410 x
= gen_rtx_NOT (wmode
, new_out
);
29411 emit_insn (gen_rtx_SET (new_out
, x
));
29415 if (CONST_INT_P (value
))
29417 value
= GEN_INT (-INTVAL (value
));
29423 if (mode
== DImode
)
29425 /* DImode plus/minus need to clobber flags. */
29426 /* The adddi3 and subdi3 patterns are incorrectly written so that
29427 they require matching operands, even when we could easily support
29428 three operands. Thankfully, this can be fixed up post-splitting,
29429 as the individual add+adc patterns do accept three operands and
29430 post-reload cprop can make these moves go away. */
29431 emit_move_insn (new_out
, old_out
);
29433 x
= gen_adddi3 (new_out
, new_out
, value
);
29435 x
= gen_subdi3 (new_out
, new_out
, value
);
29442 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
29443 emit_insn (gen_rtx_SET (new_out
, x
));
29447 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
29450 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29451 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
29453 /* Checks whether a barrier is needed and emits one accordingly. */
29455 || !(use_acquire
|| use_release
))
29456 arm_post_atomic_barrier (model
);
29459 #define MAX_VECT_LEN 16
29461 struct expand_vec_perm_d
29463 rtx target
, op0
, op1
;
29464 vec_perm_indices perm
;
29465 machine_mode vmode
;
29470 /* Generate a variable permutation. */
29473 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29475 machine_mode vmode
= GET_MODE (target
);
29476 bool one_vector_p
= rtx_equal_p (op0
, op1
);
29478 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
29479 gcc_checking_assert (GET_MODE (op0
) == vmode
);
29480 gcc_checking_assert (GET_MODE (op1
) == vmode
);
29481 gcc_checking_assert (GET_MODE (sel
) == vmode
);
29482 gcc_checking_assert (TARGET_NEON
);
29486 if (vmode
== V8QImode
)
29487 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
29489 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
29495 if (vmode
== V8QImode
)
29497 pair
= gen_reg_rtx (V16QImode
);
29498 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
29499 pair
= gen_lowpart (TImode
, pair
);
29500 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
29504 pair
= gen_reg_rtx (OImode
);
29505 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
29506 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
29512 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29514 machine_mode vmode
= GET_MODE (target
);
29515 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
29516 bool one_vector_p
= rtx_equal_p (op0
, op1
);
29519 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29520 numbering of elements for big-endian, we must reverse the order. */
29521 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
29523 /* The VTBL instruction does not use a modulo index, so we must take care
29524 of that ourselves. */
29525 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29526 mask
= gen_const_vec_duplicate (vmode
, mask
);
29527 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
29529 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
29532 /* Map lane ordering between architectural lane order, and GCC lane order,
29533 taking into account ABI. See comment above output_move_neon for details. */
29536 neon_endian_lane_map (machine_mode mode
, int lane
)
29538 if (BYTES_BIG_ENDIAN
)
29540 int nelems
= GET_MODE_NUNITS (mode
);
29541 /* Reverse lane order. */
29542 lane
= (nelems
- 1 - lane
);
29543 /* Reverse D register order, to match ABI. */
29544 if (GET_MODE_SIZE (mode
) == 16)
29545 lane
= lane
^ (nelems
/ 2);
29550 /* Some permutations index into pairs of vectors, this is a helper function
29551 to map indexes into those pairs of vectors. */
29554 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
29556 int nelem
= GET_MODE_NUNITS (mode
);
29557 if (BYTES_BIG_ENDIAN
)
29559 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
29563 /* Generate or test for an insn that supports a constant permutation. */
29565 /* Recognize patterns for the VUZP insns. */
29568 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
29570 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
29571 rtx out0
, out1
, in0
, in1
;
29575 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29578 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29579 big endian pattern on 64 bit vectors, so we correct for that. */
29580 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
29581 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
29583 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
29585 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
29587 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
29591 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29593 for (i
= 0; i
< nelt
; i
++)
29596 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
29597 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
29607 if (swap_nelt
!= 0)
29608 std::swap (in0
, in1
);
29611 out1
= gen_reg_rtx (d
->vmode
);
29613 std::swap (out0
, out1
);
29615 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
29619 /* Recognize patterns for the VZIP insns. */
29622 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
29624 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
29625 rtx out0
, out1
, in0
, in1
;
29629 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29632 is_swapped
= BYTES_BIG_ENDIAN
;
29634 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
29637 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
29639 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
29643 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29645 for (i
= 0; i
< nelt
/ 2; i
++)
29648 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
29649 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
29653 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
29654 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
29666 std::swap (in0
, in1
);
29669 out1
= gen_reg_rtx (d
->vmode
);
29671 std::swap (out0
, out1
);
29673 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
29677 /* Recognize patterns for the VREV insns. */
29679 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
29681 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
29682 rtx (*gen
) (machine_mode
, rtx
, rtx
);
29684 if (!d
->one_vector_p
)
29695 gen
= gen_neon_vrev64
;
29706 gen
= gen_neon_vrev32
;
29712 gen
= gen_neon_vrev64
;
29723 gen
= gen_neon_vrev16
;
29727 gen
= gen_neon_vrev32
;
29733 gen
= gen_neon_vrev64
;
29743 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
29744 for (j
= 0; j
<= diff
; j
+= 1)
29746 /* This is guaranteed to be true as the value of diff
29747 is 7, 3, 1 and we should have enough elements in the
29748 queue to generate this. Getting a vector mask with a
29749 value of diff other than these values implies that
29750 something is wrong by the time we get here. */
29751 gcc_assert (i
+ j
< nelt
);
29752 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
29760 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
29764 /* Recognize patterns for the VTRN insns. */
29767 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
29769 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
29770 rtx out0
, out1
, in0
, in1
;
29772 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29775 /* Note that these are little-endian tests. Adjust for big-endian later. */
29776 if (d
->perm
[0] == 0)
29778 else if (d
->perm
[0] == 1)
29782 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29784 for (i
= 0; i
< nelt
; i
+= 2)
29786 if (d
->perm
[i
] != i
+ odd
)
29788 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
29798 if (BYTES_BIG_ENDIAN
)
29800 std::swap (in0
, in1
);
29805 out1
= gen_reg_rtx (d
->vmode
);
29807 std::swap (out0
, out1
);
29809 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
29813 /* Recognize patterns for the VEXT insns. */
29816 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
29818 unsigned int i
, nelt
= d
->perm
.length ();
29821 unsigned int location
;
29823 unsigned int next
= d
->perm
[0] + 1;
29825 /* TODO: Handle GCC's numbering of elements for big-endian. */
29826 if (BYTES_BIG_ENDIAN
)
29829 /* Check if the extracted indexes are increasing by one. */
29830 for (i
= 1; i
< nelt
; next
++, i
++)
29832 /* If we hit the most significant element of the 2nd vector in
29833 the previous iteration, no need to test further. */
29834 if (next
== 2 * nelt
)
29837 /* If we are operating on only one vector: it could be a
29838 rotation. If there are only two elements of size < 64, let
29839 arm_evpc_neon_vrev catch it. */
29840 if (d
->one_vector_p
&& (next
== nelt
))
29842 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
29848 if (d
->perm
[i
] != next
)
29852 location
= d
->perm
[0];
29858 offset
= GEN_INT (location
);
29860 if(d
->vmode
== E_DImode
)
29863 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
29867 /* The NEON VTBL instruction is a fully variable permuation that's even
29868 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29869 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29870 can do slightly better by expanding this as a constant where we don't
29871 have to apply a mask. */
29874 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29876 rtx rperm
[MAX_VECT_LEN
], sel
;
29877 machine_mode vmode
= d
->vmode
;
29878 unsigned int i
, nelt
= d
->perm
.length ();
29880 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29881 numbering of elements for big-endian, we must reverse the order. */
29882 if (BYTES_BIG_ENDIAN
)
29888 /* Generic code will try constant permutation twice. Once with the
29889 original mode and again with the elements lowered to QImode.
29890 So wait and don't do the selector expansion ourselves. */
29891 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29894 for (i
= 0; i
< nelt
; ++i
)
29895 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29896 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29897 sel
= force_reg (vmode
, sel
);
29899 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29904 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29906 /* Check if the input mask matches vext before reordering the
29909 if (arm_evpc_neon_vext (d
))
29912 /* The pattern matching functions above are written to look for a small
29913 number to begin the sequence (0, 1, N/2). If we begin with an index
29914 from the second operand, we can swap the operands. */
29915 unsigned int nelt
= d
->perm
.length ();
29916 if (d
->perm
[0] >= nelt
)
29918 d
->perm
.rotate_inputs (1);
29919 std::swap (d
->op0
, d
->op1
);
29924 if (arm_evpc_neon_vuzp (d
))
29926 if (arm_evpc_neon_vzip (d
))
29928 if (arm_evpc_neon_vrev (d
))
29930 if (arm_evpc_neon_vtrn (d
))
29932 return arm_evpc_neon_vtbl (d
);
29937 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29940 arm_vectorize_vec_perm_const (machine_mode vmode
, rtx target
, rtx op0
, rtx op1
,
29941 const vec_perm_indices
&sel
)
29943 struct expand_vec_perm_d d
;
29944 int i
, nelt
, which
;
29946 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
29954 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29955 d
.testing_p
= !target
;
29957 nelt
= GET_MODE_NUNITS (d
.vmode
);
29958 for (i
= which
= 0; i
< nelt
; ++i
)
29960 int ei
= sel
[i
] & (2 * nelt
- 1);
29961 which
|= (ei
< nelt
? 1 : 2);
29970 d
.one_vector_p
= false;
29971 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
29974 /* The elements of PERM do not suggest that only the first operand
29975 is used, but both operands are identical. Allow easier matching
29976 of the permutation by folding the permutation into the single
29981 d
.one_vector_p
= true;
29986 d
.one_vector_p
= true;
29990 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
29993 return arm_expand_vec_perm_const_1 (&d
);
29995 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29996 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29997 if (!d
.one_vector_p
)
29998 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
30001 bool ret
= arm_expand_vec_perm_const_1 (&d
);
30008 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
30010 /* If we are soft float and we do not have ldrd
30011 then all auto increment forms are ok. */
30012 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
30017 /* Post increment and Pre Decrement are supported for all
30018 instruction forms except for vector forms. */
30021 if (VECTOR_MODE_P (mode
))
30023 if (code
!= ARM_PRE_DEC
)
30033 /* Without LDRD and mode size greater than
30034 word size, there is no point in auto-incrementing
30035 because ldm and stm will not have these forms. */
30036 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
30039 /* Vector and floating point modes do not support
30040 these auto increment forms. */
30041 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
30054 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30055 on ARM, since we know that shifts by negative amounts are no-ops.
30056 Additionally, the default expansion code is not available or suitable
30057 for post-reload insn splits (this can occur when the register allocator
30058 chooses not to do a shift in NEON).
30060 This function is used in both initial expand and post-reload splits, and
30061 handles all kinds of 64-bit shifts.
30063 Input requirements:
30064 - It is safe for the input and output to be the same register, but
30065 early-clobber rules apply for the shift amount and scratch registers.
30066 - Shift by register requires both scratch registers. In all other cases
30067 the scratch registers may be NULL.
30068 - Ashiftrt by a register also clobbers the CC register. */
30070 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
30071 rtx amount
, rtx scratch1
, rtx scratch2
)
30073 rtx out_high
= gen_highpart (SImode
, out
);
30074 rtx out_low
= gen_lowpart (SImode
, out
);
30075 rtx in_high
= gen_highpart (SImode
, in
);
30076 rtx in_low
= gen_lowpart (SImode
, in
);
30079 in = the register pair containing the input value.
30080 out = the destination register pair.
30081 up = the high- or low-part of each pair.
30082 down = the opposite part to "up".
30083 In a shift, we can consider bits to shift from "up"-stream to
30084 "down"-stream, so in a left-shift "up" is the low-part and "down"
30085 is the high-part of each register pair. */
30087 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
30088 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
30089 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
30090 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
30092 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
30094 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
30095 && GET_MODE (out
) == DImode
);
30097 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
30098 && GET_MODE (in
) == DImode
);
30100 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
30101 && GET_MODE (amount
) == SImode
)
30102 || CONST_INT_P (amount
)));
30103 gcc_assert (scratch1
== NULL
30104 || (GET_CODE (scratch1
) == SCRATCH
)
30105 || (GET_MODE (scratch1
) == SImode
30106 && REG_P (scratch1
)));
30107 gcc_assert (scratch2
== NULL
30108 || (GET_CODE (scratch2
) == SCRATCH
)
30109 || (GET_MODE (scratch2
) == SImode
30110 && REG_P (scratch2
)));
30111 gcc_assert (!REG_P (out
) || !REG_P (amount
)
30112 || !HARD_REGISTER_P (out
)
30113 || (REGNO (out
) != REGNO (amount
)
30114 && REGNO (out
) + 1 != REGNO (amount
)));
30116 /* Macros to make following code more readable. */
30117 #define SUB_32(DEST,SRC) \
30118 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30119 #define RSB_32(DEST,SRC) \
30120 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30121 #define SUB_S_32(DEST,SRC) \
30122 gen_addsi3_compare0 ((DEST), (SRC), \
30124 #define SET(DEST,SRC) \
30125 gen_rtx_SET ((DEST), (SRC))
30126 #define SHIFT(CODE,SRC,AMOUNT) \
30127 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30128 #define LSHIFT(CODE,SRC,AMOUNT) \
30129 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30130 SImode, (SRC), (AMOUNT))
30131 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30132 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30133 SImode, (SRC), (AMOUNT))
30135 gen_rtx_IOR (SImode, (A), (B))
30136 #define BRANCH(COND,LABEL) \
30137 gen_arm_cond_branch ((LABEL), \
30138 gen_rtx_ ## COND (CCmode, cc_reg, \
30142 /* Shifts by register and shifts by constant are handled separately. */
30143 if (CONST_INT_P (amount
))
30145 /* We have a shift-by-constant. */
30147 /* First, handle out-of-range shift amounts.
30148 In both cases we try to match the result an ARM instruction in a
30149 shift-by-register would give. This helps reduce execution
30150 differences between optimization levels, but it won't stop other
30151 parts of the compiler doing different things. This is "undefined
30152 behavior, in any case. */
30153 if (INTVAL (amount
) <= 0)
30154 emit_insn (gen_movdi (out
, in
));
30155 else if (INTVAL (amount
) >= 64)
30157 if (code
== ASHIFTRT
)
30159 rtx const31_rtx
= GEN_INT (31);
30160 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
30161 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
30164 emit_insn (gen_movdi (out
, const0_rtx
));
30167 /* Now handle valid shifts. */
30168 else if (INTVAL (amount
) < 32)
30170 /* Shifts by a constant less than 32. */
30171 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
30173 /* Clearing the out register in DImode first avoids lots
30174 of spilling and results in less stack usage.
30175 Later this redundant insn is completely removed.
30176 Do that only if "in" and "out" are different registers. */
30177 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
30178 emit_insn (SET (out
, const0_rtx
));
30179 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30180 emit_insn (SET (out_down
,
30181 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
30183 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30187 /* Shifts by a constant greater than 31. */
30188 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
30190 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
30191 emit_insn (SET (out
, const0_rtx
));
30192 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
30193 if (code
== ASHIFTRT
)
30194 emit_insn (gen_ashrsi3 (out_up
, in_up
,
30197 emit_insn (SET (out_up
, const0_rtx
));
30202 /* We have a shift-by-register. */
30203 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
30205 /* This alternative requires the scratch registers. */
30206 gcc_assert (scratch1
&& REG_P (scratch1
));
30207 gcc_assert (scratch2
&& REG_P (scratch2
));
30209 /* We will need the values "amount-32" and "32-amount" later.
30210 Swapping them around now allows the later code to be more general. */
30214 emit_insn (SUB_32 (scratch1
, amount
));
30215 emit_insn (RSB_32 (scratch2
, amount
));
30218 emit_insn (RSB_32 (scratch1
, amount
));
30219 /* Also set CC = amount > 32. */
30220 emit_insn (SUB_S_32 (scratch2
, amount
));
30223 emit_insn (RSB_32 (scratch1
, amount
));
30224 emit_insn (SUB_32 (scratch2
, amount
));
30227 gcc_unreachable ();
30230 /* Emit code like this:
30233 out_down = in_down << amount;
30234 out_down = (in_up << (amount - 32)) | out_down;
30235 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30236 out_up = in_up << amount;
30239 out_down = in_down >> amount;
30240 out_down = (in_up << (32 - amount)) | out_down;
30242 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30243 out_up = in_up << amount;
30246 out_down = in_down >> amount;
30247 out_down = (in_up << (32 - amount)) | out_down;
30249 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30250 out_up = in_up << amount;
30252 The ARM and Thumb2 variants are the same but implemented slightly
30253 differently. If this were only called during expand we could just
30254 use the Thumb2 case and let combine do the right thing, but this
30255 can also be called from post-reload splitters. */
30257 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30259 if (!TARGET_THUMB2
)
30261 /* Emit code for ARM mode. */
30262 emit_insn (SET (out_down
,
30263 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
30264 if (code
== ASHIFTRT
)
30266 rtx_code_label
*done_label
= gen_label_rtx ();
30267 emit_jump_insn (BRANCH (LT
, done_label
));
30268 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
30270 emit_label (done_label
);
30273 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
30278 /* Emit code for Thumb2 mode.
30279 Thumb2 can't do shift and or in one insn. */
30280 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
30281 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
30283 if (code
== ASHIFTRT
)
30285 rtx_code_label
*done_label
= gen_label_rtx ();
30286 emit_jump_insn (BRANCH (LT
, done_label
));
30287 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
30288 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
30289 emit_label (done_label
);
30293 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
30294 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
30298 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30312 /* Returns true if the pattern is a valid symbolic address, which is either a
30313 symbol_ref or (symbol_ref + addend).
30315 According to the ARM ELF ABI, the initial addend of REL-type relocations
30316 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
30317 literal field of the instruction as a 16-bit signed value in the range
30318 -32768 <= A < 32768. */
30321 arm_valid_symbolic_address_p (rtx addr
)
30323 rtx xop0
, xop1
= NULL_RTX
;
30326 if (target_word_relocations
)
30329 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
30332 /* (const (plus: symbol_ref const_int)) */
30333 if (GET_CODE (addr
) == CONST
)
30334 tmp
= XEXP (addr
, 0);
30336 if (GET_CODE (tmp
) == PLUS
)
30338 xop0
= XEXP (tmp
, 0);
30339 xop1
= XEXP (tmp
, 1);
30341 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
30342 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
30348 /* Returns true if a valid comparison operation and makes
30349 the operands in a form that is valid. */
30351 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
30353 enum rtx_code code
= GET_CODE (*comparison
);
30355 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
30356 ? GET_MODE (*op2
) : GET_MODE (*op1
);
30358 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
30360 if (code
== UNEQ
|| code
== LTGT
)
30363 code_int
= (int)code
;
30364 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
30365 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
30370 if (!arm_add_operand (*op1
, mode
))
30371 *op1
= force_reg (mode
, *op1
);
30372 if (!arm_add_operand (*op2
, mode
))
30373 *op2
= force_reg (mode
, *op2
);
30377 if (!cmpdi_operand (*op1
, mode
))
30378 *op1
= force_reg (mode
, *op1
);
30379 if (!cmpdi_operand (*op2
, mode
))
30380 *op2
= force_reg (mode
, *op2
);
30384 if (!TARGET_VFP_FP16INST
)
30386 /* FP16 comparisons are done in SF mode. */
30388 *op1
= convert_to_mode (mode
, *op1
, 1);
30389 *op2
= convert_to_mode (mode
, *op2
, 1);
30390 /* Fall through. */
30393 if (!vfp_compare_operand (*op1
, mode
))
30394 *op1
= force_reg (mode
, *op1
);
30395 if (!vfp_compare_operand (*op2
, mode
))
30396 *op2
= force_reg (mode
, *op2
);
30406 /* Maximum number of instructions to set block of memory. */
30408 arm_block_set_max_insns (void)
30410 if (optimize_function_for_size_p (cfun
))
30413 return current_tune
->max_insns_inline_memset
;
30416 /* Return TRUE if it's profitable to set block of memory for
30417 non-vectorized case. VAL is the value to set the memory
30418 with. LENGTH is the number of bytes to set. ALIGN is the
30419 alignment of the destination memory in bytes. UNALIGNED_P
30420 is TRUE if we can only set the memory with instructions
30421 meeting alignment requirements. USE_STRD_P is TRUE if we
30422 can use strd to set the memory. */
30424 arm_block_set_non_vect_profit_p (rtx val
,
30425 unsigned HOST_WIDE_INT length
,
30426 unsigned HOST_WIDE_INT align
,
30427 bool unaligned_p
, bool use_strd_p
)
30430 /* For leftovers in bytes of 0-7, we can set the memory block using
30431 strb/strh/str with minimum instruction number. */
30432 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
30436 num
= arm_const_inline_cost (SET
, val
);
30437 num
+= length
/ align
+ length
% align
;
30439 else if (use_strd_p
)
30441 num
= arm_const_double_inline_cost (val
);
30442 num
+= (length
>> 3) + leftover
[length
& 7];
30446 num
= arm_const_inline_cost (SET
, val
);
30447 num
+= (length
>> 2) + leftover
[length
& 3];
30450 /* We may be able to combine last pair STRH/STRB into a single STR
30451 by shifting one byte back. */
30452 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
30455 return (num
<= arm_block_set_max_insns ());
30458 /* Return TRUE if it's profitable to set block of memory for
30459 vectorized case. LENGTH is the number of bytes to set.
30460 ALIGN is the alignment of destination memory in bytes.
30461 MODE is the vector mode used to set the memory. */
30463 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
30464 unsigned HOST_WIDE_INT align
,
30468 bool unaligned_p
= ((align
& 3) != 0);
30469 unsigned int nelt
= GET_MODE_NUNITS (mode
);
30471 /* Instruction loading constant value. */
30473 /* Instructions storing the memory. */
30474 num
+= (length
+ nelt
- 1) / nelt
;
30475 /* Instructions adjusting the address expression. Only need to
30476 adjust address expression if it's 4 bytes aligned and bytes
30477 leftover can only be stored by mis-aligned store instruction. */
30478 if (!unaligned_p
&& (length
& 3) != 0)
30481 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
30482 if (!unaligned_p
&& mode
== V16QImode
)
30485 return (num
<= arm_block_set_max_insns ());
30488 /* Set a block of memory using vectorization instructions for the
30489 unaligned case. We fill the first LENGTH bytes of the memory
30490 area starting from DSTBASE with byte constant VALUE. ALIGN is
30491 the alignment requirement of memory. Return TRUE if succeeded. */
30493 arm_block_set_unaligned_vect (rtx dstbase
,
30494 unsigned HOST_WIDE_INT length
,
30495 unsigned HOST_WIDE_INT value
,
30496 unsigned HOST_WIDE_INT align
)
30498 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
30501 rtx (*gen_func
) (rtx
, rtx
);
30503 unsigned HOST_WIDE_INT v
= value
;
30504 unsigned int offset
= 0;
30505 gcc_assert ((align
& 0x3) != 0);
30506 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
30507 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
30508 if (length
>= nelt_v16
)
30511 gen_func
= gen_movmisalignv16qi
;
30516 gen_func
= gen_movmisalignv8qi
;
30518 nelt_mode
= GET_MODE_NUNITS (mode
);
30519 gcc_assert (length
>= nelt_mode
);
30520 /* Skip if it isn't profitable. */
30521 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
30524 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30525 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30527 v
= sext_hwi (v
, BITS_PER_WORD
);
30529 reg
= gen_reg_rtx (mode
);
30530 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
30531 /* Emit instruction loading the constant value. */
30532 emit_move_insn (reg
, val_vec
);
30534 /* Handle nelt_mode bytes in a vector. */
30535 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
30537 emit_insn ((*gen_func
) (mem
, reg
));
30538 if (i
+ 2 * nelt_mode
<= length
)
30540 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
30541 offset
+= nelt_mode
;
30542 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30546 /* If there are not less than nelt_v8 bytes leftover, we must be in
30548 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
30550 /* Handle (8, 16) bytes leftover. */
30551 if (i
+ nelt_v8
< length
)
30553 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
30554 offset
+= length
- i
;
30555 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30557 /* We are shifting bytes back, set the alignment accordingly. */
30558 if ((length
& 1) != 0 && align
>= 2)
30559 set_mem_align (mem
, BITS_PER_UNIT
);
30561 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30563 /* Handle (0, 8] bytes leftover. */
30564 else if (i
< length
&& i
+ nelt_v8
>= length
)
30566 if (mode
== V16QImode
)
30567 reg
= gen_lowpart (V8QImode
, reg
);
30569 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
30570 + (nelt_mode
- nelt_v8
))));
30571 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
30572 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
30574 /* We are shifting bytes back, set the alignment accordingly. */
30575 if ((length
& 1) != 0 && align
>= 2)
30576 set_mem_align (mem
, BITS_PER_UNIT
);
30578 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30584 /* Set a block of memory using vectorization instructions for the
30585 aligned case. We fill the first LENGTH bytes of the memory area
30586 starting from DSTBASE with byte constant VALUE. ALIGN is the
30587 alignment requirement of memory. Return TRUE if succeeded. */
30589 arm_block_set_aligned_vect (rtx dstbase
,
30590 unsigned HOST_WIDE_INT length
,
30591 unsigned HOST_WIDE_INT value
,
30592 unsigned HOST_WIDE_INT align
)
30594 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
30595 rtx dst
, addr
, mem
;
30598 unsigned int offset
= 0;
30600 gcc_assert ((align
& 0x3) == 0);
30601 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
30602 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
30603 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
30608 nelt_mode
= GET_MODE_NUNITS (mode
);
30609 gcc_assert (length
>= nelt_mode
);
30610 /* Skip if it isn't profitable. */
30611 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
30614 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30616 reg
= gen_reg_rtx (mode
);
30617 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
30618 /* Emit instruction loading the constant value. */
30619 emit_move_insn (reg
, val_vec
);
30622 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30623 if (mode
== V16QImode
)
30625 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30626 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30628 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30629 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
30631 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30632 offset
+= length
- nelt_mode
;
30633 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30634 /* We are shifting bytes back, set the alignment accordingly. */
30635 if ((length
& 0x3) == 0)
30636 set_mem_align (mem
, BITS_PER_UNIT
* 4);
30637 else if ((length
& 0x1) == 0)
30638 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30640 set_mem_align (mem
, BITS_PER_UNIT
);
30642 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30645 /* Fall through for bytes leftover. */
30647 nelt_mode
= GET_MODE_NUNITS (mode
);
30648 reg
= gen_lowpart (V8QImode
, reg
);
30651 /* Handle 8 bytes in a vector. */
30652 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
30654 addr
= plus_constant (Pmode
, dst
, i
);
30655 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
30656 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
30657 emit_move_insn (mem
, reg
);
30659 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
30662 /* Handle single word leftover by shifting 4 bytes back. We can
30663 use aligned access for this case. */
30664 if (i
+ UNITS_PER_WORD
== length
)
30666 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
30667 offset
+= i
- UNITS_PER_WORD
;
30668 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
30669 /* We are shifting 4 bytes back, set the alignment accordingly. */
30670 if (align
> UNITS_PER_WORD
)
30671 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
30673 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
30675 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30676 We have to use unaligned access for this case. */
30677 else if (i
< length
)
30679 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30680 offset
+= length
- nelt_mode
;
30681 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30682 /* We are shifting bytes back, set the alignment accordingly. */
30683 if ((length
& 1) == 0)
30684 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30686 set_mem_align (mem
, BITS_PER_UNIT
);
30688 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30694 /* Set a block of memory using plain strh/strb instructions, only
30695 using instructions allowed by ALIGN on processor. We fill the
30696 first LENGTH bytes of the memory area starting from DSTBASE
30697 with byte constant VALUE. ALIGN is the alignment requirement
30700 arm_block_set_unaligned_non_vect (rtx dstbase
,
30701 unsigned HOST_WIDE_INT length
,
30702 unsigned HOST_WIDE_INT value
,
30703 unsigned HOST_WIDE_INT align
)
30706 rtx dst
, addr
, mem
;
30707 rtx val_exp
, val_reg
, reg
;
30709 HOST_WIDE_INT v
= value
;
30711 gcc_assert (align
== 1 || align
== 2);
30714 v
|= (value
<< BITS_PER_UNIT
);
30716 v
= sext_hwi (v
, BITS_PER_WORD
);
30717 val_exp
= GEN_INT (v
);
30718 /* Skip if it isn't profitable. */
30719 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30720 align
, true, false))
30723 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30724 mode
= (align
== 2 ? HImode
: QImode
);
30725 val_reg
= force_reg (SImode
, val_exp
);
30726 reg
= gen_lowpart (mode
, val_reg
);
30728 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
30730 addr
= plus_constant (Pmode
, dst
, i
);
30731 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
30732 emit_move_insn (mem
, reg
);
30735 /* Handle single byte leftover. */
30736 if (i
+ 1 == length
)
30738 reg
= gen_lowpart (QImode
, val_reg
);
30739 addr
= plus_constant (Pmode
, dst
, i
);
30740 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30741 emit_move_insn (mem
, reg
);
30745 gcc_assert (i
== length
);
30749 /* Set a block of memory using plain strd/str/strh/strb instructions,
30750 to permit unaligned copies on processors which support unaligned
30751 semantics for those instructions. We fill the first LENGTH bytes
30752 of the memory area starting from DSTBASE with byte constant VALUE.
30753 ALIGN is the alignment requirement of memory. */
30755 arm_block_set_aligned_non_vect (rtx dstbase
,
30756 unsigned HOST_WIDE_INT length
,
30757 unsigned HOST_WIDE_INT value
,
30758 unsigned HOST_WIDE_INT align
)
30761 rtx dst
, addr
, mem
;
30762 rtx val_exp
, val_reg
, reg
;
30763 unsigned HOST_WIDE_INT v
;
30766 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
30767 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
30769 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
30770 if (length
< UNITS_PER_WORD
)
30771 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
30774 v
|= (v
<< BITS_PER_WORD
);
30776 v
= sext_hwi (v
, BITS_PER_WORD
);
30778 val_exp
= GEN_INT (v
);
30779 /* Skip if it isn't profitable. */
30780 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30781 align
, false, use_strd_p
))
30786 /* Try without strd. */
30787 v
= (v
>> BITS_PER_WORD
);
30788 v
= sext_hwi (v
, BITS_PER_WORD
);
30789 val_exp
= GEN_INT (v
);
30790 use_strd_p
= false;
30791 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30792 align
, false, use_strd_p
))
30797 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30798 /* Handle double words using strd if possible. */
30801 val_reg
= force_reg (DImode
, val_exp
);
30803 for (; (i
+ 8 <= length
); i
+= 8)
30805 addr
= plus_constant (Pmode
, dst
, i
);
30806 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
30807 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
30808 emit_move_insn (mem
, reg
);
30810 emit_insn (gen_unaligned_storedi (mem
, reg
));
30814 val_reg
= force_reg (SImode
, val_exp
);
30816 /* Handle words. */
30817 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30818 for (; (i
+ 4 <= length
); i
+= 4)
30820 addr
= plus_constant (Pmode
, dst
, i
);
30821 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30822 if ((align
& 3) == 0)
30823 emit_move_insn (mem
, reg
);
30825 emit_insn (gen_unaligned_storesi (mem
, reg
));
30828 /* Merge last pair of STRH and STRB into a STR if possible. */
30829 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30831 addr
= plus_constant (Pmode
, dst
, i
- 1);
30832 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30833 /* We are shifting one byte back, set the alignment accordingly. */
30834 if ((align
& 1) == 0)
30835 set_mem_align (mem
, BITS_PER_UNIT
);
30837 /* Most likely this is an unaligned access, and we can't tell at
30838 compilation time. */
30839 emit_insn (gen_unaligned_storesi (mem
, reg
));
30843 /* Handle half word leftover. */
30844 if (i
+ 2 <= length
)
30846 reg
= gen_lowpart (HImode
, val_reg
);
30847 addr
= plus_constant (Pmode
, dst
, i
);
30848 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30849 if ((align
& 1) == 0)
30850 emit_move_insn (mem
, reg
);
30852 emit_insn (gen_unaligned_storehi (mem
, reg
));
30857 /* Handle single byte leftover. */
30858 if (i
+ 1 == length
)
30860 reg
= gen_lowpart (QImode
, val_reg
);
30861 addr
= plus_constant (Pmode
, dst
, i
);
30862 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30863 emit_move_insn (mem
, reg
);
30869 /* Set a block of memory using vectorization instructions for both
30870 aligned and unaligned cases. We fill the first LENGTH bytes of
30871 the memory area starting from DSTBASE with byte constant VALUE.
30872 ALIGN is the alignment requirement of memory. */
30874 arm_block_set_vect (rtx dstbase
,
30875 unsigned HOST_WIDE_INT length
,
30876 unsigned HOST_WIDE_INT value
,
30877 unsigned HOST_WIDE_INT align
)
30879 /* Check whether we need to use unaligned store instruction. */
30880 if (((align
& 3) != 0 || (length
& 3) != 0)
30881 /* Check whether unaligned store instruction is available. */
30882 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30885 if ((align
& 3) == 0)
30886 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30888 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30891 /* Expand string store operation. Firstly we try to do that by using
30892 vectorization instructions, then try with ARM unaligned access and
30893 double-word store if profitable. OPERANDS[0] is the destination,
30894 OPERANDS[1] is the number of bytes, operands[2] is the value to
30895 initialize the memory, OPERANDS[3] is the known alignment of the
30898 arm_gen_setmem (rtx
*operands
)
30900 rtx dstbase
= operands
[0];
30901 unsigned HOST_WIDE_INT length
;
30902 unsigned HOST_WIDE_INT value
;
30903 unsigned HOST_WIDE_INT align
;
30905 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30908 length
= UINTVAL (operands
[1]);
30912 value
= (UINTVAL (operands
[2]) & 0xFF);
30913 align
= UINTVAL (operands
[3]);
30914 if (TARGET_NEON
&& length
>= 8
30915 && current_tune
->string_ops_prefer_neon
30916 && arm_block_set_vect (dstbase
, length
, value
, align
))
30919 if (!unaligned_access
&& (align
& 3) != 0)
30920 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30922 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30927 arm_macro_fusion_p (void)
30929 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30932 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30933 for MOVW / MOVT macro fusion. */
30936 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30938 /* We are trying to fuse
30939 movw imm / movt imm
30940 instructions as a group that gets scheduled together. */
30942 rtx set_dest
= SET_DEST (curr_set
);
30944 if (GET_MODE (set_dest
) != SImode
)
30947 /* We are trying to match:
30948 prev (movw) == (set (reg r0) (const_int imm16))
30949 curr (movt) == (set (zero_extract (reg r0)
30952 (const_int imm16_1))
30954 prev (movw) == (set (reg r1)
30955 (high (symbol_ref ("SYM"))))
30956 curr (movt) == (set (reg r0)
30958 (symbol_ref ("SYM")))) */
30960 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30962 if (CONST_INT_P (SET_SRC (curr_set
))
30963 && CONST_INT_P (SET_SRC (prev_set
))
30964 && REG_P (XEXP (set_dest
, 0))
30965 && REG_P (SET_DEST (prev_set
))
30966 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30970 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30971 && REG_P (SET_DEST (curr_set
))
30972 && REG_P (SET_DEST (prev_set
))
30973 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30974 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30981 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30983 rtx prev_set
= single_set (prev
);
30984 rtx curr_set
= single_set (curr
);
30990 if (any_condjump_p (curr
))
30993 if (!arm_macro_fusion_p ())
30996 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30997 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
31003 /* Return true iff the instruction fusion described by OP is enabled. */
31005 arm_fusion_enabled_p (tune_params::fuse_ops op
)
31007 return current_tune
->fusible_ops
& op
;
31010 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
31011 scheduled for speculative execution. Reject the long-running division
31012 and square-root instructions. */
31015 arm_sched_can_speculate_insn (rtx_insn
*insn
)
31017 switch (get_attr_type (insn
))
31025 case TYPE_NEON_FP_SQRT_S
:
31026 case TYPE_NEON_FP_SQRT_D
:
31027 case TYPE_NEON_FP_SQRT_S_Q
:
31028 case TYPE_NEON_FP_SQRT_D_Q
:
31029 case TYPE_NEON_FP_DIV_S
:
31030 case TYPE_NEON_FP_DIV_D
:
31031 case TYPE_NEON_FP_DIV_S_Q
:
31032 case TYPE_NEON_FP_DIV_D_Q
:
31039 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31041 static unsigned HOST_WIDE_INT
31042 arm_asan_shadow_offset (void)
31044 return HOST_WIDE_INT_1U
<< 29;
31048 /* This is a temporary fix for PR60655. Ideally we need
31049 to handle most of these cases in the generic part but
31050 currently we reject minus (..) (sym_ref). We try to
31051 ameliorate the case with minus (sym_ref1) (sym_ref2)
31052 where they are in the same section. */
31055 arm_const_not_ok_for_debug_p (rtx p
)
31057 tree decl_op0
= NULL
;
31058 tree decl_op1
= NULL
;
31060 if (GET_CODE (p
) == UNSPEC
)
31062 if (GET_CODE (p
) == MINUS
)
31064 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
31066 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
31068 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
31069 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
31071 if ((VAR_P (decl_op1
)
31072 || TREE_CODE (decl_op1
) == CONST_DECL
)
31073 && (VAR_P (decl_op0
)
31074 || TREE_CODE (decl_op0
) == CONST_DECL
))
31075 return (get_variable_section (decl_op1
, false)
31076 != get_variable_section (decl_op0
, false));
31078 if (TREE_CODE (decl_op1
) == LABEL_DECL
31079 && TREE_CODE (decl_op0
) == LABEL_DECL
)
31080 return (DECL_CONTEXT (decl_op1
)
31081 != DECL_CONTEXT (decl_op0
));
31091 /* return TRUE if x is a reference to a value in a constant pool */
31093 arm_is_constant_pool_ref (rtx x
)
31096 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
31097 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
31100 /* Remember the last target of arm_set_current_function. */
31101 static GTY(()) tree arm_previous_fndecl
;
31103 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
31106 save_restore_target_globals (tree new_tree
)
31108 /* If we have a previous state, use it. */
31109 if (TREE_TARGET_GLOBALS (new_tree
))
31110 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
31111 else if (new_tree
== target_option_default_node
)
31112 restore_target_globals (&default_target_globals
);
31115 /* Call target_reinit and save the state for TARGET_GLOBALS. */
31116 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
31119 arm_option_params_internal ();
31122 /* Invalidate arm_previous_fndecl. */
31125 arm_reset_previous_fndecl (void)
31127 arm_previous_fndecl
= NULL_TREE
;
31130 /* Establish appropriate back-end context for processing the function
31131 FNDECL. The argument might be NULL to indicate processing at top
31132 level, outside of any function scope. */
31135 arm_set_current_function (tree fndecl
)
31137 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
31140 tree old_tree
= (arm_previous_fndecl
31141 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
31144 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
31146 /* If current function has no attributes but previous one did,
31147 use the default node. */
31148 if (! new_tree
&& old_tree
)
31149 new_tree
= target_option_default_node
;
31151 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
31152 the default have been handled by save_restore_target_globals from
31153 arm_pragma_target_parse. */
31154 if (old_tree
== new_tree
)
31157 arm_previous_fndecl
= fndecl
;
31159 /* First set the target options. */
31160 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
31162 save_restore_target_globals (new_tree
);
31165 /* Implement TARGET_OPTION_PRINT. */
31168 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
31170 int flags
= ptr
->x_target_flags
;
31171 const char *fpu_name
;
31173 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
31174 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
31176 fprintf (file
, "%*sselected isa %s\n", indent
, "",
31177 TARGET_THUMB2_P (flags
) ? "thumb2" :
31178 TARGET_THUMB_P (flags
) ? "thumb1" :
31181 if (ptr
->x_arm_arch_string
)
31182 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
31183 ptr
->x_arm_arch_string
);
31185 if (ptr
->x_arm_cpu_string
)
31186 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
31187 ptr
->x_arm_cpu_string
);
31189 if (ptr
->x_arm_tune_string
)
31190 fprintf (file
, "%*sselected tune %s\n", indent
, "",
31191 ptr
->x_arm_tune_string
);
31193 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
31196 /* Hook to determine if one function can safely inline another. */
31199 arm_can_inline_p (tree caller
, tree callee
)
31201 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
31202 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
31203 bool can_inline
= true;
31205 struct cl_target_option
*caller_opts
31206 = TREE_TARGET_OPTION (caller_tree
? caller_tree
31207 : target_option_default_node
);
31209 struct cl_target_option
*callee_opts
31210 = TREE_TARGET_OPTION (callee_tree
? callee_tree
31211 : target_option_default_node
);
31213 if (callee_opts
== caller_opts
)
31216 /* Callee's ISA features should be a subset of the caller's. */
31217 struct arm_build_target caller_target
;
31218 struct arm_build_target callee_target
;
31219 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
31220 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
31222 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
31224 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
31226 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
31227 can_inline
= false;
31229 sbitmap_free (caller_target
.isa
);
31230 sbitmap_free (callee_target
.isa
);
31232 /* OK to inline between different modes.
31233 Function with mode specific instructions, e.g using asm,
31234 must be explicitly protected with noinline. */
31238 /* Hook to fix function's alignment affected by target attribute. */
31241 arm_relayout_function (tree fndecl
)
31243 if (DECL_USER_ALIGN (fndecl
))
31246 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
31249 callee_tree
= target_option_default_node
;
31251 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
31254 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
31257 /* Inner function to process the attribute((target(...))), take an argument and
31258 set the current options from the argument. If we have a list, recursively
31259 go over the list. */
31262 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
31264 if (TREE_CODE (args
) == TREE_LIST
)
31268 for (; args
; args
= TREE_CHAIN (args
))
31269 if (TREE_VALUE (args
)
31270 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
31275 else if (TREE_CODE (args
) != STRING_CST
)
31277 error ("attribute %<target%> argument not a string");
31281 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
31284 while ((q
= strtok (argstr
, ",")) != NULL
)
31287 if (!strcmp (q
, "thumb"))
31289 opts
->x_target_flags
|= MASK_THUMB
;
31290 if (TARGET_FDPIC
&& !arm_arch_thumb2
)
31291 sorry ("FDPIC mode is not supported in Thumb-1 mode");
31294 else if (!strcmp (q
, "arm"))
31295 opts
->x_target_flags
&= ~MASK_THUMB
;
31297 else if (!strcmp (q
, "general-regs-only"))
31298 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
31300 else if (!strncmp (q
, "fpu=", 4))
31303 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
31304 &fpu_index
, CL_TARGET
))
31306 error ("invalid fpu for target attribute or pragma %qs", q
);
31309 if (fpu_index
== TARGET_FPU_auto
)
31311 /* This doesn't really make sense until we support
31312 general dynamic selection of the architecture and all
31314 sorry ("auto fpu selection not currently permitted here");
31317 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
31319 else if (!strncmp (q
, "arch=", 5))
31321 char *arch
= q
+ 5;
31322 const arch_option
*arm_selected_arch
31323 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
31325 if (!arm_selected_arch
)
31327 error ("invalid architecture for target attribute or pragma %qs",
31332 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
31334 else if (q
[0] == '+')
31336 opts
->x_arm_arch_string
31337 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
31341 error ("unknown target attribute or pragma %qs", q
);
31349 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
31352 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
31353 struct gcc_options
*opts_set
)
31355 struct cl_target_option cl_opts
;
31357 if (!arm_valid_target_attribute_rec (args
, opts
))
31360 cl_target_option_save (&cl_opts
, opts
);
31361 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
31362 arm_option_check_internal (opts
);
31363 /* Do any overrides, such as global options arch=xxx.
31364 We do this since arm_active_target was overridden. */
31365 arm_option_reconfigure_globals ();
31366 arm_options_perform_arch_sanity_checks ();
31367 arm_option_override_internal (opts
, opts_set
);
31369 return build_target_option_node (opts
);
31373 add_attribute (const char * mode
, tree
*attributes
)
31375 size_t len
= strlen (mode
);
31376 tree value
= build_string (len
, mode
);
31378 TREE_TYPE (value
) = build_array_type (char_type_node
,
31379 build_index_type (size_int (len
)));
31381 *attributes
= tree_cons (get_identifier ("target"),
31382 build_tree_list (NULL_TREE
, value
),
31386 /* For testing. Insert thumb or arm modes alternatively on functions. */
31389 arm_insert_attributes (tree fndecl
, tree
* attributes
)
31393 if (! TARGET_FLIP_THUMB
)
31396 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
31397 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
31400 /* Nested definitions must inherit mode. */
31401 if (current_function_decl
)
31403 mode
= TARGET_THUMB
? "thumb" : "arm";
31404 add_attribute (mode
, attributes
);
31408 /* If there is already a setting don't change it. */
31409 if (lookup_attribute ("target", *attributes
) != NULL
)
31412 mode
= thumb_flipper
? "thumb" : "arm";
31413 add_attribute (mode
, attributes
);
31415 thumb_flipper
= !thumb_flipper
;
31418 /* Hook to validate attribute((target("string"))). */
31421 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
31422 tree args
, int ARG_UNUSED (flags
))
31425 struct gcc_options func_options
;
31426 tree cur_tree
, new_optimize
;
31427 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
31429 /* Get the optimization options of the current function. */
31430 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
31432 /* If the function changed the optimization levels as well as setting target
31433 options, start with the optimizations specified. */
31434 if (!func_optimize
)
31435 func_optimize
= optimization_default_node
;
31437 /* Init func_options. */
31438 memset (&func_options
, 0, sizeof (func_options
));
31439 init_options_struct (&func_options
, NULL
);
31440 lang_hooks
.init_options_struct (&func_options
);
31442 /* Initialize func_options to the defaults. */
31443 cl_optimization_restore (&func_options
,
31444 TREE_OPTIMIZATION (func_optimize
));
31446 cl_target_option_restore (&func_options
,
31447 TREE_TARGET_OPTION (target_option_default_node
));
31449 /* Set func_options flags with new target mode. */
31450 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
31451 &global_options_set
);
31453 if (cur_tree
== NULL_TREE
)
31456 new_optimize
= build_optimization_node (&func_options
);
31458 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
31460 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
31462 finalize_options_struct (&func_options
);
31467 /* Match an ISA feature bitmap to a named FPU. We always use the
31468 first entry that exactly matches the feature set, so that we
31469 effectively canonicalize the FPU name for the assembler. */
31471 arm_identify_fpu_from_isa (sbitmap isa
)
31473 auto_sbitmap
fpubits (isa_num_bits
);
31474 auto_sbitmap
cand_fpubits (isa_num_bits
);
31476 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
31478 /* If there are no ISA feature bits relating to the FPU, we must be
31479 doing soft-float. */
31480 if (bitmap_empty_p (fpubits
))
31483 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
31485 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
31486 if (bitmap_equal_p (fpubits
, cand_fpubits
))
31487 return all_fpus
[i
].name
;
31489 /* We must find an entry, or things have gone wrong. */
31490 gcc_unreachable ();
31493 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
31494 by the function fndecl. */
31496 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
31498 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
31500 struct cl_target_option
*targ_options
;
31502 targ_options
= TREE_TARGET_OPTION (target_parts
);
31504 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
31505 gcc_assert (targ_options
);
31507 /* Only update the assembler .arch string if it is distinct from the last
31508 such string we printed. arch_to_print is set conditionally in case
31509 targ_options->x_arm_arch_string is NULL which can be the case
31510 when cc1 is invoked directly without passing -march option. */
31511 std::string arch_to_print
;
31512 if (targ_options
->x_arm_arch_string
)
31513 arch_to_print
= targ_options
->x_arm_arch_string
;
31515 if (arch_to_print
!= arm_last_printed_arch_string
)
31517 std::string arch_name
31518 = arch_to_print
.substr (0, arch_to_print
.find ("+"));
31519 asm_fprintf (asm_out_file
, "\t.arch %s\n", arch_name
.c_str ());
31520 const arch_option
*arch
31521 = arm_parse_arch_option_name (all_architectures
, "-march",
31522 targ_options
->x_arm_arch_string
);
31523 auto_sbitmap
opt_bits (isa_num_bits
);
31526 if (arch
->common
.extensions
)
31528 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
31534 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
31535 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
31536 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
31537 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n",
31543 arm_last_printed_arch_string
= arch_to_print
;
31546 fprintf (stream
, "\t.syntax unified\n");
31550 if (is_called_in_ARM_mode (decl
)
31551 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
31552 && cfun
->is_thunk
))
31553 fprintf (stream
, "\t.code 32\n");
31554 else if (TARGET_THUMB1
)
31555 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
31557 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
31560 fprintf (stream
, "\t.arm\n");
31562 std::string fpu_to_print
31563 = TARGET_SOFT_FLOAT
31564 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target
.isa
);
31566 if (fpu_to_print
!= arm_last_printed_arch_string
)
31568 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_to_print
.c_str ());
31569 arm_last_printed_fpu_string
= fpu_to_print
;
31572 if (TARGET_POKE_FUNCTION_NAME
)
31573 arm_poke_function_name (stream
, (const char *) name
);
31576 /* If MEM is in the form of [base+offset], extract the two parts
31577 of address and set to BASE and OFFSET, otherwise return false
31578 after clearing BASE and OFFSET. */
31581 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
31585 gcc_assert (MEM_P (mem
));
31587 addr
= XEXP (mem
, 0);
31589 /* Strip off const from addresses like (const (addr)). */
31590 if (GET_CODE (addr
) == CONST
)
31591 addr
= XEXP (addr
, 0);
31593 if (GET_CODE (addr
) == REG
)
31596 *offset
= const0_rtx
;
31600 if (GET_CODE (addr
) == PLUS
31601 && GET_CODE (XEXP (addr
, 0)) == REG
31602 && CONST_INT_P (XEXP (addr
, 1)))
31604 *base
= XEXP (addr
, 0);
31605 *offset
= XEXP (addr
, 1);
31610 *offset
= NULL_RTX
;
31615 /* If INSN is a load or store of address in the form of [base+offset],
31616 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31617 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31618 otherwise return FALSE. */
31621 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
31625 gcc_assert (INSN_P (insn
));
31626 x
= PATTERN (insn
);
31627 if (GET_CODE (x
) != SET
)
31631 dest
= SET_DEST (x
);
31632 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
31635 extract_base_offset_in_addr (dest
, base
, offset
);
31637 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
31640 extract_base_offset_in_addr (src
, base
, offset
);
31645 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
31648 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31650 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31651 and PRI are only calculated for these instructions. For other instruction,
31652 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31653 instruction fusion can be supported by returning different priorities.
31655 It's important that irrelevant instructions get the largest FUSION_PRI. */
31658 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
31659 int *fusion_pri
, int *pri
)
31665 gcc_assert (INSN_P (insn
));
31668 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
31675 /* Load goes first. */
31677 *fusion_pri
= tmp
- 1;
31679 *fusion_pri
= tmp
- 2;
31683 /* INSN with smaller base register goes first. */
31684 tmp
-= ((REGNO (base
) & 0xff) << 20);
31686 /* INSN with smaller offset goes first. */
31687 off_val
= (int)(INTVAL (offset
));
31689 tmp
-= (off_val
& 0xfffff);
31691 tmp
+= ((- off_val
) & 0xfffff);
31698 /* Construct and return a PARALLEL RTX vector with elements numbering the
31699 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31700 the vector - from the perspective of the architecture. This does not
31701 line up with GCC's perspective on lane numbers, so we end up with
31702 different masks depending on our target endian-ness. The diagram
31703 below may help. We must draw the distinction when building masks
31704 which select one half of the vector. An instruction selecting
31705 architectural low-lanes for a big-endian target, must be described using
31706 a mask selecting GCC high-lanes.
31708 Big-Endian Little-Endian
31710 GCC 0 1 2 3 3 2 1 0
31711 | x | x | x | x | | x | x | x | x |
31712 Architecture 3 2 1 0 3 2 1 0
31714 Low Mask: { 2, 3 } { 0, 1 }
31715 High Mask: { 0, 1 } { 2, 3 }
31719 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
31721 int nunits
= GET_MODE_NUNITS (mode
);
31722 rtvec v
= rtvec_alloc (nunits
/ 2);
31723 int high_base
= nunits
/ 2;
31729 if (BYTES_BIG_ENDIAN
)
31730 base
= high
? low_base
: high_base
;
31732 base
= high
? high_base
: low_base
;
31734 for (i
= 0; i
< nunits
/ 2; i
++)
31735 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
31737 t1
= gen_rtx_PARALLEL (mode
, v
);
31741 /* Check OP for validity as a PARALLEL RTX vector with elements
31742 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31743 from the perspective of the architecture. See the diagram above
31744 arm_simd_vect_par_cnst_half_p for more details. */
31747 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
31750 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
31751 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
31752 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
31755 if (!VECTOR_MODE_P (mode
))
31758 if (count_op
!= count_ideal
)
31761 for (i
= 0; i
< count_ideal
; i
++)
31763 rtx elt_op
= XVECEXP (op
, 0, i
);
31764 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
31766 if (!CONST_INT_P (elt_op
)
31767 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
31773 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31776 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
31779 /* For now, we punt and not handle this for TARGET_THUMB1. */
31780 if (vcall_offset
&& TARGET_THUMB1
)
31783 /* Otherwise ok. */
31787 /* Generate RTL for a conditional branch with rtx comparison CODE in
31788 mode CC_MODE. The destination of the unlikely conditional branch
31792 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
31796 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
31797 gen_rtx_REG (cc_mode
, CC_REGNUM
),
31800 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31801 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
31803 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31806 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31808 For pure-code sections there is no letter code for this attribute, so
31809 output all the section flags numerically when this is needed. */
31812 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
31815 if (flags
& SECTION_ARM_PURECODE
)
31819 if (!(flags
& SECTION_DEBUG
))
31821 if (flags
& SECTION_EXCLUDE
)
31822 *num
|= 0x80000000;
31823 if (flags
& SECTION_WRITE
)
31825 if (flags
& SECTION_CODE
)
31827 if (flags
& SECTION_MERGE
)
31829 if (flags
& SECTION_STRINGS
)
31831 if (flags
& SECTION_TLS
)
31833 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
31842 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31844 If pure-code is passed as an option, make sure all functions are in
31845 sections that have the SHF_ARM_PURECODE attribute. */
31848 arm_function_section (tree decl
, enum node_frequency freq
,
31849 bool startup
, bool exit
)
31851 const char * section_name
;
31854 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
31855 return default_function_section (decl
, freq
, startup
, exit
);
31857 if (!target_pure_code
)
31858 return default_function_section (decl
, freq
, startup
, exit
);
31861 section_name
= DECL_SECTION_NAME (decl
);
31863 /* If a function is not in a named section then it falls under the 'default'
31864 text section, also known as '.text'. We can preserve previous behavior as
31865 the default text section already has the SHF_ARM_PURECODE section
31869 section
*default_sec
= default_function_section (decl
, freq
, startup
,
31872 /* If default_sec is not null, then it must be a special section like for
31873 example .text.startup. We set the pure-code attribute and return the
31874 same section to preserve existing behavior. */
31876 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31877 return default_sec
;
31880 /* Otherwise look whether a section has already been created with
31882 sec
= get_named_section (decl
, section_name
, 0);
31884 /* If that is not the case passing NULL as the section's name to
31885 'get_named_section' will create a section with the declaration's
31887 sec
= get_named_section (decl
, NULL
, 0);
31889 /* Set the SHF_ARM_PURECODE attribute. */
31890 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31895 /* Implements the TARGET_SECTION_FLAGS hook.
31897 If DECL is a function declaration and pure-code is passed as an option
31898 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31899 section's name and RELOC indicates whether the declarations initializer may
31900 contain runtime relocations. */
31902 static unsigned int
31903 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31905 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31907 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31908 flags
|= SECTION_ARM_PURECODE
;
31913 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31916 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31918 rtx
*quot_p
, rtx
*rem_p
)
31920 if (mode
== SImode
)
31921 gcc_assert (!TARGET_IDIV
);
31923 scalar_int_mode libval_mode
31924 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
31926 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31928 op0
, GET_MODE (op0
),
31929 op1
, GET_MODE (op1
));
31931 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31932 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31933 GET_MODE_SIZE (mode
));
31935 gcc_assert (quotient
);
31936 gcc_assert (remainder
);
31938 *quot_p
= quotient
;
31939 *rem_p
= remainder
;
31942 /* This function checks for the availability of the coprocessor builtin passed
31943 in BUILTIN for the current target. Returns true if it is available and
31944 false otherwise. If a BUILTIN is passed for which this function has not
31945 been implemented it will cause an exception. */
31948 arm_coproc_builtin_available (enum unspecv builtin
)
31950 /* None of these builtins are available in Thumb mode if the target only
31951 supports Thumb-1. */
31969 case VUNSPEC_LDC2L
:
31971 case VUNSPEC_STC2L
:
31974 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31981 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31983 if (arm_arch6
|| arm_arch5te
)
31986 case VUNSPEC_MCRR2
:
31987 case VUNSPEC_MRRC2
:
31992 gcc_unreachable ();
31997 /* This function returns true if OP is a valid memory operand for the ldc and
31998 stc coprocessor instructions and false otherwise. */
32001 arm_coproc_ldc_stc_legitimate_address (rtx op
)
32003 HOST_WIDE_INT range
;
32004 /* Has to be a memory operand. */
32010 /* We accept registers. */
32014 switch GET_CODE (op
)
32018 /* Or registers with an offset. */
32019 if (!REG_P (XEXP (op
, 0)))
32024 /* The offset must be an immediate though. */
32025 if (!CONST_INT_P (op
))
32028 range
= INTVAL (op
);
32030 /* Within the range of [-1020,1020]. */
32031 if (!IN_RANGE (range
, -1020, 1020))
32034 /* And a multiple of 4. */
32035 return (range
% 4) == 0;
32041 return REG_P (XEXP (op
, 0));
32043 gcc_unreachable ();
32048 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
32050 In VFPv1, VFP registers could only be accessed in the mode they were
32051 set, so subregs would be invalid there. However, we don't support
32052 VFPv1 at the moment, and the restriction was lifted in VFPv2.
32054 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
32055 VFP registers in little-endian order. We can't describe that accurately to
32056 GCC, so avoid taking subregs of such values.
32058 The only exception is going from a 128-bit to a 64-bit type. In that
32059 case the data layout happens to be consistent for big-endian, so we
32060 explicitly allow that case. */
32063 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
32064 reg_class_t rclass
)
32067 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
32068 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
32069 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
32070 && reg_classes_intersect_p (VFP_REGS
, rclass
))
32075 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
32076 strcpy from constants will be faster. */
32078 static HOST_WIDE_INT
32079 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
32081 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
32082 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
32083 return MAX (align
, BITS_PER_WORD
* factor
);
32087 /* Emit a speculation barrier on target architectures that do not have
32088 DSB/ISB directly. Such systems probably don't need a barrier
32089 themselves, but if the code is ever run on a later architecture, it
32090 might become a problem. */
32092 arm_emit_speculation_barrier_function ()
32094 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
32098 namespace selftest
{
32100 /* Scan the static data tables generated by parsecpu.awk looking for
32101 potential issues with the data. We primarily check for
32102 inconsistencies in the option extensions at present (extensions
32103 that duplicate others but aren't marked as aliases). Furthermore,
32104 for correct canonicalization later options must never be a subset
32105 of an earlier option. Any extension should also only specify other
32106 feature bits and never an architecture bit. The architecture is inferred
32107 from the declaration of the extension. */
32109 arm_test_cpu_arch_data (void)
32111 const arch_option
*arch
;
32112 const cpu_option
*cpu
;
32113 auto_sbitmap
target_isa (isa_num_bits
);
32114 auto_sbitmap
isa1 (isa_num_bits
);
32115 auto_sbitmap
isa2 (isa_num_bits
);
32117 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
32119 const cpu_arch_extension
*ext1
, *ext2
;
32121 if (arch
->common
.extensions
== NULL
)
32124 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
32126 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
32131 arm_initialize_isa (isa1
, ext1
->isa_bits
);
32132 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
32134 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
32137 arm_initialize_isa (isa2
, ext2
->isa_bits
);
32138 /* If the option is a subset of the parent option, it doesn't
32139 add anything and so isn't useful. */
32140 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
32142 /* If the extension specifies any architectural bits then
32143 disallow it. Extensions should only specify feature bits. */
32144 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
32149 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
32151 const cpu_arch_extension
*ext1
, *ext2
;
32153 if (cpu
->common
.extensions
== NULL
)
32156 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
32158 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
32163 arm_initialize_isa (isa1
, ext1
->isa_bits
);
32164 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
32166 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
32169 arm_initialize_isa (isa2
, ext2
->isa_bits
);
32170 /* If the option is a subset of the parent option, it doesn't
32171 add anything and so isn't useful. */
32172 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
32174 /* If the extension specifies any architectural bits then
32175 disallow it. Extensions should only specify feature bits. */
32176 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
32182 /* Scan the static data tables generated by parsecpu.awk looking for
32183 potential issues with the data. Here we check for consistency between the
32184 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
32185 a feature bit that is not defined by any FPU flag. */
32187 arm_test_fpu_data (void)
32189 auto_sbitmap
isa_all_fpubits (isa_num_bits
);
32190 auto_sbitmap
fpubits (isa_num_bits
);
32191 auto_sbitmap
tmpset (isa_num_bits
);
32193 static const enum isa_feature fpu_bitlist
[]
32194 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
32195 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
32197 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
32199 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
32200 bitmap_and_compl (tmpset
, isa_all_fpubits
, fpubits
);
32201 bitmap_clear (isa_all_fpubits
);
32202 bitmap_copy (isa_all_fpubits
, tmpset
);
32205 if (!bitmap_empty_p (isa_all_fpubits
))
32207 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
32208 " group that are not defined by any FPU.\n"
32209 " Check your arm-cpus.in.\n");
32210 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits
));
32215 arm_run_selftests (void)
32217 arm_test_cpu_arch_data ();
32218 arm_test_fpu_data ();
32220 } /* Namespace selftest. */
32222 #undef TARGET_RUN_TARGET_SELFTESTS
32223 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
32224 #endif /* CHECKING_P */
32226 struct gcc_target targetm
= TARGET_INITIALIZER
;
32228 #include "gt-arm.h"